Files
RustPython/vm/src/obj/objstr.rs
2019-02-06 17:38:01 +00:00

1106 lines
36 KiB
Rust

use super::super::format::{FormatParseError, FormatPart, FormatString};
use super::super::pyobject::{
PyContext, PyFuncArgs, PyObject, PyObjectPayload, PyObjectRef, PyResult, TypeProtocol,
};
use super::super::vm::VirtualMachine;
use super::objint;
use super::objsequence::PySliceableSequence;
use super::objtype;
use num_traits::ToPrimitive;
use std::hash::{Hash, Hasher};
// rust's builtin to_lowercase isn't sufficient for casefold
extern crate caseless;
extern crate unicode_segmentation;
use self::unicode_segmentation::UnicodeSegmentation;
pub fn init(context: &PyContext) {
let str_type = &context.str_type;
context.set_attr(&str_type, "__add__", context.new_rustfunc(str_add));
context.set_attr(&str_type, "__eq__", context.new_rustfunc(str_eq));
context.set_attr(
&str_type,
"__contains__",
context.new_rustfunc(str_contains),
);
context.set_attr(&str_type, "__getitem__", context.new_rustfunc(str_getitem));
context.set_attr(&str_type, "__gt__", context.new_rustfunc(str_gt));
context.set_attr(&str_type, "__lt__", context.new_rustfunc(str_lt));
context.set_attr(&str_type, "__hash__", context.new_rustfunc(str_hash));
context.set_attr(&str_type, "__len__", context.new_rustfunc(str_len));
context.set_attr(&str_type, "__mul__", context.new_rustfunc(str_mul));
context.set_attr(&str_type, "__new__", context.new_rustfunc(str_new));
context.set_attr(&str_type, "__str__", context.new_rustfunc(str_str));
context.set_attr(&str_type, "__repr__", context.new_rustfunc(str_repr));
context.set_attr(&str_type, "format", context.new_rustfunc(str_format));
context.set_attr(&str_type, "lower", context.new_rustfunc(str_lower));
context.set_attr(&str_type, "casefold", context.new_rustfunc(str_casefold));
context.set_attr(&str_type, "upper", context.new_rustfunc(str_upper));
context.set_attr(
&str_type,
"capitalize",
context.new_rustfunc(str_capitalize),
);
context.set_attr(&str_type, "split", context.new_rustfunc(str_split));
context.set_attr(&str_type, "rsplit", context.new_rustfunc(str_rsplit));
context.set_attr(&str_type, "strip", context.new_rustfunc(str_strip));
context.set_attr(&str_type, "lstrip", context.new_rustfunc(str_lstrip));
context.set_attr(&str_type, "rstrip", context.new_rustfunc(str_rstrip));
context.set_attr(&str_type, "endswith", context.new_rustfunc(str_endswith));
context.set_attr(
&str_type,
"startswith",
context.new_rustfunc(str_startswith),
);
context.set_attr(&str_type, "isalnum", context.new_rustfunc(str_isalnum));
context.set_attr(&str_type, "isnumeric", context.new_rustfunc(str_isnumeric));
context.set_attr(&str_type, "isdigit", context.new_rustfunc(str_isdigit));
context.set_attr(&str_type, "isdecimal", context.new_rustfunc(str_isdecimal));
context.set_attr(&str_type, "title", context.new_rustfunc(str_title));
context.set_attr(&str_type, "swapcase", context.new_rustfunc(str_swapcase));
context.set_attr(&str_type, "isalpha", context.new_rustfunc(str_isalpha));
context.set_attr(&str_type, "replace", context.new_rustfunc(str_replace));
context.set_attr(&str_type, "center", context.new_rustfunc(str_center));
context.set_attr(&str_type, "isspace", context.new_rustfunc(str_isspace));
context.set_attr(&str_type, "isupper", context.new_rustfunc(str_isupper));
context.set_attr(&str_type, "islower", context.new_rustfunc(str_islower));
context.set_attr(&str_type, "isascii", context.new_rustfunc(str_isascii));
context.set_attr(
&str_type,
"splitlines",
context.new_rustfunc(str_splitlines),
);
context.set_attr(&str_type, "join", context.new_rustfunc(str_join));
context.set_attr(&str_type, "find", context.new_rustfunc(str_find));
context.set_attr(&str_type, "rfind", context.new_rustfunc(str_rfind));
context.set_attr(&str_type, "index", context.new_rustfunc(str_index));
context.set_attr(&str_type, "rindex", context.new_rustfunc(str_rindex));
context.set_attr(&str_type, "partition", context.new_rustfunc(str_partition));
context.set_attr(
&str_type,
"rpartition",
context.new_rustfunc(str_rpartition),
);
context.set_attr(&str_type, "istitle", context.new_rustfunc(str_istitle));
context.set_attr(&str_type, "count", context.new_rustfunc(str_count));
context.set_attr(&str_type, "zfill", context.new_rustfunc(str_zfill));
context.set_attr(&str_type, "ljust", context.new_rustfunc(str_ljust));
context.set_attr(&str_type, "rjust", context.new_rustfunc(str_rjust));
context.set_attr(
&str_type,
"expandtabs",
context.new_rustfunc(str_expandtabs),
);
context.set_attr(
&str_type,
"isidentifier",
context.new_rustfunc(str_isidentifier),
);
}
pub fn get_value(obj: &PyObjectRef) -> String {
if let PyObjectPayload::String { value } = &obj.borrow().payload {
value.to_string()
} else {
panic!("Inner error getting str");
}
}
fn str_eq(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(a, Some(vm.ctx.str_type())), (b, None)]
);
let result = if objtype::isinstance(b, &vm.ctx.str_type()) {
get_value(a) == get_value(b)
} else {
false
};
Ok(vm.ctx.new_bool(result))
}
fn str_gt(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [
(zelf, Some(vm.ctx.str_type())),
(other, Some(vm.ctx.str_type()))
]
);
let zelf = get_value(zelf);
let other = get_value(other);
let result = zelf > other;
Ok(vm.ctx.new_bool(result))
}
fn str_lt(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [
(zelf, Some(vm.ctx.str_type())),
(other, Some(vm.ctx.str_type()))
]
);
let zelf = get_value(zelf);
let other = get_value(other);
let result = zelf < other;
Ok(vm.ctx.new_bool(result))
}
fn str_str(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
Ok(s.clone())
}
fn count_char(s: &str, c: char) -> usize {
s.chars().filter(|x| *x == c).count()
}
fn str_repr(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(s);
let quote_char = if count_char(&value, '\'') > count_char(&value, '"') {
'"'
} else {
'\''
};
let mut formatted = String::new();
formatted.push(quote_char);
for c in value.chars() {
if c == quote_char || c == '\\' {
formatted.push('\\');
formatted.push(c);
} else if c == '\n' {
formatted.push('\\');
formatted.push('n');
} else if c == '\t' {
formatted.push('\\');
formatted.push('t');
} else if c == '\r' {
formatted.push('\\');
formatted.push('r');
} else {
formatted.push(c);
}
}
formatted.push(quote_char);
Ok(vm.ctx.new_str(formatted))
}
fn str_add(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (s2, None)]
);
if objtype::isinstance(s2, &vm.ctx.str_type()) {
Ok(vm
.ctx
.new_str(format!("{}{}", get_value(&s), get_value(&s2))))
} else {
Err(vm.new_type_error(format!("Cannot add {} and {}", s.borrow(), s2.borrow())))
}
}
fn str_format(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
if args.args.is_empty() {
return Err(
vm.new_type_error("descriptor 'format' of 'str' object needs an argument".to_string())
);
}
let zelf = &args.args[0];
if !objtype::isinstance(&zelf, &vm.ctx.str_type()) {
let zelf_typ = zelf.typ();
let actual_type = vm.to_pystr(&zelf_typ)?;
return Err(vm.new_type_error(format!(
"descriptor 'format' requires a 'str' object but received a '{}'",
actual_type
)));
}
let format_string_text = get_value(zelf);
match FormatString::from_str(format_string_text.as_str()) {
Ok(format_string) => perform_format(vm, &format_string, &args),
Err(err) => match err {
FormatParseError::UnmatchedBracket => {
Err(vm.new_value_error("expected '}' before end of string".to_string()))
}
_ => Err(vm.new_value_error("Unexpected error parsing format string".to_string())),
},
}
}
fn call_object_format(
vm: &mut VirtualMachine,
argument: PyObjectRef,
format_spec: &str,
) -> PyResult {
let returned_type = vm.ctx.new_str(format_spec.to_string());
let result = vm.call_method(&argument, "__format__", vec![returned_type])?;
if !objtype::isinstance(&result, &vm.ctx.str_type()) {
let result_type = result.typ();
let actual_type = vm.to_pystr(&result_type)?;
return Err(vm.new_type_error(format!("__format__ must return a str, not {}", actual_type)));
}
Ok(result)
}
fn perform_format(
vm: &mut VirtualMachine,
format_string: &FormatString,
arguments: &PyFuncArgs,
) -> PyResult {
let mut final_string = String::new();
if format_string.format_parts.iter().any(FormatPart::is_auto)
&& format_string.format_parts.iter().any(FormatPart::is_index)
{
return Err(vm.new_value_error(
"cannot switch from automatic field numbering to manual field specification"
.to_string(),
));
}
let mut auto_argument_index: usize = 1;
for part in &format_string.format_parts {
let result_string: String = match part {
FormatPart::AutoSpec(format_spec) => {
let result = match arguments.args.get(auto_argument_index) {
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
None => {
return Err(vm.new_index_error("tuple index out of range".to_string()));
}
};
auto_argument_index += 1;
get_value(&result)
}
FormatPart::IndexSpec(index, format_spec) => {
let result = match arguments.args.get(*index + 1) {
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
None => {
return Err(vm.new_index_error("tuple index out of range".to_string()));
}
};
get_value(&result)
}
FormatPart::KeywordSpec(keyword, format_spec) => {
let result = match arguments.get_optional_kwarg(&keyword) {
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
None => {
return Err(vm.new_key_error(format!("'{}'", keyword)));
}
};
get_value(&result)
}
FormatPart::Literal(literal) => literal.clone(),
};
final_string.push_str(&result_string);
}
Ok(vm.ctx.new_str(final_string))
}
fn str_hash(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(zelf, Some(vm.ctx.str_type()))]);
let value = get_value(zelf);
let mut hasher = std::collections::hash_map::DefaultHasher::new();
value.hash(&mut hasher);
let hash = hasher.finish();
Ok(vm.ctx.new_int(hash))
}
fn str_len(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let sv = get_value(s);
Ok(vm.ctx.new_int(sv.chars().count()))
}
fn str_mul(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (s2, None)]
);
if objtype::isinstance(s2, &vm.ctx.int_type()) {
let value1 = get_value(&s);
let value2 = objint::get_value(s2).to_i32().unwrap();
let mut result = String::new();
for _x in 0..value2 {
result.push_str(value1.as_str());
}
Ok(vm.ctx.new_str(result))
} else {
Err(vm.new_type_error(format!(
"Cannot multiply {} and {}",
s.borrow(),
s2.borrow()
)))
}
}
fn str_upper(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s).to_uppercase();
Ok(vm.ctx.new_str(value))
}
fn str_lower(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s).to_lowercase();
Ok(vm.ctx.new_str(value))
}
fn str_capitalize(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
let (first_part, lower_str) = value.split_at(1);
let capitalized = format!("{}{}", first_part.to_uppercase(), lower_str);
Ok(vm.ctx.new_str(capitalized))
}
fn str_rsplit(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type()))],
optional = [
(pat, Some(vm.ctx.str_type())),
(num, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let pat = match pat {
Some(s) => get_value(&s),
None => " ".to_string(),
};
let num_splits = match num {
Some(n) => objint::get_value(&n).to_usize().unwrap(),
None => value.split(&pat).count(),
};
let elements = value
.rsplitn(num_splits + 1, &pat)
.map(|o| vm.ctx.new_str(o.to_string()))
.collect();
Ok(vm.ctx.new_list(elements))
}
fn str_split(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type()))],
optional = [
(pat, Some(vm.ctx.str_type())),
(num, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let pat = match pat {
Some(s) => get_value(&s),
None => " ".to_string(),
};
let num_splits = match num {
Some(n) => objint::get_value(&n).to_usize().unwrap(),
None => value.split(&pat).count(),
};
let elements = value
.splitn(num_splits + 1, &pat)
.map(|o| vm.ctx.new_str(o.to_string()))
.collect();
Ok(vm.ctx.new_list(elements))
}
fn str_strip(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s).trim().to_string();
Ok(vm.ctx.new_str(value))
}
fn str_lstrip(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s).trim_start().to_string();
Ok(vm.ctx.new_str(value))
}
fn str_rstrip(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s).trim_end().to_string();
Ok(vm.ctx.new_str(value))
}
fn str_endswith(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (pat, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let pat = get_value(&pat);
Ok(vm.ctx.new_bool(value.ends_with(pat.as_str())))
}
fn str_isidentifier(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
let mut is_identifier: bool = true;
// a string is not an identifier if it has whitespace or starts with a number
if !value.chars().any(|c| c.is_ascii_whitespace())
&& !value.chars().nth(0).unwrap().is_digit(10)
{
for c in value.chars() {
if c != "_".chars().nth(0).unwrap() && !c.is_digit(10) && !c.is_alphabetic() {
is_identifier = false;
}
}
} else {
is_identifier = false;
}
Ok(vm.ctx.new_bool(is_identifier))
}
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
fn str_isspace(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm
.ctx
.new_bool(!value.is_empty() && value.chars().all(|c| c.is_ascii_whitespace())))
}
fn str_isupper(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm.ctx.new_bool(
!value.is_empty()
&& value
.chars()
.filter(|x| !x.is_ascii_whitespace())
.all(|c| c.is_uppercase()),
))
}
fn str_islower(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm.ctx.new_bool(
!value.is_empty()
&& value
.chars()
.filter(|x| !x.is_ascii_whitespace())
.all(|c| c.is_lowercase()),
))
}
// doesn't implement keep new line delimeter just yet
fn str_splitlines(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let elements = get_value(&s)
.split('\n')
.map(|e| vm.ctx.new_str(e.to_string()))
.collect();
Ok(vm.ctx.new_list(elements))
}
fn str_zfill(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (len, Some(vm.ctx.int_type()))]
);
let value = get_value(&s);
let len = objint::get_value(&len).to_usize().unwrap();
let new_str: String;
if len <= value.len() {
new_str = value;
} else {
new_str = format!("{}{}", "0".repeat(len - value.len()), value);
}
Ok(vm.ctx.new_str(new_str))
}
fn str_join(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (iterable, None)]
);
let value = get_value(&s);
let elements: Vec<String> = vm
.extract_elements(iterable)?
.iter()
.map(|w| get_value(&w))
.collect();
let joined = elements.join(&value);
Ok(vm.ctx.new_str(joined))
}
fn str_count(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))],
optional = [
(start, Some(vm.ctx.int_type())),
(end, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let sub = get_value(&sub);
let (start, end) = match get_slice(start, end, value.len()) {
Ok((start, end)) => (start, end),
Err(e) => return Err(vm.new_index_error(e)),
};
let num_occur: usize = value[start..end].matches(&sub).count();
Ok(vm.ctx.new_int(num_occur))
}
fn str_index(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))],
optional = [
(start, Some(vm.ctx.int_type())),
(end, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let sub = get_value(&sub);
let (start, end) = match get_slice(start, end, value.len()) {
Ok((start, end)) => (start, end),
Err(e) => return Err(vm.new_index_error(e)),
};
let ind: usize = match value[start..end + 1].find(&sub) {
Some(num) => num,
None => {
return Err(vm.new_value_error("substring not found".to_string()));
}
};
Ok(vm.ctx.new_int(ind))
}
fn str_find(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))],
optional = [
(start, Some(vm.ctx.int_type())),
(end, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let sub = get_value(&sub);
let (start, end) = match get_slice(start, end, value.len()) {
Ok((start, end)) => (start, end),
Err(e) => return Err(vm.new_index_error(e)),
};
let ind: i128 = match value[start..end + 1].find(&sub) {
Some(num) => num as i128,
None => -1 as i128,
};
Ok(vm.ctx.new_int(ind))
}
// casefold is much more aggresive than lower
fn str_casefold(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
let folded_str: String = caseless::default_case_fold_str(&value);
Ok(vm.ctx.new_str(folded_str))
}
fn str_swapcase(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
let mut swapped_str = String::with_capacity(value.len());
for c in value.chars() {
// to_uppercase returns an iterator, to_ascii_uppercase returns the char
if c.is_lowercase() {
swapped_str.push(c.to_ascii_uppercase());
} else if c.is_uppercase() {
swapped_str.push(c.to_ascii_lowercase());
} else {
swapped_str.push(c);
}
}
Ok(vm.ctx.new_str(swapped_str))
}
fn str_replace(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [
(s, Some(vm.ctx.str_type())),
(old, Some(vm.ctx.str_type())),
(rep, Some(vm.ctx.str_type()))
],
optional = [(n, Some(vm.ctx.int_type()))]
);
let s = get_value(&s);
let old_str = get_value(&old);
let rep_str = get_value(&rep);
let num_rep: usize = match n {
Some(num) => objint::get_value(&num).to_usize().unwrap(),
None => 1,
};
let new_str = s.replacen(&old_str, &rep_str, num_rep);
Ok(vm.ctx.new_str(new_str))
}
fn str_expandtabs(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type()))],
optional = [(size, Some(vm.ctx.int_type()))]
);
let value = get_value(&s);
let tab_stop = match size {
Some(num) => objint::get_value(&num).to_usize().unwrap(),
None => 8 as usize,
};
let mut expanded_str = String::new();
let mut tab_size = tab_stop;
let mut col_count = 0 as usize;
for ch in value.chars() {
// 0x0009 is tab
if ch == 0x0009 as char {
let num_spaces = tab_size - col_count;
col_count += num_spaces;
let expand = " ".repeat(num_spaces);
expanded_str.push_str(&expand);
} else {
expanded_str.push(ch);
col_count += 1;
}
if col_count >= tab_size {
tab_size += tab_stop;
}
}
Ok(vm.ctx.new_str(expanded_str))
}
fn str_rpartition(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let sub = get_value(&sub);
let mut new_tup = Vec::new();
if value.contains(&sub) {
new_tup = value
.rsplitn(2, &sub)
.map(|s| vm.ctx.new_str(s.to_string()))
.collect();
new_tup.swap(0, 1); // so it's in the right order
new_tup.insert(1, vm.ctx.new_str(sub));
} else {
new_tup.push(vm.ctx.new_str(value));
new_tup.push(vm.ctx.new_str("".to_string()));
new_tup.push(vm.ctx.new_str("".to_string()));
}
Ok(vm.ctx.new_tuple(new_tup))
}
fn str_partition(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let sub = get_value(&sub);
let mut new_tup = Vec::new();
if value.contains(&sub) {
new_tup = value
.splitn(2, &sub)
.map(|s| vm.ctx.new_str(s.to_string()))
.collect();
new_tup.insert(1, vm.ctx.new_str(sub));
} else {
new_tup.push(vm.ctx.new_str(value));
new_tup.push(vm.ctx.new_str("".to_string()));
new_tup.push(vm.ctx.new_str("".to_string()));
}
Ok(vm.ctx.new_tuple(new_tup))
}
fn str_title(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
Ok(vm.ctx.new_str(make_title(&get_value(&s))))
}
fn str_rjust(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (num, Some(vm.ctx.int_type()))],
optional = [(rep, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let num = objint::get_value(&num).to_usize().unwrap();
let rep = match rep {
Some(st) => {
let rep_str = get_value(&st);
if rep_str.len() == 1 {
rep_str
} else {
return Err(vm.new_type_error(
"The fill character must be exactly one character long".to_string(),
));
}
}
None => " ".to_string(),
};
let new_str = format!("{}{}", rep.repeat(num), value);
Ok(vm.ctx.new_str(new_str))
}
fn str_ljust(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (num, Some(vm.ctx.int_type()))],
optional = [(rep, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let num = objint::get_value(&num).to_usize().unwrap();
let rep = match rep {
Some(st) => {
let rep_str = get_value(&st);
if rep_str.len() == 1 {
rep_str
} else {
return Err(vm.new_type_error(
"The fill character must be exactly one character long".to_string(),
));
}
}
None => " ".to_string(),
};
let new_str = format!("{}{}", value, rep.repeat(num));
Ok(vm.ctx.new_str(new_str))
}
fn str_istitle(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
let mut is_titled = true;
if value.is_empty() {
is_titled = false;
} else {
for word in value.split(" ") {
if word != make_title(&word) {
is_titled = false;
break;
}
}
}
Ok(vm.ctx.new_bool(is_titled))
}
fn str_center(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (len, Some(vm.ctx.int_type()))],
optional = [(chars, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let len = objint::get_value(&len).to_usize().unwrap();
let rep_char = match chars {
Some(c) => get_value(&c),
None => " ".to_string(),
};
let left_buff: usize = (len - value.len()) / 2;
let right_buff = len - value.len() - left_buff;
let new_str = format!(
"{}{}{}",
rep_char.repeat(left_buff),
value,
rep_char.repeat(right_buff)
);
Ok(vm.ctx.new_str(new_str))
}
fn str_startswith(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (pat, Some(vm.ctx.str_type()))]
);
let value = get_value(&s);
let pat = get_value(&pat);
Ok(vm.ctx.new_bool(value.starts_with(pat.as_str())))
}
fn str_contains(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [
(s, Some(vm.ctx.str_type())),
(needle, Some(vm.ctx.str_type()))
]
);
let value = get_value(&s);
let needle = get_value(&needle);
Ok(vm.ctx.new_bool(value.contains(needle.as_str())))
}
fn str_isalnum(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm
.ctx
.new_bool(!value.is_empty() && value.chars().all(|c| c.is_alphanumeric())))
}
fn str_isascii(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm
.ctx
.new_bool(!value.is_empty() && value.chars().all(|c| c.is_ascii())))
}
fn str_rindex(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))],
optional = [
(start, Some(vm.ctx.int_type())),
(end, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let sub = get_value(&sub);
let (start, end) = match get_slice(start, end, value.len()) {
Ok((start, end)) => (start, end),
Err(e) => return Err(vm.new_index_error(e)),
};
let ind: i64 = match value[start..end + 1].rfind(&sub) {
Some(num) => num as i64,
None => {
return Err(vm.new_value_error("substring not found".to_string()));
}
};
Ok(vm.ctx.new_int(ind))
}
fn str_rfind(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (sub, Some(vm.ctx.str_type()))],
optional = [
(start, Some(vm.ctx.int_type())),
(end, Some(vm.ctx.int_type()))
]
);
let value = get_value(&s);
let sub = get_value(&sub);
let (start, end) = match get_slice(start, end, value.len()) {
Ok((start, end)) => (start, end),
Err(e) => return Err(vm.new_index_error(e)),
};
let ind = match value[start..end + 1].rfind(&sub) {
Some(num) => num as i128,
None => -1 as i128,
};
Ok(vm.ctx.new_int(ind))
}
fn str_isnumeric(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm
.ctx
.new_bool(!value.is_empty() && value.chars().all(|c| c.is_numeric())))
}
fn str_isalpha(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
Ok(vm
.ctx
.new_bool(!value.is_empty() && value.chars().all(|c| c.is_alphanumeric())))
}
fn str_isdigit(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
// python's isdigit also checks if exponents are digits, these are the unicodes for exponents
let valid_unicodes: [u16; 10] = [
0x2070, 0x00B9, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079,
];
let mut is_digit: bool = true;
if value.is_empty() {
is_digit = false;
} else {
for c in value.chars() {
if !c.is_digit(10) {
// checking if char is exponent
let char_as_uni: u16 = c as u16;
if valid_unicodes.contains(&char_as_uni) {
continue;
} else {
is_digit = false;
break;
}
}
}
}
Ok(vm.ctx.new_bool(is_digit))
}
fn str_isdecimal(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(vm, args, required = [(s, Some(vm.ctx.str_type()))]);
let value = get_value(&s);
let is_decimal = if !value.is_empty() {
value.chars().all(|c| c.is_ascii_digit())
} else {
false
};
Ok(vm.ctx.new_bool(is_decimal))
}
fn str_getitem(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
arg_check!(
vm,
args,
required = [(s, Some(vm.ctx.str_type())), (needle, None)]
);
let value = get_value(&s);
subscript(vm, &value, needle.clone())
}
// TODO: should with following format
// class str(object='')
// class str(object=b'', encoding='utf-8', errors='strict')
fn str_new(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
if args.args.len() == 1 {
return Ok(vm.new_str("".to_string()));
}
if args.args.len() > 2 {
panic!("str expects exactly one parameter");
};
vm.to_str(&args.args[1])
}
impl PySliceableSequence for String {
fn do_slice(&self, start: usize, stop: usize) -> Self {
to_graphemes(self)
.get(start..stop)
.map_or(String::default(), |c| c.join(""))
}
fn do_stepped_slice(&self, start: usize, stop: usize, step: usize) -> Self {
if let Some(s) = to_graphemes(self).get(start..stop) {
return s
.iter()
.cloned()
.step_by(step)
.collect::<Vec<String>>()
.join("");
}
String::default()
}
fn len(&self) -> usize {
to_graphemes(self).len()
}
}
/// Convert a string-able `value` to a vec of graphemes
/// represents the string according to user perceived characters
fn to_graphemes<S: AsRef<str>>(value: S) -> Vec<String> {
UnicodeSegmentation::graphemes(value.as_ref(), true)
.map(String::from)
.collect()
}
pub fn subscript(vm: &mut VirtualMachine, value: &str, b: PyObjectRef) -> PyResult {
if objtype::isinstance(&b, &vm.ctx.int_type()) {
match objint::get_value(&b).to_i32() {
Some(pos) => {
let graphemes = to_graphemes(value);
let idx = graphemes.get_pos(pos);
graphemes
.get(idx)
.map(|c| vm.new_str(c.to_string()))
.ok_or(vm.new_index_error("string index out of range".to_string()))
}
None => {
Err(vm.new_index_error("cannot fit 'int' into an index-sized integer".to_string()))
}
}
} else {
match &(*b.borrow()).payload {
&PyObjectPayload::Slice { .. } => {
Ok(vm.new_str(value.to_string().get_slice_items(&b).to_string()))
}
_ => panic!(
"TypeError: indexing type {:?} with index {:?} is not supported (yet?)",
value, b
),
}
}
}
// help get optional string indices
fn get_slice(
start: Option<&std::rc::Rc<std::cell::RefCell<PyObject>>>,
end: Option<&std::rc::Rc<std::cell::RefCell<PyObject>>>,
len: usize,
) -> Result<(usize, usize), String> {
let start_idx = match start {
Some(int) => objint::get_value(&int).to_usize().unwrap(),
None => 0 as usize,
};
let end_idx = match end {
Some(int) => objint::get_value(&int).to_usize().unwrap(),
None => len - 1,
};
if start_idx >= usize::min_value() && start_idx < end_idx && end_idx < len {
Ok((start_idx, end_idx))
} else {
Err("provided index is not valid".to_string())
}
}
// helper function to title strings
fn make_title(s: &str) -> String {
let mut titled_str = String::new();
let mut capitalize_char: bool = true;
for c in s.chars() {
if c.is_alphabetic() {
if !capitalize_char {
titled_str.push(c);
} else if capitalize_char {
titled_str.push(c.to_ascii_uppercase());
capitalize_char = false;
}
} else {
titled_str.push(c);
capitalize_char = true;
}
}
titled_str
}