Files
RustPython/vm/src/obj/objstr.rs
2019-03-15 21:27:11 -07:00

945 lines
32 KiB
Rust

use std::hash::{Hash, Hasher};
use std::ops::Range;
use std::str::FromStr;
use num_traits::ToPrimitive;
use unicode_segmentation::UnicodeSegmentation;
use crate::format::{FormatParseError, FormatPart, FormatString};
use crate::function::{OptionalArg, PyFuncArgs};
use crate::pyobject::{
IdProtocol, IntoPyObject, PyContext, PyIterable, PyObjectRef, PyRef, PyResult, PyValue,
TryFromObject, TypeProtocol,
};
use crate::vm::VirtualMachine;
use super::objint;
use super::objsequence::PySliceableSequence;
use super::objslice::PySlice;
use super::objtype::{self, PyClassRef};
#[derive(Clone, Debug)]
pub struct PyString {
// TODO: shouldn't be public
pub value: String,
}
pub type PyStringRef = PyRef<PyString>;
impl PyStringRef {
fn add(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<String> {
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
Ok(format!("{}{}", self.value, get_value(&rhs)))
} else {
Err(vm.new_type_error(format!("Cannot add {} and {}", self, rhs)))
}
}
fn eq(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> bool {
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
self.value == get_value(&rhs)
} else {
false
}
}
fn contains(self, needle: PyStringRef, _vm: &mut VirtualMachine) -> bool {
self.value.contains(&needle.value)
}
fn getitem(self, needle: PyObjectRef, vm: &mut VirtualMachine) -> PyResult {
subscript(vm, &self.value, needle)
}
fn gt(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
Ok(self.value > get_value(&rhs))
} else {
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
}
}
fn ge(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
Ok(self.value >= get_value(&rhs))
} else {
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
}
}
fn lt(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
Ok(self.value < get_value(&rhs))
} else {
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
}
}
fn le(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
Ok(self.value <= get_value(&rhs))
} else {
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
}
}
fn hash(self, _vm: &mut VirtualMachine) -> usize {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
self.value.hash(&mut hasher);
hasher.finish() as usize
}
fn len(self, _vm: &mut VirtualMachine) -> usize {
self.value.chars().count()
}
fn mul(self, val: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<String> {
if objtype::isinstance(&val, &vm.ctx.int_type()) {
let value = &self.value;
let multiplier = objint::get_value(&val).to_i32().unwrap();
let mut result = String::new();
for _x in 0..multiplier {
result.push_str(value.as_str());
}
Ok(result)
} else {
Err(vm.new_type_error(format!("Cannot multiply {} and {}", self, val)))
}
}
fn str(self, _vm: &mut VirtualMachine) -> PyStringRef {
self
}
fn repr(self, _vm: &mut VirtualMachine) -> String {
let value = &self.value;
let quote_char = if count_char(value, '\'') > count_char(value, '"') {
'"'
} else {
'\''
};
let mut formatted = String::new();
formatted.push(quote_char);
for c in value.chars() {
if c == quote_char || c == '\\' {
formatted.push('\\');
formatted.push(c);
} else if c == '\n' {
formatted.push('\\');
formatted.push('n');
} else if c == '\t' {
formatted.push('\\');
formatted.push('t');
} else if c == '\r' {
formatted.push('\\');
formatted.push('r');
} else {
formatted.push(c);
}
}
formatted.push(quote_char);
formatted
}
fn lower(self, _vm: &mut VirtualMachine) -> String {
self.value.to_lowercase()
}
// casefold is much more aggressive than lower
fn casefold(self, _vm: &mut VirtualMachine) -> String {
caseless::default_case_fold_str(&self.value)
}
fn upper(self, _vm: &mut VirtualMachine) -> String {
self.value.to_uppercase()
}
fn capitalize(self, _vm: &mut VirtualMachine) -> String {
let (first_part, lower_str) = self.value.split_at(1);
format!("{}{}", first_part.to_uppercase(), lower_str)
}
fn split(
self,
pattern: OptionalArg<Self>,
num: OptionalArg<usize>,
vm: &mut VirtualMachine,
) -> PyObjectRef {
let value = &self.value;
let pattern = match pattern {
OptionalArg::Present(ref s) => &s.value,
OptionalArg::Missing => " ",
};
let num_splits = num
.into_option()
.unwrap_or_else(|| value.split(pattern).count());
let elements = value
.splitn(num_splits + 1, pattern)
.map(|o| vm.ctx.new_str(o.to_string()))
.collect();
vm.ctx.new_list(elements)
}
fn rsplit(
self,
pattern: OptionalArg<Self>,
num: OptionalArg<usize>,
vm: &mut VirtualMachine,
) -> PyObjectRef {
let value = &self.value;
let pattern = match pattern {
OptionalArg::Present(ref s) => &s.value,
OptionalArg::Missing => " ",
};
let num_splits = num
.into_option()
.unwrap_or_else(|| value.split(pattern).count());
let elements = value
.rsplitn(num_splits + 1, pattern)
.map(|o| vm.ctx.new_str(o.to_string()))
.collect();
vm.ctx.new_list(elements)
}
fn strip(self, _vm: &mut VirtualMachine) -> String {
self.value.trim().to_string()
}
fn lstrip(self, _vm: &mut VirtualMachine) -> String {
self.value.trim_start().to_string()
}
fn rstrip(self, _vm: &mut VirtualMachine) -> String {
self.value.trim_end().to_string()
}
fn endswith(
self,
suffix: PyStringRef,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
_vm: &mut VirtualMachine,
) -> bool {
if let Some((start, end)) = adjust_indices(start, end, self.value.len()) {
self.value[start..end].ends_with(&suffix.value)
} else {
false
}
}
fn startswith(
self,
prefix: PyStringRef,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
_vm: &mut VirtualMachine,
) -> bool {
if let Some((start, end)) = adjust_indices(start, end, self.value.len()) {
self.value[start..end].starts_with(&prefix.value)
} else {
false
}
}
fn isalnum(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty() && self.value.chars().all(char::is_alphanumeric)
}
fn isnumeric(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty() && self.value.chars().all(char::is_numeric)
}
fn isdigit(self, _vm: &mut VirtualMachine) -> bool {
// python's isdigit also checks if exponents are digits, these are the unicodes for exponents
let valid_unicodes: [u16; 10] = [
0x2070, 0x00B9, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079,
];
if self.value.is_empty() {
false
} else {
self.value
.chars()
.filter(|c| !c.is_digit(10))
.all(|c| valid_unicodes.contains(&(c as u16)))
}
}
fn isdecimal(self, _vm: &mut VirtualMachine) -> bool {
if self.value.is_empty() {
false
} else {
self.value.chars().all(|c| c.is_ascii_digit())
}
}
fn title(self, _vm: &mut VirtualMachine) -> String {
make_title(&self.value)
}
fn swapcase(self, _vm: &mut VirtualMachine) -> String {
let mut swapped_str = String::with_capacity(self.value.len());
for c in self.value.chars() {
// to_uppercase returns an iterator, to_ascii_uppercase returns the char
if c.is_lowercase() {
swapped_str.push(c.to_ascii_uppercase());
} else if c.is_uppercase() {
swapped_str.push(c.to_ascii_lowercase());
} else {
swapped_str.push(c);
}
}
swapped_str
}
fn isalpha(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty() && self.value.chars().all(char::is_alphanumeric)
}
fn replace(
self,
old: Self,
new: Self,
num: OptionalArg<usize>,
_vm: &mut VirtualMachine,
) -> String {
match num.into_option() {
Some(num) => self.value.replacen(&old.value, &new.value, num),
None => self.value.replace(&old.value, &new.value),
}
}
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
fn isspace(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii_whitespace())
}
fn isupper(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty()
&& self
.value
.chars()
.filter(|x| !x.is_ascii_whitespace())
.all(char::is_uppercase)
}
fn islower(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty()
&& self
.value
.chars()
.filter(|x| !x.is_ascii_whitespace())
.all(char::is_lowercase)
}
fn isascii(self, _vm: &mut VirtualMachine) -> bool {
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii())
}
// doesn't implement keep new line delimiter just yet
fn splitlines(self, vm: &mut VirtualMachine) -> PyObjectRef {
let elements = self
.value
.split('\n')
.map(|e| vm.ctx.new_str(e.to_string()))
.collect();
vm.ctx.new_list(elements)
}
fn join(self, iterable: PyIterable<PyStringRef>, vm: &mut VirtualMachine) -> PyResult<String> {
let mut joined = String::new();
for (idx, elem) in iterable.iter(vm)?.enumerate() {
let elem = elem?;
if idx != 0 {
joined.push_str(&self.value);
}
joined.push_str(&elem.value)
}
Ok(joined)
}
fn find(
self,
sub: Self,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
_vm: &mut VirtualMachine,
) -> isize {
let value = &self.value;
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
match value[start..end].find(&sub.value) {
Some(num) => (start + num) as isize,
None => -1 as isize,
}
} else {
-1 as isize
}
}
fn rfind(
self,
sub: Self,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
_vm: &mut VirtualMachine,
) -> isize {
let value = &self.value;
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
match value[start..end].rfind(&sub.value) {
Some(num) => (start + num) as isize,
None => -1 as isize,
}
} else {
-1 as isize
}
}
fn index(
self,
sub: Self,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
vm: &mut VirtualMachine,
) -> PyResult<usize> {
let value = &self.value;
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
match value[start..end].find(&sub.value) {
Some(num) => Ok(start + num),
None => Err(vm.new_value_error("substring not found".to_string())),
}
} else {
Err(vm.new_value_error("substring not found".to_string()))
}
}
fn rindex(
self,
sub: Self,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
vm: &mut VirtualMachine,
) -> PyResult<usize> {
let value = &self.value;
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
match value[start..end].rfind(&sub.value) {
Some(num) => Ok(start + num),
None => Err(vm.new_value_error("substring not found".to_string())),
}
} else {
Err(vm.new_value_error("substring not found".to_string()))
}
}
fn partition(self, sub: PyStringRef, vm: &mut VirtualMachine) -> PyObjectRef {
let value = &self.value;
let sub = &sub.value;
let mut new_tup = Vec::new();
if value.contains(sub) {
new_tup = value
.splitn(2, sub)
.map(|s| vm.ctx.new_str(s.to_string()))
.collect();
new_tup.insert(1, vm.ctx.new_str(sub.clone()));
} else {
new_tup.push(vm.ctx.new_str(value.clone()));
new_tup.push(vm.ctx.new_str("".to_string()));
new_tup.push(vm.ctx.new_str("".to_string()));
}
vm.ctx.new_tuple(new_tup)
}
fn rpartition(self, sub: PyStringRef, vm: &mut VirtualMachine) -> PyObjectRef {
let value = &self.value;
let sub = &sub.value;
let mut new_tup = Vec::new();
if value.contains(sub) {
new_tup = value
.rsplitn(2, sub)
.map(|s| vm.ctx.new_str(s.to_string()))
.collect();
new_tup.swap(0, 1); // so it's in the right order
new_tup.insert(1, vm.ctx.new_str(sub.clone()));
} else {
new_tup.push(vm.ctx.new_str(value.clone()));
new_tup.push(vm.ctx.new_str("".to_string()));
new_tup.push(vm.ctx.new_str("".to_string()));
}
vm.ctx.new_tuple(new_tup)
}
fn istitle(self, _vm: &mut VirtualMachine) -> bool {
if self.value.is_empty() {
false
} else {
self.value.split(' ').all(|word| word == make_title(word))
}
}
fn count(
self,
sub: Self,
start: OptionalArg<isize>,
end: OptionalArg<isize>,
_vm: &mut VirtualMachine,
) -> usize {
let value = &self.value;
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
self.value[start..end].matches(&sub.value).count()
} else {
0
}
}
fn zfill(self, len: usize, _vm: &mut VirtualMachine) -> String {
let value = &self.value;
if len <= value.len() {
value.to_string()
} else {
format!("{}{}", "0".repeat(len - value.len()), value)
}
}
fn get_fill_char<'a>(rep: &'a OptionalArg<Self>, vm: &mut VirtualMachine) -> PyResult<&'a str> {
let rep_str = match rep {
OptionalArg::Present(ref st) => &st.value,
OptionalArg::Missing => " ",
};
if rep_str.len() == 1 {
Ok(rep_str)
} else {
Err(vm.new_type_error(
"The fill character must be exactly one character long".to_string(),
))
}
}
fn ljust(
self,
len: usize,
rep: OptionalArg<Self>,
vm: &mut VirtualMachine,
) -> PyResult<String> {
let value = &self.value;
let rep_char = PyStringRef::get_fill_char(&rep, vm)?;
Ok(format!("{}{}", value, rep_char.repeat(len)))
}
fn rjust(
self,
len: usize,
rep: OptionalArg<Self>,
vm: &mut VirtualMachine,
) -> PyResult<String> {
let value = &self.value;
let rep_char = PyStringRef::get_fill_char(&rep, vm)?;
Ok(format!("{}{}", rep_char.repeat(len), value))
}
fn center(
self,
len: usize,
rep: OptionalArg<Self>,
vm: &mut VirtualMachine,
) -> PyResult<String> {
let value = &self.value;
let rep_char = PyStringRef::get_fill_char(&rep, vm)?;
let left_buff: usize = (len - value.len()) / 2;
let right_buff = len - value.len() - left_buff;
Ok(format!(
"{}{}{}",
rep_char.repeat(left_buff),
value,
rep_char.repeat(right_buff)
))
}
fn expandtabs(self, tab_stop: OptionalArg<usize>, _vm: &mut VirtualMachine) -> String {
let tab_stop = tab_stop.into_option().unwrap_or(8 as usize);
let mut expanded_str = String::new();
let mut tab_size = tab_stop;
let mut col_count = 0 as usize;
for ch in self.value.chars() {
// 0x0009 is tab
if ch == 0x0009 as char {
let num_spaces = tab_size - col_count;
col_count += num_spaces;
let expand = " ".repeat(num_spaces);
expanded_str.push_str(&expand);
} else {
expanded_str.push(ch);
col_count += 1;
}
if col_count >= tab_size {
tab_size += tab_stop;
}
}
expanded_str
}
fn isidentifier(self, _vm: &mut VirtualMachine) -> bool {
let value = &self.value;
// a string is not an identifier if it has whitespace or starts with a number
if !value.chars().any(|c| c.is_ascii_whitespace())
&& !value.chars().nth(0).unwrap().is_digit(10)
{
for c in value.chars() {
if c != "_".chars().nth(0).unwrap() && !c.is_digit(10) && !c.is_alphabetic() {
return false;
}
}
true
} else {
false
}
}
}
impl PyValue for PyString {
fn class(vm: &mut VirtualMachine) -> PyObjectRef {
vm.ctx.str_type()
}
}
impl IntoPyObject for String {
fn into_pyobject(self, vm: &mut VirtualMachine) -> PyResult {
Ok(vm.ctx.new_str(self))
}
}
#[rustfmt::skip] // to avoid line splitting
pub fn init(context: &PyContext) {
let str_type = &context.str_type;
let str_doc = "str(object='') -> str\n\
str(bytes_or_buffer[, encoding[, errors]]) -> str\n\
\n\
Create a new string object from the given object. If encoding or\n\
errors is specified, then the object must expose a data buffer\n\
that will be decoded using the given encoding and error handler.\n\
Otherwise, returns the result of object.__str__() (if defined)\n\
or repr(object).\n\
encoding defaults to sys.getdefaultencoding().\n\
errors defaults to 'strict'.";
context.set_attr(&str_type, "__add__", context.new_rustfunc(PyStringRef::add));
context.set_attr(&str_type, "__eq__", context.new_rustfunc(PyStringRef::eq));
context.set_attr(&str_type, "__contains__", context.new_rustfunc(PyStringRef::contains));
context.set_attr(&str_type, "__getitem__", context.new_rustfunc(PyStringRef::getitem));
context.set_attr(&str_type, "__gt__", context.new_rustfunc(PyStringRef::gt));
context.set_attr(&str_type, "__ge__", context.new_rustfunc(PyStringRef::ge));
context.set_attr(&str_type, "__lt__", context.new_rustfunc(PyStringRef::lt));
context.set_attr(&str_type, "__le__", context.new_rustfunc(PyStringRef::le));
context.set_attr(&str_type, "__hash__", context.new_rustfunc(PyStringRef::hash));
context.set_attr(&str_type, "__len__", context.new_rustfunc(PyStringRef::len));
context.set_attr(&str_type, "__mul__", context.new_rustfunc(PyStringRef::mul));
context.set_attr(&str_type, "__new__", context.new_rustfunc(str_new));
context.set_attr(&str_type, "__str__", context.new_rustfunc(PyStringRef::str));
context.set_attr(&str_type, "__repr__", context.new_rustfunc(PyStringRef::repr));
context.set_attr(&str_type, "format", context.new_rustfunc(str_format));
context.set_attr(&str_type, "lower", context.new_rustfunc(PyStringRef::lower));
context.set_attr(&str_type, "casefold", context.new_rustfunc(PyStringRef::casefold));
context.set_attr(&str_type, "upper", context.new_rustfunc(PyStringRef::upper));
context.set_attr(&str_type, "capitalize", context.new_rustfunc(PyStringRef::capitalize));
context.set_attr(&str_type, "split", context.new_rustfunc(PyStringRef::split));
context.set_attr(&str_type, "rsplit", context.new_rustfunc(PyStringRef::rsplit));
context.set_attr(&str_type, "strip", context.new_rustfunc(PyStringRef::strip));
context.set_attr(&str_type, "lstrip", context.new_rustfunc(PyStringRef::lstrip));
context.set_attr(&str_type, "rstrip", context.new_rustfunc(PyStringRef::rstrip));
context.set_attr(&str_type, "endswith", context.new_rustfunc(PyStringRef::endswith));
context.set_attr(&str_type, "startswith", context.new_rustfunc(PyStringRef::startswith));
context.set_attr(&str_type, "isalnum", context.new_rustfunc(PyStringRef::isalnum));
context.set_attr(&str_type, "isnumeric", context.new_rustfunc(PyStringRef::isnumeric));
context.set_attr(&str_type, "isdigit", context.new_rustfunc(PyStringRef::isdigit));
context.set_attr(&str_type, "isdecimal", context.new_rustfunc(PyStringRef::isdecimal));
context.set_attr(&str_type, "title", context.new_rustfunc(PyStringRef::title));
context.set_attr(&str_type, "swapcase", context.new_rustfunc(PyStringRef::swapcase));
context.set_attr(&str_type, "isalpha", context.new_rustfunc(PyStringRef::isalpha));
context.set_attr(&str_type, "replace", context.new_rustfunc(PyStringRef::replace));
context.set_attr(&str_type, "isspace", context.new_rustfunc(PyStringRef::isspace));
context.set_attr(&str_type, "isupper", context.new_rustfunc(PyStringRef::isupper));
context.set_attr(&str_type, "islower", context.new_rustfunc(PyStringRef::islower));
context.set_attr(&str_type, "isascii", context.new_rustfunc(PyStringRef::isascii));
context.set_attr(&str_type, "splitlines", context.new_rustfunc(PyStringRef::splitlines));
context.set_attr(&str_type, "join", context.new_rustfunc(PyStringRef::join));
context.set_attr(&str_type, "find", context.new_rustfunc(PyStringRef::find));
context.set_attr(&str_type, "rfind", context.new_rustfunc(PyStringRef::rfind));
context.set_attr(&str_type, "index", context.new_rustfunc(PyStringRef::index));
context.set_attr(&str_type, "rindex", context.new_rustfunc(PyStringRef::rindex));
context.set_attr(&str_type, "partition", context.new_rustfunc(PyStringRef::partition));
context.set_attr(&str_type, "rpartition", context.new_rustfunc(PyStringRef::rpartition));
context.set_attr(&str_type, "istitle", context.new_rustfunc(PyStringRef::istitle));
context.set_attr(&str_type, "count", context.new_rustfunc(PyStringRef::count));
context.set_attr(&str_type, "zfill", context.new_rustfunc(PyStringRef::zfill));
context.set_attr(&str_type, "ljust", context.new_rustfunc(PyStringRef::ljust));
context.set_attr(&str_type, "rjust", context.new_rustfunc(PyStringRef::rjust));
context.set_attr(&str_type, "center", context.new_rustfunc(PyStringRef::center));
context.set_attr(&str_type, "expandtabs", context.new_rustfunc(PyStringRef::expandtabs));
context.set_attr(&str_type, "isidentifier", context.new_rustfunc(PyStringRef::isidentifier));
context.set_attr(&str_type, "__doc__", context.new_str(str_doc.to_string()));
}
pub fn get_value(obj: &PyObjectRef) -> String {
obj.payload::<PyString>().unwrap().value.clone()
}
pub fn borrow_value(obj: &PyObjectRef) -> &str {
&obj.payload::<PyString>().unwrap().value
}
fn count_char(s: &str, c: char) -> usize {
s.chars().filter(|x| *x == c).count()
}
fn str_format(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
if args.args.is_empty() {
return Err(
vm.new_type_error("descriptor 'format' of 'str' object needs an argument".to_string())
);
}
let zelf = &args.args[0];
if !objtype::isinstance(&zelf, &vm.ctx.str_type()) {
let zelf_typ = zelf.typ();
let actual_type = vm.to_pystr(&zelf_typ)?;
return Err(vm.new_type_error(format!(
"descriptor 'format' requires a 'str' object but received a '{}'",
actual_type
)));
}
let format_string_text = get_value(zelf);
match FormatString::from_str(format_string_text.as_str()) {
Ok(format_string) => perform_format(vm, &format_string, &args),
Err(err) => match err {
FormatParseError::UnmatchedBracket => {
Err(vm.new_value_error("expected '}' before end of string".to_string()))
}
_ => Err(vm.new_value_error("Unexpected error parsing format string".to_string())),
},
}
}
fn call_object_format(
vm: &mut VirtualMachine,
argument: PyObjectRef,
format_spec: &str,
) -> PyResult {
let returned_type = vm.ctx.new_str(format_spec.to_string());
let result = vm.call_method(&argument, "__format__", vec![returned_type])?;
if !objtype::isinstance(&result, &vm.ctx.str_type()) {
let result_type = result.typ();
let actual_type = vm.to_pystr(&result_type)?;
return Err(vm.new_type_error(format!("__format__ must return a str, not {}", actual_type)));
}
Ok(result)
}
fn perform_format(
vm: &mut VirtualMachine,
format_string: &FormatString,
arguments: &PyFuncArgs,
) -> PyResult {
let mut final_string = String::new();
if format_string.format_parts.iter().any(FormatPart::is_auto)
&& format_string.format_parts.iter().any(FormatPart::is_index)
{
return Err(vm.new_value_error(
"cannot switch from automatic field numbering to manual field specification"
.to_string(),
));
}
let mut auto_argument_index: usize = 1;
for part in &format_string.format_parts {
let result_string: String = match part {
FormatPart::AutoSpec(format_spec) => {
let result = match arguments.args.get(auto_argument_index) {
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
None => {
return Err(vm.new_index_error("tuple index out of range".to_string()));
}
};
auto_argument_index += 1;
get_value(&result)
}
FormatPart::IndexSpec(index, format_spec) => {
let result = match arguments.args.get(*index + 1) {
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
None => {
return Err(vm.new_index_error("tuple index out of range".to_string()));
}
};
get_value(&result)
}
FormatPart::KeywordSpec(keyword, format_spec) => {
let result = match arguments.get_optional_kwarg(&keyword) {
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
None => {
return Err(vm.new_key_error(format!("'{}'", keyword)));
}
};
get_value(&result)
}
FormatPart::Literal(literal) => literal.clone(),
};
final_string.push_str(&result_string);
}
Ok(vm.ctx.new_str(final_string))
}
// TODO: should with following format
// class str(object='')
// class str(object=b'', encoding='utf-8', errors='strict')
fn str_new(
cls: PyClassRef,
object: OptionalArg<PyObjectRef>,
vm: &mut VirtualMachine,
) -> PyResult<PyStringRef> {
let string = match object {
OptionalArg::Present(ref input) => vm.to_str(input)?,
OptionalArg::Missing => vm.new_str("".to_string()),
};
if string.typ().is(&cls) {
TryFromObject::try_from_object(vm, string)
} else {
let payload = string.payload::<PyString>().unwrap();
PyRef::new_with_type(vm, payload.clone(), cls)
}
}
impl PySliceableSequence for String {
fn do_slice(&self, range: Range<usize>) -> Self {
to_graphemes(self)
.get(range)
.map_or(String::default(), |c| c.join(""))
}
fn do_slice_reverse(&self, range: Range<usize>) -> Self {
to_graphemes(self)
.get_mut(range)
.map_or(String::default(), |slice| {
slice.reverse();
slice.join("")
})
}
fn do_stepped_slice(&self, range: Range<usize>, step: usize) -> Self {
if let Some(s) = to_graphemes(self).get(range) {
return s
.iter()
.cloned()
.step_by(step)
.collect::<Vec<String>>()
.join("");
}
String::default()
}
fn do_stepped_slice_reverse(&self, range: Range<usize>, step: usize) -> Self {
if let Some(s) = to_graphemes(self).get(range) {
return s
.iter()
.rev()
.cloned()
.step_by(step)
.collect::<Vec<String>>()
.join("");
}
String::default()
}
fn empty() -> Self {
String::default()
}
fn len(&self) -> usize {
to_graphemes(self).len()
}
fn is_empty(&self) -> bool {
self.is_empty()
}
}
/// Convert a string-able `value` to a vec of graphemes
/// represents the string according to user perceived characters
fn to_graphemes<S: AsRef<str>>(value: S) -> Vec<String> {
UnicodeSegmentation::graphemes(value.as_ref(), true)
.map(String::from)
.collect()
}
pub fn subscript(vm: &mut VirtualMachine, value: &str, b: PyObjectRef) -> PyResult {
if objtype::isinstance(&b, &vm.ctx.int_type()) {
match objint::get_value(&b).to_i32() {
Some(pos) => {
let graphemes = to_graphemes(value);
if let Some(idx) = graphemes.get_pos(pos) {
Ok(vm.new_str(graphemes[idx].to_string()))
} else {
Err(vm.new_index_error("string index out of range".to_string()))
}
}
None => {
Err(vm.new_index_error("cannot fit 'int' into an index-sized integer".to_string()))
}
}
} else if b.payload::<PySlice>().is_some() {
let string = value.to_string().get_slice_items(vm, &b)?;
Ok(vm.new_str(string))
} else {
panic!(
"TypeError: indexing type {:?} with index {:?} is not supported (yet?)",
value, b
)
}
}
// help get optional string indices
fn adjust_indices(
start: OptionalArg<isize>,
end: OptionalArg<isize>,
len: usize,
) -> Option<(usize, usize)> {
let mut start = start.into_option().unwrap_or(0);
let mut end = end.into_option().unwrap_or(len as isize);
if end > len as isize {
end = len as isize;
} else if end < 0 {
end += len as isize;
if end < 0 {
end = 0;
}
}
if start < 0 {
start += len as isize;
if start < 0 {
start = 0;
}
}
if start > end {
None
} else {
Some((start as usize, end as usize))
}
}
// helper function to title strings
fn make_title(s: &str) -> String {
let mut titled_str = String::new();
let mut capitalize_char: bool = true;
for c in s.chars() {
if c.is_alphabetic() {
if !capitalize_char {
titled_str.push(c);
} else if capitalize_char {
titled_str.push(c.to_ascii_uppercase());
capitalize_char = false;
}
} else {
titled_str.push(c);
capitalize_char = true;
}
}
titled_str
}