mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-09 22:49:57 +09:00
945 lines
32 KiB
Rust
945 lines
32 KiB
Rust
use std::hash::{Hash, Hasher};
|
|
use std::ops::Range;
|
|
use std::str::FromStr;
|
|
|
|
use num_traits::ToPrimitive;
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
use crate::format::{FormatParseError, FormatPart, FormatString};
|
|
use crate::function::{OptionalArg, PyFuncArgs};
|
|
use crate::pyobject::{
|
|
IdProtocol, IntoPyObject, PyContext, PyIterable, PyObjectRef, PyRef, PyResult, PyValue,
|
|
TryFromObject, TypeProtocol,
|
|
};
|
|
use crate::vm::VirtualMachine;
|
|
|
|
use super::objint;
|
|
use super::objsequence::PySliceableSequence;
|
|
use super::objslice::PySlice;
|
|
use super::objtype::{self, PyClassRef};
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct PyString {
|
|
// TODO: shouldn't be public
|
|
pub value: String,
|
|
}
|
|
|
|
pub type PyStringRef = PyRef<PyString>;
|
|
|
|
impl PyStringRef {
|
|
fn add(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<String> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(format!("{}{}", self.value, get_value(&rhs)))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot add {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
fn eq(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> bool {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
self.value == get_value(&rhs)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn contains(self, needle: PyStringRef, _vm: &mut VirtualMachine) -> bool {
|
|
self.value.contains(&needle.value)
|
|
}
|
|
|
|
fn getitem(self, needle: PyObjectRef, vm: &mut VirtualMachine) -> PyResult {
|
|
subscript(vm, &self.value, needle)
|
|
}
|
|
|
|
fn gt(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value > get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
fn ge(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value >= get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
fn lt(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value < get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
fn le(self, rhs: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value <= get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
fn hash(self, _vm: &mut VirtualMachine) -> usize {
|
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
self.value.hash(&mut hasher);
|
|
hasher.finish() as usize
|
|
}
|
|
|
|
fn len(self, _vm: &mut VirtualMachine) -> usize {
|
|
self.value.chars().count()
|
|
}
|
|
|
|
fn mul(self, val: PyObjectRef, vm: &mut VirtualMachine) -> PyResult<String> {
|
|
if objtype::isinstance(&val, &vm.ctx.int_type()) {
|
|
let value = &self.value;
|
|
let multiplier = objint::get_value(&val).to_i32().unwrap();
|
|
let mut result = String::new();
|
|
for _x in 0..multiplier {
|
|
result.push_str(value.as_str());
|
|
}
|
|
Ok(result)
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot multiply {} and {}", self, val)))
|
|
}
|
|
}
|
|
|
|
fn str(self, _vm: &mut VirtualMachine) -> PyStringRef {
|
|
self
|
|
}
|
|
|
|
fn repr(self, _vm: &mut VirtualMachine) -> String {
|
|
let value = &self.value;
|
|
let quote_char = if count_char(value, '\'') > count_char(value, '"') {
|
|
'"'
|
|
} else {
|
|
'\''
|
|
};
|
|
let mut formatted = String::new();
|
|
formatted.push(quote_char);
|
|
for c in value.chars() {
|
|
if c == quote_char || c == '\\' {
|
|
formatted.push('\\');
|
|
formatted.push(c);
|
|
} else if c == '\n' {
|
|
formatted.push('\\');
|
|
formatted.push('n');
|
|
} else if c == '\t' {
|
|
formatted.push('\\');
|
|
formatted.push('t');
|
|
} else if c == '\r' {
|
|
formatted.push('\\');
|
|
formatted.push('r');
|
|
} else {
|
|
formatted.push(c);
|
|
}
|
|
}
|
|
formatted.push(quote_char);
|
|
formatted
|
|
}
|
|
|
|
fn lower(self, _vm: &mut VirtualMachine) -> String {
|
|
self.value.to_lowercase()
|
|
}
|
|
|
|
// casefold is much more aggressive than lower
|
|
fn casefold(self, _vm: &mut VirtualMachine) -> String {
|
|
caseless::default_case_fold_str(&self.value)
|
|
}
|
|
|
|
fn upper(self, _vm: &mut VirtualMachine) -> String {
|
|
self.value.to_uppercase()
|
|
}
|
|
|
|
fn capitalize(self, _vm: &mut VirtualMachine) -> String {
|
|
let (first_part, lower_str) = self.value.split_at(1);
|
|
format!("{}{}", first_part.to_uppercase(), lower_str)
|
|
}
|
|
|
|
fn split(
|
|
self,
|
|
pattern: OptionalArg<Self>,
|
|
num: OptionalArg<usize>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let pattern = match pattern {
|
|
OptionalArg::Present(ref s) => &s.value,
|
|
OptionalArg::Missing => " ",
|
|
};
|
|
let num_splits = num
|
|
.into_option()
|
|
.unwrap_or_else(|| value.split(pattern).count());
|
|
let elements = value
|
|
.splitn(num_splits + 1, pattern)
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect();
|
|
vm.ctx.new_list(elements)
|
|
}
|
|
|
|
fn rsplit(
|
|
self,
|
|
pattern: OptionalArg<Self>,
|
|
num: OptionalArg<usize>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let pattern = match pattern {
|
|
OptionalArg::Present(ref s) => &s.value,
|
|
OptionalArg::Missing => " ",
|
|
};
|
|
let num_splits = num
|
|
.into_option()
|
|
.unwrap_or_else(|| value.split(pattern).count());
|
|
let elements = value
|
|
.rsplitn(num_splits + 1, pattern)
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect();
|
|
vm.ctx.new_list(elements)
|
|
}
|
|
|
|
fn strip(self, _vm: &mut VirtualMachine) -> String {
|
|
self.value.trim().to_string()
|
|
}
|
|
|
|
fn lstrip(self, _vm: &mut VirtualMachine) -> String {
|
|
self.value.trim_start().to_string()
|
|
}
|
|
|
|
fn rstrip(self, _vm: &mut VirtualMachine) -> String {
|
|
self.value.trim_end().to_string()
|
|
}
|
|
|
|
fn endswith(
|
|
self,
|
|
suffix: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &mut VirtualMachine,
|
|
) -> bool {
|
|
if let Some((start, end)) = adjust_indices(start, end, self.value.len()) {
|
|
self.value[start..end].ends_with(&suffix.value)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn startswith(
|
|
self,
|
|
prefix: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &mut VirtualMachine,
|
|
) -> bool {
|
|
if let Some((start, end)) = adjust_indices(start, end, self.value.len()) {
|
|
self.value[start..end].starts_with(&prefix.value)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn isalnum(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(char::is_alphanumeric)
|
|
}
|
|
|
|
fn isnumeric(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(char::is_numeric)
|
|
}
|
|
|
|
fn isdigit(self, _vm: &mut VirtualMachine) -> bool {
|
|
// python's isdigit also checks if exponents are digits, these are the unicodes for exponents
|
|
let valid_unicodes: [u16; 10] = [
|
|
0x2070, 0x00B9, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079,
|
|
];
|
|
|
|
if self.value.is_empty() {
|
|
false
|
|
} else {
|
|
self.value
|
|
.chars()
|
|
.filter(|c| !c.is_digit(10))
|
|
.all(|c| valid_unicodes.contains(&(c as u16)))
|
|
}
|
|
}
|
|
|
|
fn isdecimal(self, _vm: &mut VirtualMachine) -> bool {
|
|
if self.value.is_empty() {
|
|
false
|
|
} else {
|
|
self.value.chars().all(|c| c.is_ascii_digit())
|
|
}
|
|
}
|
|
|
|
fn title(self, _vm: &mut VirtualMachine) -> String {
|
|
make_title(&self.value)
|
|
}
|
|
|
|
fn swapcase(self, _vm: &mut VirtualMachine) -> String {
|
|
let mut swapped_str = String::with_capacity(self.value.len());
|
|
for c in self.value.chars() {
|
|
// to_uppercase returns an iterator, to_ascii_uppercase returns the char
|
|
if c.is_lowercase() {
|
|
swapped_str.push(c.to_ascii_uppercase());
|
|
} else if c.is_uppercase() {
|
|
swapped_str.push(c.to_ascii_lowercase());
|
|
} else {
|
|
swapped_str.push(c);
|
|
}
|
|
}
|
|
swapped_str
|
|
}
|
|
|
|
fn isalpha(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(char::is_alphanumeric)
|
|
}
|
|
|
|
fn replace(
|
|
self,
|
|
old: Self,
|
|
new: Self,
|
|
num: OptionalArg<usize>,
|
|
_vm: &mut VirtualMachine,
|
|
) -> String {
|
|
match num.into_option() {
|
|
Some(num) => self.value.replacen(&old.value, &new.value, num),
|
|
None => self.value.replace(&old.value, &new.value),
|
|
}
|
|
}
|
|
|
|
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
|
|
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
|
|
fn isspace(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii_whitespace())
|
|
}
|
|
|
|
fn isupper(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty()
|
|
&& self
|
|
.value
|
|
.chars()
|
|
.filter(|x| !x.is_ascii_whitespace())
|
|
.all(char::is_uppercase)
|
|
}
|
|
|
|
fn islower(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty()
|
|
&& self
|
|
.value
|
|
.chars()
|
|
.filter(|x| !x.is_ascii_whitespace())
|
|
.all(char::is_lowercase)
|
|
}
|
|
|
|
fn isascii(self, _vm: &mut VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii())
|
|
}
|
|
|
|
// doesn't implement keep new line delimiter just yet
|
|
fn splitlines(self, vm: &mut VirtualMachine) -> PyObjectRef {
|
|
let elements = self
|
|
.value
|
|
.split('\n')
|
|
.map(|e| vm.ctx.new_str(e.to_string()))
|
|
.collect();
|
|
vm.ctx.new_list(elements)
|
|
}
|
|
|
|
fn join(self, iterable: PyIterable<PyStringRef>, vm: &mut VirtualMachine) -> PyResult<String> {
|
|
let mut joined = String::new();
|
|
|
|
for (idx, elem) in iterable.iter(vm)?.enumerate() {
|
|
let elem = elem?;
|
|
if idx != 0 {
|
|
joined.push_str(&self.value);
|
|
}
|
|
joined.push_str(&elem.value)
|
|
}
|
|
|
|
Ok(joined)
|
|
}
|
|
|
|
fn find(
|
|
self,
|
|
sub: Self,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &mut VirtualMachine,
|
|
) -> isize {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].find(&sub.value) {
|
|
Some(num) => (start + num) as isize,
|
|
None => -1 as isize,
|
|
}
|
|
} else {
|
|
-1 as isize
|
|
}
|
|
}
|
|
|
|
fn rfind(
|
|
self,
|
|
sub: Self,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &mut VirtualMachine,
|
|
) -> isize {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].rfind(&sub.value) {
|
|
Some(num) => (start + num) as isize,
|
|
None => -1 as isize,
|
|
}
|
|
} else {
|
|
-1 as isize
|
|
}
|
|
}
|
|
|
|
fn index(
|
|
self,
|
|
sub: Self,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyResult<usize> {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].find(&sub.value) {
|
|
Some(num) => Ok(start + num),
|
|
None => Err(vm.new_value_error("substring not found".to_string())),
|
|
}
|
|
} else {
|
|
Err(vm.new_value_error("substring not found".to_string()))
|
|
}
|
|
}
|
|
|
|
fn rindex(
|
|
self,
|
|
sub: Self,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyResult<usize> {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].rfind(&sub.value) {
|
|
Some(num) => Ok(start + num),
|
|
None => Err(vm.new_value_error("substring not found".to_string())),
|
|
}
|
|
} else {
|
|
Err(vm.new_value_error("substring not found".to_string()))
|
|
}
|
|
}
|
|
|
|
fn partition(self, sub: PyStringRef, vm: &mut VirtualMachine) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let sub = &sub.value;
|
|
let mut new_tup = Vec::new();
|
|
if value.contains(sub) {
|
|
new_tup = value
|
|
.splitn(2, sub)
|
|
.map(|s| vm.ctx.new_str(s.to_string()))
|
|
.collect();
|
|
new_tup.insert(1, vm.ctx.new_str(sub.clone()));
|
|
} else {
|
|
new_tup.push(vm.ctx.new_str(value.clone()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
}
|
|
vm.ctx.new_tuple(new_tup)
|
|
}
|
|
|
|
fn rpartition(self, sub: PyStringRef, vm: &mut VirtualMachine) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let sub = &sub.value;
|
|
let mut new_tup = Vec::new();
|
|
if value.contains(sub) {
|
|
new_tup = value
|
|
.rsplitn(2, sub)
|
|
.map(|s| vm.ctx.new_str(s.to_string()))
|
|
.collect();
|
|
new_tup.swap(0, 1); // so it's in the right order
|
|
new_tup.insert(1, vm.ctx.new_str(sub.clone()));
|
|
} else {
|
|
new_tup.push(vm.ctx.new_str(value.clone()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
}
|
|
vm.ctx.new_tuple(new_tup)
|
|
}
|
|
|
|
fn istitle(self, _vm: &mut VirtualMachine) -> bool {
|
|
if self.value.is_empty() {
|
|
false
|
|
} else {
|
|
self.value.split(' ').all(|word| word == make_title(word))
|
|
}
|
|
}
|
|
|
|
fn count(
|
|
self,
|
|
sub: Self,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &mut VirtualMachine,
|
|
) -> usize {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
self.value[start..end].matches(&sub.value).count()
|
|
} else {
|
|
0
|
|
}
|
|
}
|
|
|
|
fn zfill(self, len: usize, _vm: &mut VirtualMachine) -> String {
|
|
let value = &self.value;
|
|
if len <= value.len() {
|
|
value.to_string()
|
|
} else {
|
|
format!("{}{}", "0".repeat(len - value.len()), value)
|
|
}
|
|
}
|
|
|
|
fn get_fill_char<'a>(rep: &'a OptionalArg<Self>, vm: &mut VirtualMachine) -> PyResult<&'a str> {
|
|
let rep_str = match rep {
|
|
OptionalArg::Present(ref st) => &st.value,
|
|
OptionalArg::Missing => " ",
|
|
};
|
|
if rep_str.len() == 1 {
|
|
Ok(rep_str)
|
|
} else {
|
|
Err(vm.new_type_error(
|
|
"The fill character must be exactly one character long".to_string(),
|
|
))
|
|
}
|
|
}
|
|
|
|
fn ljust(
|
|
self,
|
|
len: usize,
|
|
rep: OptionalArg<Self>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyResult<String> {
|
|
let value = &self.value;
|
|
let rep_char = PyStringRef::get_fill_char(&rep, vm)?;
|
|
Ok(format!("{}{}", value, rep_char.repeat(len)))
|
|
}
|
|
|
|
fn rjust(
|
|
self,
|
|
len: usize,
|
|
rep: OptionalArg<Self>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyResult<String> {
|
|
let value = &self.value;
|
|
let rep_char = PyStringRef::get_fill_char(&rep, vm)?;
|
|
Ok(format!("{}{}", rep_char.repeat(len), value))
|
|
}
|
|
|
|
fn center(
|
|
self,
|
|
len: usize,
|
|
rep: OptionalArg<Self>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyResult<String> {
|
|
let value = &self.value;
|
|
let rep_char = PyStringRef::get_fill_char(&rep, vm)?;
|
|
let left_buff: usize = (len - value.len()) / 2;
|
|
let right_buff = len - value.len() - left_buff;
|
|
Ok(format!(
|
|
"{}{}{}",
|
|
rep_char.repeat(left_buff),
|
|
value,
|
|
rep_char.repeat(right_buff)
|
|
))
|
|
}
|
|
|
|
fn expandtabs(self, tab_stop: OptionalArg<usize>, _vm: &mut VirtualMachine) -> String {
|
|
let tab_stop = tab_stop.into_option().unwrap_or(8 as usize);
|
|
let mut expanded_str = String::new();
|
|
let mut tab_size = tab_stop;
|
|
let mut col_count = 0 as usize;
|
|
for ch in self.value.chars() {
|
|
// 0x0009 is tab
|
|
if ch == 0x0009 as char {
|
|
let num_spaces = tab_size - col_count;
|
|
col_count += num_spaces;
|
|
let expand = " ".repeat(num_spaces);
|
|
expanded_str.push_str(&expand);
|
|
} else {
|
|
expanded_str.push(ch);
|
|
col_count += 1;
|
|
}
|
|
if col_count >= tab_size {
|
|
tab_size += tab_stop;
|
|
}
|
|
}
|
|
expanded_str
|
|
}
|
|
|
|
fn isidentifier(self, _vm: &mut VirtualMachine) -> bool {
|
|
let value = &self.value;
|
|
// a string is not an identifier if it has whitespace or starts with a number
|
|
if !value.chars().any(|c| c.is_ascii_whitespace())
|
|
&& !value.chars().nth(0).unwrap().is_digit(10)
|
|
{
|
|
for c in value.chars() {
|
|
if c != "_".chars().nth(0).unwrap() && !c.is_digit(10) && !c.is_alphabetic() {
|
|
return false;
|
|
}
|
|
}
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PyValue for PyString {
|
|
fn class(vm: &mut VirtualMachine) -> PyObjectRef {
|
|
vm.ctx.str_type()
|
|
}
|
|
}
|
|
|
|
impl IntoPyObject for String {
|
|
fn into_pyobject(self, vm: &mut VirtualMachine) -> PyResult {
|
|
Ok(vm.ctx.new_str(self))
|
|
}
|
|
}
|
|
|
|
#[rustfmt::skip] // to avoid line splitting
|
|
pub fn init(context: &PyContext) {
|
|
let str_type = &context.str_type;
|
|
let str_doc = "str(object='') -> str\n\
|
|
str(bytes_or_buffer[, encoding[, errors]]) -> str\n\
|
|
\n\
|
|
Create a new string object from the given object. If encoding or\n\
|
|
errors is specified, then the object must expose a data buffer\n\
|
|
that will be decoded using the given encoding and error handler.\n\
|
|
Otherwise, returns the result of object.__str__() (if defined)\n\
|
|
or repr(object).\n\
|
|
encoding defaults to sys.getdefaultencoding().\n\
|
|
errors defaults to 'strict'.";
|
|
context.set_attr(&str_type, "__add__", context.new_rustfunc(PyStringRef::add));
|
|
context.set_attr(&str_type, "__eq__", context.new_rustfunc(PyStringRef::eq));
|
|
context.set_attr(&str_type, "__contains__", context.new_rustfunc(PyStringRef::contains));
|
|
context.set_attr(&str_type, "__getitem__", context.new_rustfunc(PyStringRef::getitem));
|
|
context.set_attr(&str_type, "__gt__", context.new_rustfunc(PyStringRef::gt));
|
|
context.set_attr(&str_type, "__ge__", context.new_rustfunc(PyStringRef::ge));
|
|
context.set_attr(&str_type, "__lt__", context.new_rustfunc(PyStringRef::lt));
|
|
context.set_attr(&str_type, "__le__", context.new_rustfunc(PyStringRef::le));
|
|
context.set_attr(&str_type, "__hash__", context.new_rustfunc(PyStringRef::hash));
|
|
context.set_attr(&str_type, "__len__", context.new_rustfunc(PyStringRef::len));
|
|
context.set_attr(&str_type, "__mul__", context.new_rustfunc(PyStringRef::mul));
|
|
context.set_attr(&str_type, "__new__", context.new_rustfunc(str_new));
|
|
context.set_attr(&str_type, "__str__", context.new_rustfunc(PyStringRef::str));
|
|
context.set_attr(&str_type, "__repr__", context.new_rustfunc(PyStringRef::repr));
|
|
context.set_attr(&str_type, "format", context.new_rustfunc(str_format));
|
|
context.set_attr(&str_type, "lower", context.new_rustfunc(PyStringRef::lower));
|
|
context.set_attr(&str_type, "casefold", context.new_rustfunc(PyStringRef::casefold));
|
|
context.set_attr(&str_type, "upper", context.new_rustfunc(PyStringRef::upper));
|
|
context.set_attr(&str_type, "capitalize", context.new_rustfunc(PyStringRef::capitalize));
|
|
context.set_attr(&str_type, "split", context.new_rustfunc(PyStringRef::split));
|
|
context.set_attr(&str_type, "rsplit", context.new_rustfunc(PyStringRef::rsplit));
|
|
context.set_attr(&str_type, "strip", context.new_rustfunc(PyStringRef::strip));
|
|
context.set_attr(&str_type, "lstrip", context.new_rustfunc(PyStringRef::lstrip));
|
|
context.set_attr(&str_type, "rstrip", context.new_rustfunc(PyStringRef::rstrip));
|
|
context.set_attr(&str_type, "endswith", context.new_rustfunc(PyStringRef::endswith));
|
|
context.set_attr(&str_type, "startswith", context.new_rustfunc(PyStringRef::startswith));
|
|
context.set_attr(&str_type, "isalnum", context.new_rustfunc(PyStringRef::isalnum));
|
|
context.set_attr(&str_type, "isnumeric", context.new_rustfunc(PyStringRef::isnumeric));
|
|
context.set_attr(&str_type, "isdigit", context.new_rustfunc(PyStringRef::isdigit));
|
|
context.set_attr(&str_type, "isdecimal", context.new_rustfunc(PyStringRef::isdecimal));
|
|
context.set_attr(&str_type, "title", context.new_rustfunc(PyStringRef::title));
|
|
context.set_attr(&str_type, "swapcase", context.new_rustfunc(PyStringRef::swapcase));
|
|
context.set_attr(&str_type, "isalpha", context.new_rustfunc(PyStringRef::isalpha));
|
|
context.set_attr(&str_type, "replace", context.new_rustfunc(PyStringRef::replace));
|
|
context.set_attr(&str_type, "isspace", context.new_rustfunc(PyStringRef::isspace));
|
|
context.set_attr(&str_type, "isupper", context.new_rustfunc(PyStringRef::isupper));
|
|
context.set_attr(&str_type, "islower", context.new_rustfunc(PyStringRef::islower));
|
|
context.set_attr(&str_type, "isascii", context.new_rustfunc(PyStringRef::isascii));
|
|
context.set_attr(&str_type, "splitlines", context.new_rustfunc(PyStringRef::splitlines));
|
|
context.set_attr(&str_type, "join", context.new_rustfunc(PyStringRef::join));
|
|
context.set_attr(&str_type, "find", context.new_rustfunc(PyStringRef::find));
|
|
context.set_attr(&str_type, "rfind", context.new_rustfunc(PyStringRef::rfind));
|
|
context.set_attr(&str_type, "index", context.new_rustfunc(PyStringRef::index));
|
|
context.set_attr(&str_type, "rindex", context.new_rustfunc(PyStringRef::rindex));
|
|
context.set_attr(&str_type, "partition", context.new_rustfunc(PyStringRef::partition));
|
|
context.set_attr(&str_type, "rpartition", context.new_rustfunc(PyStringRef::rpartition));
|
|
context.set_attr(&str_type, "istitle", context.new_rustfunc(PyStringRef::istitle));
|
|
context.set_attr(&str_type, "count", context.new_rustfunc(PyStringRef::count));
|
|
context.set_attr(&str_type, "zfill", context.new_rustfunc(PyStringRef::zfill));
|
|
context.set_attr(&str_type, "ljust", context.new_rustfunc(PyStringRef::ljust));
|
|
context.set_attr(&str_type, "rjust", context.new_rustfunc(PyStringRef::rjust));
|
|
context.set_attr(&str_type, "center", context.new_rustfunc(PyStringRef::center));
|
|
context.set_attr(&str_type, "expandtabs", context.new_rustfunc(PyStringRef::expandtabs));
|
|
context.set_attr(&str_type, "isidentifier", context.new_rustfunc(PyStringRef::isidentifier));
|
|
context.set_attr(&str_type, "__doc__", context.new_str(str_doc.to_string()));
|
|
}
|
|
|
|
pub fn get_value(obj: &PyObjectRef) -> String {
|
|
obj.payload::<PyString>().unwrap().value.clone()
|
|
}
|
|
|
|
pub fn borrow_value(obj: &PyObjectRef) -> &str {
|
|
&obj.payload::<PyString>().unwrap().value
|
|
}
|
|
|
|
fn count_char(s: &str, c: char) -> usize {
|
|
s.chars().filter(|x| *x == c).count()
|
|
}
|
|
|
|
fn str_format(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
|
|
if args.args.is_empty() {
|
|
return Err(
|
|
vm.new_type_error("descriptor 'format' of 'str' object needs an argument".to_string())
|
|
);
|
|
}
|
|
|
|
let zelf = &args.args[0];
|
|
if !objtype::isinstance(&zelf, &vm.ctx.str_type()) {
|
|
let zelf_typ = zelf.typ();
|
|
let actual_type = vm.to_pystr(&zelf_typ)?;
|
|
return Err(vm.new_type_error(format!(
|
|
"descriptor 'format' requires a 'str' object but received a '{}'",
|
|
actual_type
|
|
)));
|
|
}
|
|
let format_string_text = get_value(zelf);
|
|
match FormatString::from_str(format_string_text.as_str()) {
|
|
Ok(format_string) => perform_format(vm, &format_string, &args),
|
|
Err(err) => match err {
|
|
FormatParseError::UnmatchedBracket => {
|
|
Err(vm.new_value_error("expected '}' before end of string".to_string()))
|
|
}
|
|
_ => Err(vm.new_value_error("Unexpected error parsing format string".to_string())),
|
|
},
|
|
}
|
|
}
|
|
|
|
fn call_object_format(
|
|
vm: &mut VirtualMachine,
|
|
argument: PyObjectRef,
|
|
format_spec: &str,
|
|
) -> PyResult {
|
|
let returned_type = vm.ctx.new_str(format_spec.to_string());
|
|
let result = vm.call_method(&argument, "__format__", vec![returned_type])?;
|
|
if !objtype::isinstance(&result, &vm.ctx.str_type()) {
|
|
let result_type = result.typ();
|
|
let actual_type = vm.to_pystr(&result_type)?;
|
|
return Err(vm.new_type_error(format!("__format__ must return a str, not {}", actual_type)));
|
|
}
|
|
Ok(result)
|
|
}
|
|
|
|
fn perform_format(
|
|
vm: &mut VirtualMachine,
|
|
format_string: &FormatString,
|
|
arguments: &PyFuncArgs,
|
|
) -> PyResult {
|
|
let mut final_string = String::new();
|
|
if format_string.format_parts.iter().any(FormatPart::is_auto)
|
|
&& format_string.format_parts.iter().any(FormatPart::is_index)
|
|
{
|
|
return Err(vm.new_value_error(
|
|
"cannot switch from automatic field numbering to manual field specification"
|
|
.to_string(),
|
|
));
|
|
}
|
|
let mut auto_argument_index: usize = 1;
|
|
for part in &format_string.format_parts {
|
|
let result_string: String = match part {
|
|
FormatPart::AutoSpec(format_spec) => {
|
|
let result = match arguments.args.get(auto_argument_index) {
|
|
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
|
|
None => {
|
|
return Err(vm.new_index_error("tuple index out of range".to_string()));
|
|
}
|
|
};
|
|
auto_argument_index += 1;
|
|
get_value(&result)
|
|
}
|
|
FormatPart::IndexSpec(index, format_spec) => {
|
|
let result = match arguments.args.get(*index + 1) {
|
|
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
|
|
None => {
|
|
return Err(vm.new_index_error("tuple index out of range".to_string()));
|
|
}
|
|
};
|
|
get_value(&result)
|
|
}
|
|
FormatPart::KeywordSpec(keyword, format_spec) => {
|
|
let result = match arguments.get_optional_kwarg(&keyword) {
|
|
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
|
|
None => {
|
|
return Err(vm.new_key_error(format!("'{}'", keyword)));
|
|
}
|
|
};
|
|
get_value(&result)
|
|
}
|
|
FormatPart::Literal(literal) => literal.clone(),
|
|
};
|
|
final_string.push_str(&result_string);
|
|
}
|
|
Ok(vm.ctx.new_str(final_string))
|
|
}
|
|
|
|
// TODO: should with following format
|
|
// class str(object='')
|
|
// class str(object=b'', encoding='utf-8', errors='strict')
|
|
fn str_new(
|
|
cls: PyClassRef,
|
|
object: OptionalArg<PyObjectRef>,
|
|
vm: &mut VirtualMachine,
|
|
) -> PyResult<PyStringRef> {
|
|
let string = match object {
|
|
OptionalArg::Present(ref input) => vm.to_str(input)?,
|
|
OptionalArg::Missing => vm.new_str("".to_string()),
|
|
};
|
|
if string.typ().is(&cls) {
|
|
TryFromObject::try_from_object(vm, string)
|
|
} else {
|
|
let payload = string.payload::<PyString>().unwrap();
|
|
PyRef::new_with_type(vm, payload.clone(), cls)
|
|
}
|
|
}
|
|
|
|
impl PySliceableSequence for String {
|
|
fn do_slice(&self, range: Range<usize>) -> Self {
|
|
to_graphemes(self)
|
|
.get(range)
|
|
.map_or(String::default(), |c| c.join(""))
|
|
}
|
|
|
|
fn do_slice_reverse(&self, range: Range<usize>) -> Self {
|
|
to_graphemes(self)
|
|
.get_mut(range)
|
|
.map_or(String::default(), |slice| {
|
|
slice.reverse();
|
|
slice.join("")
|
|
})
|
|
}
|
|
|
|
fn do_stepped_slice(&self, range: Range<usize>, step: usize) -> Self {
|
|
if let Some(s) = to_graphemes(self).get(range) {
|
|
return s
|
|
.iter()
|
|
.cloned()
|
|
.step_by(step)
|
|
.collect::<Vec<String>>()
|
|
.join("");
|
|
}
|
|
String::default()
|
|
}
|
|
|
|
fn do_stepped_slice_reverse(&self, range: Range<usize>, step: usize) -> Self {
|
|
if let Some(s) = to_graphemes(self).get(range) {
|
|
return s
|
|
.iter()
|
|
.rev()
|
|
.cloned()
|
|
.step_by(step)
|
|
.collect::<Vec<String>>()
|
|
.join("");
|
|
}
|
|
String::default()
|
|
}
|
|
|
|
fn empty() -> Self {
|
|
String::default()
|
|
}
|
|
|
|
fn len(&self) -> usize {
|
|
to_graphemes(self).len()
|
|
}
|
|
|
|
fn is_empty(&self) -> bool {
|
|
self.is_empty()
|
|
}
|
|
}
|
|
|
|
/// Convert a string-able `value` to a vec of graphemes
|
|
/// represents the string according to user perceived characters
|
|
fn to_graphemes<S: AsRef<str>>(value: S) -> Vec<String> {
|
|
UnicodeSegmentation::graphemes(value.as_ref(), true)
|
|
.map(String::from)
|
|
.collect()
|
|
}
|
|
|
|
pub fn subscript(vm: &mut VirtualMachine, value: &str, b: PyObjectRef) -> PyResult {
|
|
if objtype::isinstance(&b, &vm.ctx.int_type()) {
|
|
match objint::get_value(&b).to_i32() {
|
|
Some(pos) => {
|
|
let graphemes = to_graphemes(value);
|
|
if let Some(idx) = graphemes.get_pos(pos) {
|
|
Ok(vm.new_str(graphemes[idx].to_string()))
|
|
} else {
|
|
Err(vm.new_index_error("string index out of range".to_string()))
|
|
}
|
|
}
|
|
None => {
|
|
Err(vm.new_index_error("cannot fit 'int' into an index-sized integer".to_string()))
|
|
}
|
|
}
|
|
} else if b.payload::<PySlice>().is_some() {
|
|
let string = value.to_string().get_slice_items(vm, &b)?;
|
|
Ok(vm.new_str(string))
|
|
} else {
|
|
panic!(
|
|
"TypeError: indexing type {:?} with index {:?} is not supported (yet?)",
|
|
value, b
|
|
)
|
|
}
|
|
}
|
|
|
|
// help get optional string indices
|
|
fn adjust_indices(
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
len: usize,
|
|
) -> Option<(usize, usize)> {
|
|
let mut start = start.into_option().unwrap_or(0);
|
|
let mut end = end.into_option().unwrap_or(len as isize);
|
|
if end > len as isize {
|
|
end = len as isize;
|
|
} else if end < 0 {
|
|
end += len as isize;
|
|
if end < 0 {
|
|
end = 0;
|
|
}
|
|
}
|
|
if start < 0 {
|
|
start += len as isize;
|
|
if start < 0 {
|
|
start = 0;
|
|
}
|
|
}
|
|
if start > end {
|
|
None
|
|
} else {
|
|
Some((start as usize, end as usize))
|
|
}
|
|
}
|
|
|
|
// helper function to title strings
|
|
fn make_title(s: &str) -> String {
|
|
let mut titled_str = String::new();
|
|
let mut capitalize_char: bool = true;
|
|
for c in s.chars() {
|
|
if c.is_alphabetic() {
|
|
if !capitalize_char {
|
|
titled_str.push(c);
|
|
} else if capitalize_char {
|
|
titled_str.push(c.to_ascii_uppercase());
|
|
capitalize_char = false;
|
|
}
|
|
} else {
|
|
titled_str.push(c);
|
|
capitalize_char = true;
|
|
}
|
|
}
|
|
titled_str
|
|
}
|