mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
1716 lines
56 KiB
Rust
1716 lines
56 KiB
Rust
extern crate unicode_categories;
|
|
extern crate unicode_xid;
|
|
|
|
use std::cell::Cell;
|
|
use std::char;
|
|
use std::fmt;
|
|
use std::mem::size_of;
|
|
use std::ops::Range;
|
|
use std::str::FromStr;
|
|
use std::string::ToString;
|
|
|
|
use num_traits::ToPrimitive;
|
|
use unicode_casing::CharExt;
|
|
use unicode_categories::UnicodeCategories;
|
|
use unicode_xid::UnicodeXID;
|
|
|
|
use super::objbytes::PyBytes;
|
|
use super::objdict::PyDict;
|
|
use super::objfloat;
|
|
use super::objint::{self, PyInt};
|
|
use super::objiter;
|
|
use super::objnone::PyNone;
|
|
use super::objsequence::PySliceableSequence;
|
|
use super::objslice::PySlice;
|
|
use super::objtuple;
|
|
use super::objtype::{self, PyClassRef};
|
|
use crate::cformat::{
|
|
CFormatPart, CFormatPreconversor, CFormatQuantity, CFormatSpec, CFormatString, CFormatType,
|
|
CNumberType,
|
|
};
|
|
use crate::format::{FormatParseError, FormatPart, FormatPreconversor, FormatString};
|
|
use crate::function::{single_or_tuple_any, OptionalArg, PyFuncArgs};
|
|
use crate::pyhash;
|
|
use crate::pyobject::{
|
|
IdProtocol, IntoPyObject, ItemProtocol, PyClassImpl, PyContext, PyIterable, PyObjectRef, PyRef,
|
|
PyResult, PyValue, TryFromObject, TryIntoRef, TypeProtocol,
|
|
};
|
|
use crate::vm::VirtualMachine;
|
|
|
|
/// str(object='') -> str
|
|
/// str(bytes_or_buffer[, encoding[, errors]]) -> str
|
|
///
|
|
/// Create a new string object from the given object. If encoding or
|
|
/// errors is specified, then the object must expose a data buffer
|
|
/// that will be decoded using the given encoding and error handler.
|
|
/// Otherwise, returns the result of object.__str__() (if defined)
|
|
/// or repr(object).
|
|
/// encoding defaults to sys.getdefaultencoding().
|
|
/// errors defaults to 'strict'."
|
|
#[pyclass(name = "str")]
|
|
#[derive(Clone, Debug)]
|
|
pub struct PyString {
|
|
value: String,
|
|
hash: Cell<Option<pyhash::PyHash>>,
|
|
}
|
|
|
|
impl PyString {
|
|
#[inline]
|
|
pub fn as_str(&self) -> &str {
|
|
&self.value
|
|
}
|
|
}
|
|
|
|
impl From<&str> for PyString {
|
|
fn from(s: &str) -> PyString {
|
|
s.to_string().into()
|
|
}
|
|
}
|
|
|
|
impl From<String> for PyString {
|
|
fn from(s: String) -> PyString {
|
|
PyString {
|
|
value: s,
|
|
hash: Cell::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub type PyStringRef = PyRef<PyString>;
|
|
|
|
impl fmt::Display for PyString {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
fmt::Display::fmt(&self.value, f)
|
|
}
|
|
}
|
|
|
|
impl TryIntoRef<PyString> for String {
|
|
fn try_into_ref(self, vm: &VirtualMachine) -> PyResult<PyRef<PyString>> {
|
|
Ok(PyString::from(self).into_ref(vm))
|
|
}
|
|
}
|
|
|
|
impl TryIntoRef<PyString> for &str {
|
|
fn try_into_ref(self, vm: &VirtualMachine) -> PyResult<PyRef<PyString>> {
|
|
Ok(PyString::from(self).into_ref(vm))
|
|
}
|
|
}
|
|
|
|
#[pyclass]
|
|
#[derive(Debug)]
|
|
pub struct PyStringIterator {
|
|
pub string: PyStringRef,
|
|
position: Cell<usize>,
|
|
}
|
|
|
|
impl PyValue for PyStringIterator {
|
|
fn class(vm: &VirtualMachine) -> PyClassRef {
|
|
vm.ctx.striterator_type()
|
|
}
|
|
}
|
|
|
|
#[pyimpl]
|
|
impl PyStringIterator {
|
|
#[pymethod(name = "__next__")]
|
|
fn next(&self, vm: &VirtualMachine) -> PyResult {
|
|
let pos = self.position.get();
|
|
|
|
if pos < self.string.value.chars().count() {
|
|
self.position.set(self.position.get() + 1);
|
|
|
|
#[allow(clippy::range_plus_one)]
|
|
let value = self.string.value.do_slice(pos..pos + 1);
|
|
|
|
value.into_pyobject(vm)
|
|
} else {
|
|
Err(objiter::new_stop_iteration(vm))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__iter__")]
|
|
fn iter(zelf: PyRef<Self>, _vm: &VirtualMachine) -> PyRef<Self> {
|
|
zelf
|
|
}
|
|
}
|
|
|
|
#[pyclass]
|
|
#[derive(Debug)]
|
|
pub struct PyStringReverseIterator {
|
|
pub position: Cell<usize>,
|
|
pub string: PyStringRef,
|
|
}
|
|
|
|
impl PyValue for PyStringReverseIterator {
|
|
fn class(vm: &VirtualMachine) -> PyClassRef {
|
|
vm.ctx.strreverseiterator_type()
|
|
}
|
|
}
|
|
|
|
#[pyimpl]
|
|
impl PyStringReverseIterator {
|
|
#[pymethod(name = "__next__")]
|
|
fn next(&self, vm: &VirtualMachine) -> PyResult {
|
|
if self.position.get() > 0 {
|
|
let position: usize = self.position.get() - 1;
|
|
|
|
#[allow(clippy::range_plus_one)]
|
|
let value = self.string.value.do_slice(position..position + 1);
|
|
|
|
self.position.set(position);
|
|
value.into_pyobject(vm)
|
|
} else {
|
|
Err(objiter::new_stop_iteration(vm))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__iter__")]
|
|
fn iter(zelf: PyRef<Self>, _vm: &VirtualMachine) -> PyRef<Self> {
|
|
zelf
|
|
}
|
|
}
|
|
|
|
#[pyimpl]
|
|
impl PyString {
|
|
// TODO: should with following format
|
|
// class str(object='')
|
|
// class str(object=b'', encoding='utf-8', errors='strict')
|
|
#[pyslot(new)]
|
|
fn tp_new(
|
|
cls: PyClassRef,
|
|
object: OptionalArg<PyObjectRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<PyStringRef> {
|
|
let string = match object {
|
|
OptionalArg::Present(ref input) => vm.to_str(input)?.into_object(),
|
|
OptionalArg::Missing => vm.new_str("".to_string()),
|
|
};
|
|
if string.class().is(&cls) {
|
|
TryFromObject::try_from_object(vm, string)
|
|
} else {
|
|
let payload = string.payload::<PyString>().unwrap();
|
|
payload.clone().into_ref_with_type(vm, cls)
|
|
}
|
|
}
|
|
#[pymethod(name = "__add__")]
|
|
fn add(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(format!("{}{}", self.value, get_value(&rhs)))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot add {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__bool__")]
|
|
fn bool(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty()
|
|
}
|
|
|
|
#[pymethod(name = "__eq__")]
|
|
fn eq(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
vm.new_bool(self.value == get_value(&rhs))
|
|
} else {
|
|
vm.ctx.not_implemented()
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__ne__")]
|
|
fn ne(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
vm.new_bool(self.value != get_value(&rhs))
|
|
} else {
|
|
vm.ctx.not_implemented()
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__contains__")]
|
|
fn contains(&self, needle: PyStringRef, _vm: &VirtualMachine) -> bool {
|
|
self.value.contains(&needle.value)
|
|
}
|
|
|
|
#[pymethod(name = "__getitem__")]
|
|
fn getitem(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult {
|
|
subscript(vm, &self.value, needle)
|
|
}
|
|
|
|
#[pymethod(name = "__gt__")]
|
|
fn gt(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value > get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__ge__")]
|
|
fn ge(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value >= get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__lt__")]
|
|
fn lt(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value < get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__le__")]
|
|
fn le(&self, rhs: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
|
|
if objtype::isinstance(&rhs, &vm.ctx.str_type()) {
|
|
Ok(self.value <= get_value(&rhs))
|
|
} else {
|
|
Err(vm.new_type_error(format!("Cannot compare {} and {}", self, rhs)))
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__hash__")]
|
|
fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
|
|
match self.hash.get() {
|
|
Some(hash) => hash,
|
|
None => {
|
|
let hash = pyhash::hash_value(&self.value);
|
|
self.hash.set(Some(hash));
|
|
hash
|
|
}
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__len__")]
|
|
fn len(&self, _vm: &VirtualMachine) -> usize {
|
|
self.value.chars().count()
|
|
}
|
|
|
|
#[pymethod(name = "__sizeof__")]
|
|
fn sizeof(&self, _vm: &VirtualMachine) -> usize {
|
|
size_of::<Self>() + self.value.capacity() * size_of::<u8>()
|
|
}
|
|
|
|
#[pymethod(name = "__mul__")]
|
|
fn mul(&self, multiplier: isize, vm: &VirtualMachine) -> PyResult<String> {
|
|
multiplier
|
|
.max(0)
|
|
.to_usize()
|
|
.map(|multiplier| self.value.repeat(multiplier))
|
|
.ok_or_else(|| {
|
|
vm.new_overflow_error("cannot fit 'int' into an index-sized integer".to_string())
|
|
})
|
|
}
|
|
|
|
#[pymethod(name = "__rmul__")]
|
|
fn rmul(&self, val: isize, vm: &VirtualMachine) -> PyResult<String> {
|
|
self.mul(val, vm)
|
|
}
|
|
|
|
#[pymethod(name = "__str__")]
|
|
fn str(zelf: PyRef<Self>, _vm: &VirtualMachine) -> PyStringRef {
|
|
zelf
|
|
}
|
|
|
|
#[pymethod(name = "__repr__")]
|
|
fn repr(&self, _vm: &VirtualMachine) -> String {
|
|
let value = &self.value;
|
|
let quote_char = if count_char(value, '\'') > count_char(value, '"') {
|
|
'"'
|
|
} else {
|
|
'\''
|
|
};
|
|
let mut formatted = String::with_capacity(value.len());
|
|
formatted.push(quote_char);
|
|
for c in value.chars() {
|
|
if c == quote_char || c == '\\' {
|
|
formatted.push('\\');
|
|
formatted.push(c);
|
|
} else if c == '\n' {
|
|
formatted.push_str("\\n")
|
|
} else if c == '\t' {
|
|
formatted.push_str("\\t");
|
|
} else if c == '\r' {
|
|
formatted.push_str("\\r");
|
|
} else if c < ' ' || c as u32 == 0x7F {
|
|
formatted.push_str(&format!("\\x{:02x}", c as u32));
|
|
} else if c.is_ascii() {
|
|
formatted.push(c);
|
|
} else if c.is_other() || c.is_separator() {
|
|
// According to python following categories aren't printable:
|
|
// * Cc (Other, Control)
|
|
// * Cf (Other, Format)
|
|
// * Cs (Other, Surrogate)
|
|
// * Co (Other, Private Use)
|
|
// * Cn (Other, Not Assigned)
|
|
// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
|
|
// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
|
|
// * Zs (Separator, Space) other than ASCII space('\x20').
|
|
let code = c as u32;
|
|
let escaped = if code < 0xff {
|
|
format!("\\U{:02x}", code)
|
|
} else if code < 0xffff {
|
|
format!("\\U{:04x}", code)
|
|
} else {
|
|
format!("\\U{:08x}", code)
|
|
};
|
|
formatted.push_str(&escaped);
|
|
} else {
|
|
formatted.push(c)
|
|
}
|
|
}
|
|
formatted.push(quote_char);
|
|
formatted
|
|
}
|
|
|
|
#[pymethod]
|
|
fn lower(&self, _vm: &VirtualMachine) -> String {
|
|
self.value.to_lowercase()
|
|
}
|
|
|
|
// casefold is much more aggressive than lower
|
|
#[pymethod]
|
|
fn casefold(&self, _vm: &VirtualMachine) -> String {
|
|
caseless::default_case_fold_str(&self.value)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn upper(&self, _vm: &VirtualMachine) -> String {
|
|
self.value.to_uppercase()
|
|
}
|
|
|
|
#[pymethod]
|
|
fn capitalize(&self, _vm: &VirtualMachine) -> String {
|
|
let (first_part, lower_str) = self.value.split_at(1);
|
|
format!("{}{}", first_part.to_uppercase(), lower_str)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn split(&self, args: SplitArgs, vm: &VirtualMachine) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let pattern = args.sep.as_ref().map(|s| s.as_str());
|
|
let num_splits = args.maxsplit;
|
|
let elements: Vec<_> = match (pattern, num_splits.is_negative()) {
|
|
(Some(pattern), true) => value
|
|
.split(pattern)
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
(Some(pattern), false) => value
|
|
.splitn(num_splits as usize + 1, pattern)
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
(None, true) => value
|
|
.split(|c: char| c.is_ascii_whitespace())
|
|
.filter(|s| !s.is_empty())
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
(None, false) => value
|
|
.splitn(num_splits as usize + 1, |c: char| c.is_ascii_whitespace())
|
|
.filter(|s| !s.is_empty())
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
};
|
|
vm.ctx.new_list(elements)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn rsplit(&self, args: SplitArgs, vm: &VirtualMachine) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let pattern = args.sep.as_ref().map(|s| s.as_str());
|
|
let num_splits = args.maxsplit;
|
|
let mut elements: Vec<_> = match (pattern, num_splits.is_negative()) {
|
|
(Some(pattern), true) => value
|
|
.rsplit(pattern)
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
(Some(pattern), false) => value
|
|
.rsplitn(num_splits as usize + 1, pattern)
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
(None, true) => value
|
|
.rsplit(|c: char| c.is_ascii_whitespace())
|
|
.filter(|s| !s.is_empty())
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
(None, false) => value
|
|
.rsplitn(num_splits as usize + 1, |c: char| c.is_ascii_whitespace())
|
|
.filter(|s| !s.is_empty())
|
|
.map(|o| vm.ctx.new_str(o.to_string()))
|
|
.collect(),
|
|
};
|
|
// Unlike Python rsplit, Rust rsplitn returns an iterator that
|
|
// starts from the end of the string.
|
|
elements.reverse();
|
|
vm.ctx.new_list(elements)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn strip(&self, chars: OptionalArg<PyStringRef>, _vm: &VirtualMachine) -> String {
|
|
let chars = match chars {
|
|
OptionalArg::Present(ref chars) => &chars.value,
|
|
OptionalArg::Missing => return self.value.trim().to_string(),
|
|
};
|
|
self.value.trim_matches(|c| chars.contains(c)).to_string()
|
|
}
|
|
|
|
#[pymethod]
|
|
fn lstrip(&self, chars: OptionalArg<PyStringRef>, _vm: &VirtualMachine) -> String {
|
|
let chars = match chars {
|
|
OptionalArg::Present(ref chars) => &chars.value,
|
|
OptionalArg::Missing => return self.value.trim_start().to_string(),
|
|
};
|
|
self.value
|
|
.trim_start_matches(|c| chars.contains(c))
|
|
.to_string()
|
|
}
|
|
|
|
#[pymethod]
|
|
fn rstrip(&self, chars: OptionalArg<PyStringRef>, _vm: &VirtualMachine) -> String {
|
|
let chars = match chars {
|
|
OptionalArg::Present(ref chars) => &chars.value,
|
|
OptionalArg::Missing => return self.value.trim_end().to_string(),
|
|
};
|
|
self.value
|
|
.trim_end_matches(|c| chars.contains(c))
|
|
.to_string()
|
|
}
|
|
|
|
#[pymethod]
|
|
fn endswith(
|
|
&self,
|
|
suffix: PyObjectRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<bool> {
|
|
if let Some((start, end)) = adjust_indices(start, end, self.value.len()) {
|
|
let value = &self.value[start..end];
|
|
single_or_tuple_any(
|
|
suffix,
|
|
|s: PyStringRef| Ok(value.ends_with(&s.value)),
|
|
|o| {
|
|
format!(
|
|
"endswith first arg must be str or a tuple of str, not {}",
|
|
o.class(),
|
|
)
|
|
},
|
|
vm,
|
|
)
|
|
} else {
|
|
Ok(false)
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn startswith(
|
|
&self,
|
|
prefix: PyObjectRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<bool> {
|
|
if let Some((start, end)) = adjust_indices(start, end, self.value.len()) {
|
|
let value = &self.value[start..end];
|
|
single_or_tuple_any(
|
|
prefix,
|
|
|s: PyStringRef| Ok(value.starts_with(&s.value)),
|
|
|o| {
|
|
format!(
|
|
"startswith first arg must be str or a tuple of str, not {}",
|
|
o.class(),
|
|
)
|
|
},
|
|
vm,
|
|
)
|
|
} else {
|
|
Ok(false)
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isalnum(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(char::is_alphanumeric)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isnumeric(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(char::is_numeric)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isdigit(&self, _vm: &VirtualMachine) -> bool {
|
|
// python's isdigit also checks if exponents are digits, these are the unicodes for exponents
|
|
let valid_unicodes: [u16; 10] = [
|
|
0x2070, 0x00B9, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079,
|
|
];
|
|
|
|
if self.value.is_empty() {
|
|
false
|
|
} else {
|
|
self.value
|
|
.chars()
|
|
.filter(|c| !c.is_digit(10))
|
|
.all(|c| valid_unicodes.contains(&(c as u16)))
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isdecimal(&self, _vm: &VirtualMachine) -> bool {
|
|
if self.value.is_empty() {
|
|
false
|
|
} else {
|
|
self.value.chars().all(|c| c.is_ascii_digit())
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__mod__")]
|
|
fn modulo(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult {
|
|
let format_string_text = &self.value;
|
|
let format_string = CFormatString::from_str(format_string_text)
|
|
.map_err(|err| vm.new_value_error(err.to_string()))?;
|
|
do_cformat(vm, format_string, values.clone())
|
|
}
|
|
|
|
#[pymethod(name = "__rmod__")]
|
|
fn rmod(&self, _values: PyObjectRef, vm: &VirtualMachine) -> PyResult {
|
|
Ok(vm.ctx.not_implemented())
|
|
}
|
|
|
|
#[pymethod]
|
|
fn format(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult {
|
|
if args.args.is_empty() {
|
|
return Err(vm.new_type_error(
|
|
"descriptor 'format' of 'str' object needs an argument".to_string(),
|
|
));
|
|
}
|
|
|
|
let zelf = &args.args[0];
|
|
if !objtype::isinstance(&zelf, &vm.ctx.str_type()) {
|
|
let zelf_typ = zelf.class();
|
|
let actual_type = vm.to_pystr(&zelf_typ)?;
|
|
return Err(vm.new_type_error(format!(
|
|
"descriptor 'format' requires a 'str' object but received a '{}'",
|
|
actual_type
|
|
)));
|
|
}
|
|
let format_string_text = get_value(zelf);
|
|
match FormatString::from_str(format_string_text.as_str()) {
|
|
Ok(format_string) => perform_format(vm, &format_string, &args),
|
|
Err(err) => match err {
|
|
FormatParseError::UnmatchedBracket => {
|
|
Err(vm.new_value_error("expected '}' before end of string".to_string()))
|
|
}
|
|
_ => Err(vm.new_value_error("Unexpected error parsing format string".to_string())),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Return a titlecased version of the string where words start with an
|
|
/// uppercase character and the remaining characters are lowercase.
|
|
#[pymethod]
|
|
fn title(&self, _vm: &VirtualMachine) -> String {
|
|
let mut title = String::with_capacity(self.value.len());
|
|
let mut previous_is_cased = false;
|
|
for c in self.value.chars() {
|
|
if c.is_lowercase() {
|
|
if !previous_is_cased {
|
|
title.extend(c.to_titlecase());
|
|
} else {
|
|
title.push(c);
|
|
}
|
|
previous_is_cased = true;
|
|
} else if c.is_uppercase() || c.is_titlecase() {
|
|
if previous_is_cased {
|
|
title.extend(c.to_lowercase());
|
|
} else {
|
|
title.push(c);
|
|
}
|
|
previous_is_cased = true;
|
|
} else {
|
|
previous_is_cased = false;
|
|
title.push(c);
|
|
}
|
|
}
|
|
title
|
|
}
|
|
|
|
#[pymethod]
|
|
fn swapcase(&self, _vm: &VirtualMachine) -> String {
|
|
let mut swapped_str = String::with_capacity(self.value.len());
|
|
for c in self.value.chars() {
|
|
// to_uppercase returns an iterator, to_ascii_uppercase returns the char
|
|
if c.is_lowercase() {
|
|
swapped_str.push(c.to_ascii_uppercase());
|
|
} else if c.is_uppercase() {
|
|
swapped_str.push(c.to_ascii_lowercase());
|
|
} else {
|
|
swapped_str.push(c);
|
|
}
|
|
}
|
|
swapped_str
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isalpha(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(char::is_alphanumeric)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn replace(
|
|
&self,
|
|
old: PyStringRef,
|
|
new: PyStringRef,
|
|
num: OptionalArg<usize>,
|
|
_vm: &VirtualMachine,
|
|
) -> String {
|
|
match num.into_option() {
|
|
Some(num) => self.value.replacen(&old.value, &new.value, num),
|
|
None => self.value.replace(&old.value, &new.value),
|
|
}
|
|
}
|
|
|
|
/// Return true if all characters in the string are printable or the string is empty,
|
|
/// false otherwise. Nonprintable characters are those characters defined in the
|
|
/// Unicode character database as `Other` or `Separator`,
|
|
/// excepting the ASCII space (0x20) which is considered printable.
|
|
///
|
|
/// All characters except those characters defined in the Unicode character
|
|
/// database as following categories are considered printable.
|
|
/// * Cc (Other, Control)
|
|
/// * Cf (Other, Format)
|
|
/// * Cs (Other, Surrogate)
|
|
/// * Co (Other, Private Use)
|
|
/// * Cn (Other, Not Assigned)
|
|
/// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
|
|
/// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
|
|
/// * Zs (Separator, Space) other than ASCII space('\x20').
|
|
#[pymethod]
|
|
fn isprintable(&self, _vm: &VirtualMachine) -> bool {
|
|
self.value.chars().all(|c| match c {
|
|
'\u{0020}' => true,
|
|
_ => !(c.is_other_control() | c.is_separator()),
|
|
})
|
|
}
|
|
|
|
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
|
|
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
|
|
#[pymethod]
|
|
fn isspace(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii_whitespace())
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isupper(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty()
|
|
&& self
|
|
.value
|
|
.chars()
|
|
.filter(|x| !x.is_ascii_whitespace())
|
|
.all(char::is_uppercase)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn islower(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty()
|
|
&& self
|
|
.value
|
|
.chars()
|
|
.filter(|x| !x.is_ascii_whitespace())
|
|
.all(char::is_lowercase)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isascii(&self, _vm: &VirtualMachine) -> bool {
|
|
!self.value.is_empty() && self.value.chars().all(|c| c.is_ascii())
|
|
}
|
|
|
|
// doesn't implement keep new line delimiter just yet
|
|
#[pymethod]
|
|
fn splitlines(&self, vm: &VirtualMachine) -> PyObjectRef {
|
|
let elements = self
|
|
.value
|
|
.split('\n')
|
|
.map(|e| vm.ctx.new_str(e.to_string()))
|
|
.collect();
|
|
vm.ctx.new_list(elements)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn join(&self, iterable: PyIterable<PyStringRef>, vm: &VirtualMachine) -> PyResult<String> {
|
|
let mut joined = String::new();
|
|
|
|
for (idx, elem) in iterable.iter(vm)?.enumerate() {
|
|
let elem = elem?;
|
|
if idx != 0 {
|
|
joined.push_str(&self.value);
|
|
}
|
|
joined.push_str(&elem.value)
|
|
}
|
|
|
|
Ok(joined)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn find(
|
|
&self,
|
|
sub: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &VirtualMachine,
|
|
) -> isize {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].find(&sub.value) {
|
|
Some(num) => (start + num) as isize,
|
|
None => -1 as isize,
|
|
}
|
|
} else {
|
|
-1 as isize
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn rfind(
|
|
&self,
|
|
sub: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &VirtualMachine,
|
|
) -> isize {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].rfind(&sub.value) {
|
|
Some(num) => (start + num) as isize,
|
|
None => -1 as isize,
|
|
}
|
|
} else {
|
|
-1 as isize
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn index(
|
|
&self,
|
|
sub: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<usize> {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].find(&sub.value) {
|
|
Some(num) => Ok(start + num),
|
|
None => Err(vm.new_value_error("substring not found".to_string())),
|
|
}
|
|
} else {
|
|
Err(vm.new_value_error("substring not found".to_string()))
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn rindex(
|
|
&self,
|
|
sub: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<usize> {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
match value[start..end].rfind(&sub.value) {
|
|
Some(num) => Ok(start + num),
|
|
None => Err(vm.new_value_error("substring not found".to_string())),
|
|
}
|
|
} else {
|
|
Err(vm.new_value_error("substring not found".to_string()))
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn partition(&self, sub: PyStringRef, vm: &VirtualMachine) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let sub = &sub.value;
|
|
let mut new_tup = Vec::new();
|
|
if value.contains(sub) {
|
|
new_tup = value
|
|
.splitn(2, sub)
|
|
.map(|s| vm.ctx.new_str(s.to_string()))
|
|
.collect();
|
|
new_tup.insert(1, vm.ctx.new_str(sub.clone()));
|
|
} else {
|
|
new_tup.push(vm.ctx.new_str(value.clone()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
}
|
|
vm.ctx.new_tuple(new_tup)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn rpartition(&self, sub: PyStringRef, vm: &VirtualMachine) -> PyObjectRef {
|
|
let value = &self.value;
|
|
let sub = &sub.value;
|
|
let mut new_tup = Vec::new();
|
|
if value.contains(sub) {
|
|
new_tup = value
|
|
.rsplitn(2, sub)
|
|
.map(|s| vm.ctx.new_str(s.to_string()))
|
|
.collect();
|
|
new_tup.swap(0, 1); // so it's in the right order
|
|
new_tup.insert(1, vm.ctx.new_str(sub.clone()));
|
|
} else {
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
new_tup.push(vm.ctx.new_str("".to_string()));
|
|
new_tup.push(vm.ctx.new_str(value.clone()));
|
|
}
|
|
vm.ctx.new_tuple(new_tup)
|
|
}
|
|
|
|
/// Return `true` if the sequence is ASCII titlecase and the sequence is not
|
|
/// empty, `false` otherwise.
|
|
#[pymethod]
|
|
fn istitle(&self, _vm: &VirtualMachine) -> bool {
|
|
if self.value.is_empty() {
|
|
return false;
|
|
}
|
|
|
|
let mut cased = false;
|
|
let mut previous_is_cased = false;
|
|
for c in self.value.chars() {
|
|
if c.is_uppercase() || c.is_titlecase() {
|
|
if previous_is_cased {
|
|
return false;
|
|
}
|
|
previous_is_cased = true;
|
|
cased = true;
|
|
} else if c.is_lowercase() {
|
|
if !previous_is_cased {
|
|
return false;
|
|
}
|
|
previous_is_cased = true;
|
|
cased = true;
|
|
} else {
|
|
previous_is_cased = false;
|
|
}
|
|
}
|
|
cased
|
|
}
|
|
|
|
#[pymethod]
|
|
fn count(
|
|
&self,
|
|
sub: PyStringRef,
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
_vm: &VirtualMachine,
|
|
) -> usize {
|
|
let value = &self.value;
|
|
if let Some((start, end)) = adjust_indices(start, end, value.len()) {
|
|
self.value[start..end].matches(&sub.value).count()
|
|
} else {
|
|
0
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn zfill(&self, len: usize, _vm: &VirtualMachine) -> String {
|
|
let value = &self.value;
|
|
if len <= value.len() {
|
|
value.to_string()
|
|
} else {
|
|
format!("{}{}", "0".repeat(len - value.len()), value)
|
|
}
|
|
}
|
|
|
|
fn get_fill_char<'a>(
|
|
rep: &'a OptionalArg<PyStringRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<&'a str> {
|
|
let rep_str = match rep {
|
|
OptionalArg::Present(ref st) => &st.value,
|
|
OptionalArg::Missing => " ",
|
|
};
|
|
if rep_str.len() == 1 {
|
|
Ok(rep_str)
|
|
} else {
|
|
Err(vm.new_type_error(
|
|
"The fill character must be exactly one character long".to_string(),
|
|
))
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn ljust(
|
|
&self,
|
|
len: usize,
|
|
rep: OptionalArg<PyStringRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<String> {
|
|
let value = &self.value;
|
|
let rep_char = Self::get_fill_char(&rep, vm)?;
|
|
if len <= value.len() {
|
|
Ok(value.to_string())
|
|
} else {
|
|
Ok(format!("{}{}", value, rep_char.repeat(len - value.len())))
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn rjust(
|
|
&self,
|
|
len: usize,
|
|
rep: OptionalArg<PyStringRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<String> {
|
|
let value = &self.value;
|
|
let rep_char = Self::get_fill_char(&rep, vm)?;
|
|
if len <= value.len() {
|
|
Ok(value.to_string())
|
|
} else {
|
|
Ok(format!("{}{}", rep_char.repeat(len - value.len()), value))
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn center(
|
|
&self,
|
|
len: usize,
|
|
rep: OptionalArg<PyStringRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult<String> {
|
|
let value = &self.value;
|
|
let rep_char = Self::get_fill_char(&rep, vm)?;
|
|
let value_len = self.value.chars().count();
|
|
|
|
if len <= value_len {
|
|
return Ok(value.to_string());
|
|
}
|
|
let diff: usize = len - value_len;
|
|
let mut left_buff: usize = diff / 2;
|
|
let mut right_buff: usize = left_buff;
|
|
|
|
if diff % 2 != 0 && value_len % 2 == 0 {
|
|
left_buff += 1
|
|
}
|
|
|
|
if diff % 2 != 0 && value_len % 2 != 0 {
|
|
right_buff += 1
|
|
}
|
|
Ok(format!(
|
|
"{}{}{}",
|
|
rep_char.repeat(left_buff),
|
|
value,
|
|
rep_char.repeat(right_buff)
|
|
))
|
|
}
|
|
|
|
#[pymethod]
|
|
fn expandtabs(&self, tab_stop: OptionalArg<usize>, _vm: &VirtualMachine) -> String {
|
|
let tab_stop = tab_stop.into_option().unwrap_or(8 as usize);
|
|
let mut expanded_str = String::with_capacity(self.value.len());
|
|
let mut tab_size = tab_stop;
|
|
let mut col_count = 0 as usize;
|
|
for ch in self.value.chars() {
|
|
// 0x0009 is tab
|
|
if ch == 0x0009 as char {
|
|
let num_spaces = tab_size - col_count;
|
|
col_count += num_spaces;
|
|
let expand = " ".repeat(num_spaces);
|
|
expanded_str.push_str(&expand);
|
|
} else {
|
|
expanded_str.push(ch);
|
|
col_count += 1;
|
|
}
|
|
if col_count >= tab_size {
|
|
tab_size += tab_stop;
|
|
}
|
|
}
|
|
expanded_str
|
|
}
|
|
|
|
#[pymethod]
|
|
fn isidentifier(&self, _vm: &VirtualMachine) -> bool {
|
|
let mut chars = self.value.chars();
|
|
let is_identifier_start = match chars.next() {
|
|
Some('_') => true,
|
|
Some(c) => UnicodeXID::is_xid_start(c),
|
|
None => false,
|
|
};
|
|
// a string is not an identifier if it has whitespace or starts with a number
|
|
is_identifier_start && chars.all(UnicodeXID::is_xid_continue)
|
|
}
|
|
|
|
// https://docs.python.org/3/library/stdtypes.html#str.translate
|
|
#[pymethod]
|
|
fn translate(&self, table: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
|
|
vm.get_method_or_type_error(table.clone(), "__getitem__", || {
|
|
format!("'{}' object is not subscriptable", table.class().name)
|
|
})?;
|
|
|
|
let mut translated = String::new();
|
|
for c in self.value.chars() {
|
|
match table.get_item(&(c as u32).into_pyobject(vm)?, vm) {
|
|
Ok(value) => {
|
|
if let Some(text) = value.payload::<PyString>() {
|
|
translated.push_str(&text.value);
|
|
} else if let Some(bigint) = value.payload::<PyInt>() {
|
|
match bigint.as_bigint().to_u32().and_then(std::char::from_u32) {
|
|
Some(ch) => translated.push(ch as char),
|
|
None => {
|
|
return Err(vm.new_value_error(
|
|
"character mapping must be in range(0x110000)".to_owned(),
|
|
));
|
|
}
|
|
}
|
|
} else if value.payload::<PyNone>().is_some() {
|
|
// Do Nothing
|
|
} else {
|
|
return Err(vm.new_type_error(
|
|
"character mapping must return integer, None or str".to_owned(),
|
|
));
|
|
}
|
|
}
|
|
_ => translated.push(c),
|
|
}
|
|
}
|
|
Ok(translated)
|
|
}
|
|
|
|
#[pymethod]
|
|
fn maketrans(
|
|
dict_or_str: PyObjectRef,
|
|
to_str: OptionalArg<PyStringRef>,
|
|
none_str: OptionalArg<PyStringRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult {
|
|
let new_dict = vm.context().new_dict();
|
|
if let OptionalArg::Present(to_str) = to_str {
|
|
match dict_or_str.downcast::<PyString>() {
|
|
Ok(from_str) => {
|
|
if to_str.len(vm) == from_str.len(vm) {
|
|
for (c1, c2) in from_str.value.chars().zip(to_str.value.chars()) {
|
|
new_dict.set_item(&vm.new_int(c1 as u32), vm.new_int(c2 as u32), vm)?;
|
|
}
|
|
if let OptionalArg::Present(none_str) = none_str {
|
|
for c in none_str.value.chars() {
|
|
new_dict.set_item(&vm.new_int(c as u32), vm.get_none(), vm)?;
|
|
}
|
|
}
|
|
new_dict.into_pyobject(vm)
|
|
} else {
|
|
Err(vm.new_value_error(
|
|
"the first two maketrans arguments must have equal length".to_owned(),
|
|
))
|
|
}
|
|
}
|
|
_ => Err(vm.new_type_error(
|
|
"first maketrans argument must be a string if there is a second argument"
|
|
.to_owned(),
|
|
)),
|
|
}
|
|
} else {
|
|
// dict_str must be a dict
|
|
match dict_or_str.downcast::<PyDict>() {
|
|
Ok(dict) => {
|
|
for (key, val) in dict {
|
|
if let Some(num) = key.payload::<PyInt>() {
|
|
new_dict.set_item(
|
|
&num.as_bigint().to_i32().into_pyobject(vm)?,
|
|
val,
|
|
vm,
|
|
)?;
|
|
} else if let Some(string) = key.payload::<PyString>() {
|
|
if string.len(vm) == 1 {
|
|
let num_value = string.value.chars().next().unwrap() as u32;
|
|
new_dict.set_item(&num_value.into_pyobject(vm)?, val, vm)?;
|
|
} else {
|
|
return Err(vm.new_value_error(
|
|
"string keys in translate table must be of length 1".to_owned(),
|
|
));
|
|
}
|
|
}
|
|
}
|
|
new_dict.into_pyobject(vm)
|
|
}
|
|
_ => Err(vm.new_value_error(
|
|
"if you give only one argument to maketrans it must be a dict".to_owned(),
|
|
)),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[pymethod]
|
|
fn encode(
|
|
&self,
|
|
encoding: OptionalArg<PyObjectRef>,
|
|
_errors: OptionalArg<PyObjectRef>,
|
|
vm: &VirtualMachine,
|
|
) -> PyResult {
|
|
let encoding = encoding.map_or_else(
|
|
|| Ok("utf-8".to_string()),
|
|
|v| {
|
|
if objtype::isinstance(&v, &vm.ctx.str_type()) {
|
|
Ok(get_value(&v))
|
|
} else {
|
|
Err(vm.new_type_error(format!(
|
|
"encode() argument 1 must be str, not {}",
|
|
v.class().name
|
|
)))
|
|
}
|
|
},
|
|
)?;
|
|
|
|
let encoded = PyBytes::from_string(&self.value, &encoding, vm)?;
|
|
Ok(encoded.into_pyobject(vm)?)
|
|
}
|
|
|
|
#[pymethod(name = "__iter__")]
|
|
fn iter(zelf: PyRef<Self>, _vm: &VirtualMachine) -> PyStringIterator {
|
|
PyStringIterator {
|
|
position: Cell::new(0),
|
|
string: zelf,
|
|
}
|
|
}
|
|
|
|
#[pymethod(name = "__reversed__")]
|
|
fn reversed(zelf: PyRef<Self>, _vm: &VirtualMachine) -> PyStringReverseIterator {
|
|
let begin = zelf.value.chars().count();
|
|
|
|
PyStringReverseIterator {
|
|
position: Cell::new(begin),
|
|
string: zelf,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PyValue for PyString {
|
|
fn class(vm: &VirtualMachine) -> PyClassRef {
|
|
vm.ctx.str_type()
|
|
}
|
|
}
|
|
|
|
impl IntoPyObject for String {
|
|
fn into_pyobject(self, vm: &VirtualMachine) -> PyResult {
|
|
Ok(vm.ctx.new_str(self))
|
|
}
|
|
}
|
|
|
|
impl IntoPyObject for &str {
|
|
fn into_pyobject(self, vm: &VirtualMachine) -> PyResult {
|
|
Ok(vm.ctx.new_str(self.to_string()))
|
|
}
|
|
}
|
|
|
|
impl IntoPyObject for &String {
|
|
fn into_pyobject(self, vm: &VirtualMachine) -> PyResult {
|
|
Ok(vm.ctx.new_str(self.clone()))
|
|
}
|
|
}
|
|
|
|
#[derive(FromArgs)]
|
|
struct SplitArgs {
|
|
#[pyarg(positional_or_keyword, default = "None")]
|
|
sep: Option<PyStringRef>,
|
|
#[pyarg(positional_or_keyword, default = "-1")]
|
|
maxsplit: isize,
|
|
}
|
|
|
|
pub fn init(ctx: &PyContext) {
|
|
PyString::extend_class(ctx, &ctx.types.str_type);
|
|
|
|
PyStringIterator::extend_class(ctx, &ctx.types.striterator_type);
|
|
PyStringReverseIterator::extend_class(ctx, &ctx.types.strreverseiterator_type);
|
|
}
|
|
|
|
pub fn get_value(obj: &PyObjectRef) -> String {
|
|
obj.payload::<PyString>().unwrap().value.clone()
|
|
}
|
|
|
|
pub fn borrow_value(obj: &PyObjectRef) -> &str {
|
|
&obj.payload::<PyString>().unwrap().value
|
|
}
|
|
|
|
fn count_char(s: &str, c: char) -> usize {
|
|
s.chars().filter(|x| *x == c).count()
|
|
}
|
|
|
|
fn call_getitem(vm: &VirtualMachine, container: &PyObjectRef, key: &PyObjectRef) -> PyResult {
|
|
vm.call_method(container, "__getitem__", vec![key.clone()])
|
|
}
|
|
|
|
fn call_object_format(vm: &VirtualMachine, argument: PyObjectRef, format_spec: &str) -> PyResult {
|
|
let (preconversor, new_format_spec) = FormatPreconversor::parse_and_consume(format_spec);
|
|
let argument = match preconversor {
|
|
Some(FormatPreconversor::Str) => vm.call_method(&argument, "__str__", vec![])?,
|
|
Some(FormatPreconversor::Repr) => vm.call_method(&argument, "__repr__", vec![])?,
|
|
Some(FormatPreconversor::Ascii) => vm.call_method(&argument, "__repr__", vec![])?,
|
|
None => argument,
|
|
};
|
|
let returned_type = vm.ctx.new_str(new_format_spec.to_string());
|
|
|
|
let result = vm.call_method(&argument, "__format__", vec![returned_type])?;
|
|
if !objtype::isinstance(&result, &vm.ctx.str_type()) {
|
|
let result_type = result.class();
|
|
let actual_type = vm.to_pystr(&result_type)?;
|
|
return Err(vm.new_type_error(format!("__format__ must return a str, not {}", actual_type)));
|
|
}
|
|
Ok(result)
|
|
}
|
|
|
|
fn do_cformat_specifier(
|
|
vm: &VirtualMachine,
|
|
format_spec: &mut CFormatSpec,
|
|
obj: PyObjectRef,
|
|
) -> PyResult<String> {
|
|
use CNumberType::*;
|
|
// do the formatting by type
|
|
let format_type = &format_spec.format_type;
|
|
|
|
match format_type {
|
|
CFormatType::String(preconversor) => {
|
|
let result = match preconversor {
|
|
CFormatPreconversor::Str => vm.call_method(&obj.clone(), "__str__", vec![])?,
|
|
CFormatPreconversor::Repr => vm.call_method(&obj.clone(), "__repr__", vec![])?,
|
|
CFormatPreconversor::Ascii => vm.call_method(&obj.clone(), "__repr__", vec![])?,
|
|
};
|
|
Ok(format_spec.format_string(get_value(&result)))
|
|
}
|
|
CFormatType::Number(_) => {
|
|
if !objtype::isinstance(&obj, &vm.ctx.int_type()) {
|
|
let required_type_string = match format_type {
|
|
CFormatType::Number(Decimal) => "a number",
|
|
CFormatType::Number(_) => "an integer",
|
|
_ => unreachable!(),
|
|
};
|
|
return Err(vm.new_type_error(format!(
|
|
"%{} format: {} is required, not {}",
|
|
format_spec.format_char,
|
|
required_type_string,
|
|
obj.class()
|
|
)));
|
|
}
|
|
Ok(format_spec.format_number(objint::get_value(&obj)))
|
|
}
|
|
CFormatType::Float(_) => if objtype::isinstance(&obj, &vm.ctx.float_type()) {
|
|
format_spec.format_float(objfloat::get_value(&obj))
|
|
} else if objtype::isinstance(&obj, &vm.ctx.int_type()) {
|
|
format_spec.format_float(objint::get_value(&obj).to_f64().unwrap())
|
|
} else {
|
|
let required_type_string = "an floating point or integer";
|
|
return Err(vm.new_type_error(format!(
|
|
"%{} format: {} is required, not {}",
|
|
format_spec.format_char,
|
|
required_type_string,
|
|
obj.class()
|
|
)));
|
|
}
|
|
.map_err(|e| vm.new_not_implemented_error(e)),
|
|
CFormatType::Character => {
|
|
let char_string = {
|
|
if objtype::isinstance(&obj, &vm.ctx.int_type()) {
|
|
// BigInt truncation is fine in this case because only the unicode range is relevant
|
|
match objint::get_value(&obj).to_u32().and_then(char::from_u32) {
|
|
Some(value) => Ok(value.to_string()),
|
|
None => {
|
|
Err(vm.new_overflow_error("%c arg not in range(0x110000)".to_string()))
|
|
}
|
|
}
|
|
} else if objtype::isinstance(&obj, &vm.ctx.str_type()) {
|
|
let s: String = get_value(&obj);
|
|
let num_chars = s.chars().count();
|
|
if num_chars != 1 {
|
|
Err(vm.new_type_error("%c requires int or char".to_string()))
|
|
} else {
|
|
Ok(s.chars().next().unwrap().to_string())
|
|
}
|
|
} else {
|
|
// TODO re-arrange this block so this error is only created once
|
|
Err(vm.new_type_error("%c requires int or char".to_string()))
|
|
}
|
|
}?;
|
|
format_spec.precision = Some(CFormatQuantity::Amount(1));
|
|
Ok(format_spec.format_string(char_string))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn try_update_quantity_from_tuple(
|
|
vm: &VirtualMachine,
|
|
elements: &mut dyn Iterator<Item = PyObjectRef>,
|
|
q: &mut Option<CFormatQuantity>,
|
|
mut tuple_index: usize,
|
|
) -> PyResult<usize> {
|
|
match q {
|
|
Some(CFormatQuantity::FromValuesTuple) => {
|
|
match elements.next() {
|
|
Some(width_obj) => {
|
|
tuple_index += 1;
|
|
if !objtype::isinstance(&width_obj, &vm.ctx.int_type()) {
|
|
Err(vm.new_type_error("* wants int".to_string()))
|
|
} else {
|
|
// TODO: handle errors when truncating BigInt to usize
|
|
*q = Some(CFormatQuantity::Amount(
|
|
objint::get_value(&width_obj).to_usize().unwrap(),
|
|
));
|
|
Ok(tuple_index)
|
|
}
|
|
}
|
|
None => {
|
|
Err(vm.new_type_error("not enough arguments for format string".to_string()))
|
|
}
|
|
}
|
|
}
|
|
_ => Ok(tuple_index),
|
|
}
|
|
}
|
|
|
|
fn do_cformat(
|
|
vm: &VirtualMachine,
|
|
mut format_string: CFormatString,
|
|
values_obj: PyObjectRef,
|
|
) -> PyResult {
|
|
let mut final_string = String::new();
|
|
let num_specifiers = format_string
|
|
.format_parts
|
|
.iter()
|
|
.filter(|(_, part)| CFormatPart::is_specifier(part))
|
|
.count();
|
|
let mapping_required = format_string
|
|
.format_parts
|
|
.iter()
|
|
.any(|(_, part)| CFormatPart::has_key(part))
|
|
&& format_string
|
|
.format_parts
|
|
.iter()
|
|
.filter(|(_, part)| CFormatPart::is_specifier(part))
|
|
.all(|(_, part)| CFormatPart::has_key(part));
|
|
|
|
let values = if mapping_required {
|
|
if !objtype::isinstance(&values_obj, &vm.ctx.dict_type()) {
|
|
return Err(vm.new_type_error("format requires a mapping".to_string()));
|
|
}
|
|
values_obj.clone()
|
|
} else {
|
|
// check for only literal parts, in which case only dict or empty tuple is allowed
|
|
if num_specifiers == 0
|
|
&& !(objtype::isinstance(&values_obj, &vm.ctx.types.tuple_type)
|
|
&& objtuple::get_value(&values_obj).is_empty())
|
|
&& !objtype::isinstance(&values_obj, &vm.ctx.types.dict_type)
|
|
{
|
|
return Err(vm.new_type_error(
|
|
"not all arguments converted during string formatting".to_string(),
|
|
));
|
|
}
|
|
|
|
// convert `values_obj` to a new tuple if it's not a tuple
|
|
if !objtype::isinstance(&values_obj, &vm.ctx.tuple_type()) {
|
|
vm.ctx.new_tuple(vec![values_obj.clone()])
|
|
} else {
|
|
values_obj.clone()
|
|
}
|
|
};
|
|
|
|
let mut tuple_index: usize = 0;
|
|
for (_, part) in &mut format_string.format_parts {
|
|
let result_string: String = match part {
|
|
CFormatPart::Spec(format_spec) => {
|
|
// try to get the object
|
|
let obj: PyObjectRef = match &format_spec.mapping_key {
|
|
Some(key) => {
|
|
// TODO: change the KeyError message to match the one in cpython
|
|
call_getitem(vm, &values, &vm.ctx.new_str(key.to_string()))?
|
|
}
|
|
None => {
|
|
let mut elements =
|
|
objtuple::get_value(&values).into_iter().skip(tuple_index);
|
|
|
|
tuple_index = try_update_quantity_from_tuple(
|
|
vm,
|
|
&mut elements,
|
|
&mut format_spec.min_field_width,
|
|
tuple_index,
|
|
)?;
|
|
tuple_index = try_update_quantity_from_tuple(
|
|
vm,
|
|
&mut elements,
|
|
&mut format_spec.precision,
|
|
tuple_index,
|
|
)?;
|
|
|
|
let obj = match elements.next() {
|
|
Some(obj) => Ok(obj),
|
|
None => Err(vm.new_type_error(
|
|
"not enough arguments for format string".to_string(),
|
|
)),
|
|
}?;
|
|
tuple_index += 1;
|
|
|
|
obj
|
|
}
|
|
};
|
|
do_cformat_specifier(vm, format_spec, obj)
|
|
}
|
|
CFormatPart::Literal(literal) => Ok(literal.clone()),
|
|
}?;
|
|
final_string.push_str(&result_string);
|
|
}
|
|
|
|
// check that all arguments were converted
|
|
if (!mapping_required
|
|
&& objtuple::get_value(&values)
|
|
.into_iter()
|
|
.nth(tuple_index)
|
|
.is_some())
|
|
&& !objtype::isinstance(&values_obj, &vm.ctx.types.dict_type)
|
|
{
|
|
return Err(
|
|
vm.new_type_error("not all arguments converted during string formatting".to_string())
|
|
);
|
|
}
|
|
Ok(vm.ctx.new_str(final_string))
|
|
}
|
|
|
|
fn perform_format(
|
|
vm: &VirtualMachine,
|
|
format_string: &FormatString,
|
|
arguments: &PyFuncArgs,
|
|
) -> PyResult {
|
|
let mut final_string = String::new();
|
|
if format_string.format_parts.iter().any(FormatPart::is_auto)
|
|
&& format_string.format_parts.iter().any(FormatPart::is_index)
|
|
{
|
|
return Err(vm.new_value_error(
|
|
"cannot switch from automatic field numbering to manual field specification"
|
|
.to_string(),
|
|
));
|
|
}
|
|
let mut auto_argument_index: usize = 1;
|
|
for part in &format_string.format_parts {
|
|
let result_string: String = match part {
|
|
FormatPart::AutoSpec(format_spec) => {
|
|
let result = match arguments.args.get(auto_argument_index) {
|
|
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
|
|
None => {
|
|
return Err(vm.new_index_error("tuple index out of range".to_string()));
|
|
}
|
|
};
|
|
auto_argument_index += 1;
|
|
get_value(&result)
|
|
}
|
|
FormatPart::IndexSpec(index, format_spec) => {
|
|
let result = match arguments.args.get(*index + 1) {
|
|
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
|
|
None => {
|
|
return Err(vm.new_index_error("tuple index out of range".to_string()));
|
|
}
|
|
};
|
|
get_value(&result)
|
|
}
|
|
FormatPart::KeywordSpec(keyword, format_spec) => {
|
|
let result = match arguments.get_optional_kwarg(&keyword) {
|
|
Some(argument) => call_object_format(vm, argument.clone(), &format_spec)?,
|
|
None => {
|
|
return Err(vm.new_key_error(vm.new_str(keyword.to_string())));
|
|
}
|
|
};
|
|
get_value(&result)
|
|
}
|
|
FormatPart::Literal(literal) => literal.clone(),
|
|
};
|
|
final_string.push_str(&result_string);
|
|
}
|
|
Ok(vm.ctx.new_str(final_string))
|
|
}
|
|
|
|
impl PySliceableSequence for String {
|
|
type Sliced = String;
|
|
|
|
fn do_slice(&self, range: Range<usize>) -> Self::Sliced {
|
|
self.chars()
|
|
.skip(range.start)
|
|
.take(range.end - range.start)
|
|
.collect()
|
|
}
|
|
|
|
fn do_slice_reverse(&self, range: Range<usize>) -> Self::Sliced {
|
|
let count = self.chars().count();
|
|
|
|
self.chars()
|
|
.rev()
|
|
.skip(count - range.end)
|
|
.take(range.end - range.start)
|
|
.collect()
|
|
}
|
|
|
|
fn do_stepped_slice(&self, range: Range<usize>, step: usize) -> Self::Sliced {
|
|
self.chars()
|
|
.skip(range.start)
|
|
.take(range.end - range.start)
|
|
.step_by(step)
|
|
.collect()
|
|
}
|
|
|
|
fn do_stepped_slice_reverse(&self, range: Range<usize>, step: usize) -> Self::Sliced {
|
|
let count = self.chars().count();
|
|
|
|
self.chars()
|
|
.rev()
|
|
.skip(count - range.end)
|
|
.take(range.end - range.start)
|
|
.step_by(step)
|
|
.collect()
|
|
}
|
|
|
|
fn empty() -> Self::Sliced {
|
|
String::default()
|
|
}
|
|
|
|
fn len(&self) -> usize {
|
|
self.chars().count()
|
|
}
|
|
|
|
fn is_empty(&self) -> bool {
|
|
self.is_empty()
|
|
}
|
|
}
|
|
|
|
pub fn subscript(vm: &VirtualMachine, value: &str, b: PyObjectRef) -> PyResult {
|
|
if objtype::isinstance(&b, &vm.ctx.int_type()) {
|
|
match objint::get_value(&b).to_isize() {
|
|
Some(pos) => {
|
|
let index: usize = if pos.is_negative() {
|
|
(value.chars().count() as isize + pos) as usize
|
|
} else {
|
|
pos.abs() as usize
|
|
};
|
|
|
|
if let Some(character) = value.chars().nth(index) {
|
|
Ok(vm.new_str(character.to_string()))
|
|
} else {
|
|
Err(vm.new_index_error("string index out of range".to_string()))
|
|
}
|
|
}
|
|
None => {
|
|
Err(vm.new_index_error("cannot fit 'int' into an index-sized integer".to_string()))
|
|
}
|
|
}
|
|
} else if b.payload::<PySlice>().is_some() {
|
|
let string = value.to_string().get_slice_items(vm, &b)?;
|
|
Ok(vm.new_str(string))
|
|
} else {
|
|
Err(vm.new_type_error(format!(
|
|
"indexing type {:?} with index {:?} is not supported",
|
|
value, b
|
|
)))
|
|
}
|
|
}
|
|
|
|
// help get optional string indices
|
|
fn adjust_indices(
|
|
start: OptionalArg<isize>,
|
|
end: OptionalArg<isize>,
|
|
len: usize,
|
|
) -> Option<(usize, usize)> {
|
|
let mut start = start.into_option().unwrap_or(0);
|
|
let mut end = end.into_option().unwrap_or(len as isize);
|
|
if end > len as isize {
|
|
end = len as isize;
|
|
} else if end < 0 {
|
|
end += len as isize;
|
|
if end < 0 {
|
|
end = 0;
|
|
}
|
|
}
|
|
if start < 0 {
|
|
start += len as isize;
|
|
if start < 0 {
|
|
start = 0;
|
|
}
|
|
}
|
|
if start > end {
|
|
None
|
|
} else {
|
|
Some((start as usize, end as usize))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn str_title() {
|
|
let vm: VirtualMachine = Default::default();
|
|
|
|
let tests = vec![
|
|
(" Hello ", " hello "),
|
|
("Hello ", "hello "),
|
|
("Hello ", "Hello "),
|
|
("Format This As Title String", "fOrMaT thIs aS titLe String"),
|
|
("Format,This-As*Title;String", "fOrMaT,thIs-aS*titLe;String"),
|
|
("Getint", "getInt"),
|
|
("Greek Ωppercases ...", "greek ωppercases ..."),
|
|
("Greek ῼitlecases ...", "greek ῳitlecases ..."),
|
|
];
|
|
for (title, input) in tests {
|
|
assert_eq!(PyString::from(input).title(&vm).as_str(), title);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn str_istitle() {
|
|
let vm: VirtualMachine = Default::default();
|
|
|
|
let pos = vec![
|
|
"A",
|
|
"A Titlecased Line",
|
|
"A\nTitlecased Line",
|
|
"A Titlecased, Line",
|
|
"Greek Ωppercases ...",
|
|
"Greek ῼitlecases ...",
|
|
];
|
|
|
|
for s in pos {
|
|
assert!(PyString::from(s).istitle(&vm));
|
|
}
|
|
|
|
let neg = vec![
|
|
"",
|
|
"a",
|
|
"\n",
|
|
"Not a capitalized String",
|
|
"Not\ta Titlecase String",
|
|
"Not--a Titlecase String",
|
|
"NOT",
|
|
];
|
|
for s in neg {
|
|
assert!(!PyString::from(s).istitle(&vm));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn str_maketrans_and_translate() {
|
|
let vm: VirtualMachine = Default::default();
|
|
|
|
let table = vm.context().new_dict();
|
|
table
|
|
.set_item("a", vm.new_str("🎅".to_owned()), &vm)
|
|
.unwrap();
|
|
table.set_item("b", vm.get_none(), &vm).unwrap();
|
|
table
|
|
.set_item("c", vm.new_str("xda".to_owned()), &vm)
|
|
.unwrap();
|
|
let translated = PyString::maketrans(
|
|
table.into_object(),
|
|
OptionalArg::Missing,
|
|
OptionalArg::Missing,
|
|
&vm,
|
|
)
|
|
.unwrap();
|
|
let text = PyString::from("abc");
|
|
let translated = text.translate(translated, &vm).unwrap();
|
|
assert_eq!(translated, "🎅xda".to_owned());
|
|
let translated = text.translate(vm.new_int(3), &vm);
|
|
assert_eq!(translated.unwrap_err().class().name, "TypeError".to_owned());
|
|
}
|
|
}
|