use crate::builtins::int::PyIntRef; use crate::cformat::CFormatString; use crate::function::{single_or_tuple_any, OptionalOption}; use crate::pyobject::{ BorrowValue, PyIterator, PyObjectRef, PyResult, TryFromObject, TypeProtocol, }; use crate::vm::VirtualMachine; use num_traits::{cast::ToPrimitive, sign::Signed}; use std::str::FromStr; #[derive(FromArgs)] pub struct SplitArgs<'s, T: TryFromObject + AnyStrWrapper<'s>> { #[pyarg(any, default)] sep: Option, #[pyarg(any, default = "-1")] maxsplit: isize, _phantom: std::marker::PhantomData<&'s ()>, } impl<'s, T: TryFromObject + AnyStrWrapper<'s>> SplitArgs<'s, T> { pub fn get_value(self, vm: &VirtualMachine) -> PyResult<(Option, isize)> { let sep = if let Some(s) = self.sep { let sep = s.as_ref(); if sep.is_empty() { return Err(vm.new_value_error("empty separator".to_owned())); } Some(s) } else { None }; Ok((sep, self.maxsplit)) } } #[derive(FromArgs)] pub struct SplitLinesArgs { #[pyarg(any, default = "false")] pub keepends: bool, } #[derive(FromArgs)] pub struct ExpandTabsArgs { #[pyarg(any, default = "8")] tabsize: isize, } impl ExpandTabsArgs { pub fn tabsize(&self) -> usize { self.tabsize.to_usize().unwrap_or(0) } } #[derive(FromArgs)] pub struct StartsEndsWithArgs { #[pyarg(positional)] affix: PyObjectRef, #[pyarg(positional, default)] start: Option, #[pyarg(positional, default)] end: Option, } impl StartsEndsWithArgs { fn get_value(self, len: usize) -> (PyObjectRef, std::ops::Range) { let range = adjust_indices(self.start, self.end, len); (self.affix, range) } } fn saturate_to_isize(py_int: PyIntRef) -> isize { let big = py_int.borrow_value(); big.to_isize().unwrap_or_else(|| { if big.is_negative() { std::isize::MIN } else { std::isize::MAX } }) } // help get optional string indices pub fn adjust_indices( start: Option, end: Option, len: usize, ) -> std::ops::Range { let mut start = start.map_or(0, saturate_to_isize); let mut end = end.map_or(len as isize, saturate_to_isize); if end > len as isize { end = len as isize; } else if end < 0 { end += len as isize; if end < 0 { end = 0; } } if start < 0 { start += len as isize; if start < 0 { start = 0; } } start as usize..end as usize } pub trait StringRange { fn is_normal(&self) -> bool; } impl StringRange for std::ops::Range { fn is_normal(&self) -> bool { self.start <= self.end } } pub trait AnyStrWrapper<'s> { type Str: ?Sized + AnyStr<'s>; fn as_ref(&self) -> &Self::Str; } pub trait AnyStrContainer where S: ?Sized, { fn new() -> Self; fn with_capacity(capacity: usize) -> Self; fn push_str(&mut self, s: &S); } // TODO: GATs for `'s` once stabilized pub trait AnyStr<'s>: 's { type Char: Copy; type Container: AnyStrContainer + Extend; type CharIter: Iterator + 's; type ElementIter: Iterator + 's; fn element_bytes_len(c: Self::Char) -> usize; fn to_container(&self) -> Self::Container; fn as_bytes(&self) -> &[u8]; fn as_utf8_str(&self) -> Result<&str, std::str::Utf8Error>; fn chars(&'s self) -> Self::CharIter; fn elements(&'s self) -> Self::ElementIter; fn get_bytes(&self, range: std::ops::Range) -> &Self; // FIXME: get_chars is expensive for str fn get_chars(&self, range: std::ops::Range) -> &Self; fn bytes_len(&self) -> usize; // fn chars_len(&self) -> usize; // cannot access to cache here fn is_empty(&self) -> bool; fn py_add(&self, other: &Self) -> Self::Container { let mut new = Self::Container::with_capacity(self.bytes_len() + other.bytes_len()); new.push_str(self); new.push_str(other); new } fn py_split( &self, args: SplitArgs<'s, T>, vm: &VirtualMachine, split: SP, splitn: SN, splitw: SW, ) -> PyResult> where T: TryFromObject + AnyStrWrapper<'s, Str = Self>, SP: Fn(&Self, &Self, &VirtualMachine) -> Vec, SN: Fn(&Self, &Self, usize, &VirtualMachine) -> Vec, SW: Fn(&Self, isize, &VirtualMachine) -> Vec, { let (sep, maxsplit) = args.get_value(vm)?; let splited = if let Some(pattern) = sep { if maxsplit < 0 { split(self, pattern.as_ref(), vm) } else { splitn(self, pattern.as_ref(), (maxsplit + 1) as usize, vm) } } else { splitw(self, maxsplit, vm) }; Ok(splited) } fn py_split_whitespace(&self, maxsplit: isize, convert: F) -> Vec where F: Fn(&Self) -> PyObjectRef; fn py_rsplit_whitespace(&self, maxsplit: isize, convert: F) -> Vec where F: Fn(&Self) -> PyObjectRef; #[inline] fn py_startsendswith( &self, args: StartsEndsWithArgs, func_name: &str, py_type_name: &str, func: F, vm: &VirtualMachine, ) -> PyResult where T: TryFromObject, F: Fn(&Self, &T) -> bool, { let (affix, range) = args.get_value(self.bytes_len()); if range.is_normal() { let value = self.get_bytes(range); single_or_tuple_any( affix, &|s: &T| Ok(func(value, s)), &|o| { format!( "{} first arg must be {} or a tuple of {}, not {}", func_name, py_type_name, py_type_name, o.class(), ) }, vm, ) } else { Ok(false) } } #[inline] fn py_strip<'a, S, FC, FD>( &'a self, chars: OptionalOption, func_chars: FC, func_default: FD, ) -> &'a Self where S: AnyStrWrapper<'s, Str = Self>, FC: Fn(&'a Self, &Self) -> &'a Self, FD: Fn(&'a Self) -> &'a Self, { let chars = chars.flatten(); match chars { Some(chars) => func_chars(self, chars.as_ref()), None => func_default(self), } } #[inline] fn py_find(&self, needle: &Self, range: std::ops::Range, find: F) -> Option where F: Fn(&Self, &Self) -> Option, { if range.is_normal() { let start = range.start; let index = find(self.get_chars(range), &needle)?; Some(start + index) } else { None } } #[inline] fn py_count(&self, needle: &Self, range: std::ops::Range, count: F) -> usize where F: Fn(&Self, &Self) -> usize, { if range.is_normal() { count(self.get_chars(range), &needle) } else { 0 } } fn py_pad(&self, left: usize, right: usize, fillchar: Self::Char) -> Self::Container { let mut u = Self::Container::with_capacity( (left + right) * Self::element_bytes_len(fillchar) + self.bytes_len(), ); u.extend(std::iter::repeat(fillchar).take(left)); u.push_str(self); u.extend(std::iter::repeat(fillchar).take(right)); u } fn py_center(&self, width: usize, fillchar: Self::Char, len: usize) -> Self::Container { let marg = width - len; let left = marg / 2 + (marg & width & 1); self.py_pad(left, marg - left, fillchar) } fn py_ljust(&self, width: usize, fillchar: Self::Char, len: usize) -> Self::Container { self.py_pad(0, width - len, fillchar) } fn py_rjust(&self, width: usize, fillchar: Self::Char, len: usize) -> Self::Container { self.py_pad(width - len, 0, fillchar) } fn py_join<'a>( &self, mut iter: PyIterator<'a, impl AnyStrWrapper<'s, Str = Self> + TryFromObject>, ) -> PyResult { let mut joined = if let Some(elem) = iter.next() { elem?.as_ref().to_container() } else { return Ok(Self::Container::new()); }; for elem in iter { let elem = elem?; joined.push_str(self); joined.push_str(elem.as_ref()); } Ok(joined) } fn py_partition<'a, F, S>( &'a self, sub: &Self, split: F, vm: &VirtualMachine, ) -> PyResult<(Self::Container, bool, Self::Container)> where F: Fn() -> S, S: std::iter::Iterator, { if sub.is_empty() { return Err(vm.new_value_error("empty separator".to_owned())); } let mut sp = split(); let front = sp.next().unwrap().to_container(); let (has_mid, back) = if let Some(back) = sp.next() { (true, back.to_container()) } else { (false, Self::Container::new()) }; Ok((front, has_mid, back)) } fn py_removeprefix(&self, prefix: &Self, prefix_len: usize, is_prefix: FC) -> &Self where FC: Fn(&Self, &Self) -> bool, { //if self.py_starts_with(prefix) { if is_prefix(&self, prefix) { self.get_bytes(prefix_len..self.bytes_len()) } else { &self } } fn py_removesuffix(&self, suffix: &Self, suffix_len: usize, is_suffix: FC) -> &Self where FC: Fn(&Self, &Self) -> bool, { if is_suffix(&self, suffix) { self.get_bytes(0..self.bytes_len() - suffix_len) } else { &self } } fn py_splitlines(&self, options: SplitLinesArgs, into_wrapper: FW) -> Vec where FW: Fn(&Self) -> W, { let keep = if options.keepends { 1 } else { 0 }; let mut elements = Vec::new(); let mut last_i = 0; let mut enumerated = self.as_bytes().iter().enumerate().peekable(); while let Some((i, ch)) = enumerated.next() { let (end_len, i_diff) = match *ch { b'\n' => (keep, 1), b'\r' => { let is_rn = enumerated.peek().map_or(false, |(_, ch)| **ch == b'\n'); if is_rn { let _ = enumerated.next(); (keep + keep, 2) } else { (keep, 1) } } _ => { continue; } }; let range = last_i..i + end_len; last_i = i + i_diff; elements.push(into_wrapper(self.get_bytes(range))); } if last_i != self.bytes_len() { elements.push(into_wrapper(self.get_bytes(last_i..self.bytes_len()))); } elements } fn py_zfill(&self, width: isize) -> Vec { let width = width.to_usize().unwrap_or(0); rustpython_common::str::zfill(self.as_bytes(), width) } fn py_iscase(&'s self, is_case: F, is_opposite: G) -> bool where F: Fn(char) -> bool, G: Fn(char) -> bool, { // Unified form of CPython functions: // _Py_bytes_islower // Py_bytes_isupper // unicode_islower_impl // unicode_isupper_impl let mut cased = false; for c in self.chars() { if is_opposite(c) { return false; } else if !cased && is_case(c) { cased = true } } cased } fn py_cformat(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult { let format_string = self.as_utf8_str().unwrap(); CFormatString::from_str(format_string) .map_err(|err| vm.new_value_error(err.to_string()))? .format(vm, values) } }