Merge pull request #1887 from youknowone/pystr-pad

Fix str.center & move pad methods into pystr
This commit is contained in:
Jeong YunWon
2020-04-29 19:15:39 +09:00
committed by GitHub
5 changed files with 166 additions and 146 deletions

View File

@@ -896,8 +896,6 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('ß'.swapcase(), 'SS')
self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300')
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_center(self):
string_tests.CommonTest.test_center(self)
self.assertEqual('x'.center(2, '\U0010FFFF'),
@@ -960,8 +958,6 @@ class UnicodeTest(string_tests.CommonTest,
self.assertNotIn(delim * 2, fill)
self.assertIn(delim * 2, fill + delim * 2)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue18183(self):
'\U00010000\U00100000'.lower()
'\U00010000\U00100000'.casefold()

View File

@@ -1,6 +1,5 @@
use bstr::ByteSlice;
use num_bigint::{BigInt, ToBigInt};
use num_integer::Integer;
use num_traits::{One, Signed, ToPrimitive, Zero};
use std::convert::TryFrom;
use std::ops::Range;
@@ -179,25 +178,17 @@ impl ByteInnerFindOptions {
#[derive(FromArgs)]
pub struct ByteInnerPaddingOptions {
#[pyarg(positional_only, optional = false)]
width: PyIntRef,
width: isize,
#[pyarg(positional_only, optional = true)]
fillbyte: OptionalArg<PyObjectRef>,
fillchar: OptionalArg<PyObjectRef>,
}
impl ByteInnerPaddingOptions {
fn get_value(self, fn_name: &str, len: usize, vm: &VirtualMachine) -> PyResult<(u8, usize)> {
let fillbyte = if let OptionalArg::Present(v) = &self.fillbyte {
match try_as_byte(&v) {
Some(x) => {
if x.len() == 1 {
x[0]
} else {
return Err(vm.new_type_error(format!(
"{}() argument 2 must be a byte string of length 1, not {}",
fn_name, &v
)));
}
}
None => {
fn get_value(self, fn_name: &str, vm: &VirtualMachine) -> PyResult<(isize, u8)> {
let fillchar = if let OptionalArg::Present(v) = self.fillchar {
match try_as_byte(v.clone()) {
Some(x) if x.len() == 1 => x[0],
_ => {
return Err(vm.new_type_error(format!(
"{}() argument 2 must be a byte string of length 1, not {}",
fn_name, &v
@@ -208,20 +199,7 @@ impl ByteInnerPaddingOptions {
b' ' // default is space
};
// <0 = no change
let width = if let Some(x) = self.width.as_bigint().to_usize() {
if x <= len {
0
} else {
x
}
} else {
0
};
let diff: usize = if width != 0 { width - len } else { 0 };
Ok((fillbyte, diff))
Ok((self.width, fillchar))
}
}
@@ -724,30 +702,27 @@ impl PyByteInner {
.collect::<Vec<u8>>())
}
#[inline]
fn pad(
&self,
options: ByteInnerPaddingOptions,
pad: fn(&[u8], usize, u8) -> Vec<u8>,
vm: &VirtualMachine,
) -> PyResult<Vec<u8>> {
let (width, fillchar) = options.get_value("center", vm)?;
Ok(if self.len() as isize >= width {
Vec::from(&self.elements[..])
} else {
pad(&self.elements, width as usize, fillchar)
})
}
pub fn center(
&self,
options: ByteInnerPaddingOptions,
vm: &VirtualMachine,
) -> PyResult<Vec<u8>> {
let (fillbyte, diff) = options.get_value("center", self.len(), vm)?;
let mut ln: usize = diff / 2;
let mut rn: usize = ln;
if diff.is_odd() && self.len() % 2 == 0 {
ln += 1
}
if diff.is_odd() && self.len() % 2 != 0 {
rn += 1
}
// merge all
let mut res = vec![fillbyte; ln];
res.extend_from_slice(&self.elements[..]);
res.extend_from_slice(&vec![fillbyte; rn][..]);
Ok(res)
self.pad(options, PyCommonString::<u8>::py_center, vm)
}
pub fn ljust(
@@ -755,14 +730,7 @@ impl PyByteInner {
options: ByteInnerPaddingOptions,
vm: &VirtualMachine,
) -> PyResult<Vec<u8>> {
let (fillbyte, diff) = options.get_value("ljust", self.len(), vm)?;
// merge all
let mut res = vec![];
res.extend_from_slice(&self.elements[..]);
res.extend_from_slice(&vec![fillbyte; diff][..]);
Ok(res)
self.pad(options, PyCommonString::<u8>::py_ljust, vm)
}
pub fn rjust(
@@ -770,13 +738,7 @@ impl PyByteInner {
options: ByteInnerPaddingOptions,
vm: &VirtualMachine,
) -> PyResult<Vec<u8>> {
let (fillbyte, diff) = options.get_value("rjust", self.len(), vm)?;
// merge all
let mut res = vec![fillbyte; diff];
res.extend_from_slice(&self.elements[..]);
Ok(res)
self.pad(options, PyCommonString::<u8>::py_rjust, vm)
}
pub fn count(&self, options: ByteInnerFindOptions, vm: &VirtualMachine) -> PyResult<usize> {
@@ -1130,7 +1092,9 @@ impl PyByteInner {
{
return Err(vm.new_overflow_error("replace bytes is too long".to_owned()));
}
let result_len = self.elements.len() + count * (to.len() - from.len());
let result_len = (self.elements.len() as isize
+ count as isize * (to.len() as isize - from.len() as isize))
as usize;
let mut result = Vec::with_capacity(result_len);
let mut last_end = 0;
@@ -1259,8 +1223,8 @@ impl PyByteInner {
}
}
pub fn try_as_byte(obj: &PyObjectRef) -> Option<Vec<u8>> {
match_class!(match obj.clone() {
pub fn try_as_byte(obj: PyObjectRef) -> Option<Vec<u8>> {
match_class!(match obj {
i @ PyBytes => Some(i.get_value().to_vec()),
j @ PyByteArray => Some(j.borrow_value().elements.to_vec()),
_ => None,
@@ -1350,7 +1314,17 @@ impl PyCommonStringWrapper<[u8]> for PyByteInner {
const ASCII_WHITESPACES: [u8; 6] = [0x20, 0x09, 0x0a, 0x0c, 0x0d, 0x0b];
impl PyCommonString<u8> for [u8] {
fn get_slice(&self, range: std::ops::Range<usize>) -> &Self {
type Container = Vec<u8>;
fn with_capacity(capacity: usize) -> Self::Container {
Vec::with_capacity(capacity)
}
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
&self[range]
}
fn get_chars<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
&self[range]
}
@@ -1358,7 +1332,11 @@ impl PyCommonString<u8> for [u8] {
Self::is_empty(self)
}
fn len(&self) -> usize {
fn bytes_len(&self) -> usize {
Self::len(self)
}
fn chars_len(&self) -> usize {
Self::len(self)
}
@@ -1407,4 +1385,12 @@ impl PyCommonString<u8> for [u8] {
}
splited
}
fn py_pad(&self, left: usize, right: usize, fill: u8) -> Self::Container {
let mut u = Vec::with_capacity(left + self.len() + right);
u.extend(std::iter::repeat(fill).take(left));
u.extend_from_slice(self);
u.extend(std::iter::repeat(fill).take(right));
u
}
}

View File

@@ -18,7 +18,7 @@ pub type PyMemoryViewRef = PyRef<PyMemoryView>;
#[pyimpl]
impl PyMemoryView {
pub fn try_value(&self) -> Option<Vec<u8>> {
try_as_byte(&self.obj_ref)
try_as_byte(self.obj_ref.clone())
}
#[pyslot]

View File

@@ -305,6 +305,7 @@ impl PyString {
}
#[pymethod(name = "__len__")]
#[inline]
fn len(&self) -> usize {
self.value.chars().count()
}
@@ -965,85 +966,69 @@ impl PyString {
}
}
fn get_fill_char<'a>(
rep: &'a OptionalArg<PyStringRef>,
vm: &VirtualMachine,
) -> PyResult<&'a str> {
let rep_str = match rep {
OptionalArg::Present(ref st) => &st.value,
OptionalArg::Missing => " ",
};
if rep_str.len() == 1 {
Ok(rep_str)
} else {
Err(vm
.new_type_error("The fill character must be exactly one character long".to_owned()))
fn get_fill_char(fillchar: OptionalArg<PyStringRef>, vm: &VirtualMachine) -> PyResult<char> {
match fillchar {
OptionalArg::Present(ref s) => {
let mut chars = s.value.chars();
let first = chars.next();
let second = chars.next();
if first.is_some() && second.is_none() {
// safeness is guaranteed by upper first.is_some()
Ok(first.unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }))
} else {
Err(vm.new_type_error(
"The fill character must be exactly one character long".to_owned(),
))
}
}
OptionalArg::Missing => Ok(' '),
}
}
#[pymethod]
fn ljust(
#[inline]
fn pad(
&self,
len: usize,
rep: OptionalArg<PyStringRef>,
width: isize,
fillchar: OptionalArg<PyStringRef>,
pad: fn(&str, usize, char) -> String,
vm: &VirtualMachine,
) -> PyResult<String> {
let value = &self.value;
let rep_char = Self::get_fill_char(&rep, vm)?;
if len <= value.len() {
Ok(value.to_owned())
let fillchar = Self::get_fill_char(fillchar, vm)?;
Ok(if self.len() as isize >= width {
String::from(&self.value)
} else {
Ok(format!("{}{}", value, rep_char.repeat(len - value.len())))
}
}
#[pymethod]
fn rjust(
&self,
len: usize,
rep: OptionalArg<PyStringRef>,
vm: &VirtualMachine,
) -> PyResult<String> {
let value = &self.value;
let rep_char = Self::get_fill_char(&rep, vm)?;
if len <= value.len() {
Ok(value.to_owned())
} else {
Ok(format!("{}{}", rep_char.repeat(len - value.len()), value))
}
pad(&self.value, width as usize, fillchar)
})
}
#[pymethod]
fn center(
&self,
len: usize,
rep: OptionalArg<PyStringRef>,
width: isize,
fillchar: OptionalArg<PyStringRef>,
vm: &VirtualMachine,
) -> PyResult<String> {
let value = &self.value;
let rep_char = Self::get_fill_char(&rep, vm)?;
let value_len = self.value.chars().count();
self.pad(width, fillchar, PyCommonString::<char>::py_center, vm)
}
if len <= value_len {
return Ok(value.to_owned());
}
let diff: usize = len - value_len;
let mut left_buff: usize = diff / 2;
let mut right_buff: usize = left_buff;
#[pymethod]
fn ljust(
&self,
width: isize,
fillchar: OptionalArg<PyStringRef>,
vm: &VirtualMachine,
) -> PyResult<String> {
self.pad(width, fillchar, PyCommonString::<char>::py_ljust, vm)
}
if diff % 2 != 0 && value_len % 2 == 0 {
left_buff += 1
}
if diff % 2 != 0 && value_len % 2 != 0 {
right_buff += 1
}
Ok(format!(
"{}{}{}",
rep_char.repeat(left_buff),
value,
rep_char.repeat(right_buff)
))
#[pymethod]
fn rjust(
&self,
width: isize,
fillchar: OptionalArg<PyStringRef>,
vm: &VirtualMachine,
) -> PyResult<String> {
self.pad(width, fillchar, PyCommonString::<char>::py_rjust, vm)
}
#[pymethod]
@@ -1750,18 +1735,41 @@ impl PyCommonStringWrapper<str> for PyStringRef {
}
impl PyCommonString<char> for str {
fn get_slice(&self, range: std::ops::Range<usize>) -> &Self {
type Container = String;
fn with_capacity(capacity: usize) -> Self::Container {
String::with_capacity(capacity)
}
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
&self[range]
}
fn get_chars<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
let mut chars = self.chars();
for _ in 0..range.start {
let _ = chars.next();
}
let start = chars.as_str();
for _ in range {
let _ = chars.next();
}
let end = chars.as_str();
&start[..start.len() - end.len()]
}
fn is_empty(&self) -> bool {
Self::is_empty(self)
}
fn len(&self) -> usize {
fn bytes_len(&self) -> usize {
Self::len(self)
}
fn chars_len(&self) -> usize {
self.chars().count()
}
fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
where
F: Fn(&Self) -> PyObjectRef,
@@ -1813,4 +1821,12 @@ impl PyCommonString<char> for str {
}
splited
}
fn py_pad(&self, left: usize, right: usize, fill: char) -> Self::Container {
let mut u = String::with_capacity((left + right) * fill.len_utf8() + self.len());
u.extend(std::iter::repeat(fill).take(left));
u.push_str(self);
u.extend(std::iter::repeat(fill).take(right));
u
}
}

View File

@@ -114,8 +114,14 @@ where
}
pub trait PyCommonString<E> {
fn get_slice(&self, range: std::ops::Range<usize>) -> &Self;
fn len(&self) -> usize;
type Container;
fn with_capacity(capacity: usize) -> Self::Container;
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self;
// FIXME: get_chars is expensive for str
fn get_chars<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self;
fn bytes_len(&self) -> usize;
fn chars_len(&self) -> usize;
fn is_empty(&self) -> bool;
fn py_split<T, SP, SN, SW, R>(
@@ -164,9 +170,9 @@ pub trait PyCommonString<E> {
T: TryFromObject,
F: Fn(&Self, &T) -> bool,
{
let (affix, range) = args.get_value(self.len());
let (affix, range) = args.get_value(self.bytes_len());
if range.is_normal() {
let value = self.get_slice(range);
let value = self.get_bytes(range);
single_or_tuple_any(
affix,
|s: &T| Ok(func(value, s)),
@@ -212,7 +218,7 @@ pub trait PyCommonString<E> {
{
if range.is_normal() {
let start = range.start;
if let Some(index) = find(self.get_slice(range), &needle) {
if let Some(index) = find(self.get_chars(range), &needle) {
return Some(start + index);
}
}
@@ -225,9 +231,25 @@ pub trait PyCommonString<E> {
F: Fn(&Self, &Self) -> usize,
{
if range.is_normal() {
count(self.get_slice(range), &needle)
count(self.get_chars(range), &needle)
} else {
0
}
}
fn py_pad(&self, left: usize, right: usize, fillchar: E) -> Self::Container;
fn py_center(&self, width: usize, fillchar: E) -> Self::Container {
let marg = width - self.chars_len();
let left = marg / 2 + (marg & width & 1);
self.py_pad(left, marg - left, fillchar)
}
fn py_ljust(&self, width: usize, fillchar: E) -> Self::Container {
self.py_pad(0, width - self.chars_len(), fillchar)
}
fn py_rjust(&self, width: usize, fillchar: E) -> Self::Container {
self.py_pad(width - self.chars_len(), 0, fillchar)
}
}