pystr::py_splitlines

This commit is contained in:
Jeong YunWon
2020-07-19 03:02:06 +09:00
parent 62277faa44
commit 5cfdf1db2c
5 changed files with 56 additions and 71 deletions

View File

@@ -451,13 +451,10 @@ impl PyByteArray {
#[pymethod(name = "splitlines")]
fn splitlines(&self, options: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyResult {
let as_bytes = self
let lines = self
.borrow_value()
.splitlines(options)
.iter()
.map(|x| vm.ctx.new_bytearray(x.to_vec()))
.collect::<Vec<PyObjectRef>>();
Ok(vm.ctx.new_list(as_bytes))
.splitlines(options, |x| vm.ctx.new_bytearray(x.to_vec()));
Ok(vm.ctx.new_list(lines))
}
#[pymethod(name = "zfill")]

View File

@@ -961,36 +961,11 @@ impl PyByteInner {
res
}
pub fn splitlines(&self, options: pystr::SplitLinesArgs) -> Vec<&[u8]> {
let keep = if options.keepends { 1 } else { 0 };
let value = &self.elements;
let mut elements = Vec::new();
let mut last_i = 0;
let mut chars = value.iter().enumerate().peekable();
while let Some((i, ch)) = chars.next() {
let (end_len, i_diff) = match ch {
b'\n' => (keep, 1),
b'\r' => {
let is_rn = chars.peek().map_or(false, |(_, ch)| *ch == &b'\n');
if is_rn {
let _ = chars.next();
(keep + keep, 2)
} else {
(keep, 1)
}
}
_ => {
continue;
}
};
let range = last_i..i + end_len;
last_i = i + i_diff;
elements.push(&value[range]);
}
if last_i != value.len() {
elements.push(&value[last_i..]);
}
elements
pub fn splitlines<FW, W>(&self, options: pystr::SplitLinesArgs, into_wrapper: FW) -> Vec<W>
where
FW: Fn(&[u8]) -> W,
{
self.elements.py_splitlines(options, into_wrapper)
}
pub fn zfill(&self, width: isize) -> Vec<u8> {
@@ -1319,6 +1294,10 @@ impl PyCommonString<u8> for [u8] {
Vec::with_capacity(capacity)
}
fn as_bytes(&self) -> &[u8] {
self
}
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
&self[range]
}

View File

@@ -418,13 +418,10 @@ impl PyBytes {
#[pymethod(name = "splitlines")]
fn splitlines(&self, options: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyResult {
let as_bytes = self
let lines = self
.inner
.splitlines(options)
.iter()
.map(|x| vm.ctx.new_bytes(x.to_vec()))
.collect::<Vec<PyObjectRef>>();
Ok(vm.ctx.new_list(as_bytes))
.splitlines(options, |x| vm.ctx.new_bytes(x.to_vec()));
Ok(vm.ctx.new_list(lines))
}
#[pymethod(name = "zfill")]

View File

@@ -803,35 +803,8 @@ impl PyString {
#[pymethod]
fn splitlines(&self, args: pystr::SplitLinesArgs, vm: &VirtualMachine) -> PyObjectRef {
let keep = if args.keepends { 1 } else { 0 };
let value = &self.value;
let mut elements = Vec::new();
let mut last_i = 0;
let mut chars = value.chars().enumerate().peekable();
while let Some((i, ch)) = chars.next() {
let (end_len, i_diff) = match ch {
'\n' => (keep, 1),
'\r' => {
let is_rn = chars.peek().map_or(false, |(_, ch)| *ch == '\n');
if is_rn {
let _ = chars.next();
(keep + keep, 2)
} else {
(keep, 1)
}
}
_ => {
continue;
}
};
let range = last_i..i + end_len;
last_i = i + i_diff;
elements.push(vm.ctx.new_str(&value[range]));
}
if last_i != value.len() {
elements.push(vm.ctx.new_str(&value[last_i..]));
}
vm.ctx.new_list(elements)
vm.ctx
.new_list(self.value.py_splitlines(args, |s| vm.new_str(s.to_owned())))
}
#[pymethod]
@@ -1760,6 +1733,10 @@ impl PyCommonString<char> for str {
String::with_capacity(capacity)
}
fn as_bytes(&self) -> &[u8] {
self.as_bytes()
}
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self {
&self[range]
}

View File

@@ -129,6 +129,7 @@ pub trait PyCommonString<E> {
type Container;
fn with_capacity(capacity: usize) -> Self::Container;
fn as_bytes(&self) -> &[u8];
fn get_bytes<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self;
// FIXME: get_chars is expensive for str
fn get_chars<'a>(&'a self, range: std::ops::Range<usize>) -> &'a Self;
@@ -297,4 +298,38 @@ pub trait PyCommonString<E> {
&self
}
}
fn py_splitlines<FW, W>(&self, options: SplitLinesArgs, into_wrapper: FW) -> Vec<W>
where
FW: Fn(&Self) -> W,
{
let keep = if options.keepends { 1 } else { 0 };
let mut elements = Vec::new();
let mut last_i = 0;
let mut enumerated = self.as_bytes().iter().enumerate().peekable();
while let Some((i, ch)) = enumerated.next() {
let (end_len, i_diff) = match *ch {
b'\n' => (keep, 1),
b'\r' => {
let is_rn = enumerated.peek().map_or(false, |(_, ch)| **ch == b'\n');
if is_rn {
let _ = enumerated.next();
(keep + keep, 2)
} else {
(keep, 1)
}
}
_ => {
continue;
}
};
let range = last_i..i + end_len;
last_i = i + i_diff;
elements.push(into_wrapper(self.get_bytes(range)));
}
if last_i != self.bytes_len() {
elements.push(into_wrapper(self.get_bytes(last_i..self.bytes_len())));
}
elements
}
}