From de7f07b354c0497d2d358cd7deee2cc04eb83afa Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sun, 3 Jan 2021 19:44:40 +0200 Subject: [PATCH] impl Pattern.sub --- Lib/test/test_re.py | 2 - extra_tests/snippets/stdlib_re.py | 4 +- vm/src/builtins/dict.rs | 15 ---- vm/src/stdlib/sre.rs | 135 +++++++++++++++++++++++++----- vm/src/stdlib/sre/interp.rs | 24 +++++- 5 files changed, 137 insertions(+), 43 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 37cbc6f79..1d9a2f549 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2272,8 +2272,6 @@ class PatternReprTests(unittest.TestCase): self.assertEqual(r[:30], "re.compile('Very long long lon") self.assertEqual(r[-16:], ", re.IGNORECASE)") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_flags_repr(self): self.assertEqual(repr(re.I), "re.IGNORECASE") self.assertEqual(repr(re.I|re.S|re.X), diff --git a/extra_tests/snippets/stdlib_re.py b/extra_tests/snippets/stdlib_re.py index ed9c12002..87e1a27c9 100644 --- a/extra_tests/snippets/stdlib_re.py +++ b/extra_tests/snippets/stdlib_re.py @@ -15,9 +15,9 @@ assert mo.end() == 5 assert re.escape('python.exe') == 'python\\.exe' p = re.compile('ab') -# s = p.sub('x', 'abcabca') +s = p.sub('x', 'abcabca') # print(s) -# assert s == 'xcxca' +assert s == 'xcxca' idpattern = r'([_a-z][_a-z0-9]*)' diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index 6bb1b44ac..2e09b6811 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -860,18 +860,3 @@ impl PyMapping { self.dict } } - -impl TryFromObject for std::collections::HashMap -where - K: TryFromObject + std::hash::Hash + Eq, - V: TryFromObject, -{ - fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { - let mapping = PyMapping::try_from_object(vm, obj)?; - mapping - .into_dict() - .into_iter() - .map(|(k, v)| Ok((K::try_from_object(vm, k)?, V::try_from_object(vm, v)?))) - .collect() - } -} diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index 078ac1019..a569f2749 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -12,13 +12,10 @@ mod _sre { use super::constants::SreFlag; use super::interp::{self, lower_ascii, lower_unicode, upper_unicode, State}; use crate::builtins::tuple::PyTupleRef; - use crate::builtins::{PyDictRef, PyStrRef, PyTypeRef}; + use crate::builtins::{PyDictRef, PyList, PyStr, PyStrRef, PyTypeRef}; use crate::function::{Args, OptionalArg}; - use crate::pyobject::{ - Either, IntoPyObject, PyCallable, PyObjectRef, PyRef, PyResult, PyValue, StaticType, - }; + use crate::pyobject::{Either, IntoPyObject, PyCallable, PyIterable, PyObjectRef, PyRef, PyResult, PyValue, StaticType}; use crate::VirtualMachine; - use std::collections::HashMap; use std::convert::TryFrom; #[pyattr] @@ -173,19 +170,13 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> Option> { - // TODO: optimize by op info and skip prefix - let start = string_args.pos; - for i in start..string_args.endpos { - if let Some(m) = interp::pymatch( - string_args.string.clone(), - i, - string_args.endpos, - zelf.clone(), - ) { - return Some(m.into_ref(vm)); - } - } - None + interp::search( + string_args.string, + string_args.pos, + string_args.endpos, + zelf, + ) + .map(|x| x.into_ref(vm)) } #[pymethod] fn findall(&self, string_args: StringArgs) -> Option { @@ -200,9 +191,14 @@ mod _sre { None } #[pymethod] - fn sub(&self, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("".to_owned())) + fn sub(zelf: PyRef, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult { + Self::subx(zelf, sub_args, false, vm) } + #[pymethod] + fn subn(zelf: PyRef, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult { + Self::subx(zelf, sub_args, true, vm) + } + #[pyproperty] fn flags(&self) -> u16 { self.flags.bits() @@ -211,9 +207,100 @@ mod _sre { fn groupindex(&self) -> PyDictRef { self.groupindex.clone() } + #[pyproperty] + fn groups(&self) -> usize { + self.groups + } + #[pyproperty] + fn pattern(&self) -> PyObjectRef { + self.pattern.clone() + } - fn subx(&self, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult { - Err(vm.new_not_implemented_error("".to_owned())) + fn subx( + zelf: PyRef, + sub_args: SubArgs, + subn: bool, + vm: &VirtualMachine, + ) -> PyResult { + let filter: PyObjectRef = match sub_args.repl { + Either::A(callable) => callable.into_object(), + Either::B(s) => { + if s.borrow_value().contains('\\') { + // handle non-literal strings ; hand it over to the template compiler + let re = vm.import("re", &[], 0)?; + let func = vm.get_attribute(re, "_subx")?; + vm.invoke(&func, (zelf.clone(), s))? + } else { + s.into_object() + } + } + }; + + let mut sublist: Vec = Vec::new(); + + let mut n = 0; + let mut last_pos = 0; + while sub_args.count == 0 || n < sub_args.count { + let m = match interp::search( + sub_args.string.clone(), + last_pos, + std::usize::MAX, + zelf.clone(), + ) { + Some(m) => m, + None => { + break; + } + }; + let start = m.regs[0].0 as usize; + if last_pos < start { + /* get segment before this match */ + sublist.push( + m.string + .borrow_value() + .chars() + .take(start) + .skip(last_pos) + .collect::() + .into_pyobject(vm), + ); + } + + last_pos = m.regs[0].1 as usize; + if last_pos == start { + last_pos += 1; + } + + if vm.is_callable(&filter) { + let ret = vm.invoke(&filter, (m.into_ref(vm),))?; + sublist.push(ret); + } else { + sublist.push(filter.clone()); + } + + n += 1; + } + + /* get segment following last match */ + sublist.push( + sub_args + .string + .borrow_value() + .chars() + .skip(last_pos) + .collect::() + .into_pyobject(vm), + ); + + let list = PyList::from(sublist).into_object(vm); + let s = vm.ctx.new_str(""); + let ret = vm.call_method(&s, "join", (list,))?; + + Ok(if subn { + (ret, n).into_pyobject(vm) + } else { + ret + }) } } @@ -343,7 +430,9 @@ mod _sre { #[pymethod(magic)] fn getitem(&self, index: isize, vm: &VirtualMachine) -> Option { - self.get_index(index, vm).ok().and_then(|i| self.get_slice(i)) + self.get_index(index, vm) + .ok() + .and_then(|i| self.get_slice(i)) } #[pymethod] diff --git a/vm/src/stdlib/sre/interp.rs b/vm/src/stdlib/sre/interp.rs index ca5efd9e4..98844ee9c 100644 --- a/vm/src/stdlib/sre/interp.rs +++ b/vm/src/stdlib/sre/interp.rs @@ -142,6 +142,27 @@ pub(crate) fn pymatch( } } +pub(crate) fn search( + string: PyStrRef, + start: usize, + end: usize, + pattern: PyRef, +) -> Option { + // TODO: optimize by op info and skip prefix + let end = std::cmp::min(end, string.char_len()); + for i in start..end { + if let Some(m) = pymatch( + string.clone(), + i, + end, + pattern.clone(), + ) { + return Some(m); + } + } + None +} + #[derive(Debug, Copy, Clone)] struct MatchContext { string_position: usize, @@ -750,7 +771,8 @@ fn charset(set: &[u32], c: char) -> bool { let (_, blockindices, _) = unsafe { set.align_to::() }; let blocks = &set[64..]; let block = blockindices[block_index as usize]; - if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize] & (1u32 << (ch & 31)) + if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize] + & (1u32 << (ch & 31)) != 0 { return ok;