impl Pattern.sub

This commit is contained in:
Kangzhi Shi
2021-01-03 19:44:40 +02:00
parent 99aaaf7dba
commit de7f07b354
5 changed files with 137 additions and 43 deletions

View File

@@ -2272,8 +2272,6 @@ class PatternReprTests(unittest.TestCase):
self.assertEqual(r[:30], "re.compile('Very long long lon")
self.assertEqual(r[-16:], ", re.IGNORECASE)")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_flags_repr(self):
self.assertEqual(repr(re.I), "re.IGNORECASE")
self.assertEqual(repr(re.I|re.S|re.X),

View File

@@ -15,9 +15,9 @@ assert mo.end() == 5
assert re.escape('python.exe') == 'python\\.exe'
p = re.compile('ab')
# s = p.sub('x', 'abcabca')
s = p.sub('x', 'abcabca')
# print(s)
# assert s == 'xcxca'
assert s == 'xcxca'
idpattern = r'([_a-z][_a-z0-9]*)'

View File

@@ -860,18 +860,3 @@ impl PyMapping {
self.dict
}
}
impl<K, V> TryFromObject for std::collections::HashMap<K, V>
where
K: TryFromObject + std::hash::Hash + Eq,
V: TryFromObject,
{
fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
let mapping = PyMapping::try_from_object(vm, obj)?;
mapping
.into_dict()
.into_iter()
.map(|(k, v)| Ok((K::try_from_object(vm, k)?, V::try_from_object(vm, v)?)))
.collect()
}
}

View File

@@ -12,13 +12,10 @@ mod _sre {
use super::constants::SreFlag;
use super::interp::{self, lower_ascii, lower_unicode, upper_unicode, State};
use crate::builtins::tuple::PyTupleRef;
use crate::builtins::{PyDictRef, PyStrRef, PyTypeRef};
use crate::builtins::{PyDictRef, PyList, PyStr, PyStrRef, PyTypeRef};
use crate::function::{Args, OptionalArg};
use crate::pyobject::{
Either, IntoPyObject, PyCallable, PyObjectRef, PyRef, PyResult, PyValue, StaticType,
};
use crate::pyobject::{Either, IntoPyObject, PyCallable, PyIterable, PyObjectRef, PyRef, PyResult, PyValue, StaticType};
use crate::VirtualMachine;
use std::collections::HashMap;
use std::convert::TryFrom;
#[pyattr]
@@ -173,19 +170,13 @@ mod _sre {
string_args: StringArgs,
vm: &VirtualMachine,
) -> Option<PyRef<Match>> {
// TODO: optimize by op info and skip prefix
let start = string_args.pos;
for i in start..string_args.endpos {
if let Some(m) = interp::pymatch(
string_args.string.clone(),
i,
string_args.endpos,
zelf.clone(),
) {
return Some(m.into_ref(vm));
}
}
None
interp::search(
string_args.string,
string_args.pos,
string_args.endpos,
zelf,
)
.map(|x| x.into_ref(vm))
}
#[pymethod]
fn findall(&self, string_args: StringArgs) -> Option<PyObjectRef> {
@@ -200,9 +191,14 @@ mod _sre {
None
}
#[pymethod]
fn sub(&self, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult<PyStrRef> {
Err(vm.new_not_implemented_error("".to_owned()))
fn sub(zelf: PyRef<Pattern>, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult {
Self::subx(zelf, sub_args, false, vm)
}
#[pymethod]
fn subn(zelf: PyRef<Pattern>, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult {
Self::subx(zelf, sub_args, true, vm)
}
#[pyproperty]
fn flags(&self) -> u16 {
self.flags.bits()
@@ -211,9 +207,100 @@ mod _sre {
fn groupindex(&self) -> PyDictRef {
self.groupindex.clone()
}
#[pyproperty]
fn groups(&self) -> usize {
self.groups
}
#[pyproperty]
fn pattern(&self) -> PyObjectRef {
self.pattern.clone()
}
fn subx(&self, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult<PyStrRef> {
Err(vm.new_not_implemented_error("".to_owned()))
fn subx(
zelf: PyRef<Pattern>,
sub_args: SubArgs,
subn: bool,
vm: &VirtualMachine,
) -> PyResult {
let filter: PyObjectRef = match sub_args.repl {
Either::A(callable) => callable.into_object(),
Either::B(s) => {
if s.borrow_value().contains('\\') {
// handle non-literal strings ; hand it over to the template compiler
let re = vm.import("re", &[], 0)?;
let func = vm.get_attribute(re, "_subx")?;
vm.invoke(&func, (zelf.clone(), s))?
} else {
s.into_object()
}
}
};
let mut sublist: Vec<PyObjectRef> = Vec::new();
let mut n = 0;
let mut last_pos = 0;
while sub_args.count == 0 || n < sub_args.count {
let m = match interp::search(
sub_args.string.clone(),
last_pos,
std::usize::MAX,
zelf.clone(),
) {
Some(m) => m,
None => {
break;
}
};
let start = m.regs[0].0 as usize;
if last_pos < start {
/* get segment before this match */
sublist.push(
m.string
.borrow_value()
.chars()
.take(start)
.skip(last_pos)
.collect::<String>()
.into_pyobject(vm),
);
}
last_pos = m.regs[0].1 as usize;
if last_pos == start {
last_pos += 1;
}
if vm.is_callable(&filter) {
let ret = vm.invoke(&filter, (m.into_ref(vm),))?;
sublist.push(ret);
} else {
sublist.push(filter.clone());
}
n += 1;
}
/* get segment following last match */
sublist.push(
sub_args
.string
.borrow_value()
.chars()
.skip(last_pos)
.collect::<String>()
.into_pyobject(vm),
);
let list = PyList::from(sublist).into_object(vm);
let s = vm.ctx.new_str("");
let ret = vm.call_method(&s, "join", (list,))?;
Ok(if subn {
(ret, n).into_pyobject(vm)
} else {
ret
})
}
}
@@ -343,7 +430,9 @@ mod _sre {
#[pymethod(magic)]
fn getitem(&self, index: isize, vm: &VirtualMachine) -> Option<String> {
self.get_index(index, vm).ok().and_then(|i| self.get_slice(i))
self.get_index(index, vm)
.ok()
.and_then(|i| self.get_slice(i))
}
#[pymethod]

View File

@@ -142,6 +142,27 @@ pub(crate) fn pymatch(
}
}
pub(crate) fn search(
string: PyStrRef,
start: usize,
end: usize,
pattern: PyRef<Pattern>,
) -> Option<Match> {
// TODO: optimize by op info and skip prefix
let end = std::cmp::min(end, string.char_len());
for i in start..end {
if let Some(m) = pymatch(
string.clone(),
i,
end,
pattern.clone(),
) {
return Some(m);
}
}
None
}
#[derive(Debug, Copy, Clone)]
struct MatchContext {
string_position: usize,
@@ -750,7 +771,8 @@ fn charset(set: &[u32], c: char) -> bool {
let (_, blockindices, _) = unsafe { set.align_to::<u8>() };
let blocks = &set[64..];
let block = blockindices[block_index as usize];
if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize] & (1u32 << (ch & 31))
if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize]
& (1u32 << (ch & 31))
!= 0
{
return ok;