fix sre zerowidth search

This commit is contained in:
Kangzhi Shi
2021-04-20 10:19:52 +02:00
parent 8d5588df67
commit 7ab4e9573c
4 changed files with 57 additions and 100 deletions

4
Cargo.lock generated
View File

@@ -2335,9 +2335,7 @@ dependencies = [
[[package]]
name = "sre-engine"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4953873f7ab71f618648bc796ade9b85ea593d837e535941d28fb8269777c265"
version = "0.1.2"
dependencies = [
"bitflags",
"num_enum",

View File

@@ -1802,8 +1802,6 @@ class ReTests(unittest.TestCase):
)
self.assertRegex(repr(second), pattern)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_zerowidth(self):
# Issues 852532, 1647489, 3262, 25054.
self.assertEqual(re.split(r"\b", "a::bc"), ['', 'a', '::', 'bc', ''])

View File

@@ -93,7 +93,7 @@ libz-sys = { version = "1.0", optional = true }
# RustPython crates implementing functionality based on CPython
mt19937 = "2.0"
sre-engine = "0.1.0"
sre-engine = "0.1.2"
# to work on sre-engine locally
# sre-engine = { path = "../../sre-engine" }

View File

@@ -201,13 +201,9 @@ mod _sre {
vm,
|mut state| {
state = state.pymatch();
if state.has_matched != Some(true) {
Ok(None)
} else {
Ok(Some(
Match::new(&state, zelf.clone(), string_args.string).into_ref(vm),
))
}
Ok(state
.has_matched
.then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm)))
},
)
}
@@ -226,13 +222,9 @@ mod _sre {
|mut state| {
state.match_all = true;
state = state.pymatch();
if state.has_matched != Some(true) {
Ok(None)
} else {
Ok(Some(
Match::new(&state, zelf.clone(), string_args.string).into_ref(vm),
))
}
Ok(state
.has_matched
.then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm)))
},
)
}
@@ -250,14 +242,9 @@ mod _sre {
vm,
|mut state| {
state = state.search();
if state.has_matched != Some(true) {
Ok(None)
} else {
Ok(Some(
Match::new(&state, zelf.clone(), string_args.string).into_ref(vm),
))
}
Ok(state
.has_matched
.then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm)))
},
)
}
@@ -276,11 +263,8 @@ mod _sre {
|mut state| {
let mut matchlist: Vec<PyObjectRef> = Vec::new();
while state.start <= state.end {
state.reset();
state = state.search();
if state.has_matched != Some(true) {
if !state.has_matched {
break;
}
@@ -296,11 +280,9 @@ mod _sre {
matchlist.push(item);
if state.string_position == state.start {
state.start += 1;
} else {
state.start = state.string_position;
}
state.must_advance = state.string_position == state.start;
state.start = state.string_position;
state.reset();
}
Ok(PyList::from(matchlist).into_ref(vm))
},
@@ -317,7 +299,8 @@ mod _sre {
pattern: zelf,
string: string_args.string,
start: AtomicCell::new(string_args.pos),
end: AtomicCell::new(string_args.endpos),
end: string_args.endpos,
must_advance: AtomicCell::new(false),
}
.into_ref(vm);
let search = vm.get_method(scanner.into_object(), "search").unwrap()?;
@@ -336,7 +319,8 @@ mod _sre {
pattern: zelf,
string: string_args.string,
start: AtomicCell::new(string_args.pos),
end: AtomicCell::new(string_args.endpos),
end: string_args.endpos,
must_advance: AtomicCell::new(false),
}
.into_ref(vm)
}
@@ -367,10 +351,8 @@ mod _sre {
let mut n = 0;
let mut last = 0;
while split_args.maxsplit == 0 || n < split_args.maxsplit {
state.reset();
state = state.search();
if state.has_matched != Some(true) {
if !state.has_matched {
break;
}
@@ -388,13 +370,10 @@ mod _sre {
}
n += 1;
state.must_advance = state.string_position == state.start;
last = state.string_position;
if state.start == state.string_position {
state.start += 1;
} else {
state.start = state.string_position;
}
state.start = state.string_position;
state.reset();
}
// get segment following last match (even if empty)
@@ -458,10 +437,8 @@ mod _sre {
let mut n = 0;
let mut last_pos = 0;
while count == 0 || n < count {
state.reset();
state = state.search();
if state.has_matched != Some(true) {
if !state.has_matched {
break;
}
@@ -470,25 +447,20 @@ mod _sre {
sublist.push(slice_drive(&state.string, last_pos, state.start, vm));
}
if !(last_pos == state.start && last_pos == state.string_position && n > 1) {
// the above ignores empty matches on latest position
if is_callable {
let m = Match::new(&state, zelf.clone(), string.clone());
let ret = vm.invoke(&filter, (m.into_ref(vm),))?;
sublist.push(ret);
} else {
sublist.push(filter.clone());
}
last_pos = state.string_position;
n += 1;
}
if state.string_position == state.start {
state.start += 1;
if is_callable {
let m = Match::new(&state, zelf.clone(), string.clone());
let ret = vm.invoke(&filter, (m.into_ref(vm),))?;
sublist.push(ret);
} else {
state.start = state.string_position;
sublist.push(filter.clone());
}
last_pos = state.string_position;
n += 1;
state.must_advance = state.string_position == state.start;
state.start = state.string_position;
state.reset();
}
/* get segment following last match */
@@ -764,7 +736,8 @@ mod _sre {
pattern: PyRef<Pattern>,
string: PyObjectRef,
start: AtomicCell<usize>,
end: AtomicCell<usize>,
end: usize,
must_advance: AtomicCell<bool>,
}
impl PyValue for SreScanner {
fn class(_vm: &VirtualMachine) -> &PyTypeRef {
@@ -784,56 +757,44 @@ mod _sre {
self.pattern.with_state(
self.string.clone(),
self.start.load(),
self.end.load(),
self.end,
vm,
|mut state| {
state.must_advance = self.must_advance.load();
state = state.pymatch();
if state.has_matched != Some(true) {
Ok(None)
} else {
if state.start == state.string_position {
self.start.store(state.string_position + 1);
} else {
self.start.store(state.string_position);
}
Ok(Some(
Match::new(&state, self.pattern.clone(), self.string.clone())
.into_ref(vm),
))
}
self.must_advance
.store(state.start == state.string_position);
self.start.store(state.string_position);
Ok(state.has_matched.then(|| {
Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm)
}))
},
)
}
#[pymethod]
fn search(&self, vm: &VirtualMachine) -> PyResult<Option<PyRef<Match>>> {
if self.start.load() > self.end.load() {
if self.start.load() > self.end {
return Ok(None);
}
self.pattern.with_state(
self.string.clone(),
self.start.load(),
self.end.load(),
self.end,
vm,
|mut state| {
state.reset();
state.must_advance = self.must_advance.load();
state = state.search();
self.end.store(state.end);
if state.has_matched == Some(true) {
if state.start == state.string_position {
self.start.store(state.string_position + 1);
} else {
self.start.store(state.string_position);
}
Ok(Some(
Match::new(&state, self.pattern.clone(), self.string.clone())
.into_ref(vm),
))
} else {
Ok(None)
}
self.must_advance
.store(state.string_position == state.start);
self.start.store(state.string_position);
Ok(state.has_matched.then(|| {
Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm)
}))
},
)
}