diff --git a/Cargo.lock b/Cargo.lock index ca156ee7a..e9ef09e9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2335,9 +2335,7 @@ dependencies = [ [[package]] name = "sre-engine" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4953873f7ab71f618648bc796ade9b85ea593d837e535941d28fb8269777c265" +version = "0.1.2" dependencies = [ "bitflags", "num_enum", diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 43bac6264..539ae1c73 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1802,8 +1802,6 @@ class ReTests(unittest.TestCase): ) self.assertRegex(repr(second), pattern) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_zerowidth(self): # Issues 852532, 1647489, 3262, 25054. self.assertEqual(re.split(r"\b", "a::bc"), ['', 'a', '::', 'bc', '']) diff --git a/vm/Cargo.toml b/vm/Cargo.toml index d435ba606..0335576bf 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -93,7 +93,7 @@ libz-sys = { version = "1.0", optional = true } # RustPython crates implementing functionality based on CPython mt19937 = "2.0" -sre-engine = "0.1.0" +sre-engine = "0.1.2" # to work on sre-engine locally # sre-engine = { path = "../../sre-engine" } diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index 0ba265c68..e15c89416 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -201,13 +201,9 @@ mod _sre { vm, |mut state| { state = state.pymatch(); - if state.has_matched != Some(true) { - Ok(None) - } else { - Ok(Some( - Match::new(&state, zelf.clone(), string_args.string).into_ref(vm), - )) - } + Ok(state + .has_matched + .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) }, ) } @@ -226,13 +222,9 @@ mod _sre { |mut state| { state.match_all = true; state = state.pymatch(); - if state.has_matched != Some(true) { - Ok(None) - } else { - Ok(Some( - Match::new(&state, zelf.clone(), string_args.string).into_ref(vm), - )) - } + Ok(state + .has_matched + .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) }, ) } @@ -250,14 +242,9 @@ mod _sre { vm, |mut state| { state = state.search(); - - if state.has_matched != Some(true) { - Ok(None) - } else { - Ok(Some( - Match::new(&state, zelf.clone(), string_args.string).into_ref(vm), - )) - } + Ok(state + .has_matched + .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) }, ) } @@ -276,11 +263,8 @@ mod _sre { |mut state| { let mut matchlist: Vec = Vec::new(); while state.start <= state.end { - state.reset(); - state = state.search(); - - if state.has_matched != Some(true) { + if !state.has_matched { break; } @@ -296,11 +280,9 @@ mod _sre { matchlist.push(item); - if state.string_position == state.start { - state.start += 1; - } else { - state.start = state.string_position; - } + state.must_advance = state.string_position == state.start; + state.start = state.string_position; + state.reset(); } Ok(PyList::from(matchlist).into_ref(vm)) }, @@ -317,7 +299,8 @@ mod _sre { pattern: zelf, string: string_args.string, start: AtomicCell::new(string_args.pos), - end: AtomicCell::new(string_args.endpos), + end: string_args.endpos, + must_advance: AtomicCell::new(false), } .into_ref(vm); let search = vm.get_method(scanner.into_object(), "search").unwrap()?; @@ -336,7 +319,8 @@ mod _sre { pattern: zelf, string: string_args.string, start: AtomicCell::new(string_args.pos), - end: AtomicCell::new(string_args.endpos), + end: string_args.endpos, + must_advance: AtomicCell::new(false), } .into_ref(vm) } @@ -367,10 +351,8 @@ mod _sre { let mut n = 0; let mut last = 0; while split_args.maxsplit == 0 || n < split_args.maxsplit { - state.reset(); state = state.search(); - - if state.has_matched != Some(true) { + if !state.has_matched { break; } @@ -388,13 +370,10 @@ mod _sre { } n += 1; + state.must_advance = state.string_position == state.start; last = state.string_position; - - if state.start == state.string_position { - state.start += 1; - } else { - state.start = state.string_position; - } + state.start = state.string_position; + state.reset(); } // get segment following last match (even if empty) @@ -458,10 +437,8 @@ mod _sre { let mut n = 0; let mut last_pos = 0; while count == 0 || n < count { - state.reset(); state = state.search(); - - if state.has_matched != Some(true) { + if !state.has_matched { break; } @@ -470,25 +447,20 @@ mod _sre { sublist.push(slice_drive(&state.string, last_pos, state.start, vm)); } - if !(last_pos == state.start && last_pos == state.string_position && n > 1) { - // the above ignores empty matches on latest position - if is_callable { - let m = Match::new(&state, zelf.clone(), string.clone()); - let ret = vm.invoke(&filter, (m.into_ref(vm),))?; - sublist.push(ret); - } else { - sublist.push(filter.clone()); - } - - last_pos = state.string_position; - n += 1; - } - - if state.string_position == state.start { - state.start += 1; + if is_callable { + let m = Match::new(&state, zelf.clone(), string.clone()); + let ret = vm.invoke(&filter, (m.into_ref(vm),))?; + sublist.push(ret); } else { - state.start = state.string_position; + sublist.push(filter.clone()); } + + last_pos = state.string_position; + n += 1; + + state.must_advance = state.string_position == state.start; + state.start = state.string_position; + state.reset(); } /* get segment following last match */ @@ -764,7 +736,8 @@ mod _sre { pattern: PyRef, string: PyObjectRef, start: AtomicCell, - end: AtomicCell, + end: usize, + must_advance: AtomicCell, } impl PyValue for SreScanner { fn class(_vm: &VirtualMachine) -> &PyTypeRef { @@ -784,56 +757,44 @@ mod _sre { self.pattern.with_state( self.string.clone(), self.start.load(), - self.end.load(), + self.end, vm, |mut state| { + state.must_advance = self.must_advance.load(); state = state.pymatch(); - if state.has_matched != Some(true) { - Ok(None) - } else { - if state.start == state.string_position { - self.start.store(state.string_position + 1); - } else { - self.start.store(state.string_position); - } - Ok(Some( - Match::new(&state, self.pattern.clone(), self.string.clone()) - .into_ref(vm), - )) - } + self.must_advance + .store(state.start == state.string_position); + self.start.store(state.string_position); + + Ok(state.has_matched.then(|| { + Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm) + })) }, ) } #[pymethod] fn search(&self, vm: &VirtualMachine) -> PyResult>> { - if self.start.load() > self.end.load() { + if self.start.load() > self.end { return Ok(None); } self.pattern.with_state( self.string.clone(), self.start.load(), - self.end.load(), + self.end, vm, |mut state| { - state.reset(); + state.must_advance = self.must_advance.load(); state = state.search(); - self.end.store(state.end); - if state.has_matched == Some(true) { - if state.start == state.string_position { - self.start.store(state.string_position + 1); - } else { - self.start.store(state.string_position); - } - Ok(Some( - Match::new(&state, self.pattern.clone(), self.string.clone()) - .into_ref(vm), - )) - } else { - Ok(None) - } + self.must_advance + .store(state.string_position == state.start); + self.start.store(state.string_position); + + Ok(state.has_matched.then(|| { + Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm) + })) }, ) }