diff --git a/Cargo.lock b/Cargo.lock index 735093e94..b75ef7391 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2120,12 +2120,13 @@ dependencies = [ [[package]] name = "sre-engine" -version = "0.3.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a461901c54df500e011f7c2531529d8a1b0e47a10aea74acf4a5d2b40d75be4" +checksum = "a490c5c46c35dba9a6f5e7ee8e4d67e775eb2d2da0f115750b8d10e1c1ac2d28" dependencies = [ "bitflags", "num_enum", + "optional", ] [[package]] diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 0081670f5..7e15ab842 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -73,9 +73,10 @@ memoffset = "0.6.5" optional = "0.5.0" # RustPython crates implementing functionality based on CPython -sre-engine = "0.3.0" +sre-engine = "0.4.1" # to work on sre-engine locally or git version -# sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/12/head" } +# sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/14/head" } +# sre-engine = { git = "https://github.com/RustPython/sre-engine" } # sre-engine = { path = "../../sre-engine" } ## unicode stuff diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index bab27d051..ecca7b4a4 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -22,7 +22,7 @@ mod _sre { use num_traits::ToPrimitive; use sre_engine::{ constants::SreFlag, - engine::{lower_ascii, lower_unicode, upper_unicode, State, StrDrive}, + engine::{lower_ascii, lower_unicode, upper_unicode, Request, SearchIter, State, StrDrive}, }; #[pyattr] @@ -53,8 +53,8 @@ mod _sre { trait SreStr: StrDrive { fn slice(&self, start: usize, end: usize, vm: &VirtualMachine) -> PyObjectRef; - fn create_state(self, pattern: &Pattern, start: usize, end: usize) -> State { - State::new(self, start, end, pattern.flags, &pattern.code) + fn create_request(self, pattern: &Pattern, start: usize, end: usize) -> Request { + Request::new(self, start, end, &pattern.code, false) } } @@ -181,8 +181,9 @@ mod _sre { endpos, } = string_args; with_sre_str!(zelf, &string.clone(), vm, |x| { - let mut state = x.create_state(&zelf, pos, endpos); - state.pymatch(); + let req = x.create_request(&zelf, pos, endpos); + let mut state = State::default(); + state.pymatch(req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string).into_ref(vm))) @@ -196,9 +197,10 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult>> { with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { - let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); - state.match_all = true; - state.pymatch(); + let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); + req.match_all = true; + let mut state = State::default(); + state.pymatch(req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) @@ -212,8 +214,9 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult>> { with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { - let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); - state.search(); + let req = x.create_request(&zelf, string_args.pos, string_args.endpos); + let mut state = State::default(); + state.search(req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) @@ -226,19 +229,17 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> PyResult> { - with_sre_str!(zelf, &string_args.string, vm, |x| { - let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); + with_sre_str!(zelf, &string_args.string, vm, |s| { + let req = s.create_request(&zelf, string_args.pos, string_args.endpos); + let state = State::default(); let mut matchlist: Vec = Vec::new(); - while state.start <= state.end { - state.search(); - if !state.has_matched { - break; - } + let mut iter = SearchIter { req, state }; - let m = Match::new(&state, zelf.clone(), string_args.string.clone()); + while iter.next().is_some() { + let m = Match::new(&iter.state, zelf.clone(), string_args.string.clone()); let item = if zelf.groups == 0 || zelf.groups == 1 { - m.get_slice(zelf.groups, state.string, vm) + m.get_slice(zelf.groups, s, vm) .unwrap_or_else(|| vm.ctx.none()) } else { m.groups(OptionalArg::Present(vm.ctx.new_str(ascii!("")).into()), vm)? @@ -246,11 +247,8 @@ mod _sre { }; matchlist.push(item); - - state.must_advance = state.string_position == state.start; - state.start = state.string_position; - state.reset(); } + Ok(matchlist) }) } @@ -306,40 +304,32 @@ mod _sre { split_args: SplitArgs, vm: &VirtualMachine, ) -> PyResult> { - with_sre_str!(zelf, &split_args.string, vm, |x| { - let mut state = x.create_state(&zelf, 0, usize::MAX); + with_sre_str!(zelf, &split_args.string, vm, |s| { + let req = s.create_request(&zelf, 0, usize::MAX); + let state = State::default(); let mut splitlist: Vec = Vec::new(); - + let mut iter = SearchIter { req, state }; let mut n = 0; let mut last = 0; - while split_args.maxsplit == 0 || n < split_args.maxsplit { - state.search(); - if !state.has_matched { - break; - } + while (split_args.maxsplit == 0 || n < split_args.maxsplit) && iter.next().is_some() + { /* get segment before this match */ - splitlist.push(state.string.slice(last, state.start, vm)); + splitlist.push(s.slice(last, iter.state.start, vm)); - let m = Match::new(&state, zelf.clone(), split_args.string.clone()); + let m = Match::new(&iter.state, zelf.clone(), split_args.string.clone()); // add groups (if any) for i in 1..=zelf.groups { - splitlist.push( - m.get_slice(i, state.string, vm) - .unwrap_or_else(|| vm.ctx.none()), - ); + splitlist.push(m.get_slice(i, s, vm).unwrap_or_else(|| vm.ctx.none())); } n += 1; - state.must_advance = state.string_position == state.start; - last = state.string_position; - state.start = state.string_position; - state.reset(); + last = iter.state.string_position; } // get segment following last match (even if empty) - splitlist.push(state.string.slice(last, state.string.count(), vm)); + splitlist.push(req.string.slice(last, s.count(), vm)); Ok(splitlist) }) @@ -438,39 +428,33 @@ mod _sre { }; with_sre_str!(zelf, &string, vm, |s| { - let mut state = s.create_state(&zelf, 0, usize::MAX); + let req = s.create_request(&zelf, 0, usize::MAX); + let state = State::default(); let mut sublist: Vec = Vec::new(); + let mut iter = SearchIter { req, state }; let mut n = 0; let mut last_pos = 0; - while count == 0 || n < count { - state.search(); - if !state.has_matched { - break; - } - if last_pos < state.start { + while (count == 0 || n < count) && iter.next().is_some() { + if last_pos < iter.state.start { /* get segment before this match */ - sublist.push(state.string.slice(last_pos, state.start, vm)); + sublist.push(s.slice(last_pos, iter.state.start, vm)); } if is_callable { - let m = Match::new(&state, zelf.clone(), string.clone()); + let m = Match::new(&iter.state, zelf.clone(), string.clone()); let ret = vm.invoke(&filter, (m.into_ref(vm),))?; sublist.push(ret); } else { sublist.push(filter.clone()); } - last_pos = state.string_position; + last_pos = iter.state.string_position; n += 1; - - state.must_advance = state.string_position == state.start; - state.start = state.string_position; - state.reset(); } /* get segment following last match */ - sublist.push(state.string.slice(last_pos, state.end, vm)); + sublist.push(s.slice(last_pos, iter.req.end, vm)); let list = PyList::from(sublist).into_pyobject(vm); @@ -550,10 +534,10 @@ mod _sre { for group in 0..pattern.groups { let mark_index = 2 * group; if mark_index + 1 < state.marks.len() { - if let (Some(start), Some(end)) = - (state.marks[mark_index], state.marks[mark_index + 1]) - { - regs.push((start as isize, end as isize)); + let start = state.marks[mark_index]; + let end = state.marks[mark_index + 1]; + if start.is_some() && end.is_some() { + regs.push((start.unpack() as isize, end.unpack() as isize)); continue; } } @@ -563,8 +547,8 @@ mod _sre { string, pattern, pos: state.start, - endpos: state.end, - lastindex: state.lastindex, + endpos: state.string_position, + lastindex: state.marks.last_index(), regs, } } @@ -802,12 +786,13 @@ mod _sre { #[pymethod(name = "match")] fn pymatch(&self, vm: &VirtualMachine) -> PyResult>> { with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { - let mut state = s.create_state(&self.pattern, self.start.load(), self.end); - state.must_advance = self.must_advance.load(); - state.pymatch(); + let mut req = s.create_request(&self.pattern, self.start.load(), self.end); + let mut state = State::default(); + req.must_advance = self.must_advance.load(); + state.pymatch(req); self.must_advance - .store(state.start == state.string_position); + .store(state.string_position == state.start); self.start.store(state.string_position); Ok(state.has_matched.then(|| { @@ -822,10 +807,11 @@ mod _sre { return Ok(None); } with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { - let mut state = s.create_state(&self.pattern, self.start.load(), self.end); - state.must_advance = self.must_advance.load(); + let mut req = s.create_request(&self.pattern, self.start.load(), self.end); + let mut state = State::default(); + req.must_advance = self.must_advance.load(); - state.search(); + state.search(req); self.must_advance .store(state.string_position == state.start);