From ed30c5d84d23d3bdef85ae61d3638c876a70e0f6 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Tue, 9 Aug 2022 22:35:27 +0200 Subject: [PATCH 1/2] refactor to update sre-engine to 0.4.0 --- Cargo.lock | 6 +-- vm/Cargo.toml | 4 +- vm/src/stdlib/sre.rs | 104 +++++++++++++++++++++++-------------------- 3 files changed, 60 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae0f9c81f..500c6bc50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2120,12 +2120,12 @@ dependencies = [ [[package]] name = "sre-engine" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a461901c54df500e011f7c2531529d8a1b0e47a10aea74acf4a5d2b40d75be4" +version = "0.4.1" +source = "git+https://github.com/RustPython/sre-engine?rev=refs/pull/14/head#3a745cc494eea84a8b6fa2053fbbadccf181370d" dependencies = [ "bitflags", "num_enum", + "optional", ] [[package]] diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 832114ed7..6e0b1396a 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -74,9 +74,9 @@ memoffset = "0.6.5" optional = "0.5.0" # RustPython crates implementing functionality based on CPython -sre-engine = "0.3.0" +# sre-engine = "0.3.0" # to work on sre-engine locally or git version -# sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/12/head" } +sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/14/head" } # sre-engine = { path = "../../sre-engine" } ## unicode stuff diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index bab27d051..49477ccc8 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -22,7 +22,7 @@ mod _sre { use num_traits::ToPrimitive; use sre_engine::{ constants::SreFlag, - engine::{lower_ascii, lower_unicode, upper_unicode, State, StrDrive}, + engine::{lower_ascii, lower_unicode, upper_unicode, Request, State, StrDrive}, }; #[pyattr] @@ -53,8 +53,8 @@ mod _sre { trait SreStr: StrDrive { fn slice(&self, start: usize, end: usize, vm: &VirtualMachine) -> PyObjectRef; - fn create_state(self, pattern: &Pattern, start: usize, end: usize) -> State { - State::new(self, start, end, pattern.flags, &pattern.code) + fn create_request(self, pattern: &Pattern, start: usize, end: usize) -> Request { + Request::new(self, start, end, &pattern.code, false) } } @@ -181,8 +181,9 @@ mod _sre { endpos, } = string_args; with_sre_str!(zelf, &string.clone(), vm, |x| { - let mut state = x.create_state(&zelf, pos, endpos); - state.pymatch(); + let mut req = x.create_request(&zelf, pos, endpos); + let mut state = State::default(); + state.pymatch(&mut req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string).into_ref(vm))) @@ -196,9 +197,10 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult>> { with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { - let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); - state.match_all = true; - state.pymatch(); + let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); + req.match_all = true; + let mut state = State::default(); + state.pymatch(&mut req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) @@ -212,8 +214,9 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult>> { with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { - let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); - state.search(); + let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); + let mut state = State::default(); + state.search(&mut req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) @@ -227,10 +230,11 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult> { with_sre_str!(zelf, &string_args.string, vm, |x| { - let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); + let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); + let mut state = State::default(); let mut matchlist: Vec = Vec::new(); - while state.start <= state.end { - state.search(); + while req.start <= req.end { + state.search(&mut req); if !state.has_matched { break; } @@ -238,7 +242,7 @@ mod _sre { let m = Match::new(&state, zelf.clone(), string_args.string.clone()); let item = if zelf.groups == 0 || zelf.groups == 1 { - m.get_slice(zelf.groups, state.string, vm) + m.get_slice(zelf.groups, req.string, vm) .unwrap_or_else(|| vm.ctx.none()) } else { m.groups(OptionalArg::Present(vm.ctx.new_str(ascii!("")).into()), vm)? @@ -247,9 +251,9 @@ mod _sre { matchlist.push(item); - state.must_advance = state.string_position == state.start; - state.start = state.string_position; - state.reset(); + req.must_advance = state.string_position == req.start; + req.start = state.string_position; + state.reset(req.start); } Ok(matchlist) }) @@ -307,39 +311,40 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult> { with_sre_str!(zelf, &split_args.string, vm, |x| { - let mut state = x.create_state(&zelf, 0, usize::MAX); + let mut req = x.create_request(&zelf, 0, usize::MAX); + let mut state = State::default(); let mut splitlist: Vec = Vec::new(); let mut n = 0; let mut last = 0; while split_args.maxsplit == 0 || n < split_args.maxsplit { - state.search(); + state.search(&mut req); if !state.has_matched { break; } /* get segment before this match */ - splitlist.push(state.string.slice(last, state.start, vm)); + splitlist.push(req.string.slice(last, req.start, vm)); let m = Match::new(&state, zelf.clone(), split_args.string.clone()); // add groups (if any) for i in 1..=zelf.groups { splitlist.push( - m.get_slice(i, state.string, vm) + m.get_slice(i, req.string, vm) .unwrap_or_else(|| vm.ctx.none()), ); } n += 1; - state.must_advance = state.string_position == state.start; + req.must_advance = state.string_position == req.start; last = state.string_position; - state.start = state.string_position; - state.reset(); + req.start = state.string_position; + state.reset(req.start); } // get segment following last match (even if empty) - splitlist.push(state.string.slice(last, state.string.count(), vm)); + splitlist.push(req.string.slice(last, req.string.count(), vm)); Ok(splitlist) }) @@ -438,19 +443,20 @@ mod _sre { }; with_sre_str!(zelf, &string, vm, |s| { - let mut state = s.create_state(&zelf, 0, usize::MAX); + let mut req = s.create_request(&zelf, 0, usize::MAX); + let mut state = State::default(); let mut sublist: Vec = Vec::new(); let mut n = 0; let mut last_pos = 0; while count == 0 || n < count { - state.search(); + state.search(&mut req); if !state.has_matched { break; } - if last_pos < state.start { + if last_pos < req.start { /* get segment before this match */ - sublist.push(state.string.slice(last_pos, state.start, vm)); + sublist.push(req.string.slice(last_pos, req.start, vm)); } if is_callable { @@ -464,13 +470,13 @@ mod _sre { last_pos = state.string_position; n += 1; - state.must_advance = state.string_position == state.start; - state.start = state.string_position; - state.reset(); + req.must_advance = state.string_position == req.start; + req.start = state.string_position; + state.reset(req.start); } /* get segment following last match */ - sublist.push(state.string.slice(last_pos, state.end, vm)); + sublist.push(req.string.slice(last_pos, req.end, vm)); let list = PyList::from(sublist).into_pyobject(vm); @@ -550,10 +556,10 @@ mod _sre { for group in 0..pattern.groups { let mark_index = 2 * group; if mark_index + 1 < state.marks.len() { - if let (Some(start), Some(end)) = - (state.marks[mark_index], state.marks[mark_index + 1]) - { - regs.push((start as isize, end as isize)); + let start = state.marks[mark_index]; + let end = state.marks[mark_index + 1]; + if start.is_some() && end.is_some() { + regs.push((start.unpack() as isize, end.unpack() as isize)); continue; } } @@ -563,8 +569,8 @@ mod _sre { string, pattern, pos: state.start, - endpos: state.end, - lastindex: state.lastindex, + endpos: state.string_position, + lastindex: state.marks.last_index(), regs, } } @@ -802,12 +808,12 @@ mod _sre { #[pymethod(name = "match")] fn pymatch(&self, vm: &VirtualMachine) -> PyResult>> { with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { - let mut state = s.create_state(&self.pattern, self.start.load(), self.end); - state.must_advance = self.must_advance.load(); - state.pymatch(); + let mut req = s.create_request(&self.pattern, self.start.load(), self.end); + let mut state = State::default(); + req.must_advance = self.must_advance.load(); + state.pymatch(&mut req); - self.must_advance - .store(state.start == state.string_position); + self.must_advance.store(req.start == state.string_position); self.start.store(state.string_position); Ok(state.has_matched.then(|| { @@ -822,13 +828,13 @@ mod _sre { return Ok(None); } with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { - let mut state = s.create_state(&self.pattern, self.start.load(), self.end); - state.must_advance = self.must_advance.load(); + let mut req = s.create_request(&self.pattern, self.start.load(), self.end); + let mut state = State::default(); + req.must_advance = self.must_advance.load(); - state.search(); + state.search(&mut req); - self.must_advance - .store(state.string_position == state.start); + self.must_advance.store(state.string_position == req.start); self.start.store(state.string_position); Ok(state.has_matched.then(|| { From b5fa915708a2908e51348473d3258d238212051f Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Mon, 15 Aug 2022 21:42:06 +0200 Subject: [PATCH 2/2] refactor to update sre-engine to 0.4.1 --- Cargo.lock | 3 +- vm/Cargo.toml | 5 ++- vm/src/stdlib/sre.rs | 100 +++++++++++++++++-------------------------- 3 files changed, 45 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 500c6bc50..9c9e5e2d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2121,7 +2121,8 @@ dependencies = [ [[package]] name = "sre-engine" version = "0.4.1" -source = "git+https://github.com/RustPython/sre-engine?rev=refs/pull/14/head#3a745cc494eea84a8b6fa2053fbbadccf181370d" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a490c5c46c35dba9a6f5e7ee8e4d67e775eb2d2da0f115750b8d10e1c1ac2d28" dependencies = [ "bitflags", "num_enum", diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 6e0b1396a..960c535ec 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -74,9 +74,10 @@ memoffset = "0.6.5" optional = "0.5.0" # RustPython crates implementing functionality based on CPython -# sre-engine = "0.3.0" +sre-engine = "0.4.1" # to work on sre-engine locally or git version -sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/14/head" } +# sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/14/head" } +# sre-engine = { git = "https://github.com/RustPython/sre-engine" } # sre-engine = { path = "../../sre-engine" } ## unicode stuff diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index 49477ccc8..ecca7b4a4 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -22,7 +22,7 @@ mod _sre { use num_traits::ToPrimitive; use sre_engine::{ constants::SreFlag, - engine::{lower_ascii, lower_unicode, upper_unicode, Request, State, StrDrive}, + engine::{lower_ascii, lower_unicode, upper_unicode, Request, SearchIter, State, StrDrive}, }; #[pyattr] @@ -181,9 +181,9 @@ mod _sre { endpos, } = string_args; with_sre_str!(zelf, &string.clone(), vm, |x| { - let mut req = x.create_request(&zelf, pos, endpos); + let req = x.create_request(&zelf, pos, endpos); let mut state = State::default(); - state.pymatch(&mut req); + state.pymatch(req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string).into_ref(vm))) @@ -200,7 +200,7 @@ mod _sre { let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); req.match_all = true; let mut state = State::default(); - state.pymatch(&mut req); + state.pymatch(req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) @@ -214,9 +214,9 @@ mod _sre { vm: &VirtualMachine, ) -> PyResult>> { with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { - let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); + let req = x.create_request(&zelf, string_args.pos, string_args.endpos); let mut state = State::default(); - state.search(&mut req); + state.search(req); Ok(state .has_matched .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) @@ -229,20 +229,17 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> PyResult> { - with_sre_str!(zelf, &string_args.string, vm, |x| { - let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); - let mut state = State::default(); + with_sre_str!(zelf, &string_args.string, vm, |s| { + let req = s.create_request(&zelf, string_args.pos, string_args.endpos); + let state = State::default(); let mut matchlist: Vec = Vec::new(); - while req.start <= req.end { - state.search(&mut req); - if !state.has_matched { - break; - } + let mut iter = SearchIter { req, state }; - let m = Match::new(&state, zelf.clone(), string_args.string.clone()); + while iter.next().is_some() { + let m = Match::new(&iter.state, zelf.clone(), string_args.string.clone()); let item = if zelf.groups == 0 || zelf.groups == 1 { - m.get_slice(zelf.groups, req.string, vm) + m.get_slice(zelf.groups, s, vm) .unwrap_or_else(|| vm.ctx.none()) } else { m.groups(OptionalArg::Present(vm.ctx.new_str(ascii!("")).into()), vm)? @@ -250,11 +247,8 @@ mod _sre { }; matchlist.push(item); - - req.must_advance = state.string_position == req.start; - req.start = state.string_position; - state.reset(req.start); } + Ok(matchlist) }) } @@ -310,41 +304,32 @@ mod _sre { split_args: SplitArgs, vm: &VirtualMachine, ) -> PyResult> { - with_sre_str!(zelf, &split_args.string, vm, |x| { - let mut req = x.create_request(&zelf, 0, usize::MAX); - let mut state = State::default(); + with_sre_str!(zelf, &split_args.string, vm, |s| { + let req = s.create_request(&zelf, 0, usize::MAX); + let state = State::default(); let mut splitlist: Vec = Vec::new(); - + let mut iter = SearchIter { req, state }; let mut n = 0; let mut last = 0; - while split_args.maxsplit == 0 || n < split_args.maxsplit { - state.search(&mut req); - if !state.has_matched { - break; - } + while (split_args.maxsplit == 0 || n < split_args.maxsplit) && iter.next().is_some() + { /* get segment before this match */ - splitlist.push(req.string.slice(last, req.start, vm)); + splitlist.push(s.slice(last, iter.state.start, vm)); - let m = Match::new(&state, zelf.clone(), split_args.string.clone()); + let m = Match::new(&iter.state, zelf.clone(), split_args.string.clone()); // add groups (if any) for i in 1..=zelf.groups { - splitlist.push( - m.get_slice(i, req.string, vm) - .unwrap_or_else(|| vm.ctx.none()), - ); + splitlist.push(m.get_slice(i, s, vm).unwrap_or_else(|| vm.ctx.none())); } n += 1; - req.must_advance = state.string_position == req.start; - last = state.string_position; - req.start = state.string_position; - state.reset(req.start); + last = iter.state.string_position; } // get segment following last match (even if empty) - splitlist.push(req.string.slice(last, req.string.count(), vm)); + splitlist.push(req.string.slice(last, s.count(), vm)); Ok(splitlist) }) @@ -443,40 +428,33 @@ mod _sre { }; with_sre_str!(zelf, &string, vm, |s| { - let mut req = s.create_request(&zelf, 0, usize::MAX); - let mut state = State::default(); + let req = s.create_request(&zelf, 0, usize::MAX); + let state = State::default(); let mut sublist: Vec = Vec::new(); + let mut iter = SearchIter { req, state }; let mut n = 0; let mut last_pos = 0; - while count == 0 || n < count { - state.search(&mut req); - if !state.has_matched { - break; - } - if last_pos < req.start { + while (count == 0 || n < count) && iter.next().is_some() { + if last_pos < iter.state.start { /* get segment before this match */ - sublist.push(req.string.slice(last_pos, req.start, vm)); + sublist.push(s.slice(last_pos, iter.state.start, vm)); } if is_callable { - let m = Match::new(&state, zelf.clone(), string.clone()); + let m = Match::new(&iter.state, zelf.clone(), string.clone()); let ret = vm.invoke(&filter, (m.into_ref(vm),))?; sublist.push(ret); } else { sublist.push(filter.clone()); } - last_pos = state.string_position; + last_pos = iter.state.string_position; n += 1; - - req.must_advance = state.string_position == req.start; - req.start = state.string_position; - state.reset(req.start); } /* get segment following last match */ - sublist.push(req.string.slice(last_pos, req.end, vm)); + sublist.push(s.slice(last_pos, iter.req.end, vm)); let list = PyList::from(sublist).into_pyobject(vm); @@ -811,9 +789,10 @@ mod _sre { let mut req = s.create_request(&self.pattern, self.start.load(), self.end); let mut state = State::default(); req.must_advance = self.must_advance.load(); - state.pymatch(&mut req); + state.pymatch(req); - self.must_advance.store(req.start == state.string_position); + self.must_advance + .store(state.string_position == state.start); self.start.store(state.string_position); Ok(state.has_matched.then(|| { @@ -832,9 +811,10 @@ mod _sre { let mut state = State::default(); req.must_advance = self.must_advance.load(); - state.search(&mut req); + state.search(req); - self.must_advance.store(state.string_position == req.start); + self.must_advance + .store(state.string_position == state.start); self.start.store(state.string_position); Ok(state.has_matched.then(|| {