diff --git a/Cargo.lock b/Cargo.lock index 8c882475b..189094dad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2111,9 +2111,9 @@ dependencies = [ [[package]] name = "sre-engine" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1401a990298acbcda1893ef1e449895fabec80f94c2633ee64b1046c84712b24" +checksum = "2a461901c54df500e011f7c2531529d8a1b0e47a10aea74acf4a5d2b40d75be4" dependencies = [ "bitflags", "num_enum", diff --git a/vm/Cargo.toml b/vm/Cargo.toml index da4ad14cb..48641a8db 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -72,9 +72,9 @@ memoffset = "0.6.5" optional = "0.5.0" # RustPython crates implementing functionality based on CPython -sre-engine = "0.2.1" +sre-engine = "0.3.0" # to work on sre-engine locally or git version -# sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/11/head" } +# sre-engine = { git = "https://github.com/RustPython/sre-engine", rev = "refs/pull/12/head" } # sre-engine = { path = "../../sre-engine" } ## unicode stuff diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index 251302944..bab27d051 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -50,21 +50,27 @@ mod _sre { lower_unicode(ch as u32) as i32 } - fn slice_drive( - this: &StrDrive<'_>, - start: usize, - end: usize, - vm: &VirtualMachine, - ) -> PyObjectRef { - match this { - StrDrive::Str(s) => vm - .ctx - .new_str(s.chars().take(end).skip(start).collect::()) - .into(), - StrDrive::Bytes(b) => vm - .ctx - .new_bytes(b.iter().take(end).skip(start).cloned().collect()) - .into(), + trait SreStr: StrDrive { + fn slice(&self, start: usize, end: usize, vm: &VirtualMachine) -> PyObjectRef; + + fn create_state(self, pattern: &Pattern, start: usize, end: usize) -> State { + State::new(self, start, end, pattern.flags, &pattern.code) + } + } + + impl SreStr for &[u8] { + fn slice(&self, start: usize, end: usize, vm: &VirtualMachine) -> PyObjectRef { + vm.ctx + .new_bytes(self.iter().take(end).skip(start).cloned().collect()) + .into() + } + } + + impl SreStr for &str { + fn slice(&self, start: usize, end: usize, vm: &VirtualMachine) -> PyObjectRef { + vm.ctx + .new_str(self.chars().take(end).skip(start).collect::()) + .into() } } @@ -134,37 +140,33 @@ mod _sre { pub isbytes: bool, } + macro_rules! with_sre_str { + ($pattern:expr, $string:expr, $vm:expr, $f:expr) => { + if $pattern.isbytes { + Pattern::with_bytes($string, $vm, $f) + } else { + Pattern::with_str($string, $vm, $f) + } + }; + } + #[pyclass(with(Hashable, Comparable))] impl Pattern { - fn with_str_drive PyResult>( - &self, - string: PyObjectRef, - vm: &VirtualMachine, - f: F, - ) -> PyResult { - if self.isbytes { - let buffer = PyBuffer::try_from_borrowed_object(vm, &string)?; - buffer.contiguous_or_collect(|bytes| f(StrDrive::Bytes(bytes))) - } else { - let s = string - .payload::() - .ok_or_else(|| vm.new_type_error("expected string".to_owned()))?; - f(StrDrive::Str(s.as_str())) - } + fn with_str(string: &PyObject, vm: &VirtualMachine, f: F) -> PyResult + where + F: FnOnce(&str) -> PyResult, + { + string + .payload::() + .ok_or_else(|| vm.new_type_error("expected string".to_owned())) + .and_then(|x| f(x.as_str())) } - fn with_state PyResult>( - &self, - string: PyObjectRef, - start: usize, - end: usize, - vm: &VirtualMachine, - f: F, - ) -> PyResult { - self.with_str_drive(string, vm, |str_drive| { - let state = State::new(str_drive, start, end, self.flags, &self.code); - f(state) - }) + fn with_bytes(string: &PyObject, vm: &VirtualMachine, f: F) -> PyResult + where + F: FnOnce(&[u8]) -> PyResult, + { + PyBuffer::try_from_borrowed_object(vm, string).and_then(|x| x.contiguous_or_collect(f)) } #[pymethod(name = "match")] @@ -173,18 +175,18 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> PyResult>> { - zelf.with_state( - string_args.string.clone(), - string_args.pos, - string_args.endpos, - vm, - |mut state| { - state = state.pymatch(); - Ok(state - .has_matched - .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) - }, - ) + let StringArgs { + string, + pos, + endpos, + } = string_args; + with_sre_str!(zelf, &string.clone(), vm, |x| { + let mut state = x.create_state(&zelf, pos, endpos); + state.pymatch(); + Ok(state + .has_matched + .then(|| Match::new(&state, zelf.clone(), string).into_ref(vm))) + }) } #[pymethod] @@ -193,19 +195,14 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> PyResult>> { - zelf.with_state( - string_args.string.clone(), - string_args.pos, - string_args.endpos, - vm, - |mut state| { - state.match_all = true; - state = state.pymatch(); - Ok(state - .has_matched - .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) - }, - ) + with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { + let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); + state.match_all = true; + state.pymatch(); + Ok(state + .has_matched + .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) + }) } #[pymethod] @@ -214,18 +211,13 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> PyResult>> { - zelf.with_state( - string_args.string.clone(), - string_args.pos, - string_args.endpos, - vm, - |mut state| { - state = state.search(); - Ok(state - .has_matched - .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) - }, - ) + with_sre_str!(zelf, &string_args.string.clone(), vm, |x| { + let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); + state.search(); + Ok(state + .has_matched + .then(|| Match::new(&state, zelf.clone(), string_args.string).into_ref(vm))) + }) } #[pymethod] @@ -234,38 +226,33 @@ mod _sre { string_args: StringArgs, vm: &VirtualMachine, ) -> PyResult> { - zelf.with_state( - string_args.string.clone(), - string_args.pos, - string_args.endpos, - vm, - |mut state| { - let mut matchlist: Vec = Vec::new(); - while state.start <= state.end { - state = state.search(); - if !state.has_matched { - break; - } - - let m = Match::new(&state, zelf.clone(), string_args.string.clone()); - - let item = if zelf.groups == 0 || zelf.groups == 1 { - m.get_slice(zelf.groups, state.string, vm) - .unwrap_or_else(|| vm.ctx.none()) - } else { - m.groups(OptionalArg::Present(vm.ctx.new_str(ascii!("")).into()), vm)? - .into() - }; - - matchlist.push(item); - - state.must_advance = state.string_position == state.start; - state.start = state.string_position; - state.reset(); + with_sre_str!(zelf, &string_args.string, vm, |x| { + let mut state = x.create_state(&zelf, string_args.pos, string_args.endpos); + let mut matchlist: Vec = Vec::new(); + while state.start <= state.end { + state.search(); + if !state.has_matched { + break; } - Ok(matchlist) - }, - ) + + let m = Match::new(&state, zelf.clone(), string_args.string.clone()); + + let item = if zelf.groups == 0 || zelf.groups == 1 { + m.get_slice(zelf.groups, state.string, vm) + .unwrap_or_else(|| vm.ctx.none()) + } else { + m.groups(OptionalArg::Present(vm.ctx.new_str(ascii!("")).into()), vm)? + .into() + }; + + matchlist.push(item); + + state.must_advance = state.string_position == state.start; + state.start = state.string_position; + state.reset(); + } + Ok(matchlist) + }) } #[pymethod] @@ -319,19 +306,20 @@ mod _sre { split_args: SplitArgs, vm: &VirtualMachine, ) -> PyResult> { - zelf.with_state(split_args.string.clone(), 0, usize::MAX, vm, |mut state| { + with_sre_str!(zelf, &split_args.string, vm, |x| { + let mut state = x.create_state(&zelf, 0, usize::MAX); let mut splitlist: Vec = Vec::new(); let mut n = 0; let mut last = 0; while split_args.maxsplit == 0 || n < split_args.maxsplit { - state = state.search(); + state.search(); if !state.has_matched { break; } /* get segment before this match */ - splitlist.push(slice_drive(&state.string, last, state.start, vm)); + splitlist.push(state.string.slice(last, state.start, vm)); let m = Match::new(&state, zelf.clone(), split_args.string.clone()); @@ -351,7 +339,7 @@ mod _sre { } // get segment following last match (even if empty) - splitlist.push(slice_drive(&state.string, last, state.string.count(), vm)); + splitlist.push(state.string.slice(last, state.string.count(), vm)); Ok(splitlist) }) @@ -434,12 +422,11 @@ mod _sre { let (is_callable, filter) = if vm.is_callable(&repl) { (true, repl) } else { - let is_template = zelf.with_str_drive::<_, _>(repl.clone(), vm, |str_drive| { - Ok(match str_drive { - StrDrive::Str(s) => s.contains('\\'), - StrDrive::Bytes(b) => b.contains(&b'\\'), - }) - })?; + let is_template = if zelf.isbytes { + Self::with_bytes(&repl, vm, |x| Ok(x.contains(&b'\\'))) + } else { + Self::with_str(&repl, vm, |x| Ok(x.contains('\\'))) + }?; if is_template { let re = vm.import("re", None, 0)?; let func = re.get_attr("_subx", vm)?; @@ -450,19 +437,20 @@ mod _sre { } }; - zelf.with_state(string.clone(), 0, usize::MAX, vm, |mut state| { + with_sre_str!(zelf, &string, vm, |s| { + let mut state = s.create_state(&zelf, 0, usize::MAX); let mut sublist: Vec = Vec::new(); let mut n = 0; let mut last_pos = 0; while count == 0 || n < count { - state = state.search(); + state.search(); if !state.has_matched { break; } if last_pos < state.start { /* get segment before this match */ - sublist.push(slice_drive(&state.string, last_pos, state.start, vm)); + sublist.push(state.string.slice(last_pos, state.start, vm)); } if is_callable { @@ -482,7 +470,7 @@ mod _sre { } /* get segment following last match */ - sublist.push(slice_drive(&state.string, last_pos, state.end, vm)); + sublist.push(state.string.slice(last_pos, state.end, vm)); let list = PyList::from(sublist).into_pyobject(vm); @@ -553,7 +541,11 @@ mod _sre { #[pyclass(with(AsMapping))] impl Match { - pub(crate) fn new(state: &State, pattern: PyRef, string: PyObjectRef) -> Self { + pub(crate) fn new( + state: &State, + pattern: PyRef, + string: PyObjectRef, + ) -> Self { let mut regs = vec![(state.start as isize, state.string_position as isize)]; for group in 0..pattern.groups { let mark_index = 2 * group; @@ -645,30 +637,29 @@ mod _sre { #[pymethod] fn group(&self, args: PosArgs, vm: &VirtualMachine) -> PyResult { - self.pattern - .with_str_drive(self.string.clone(), vm, |str_drive| { - let args = args.into_vec(); - if args.is_empty() { - return Ok(self.get_slice(0, str_drive, vm).unwrap().to_pyobject(vm)); - } - let mut v: Vec = args - .into_iter() - .map(|x| { - self.get_index(x, vm) - .ok_or_else(|| vm.new_index_error("no such group".to_owned())) - .map(|index| { - self.get_slice(index, str_drive, vm) - .map(|x| x.to_pyobject(vm)) - .unwrap_or_else(|| vm.ctx.none()) - }) - }) - .try_collect()?; - if v.len() == 1 { - Ok(v.pop().unwrap()) - } else { - Ok(vm.ctx.new_tuple(v).into()) - } - }) + with_sre_str!(self.pattern, &self.string, vm, |str_drive| { + let args = args.into_vec(); + if args.is_empty() { + return Ok(self.get_slice(0, str_drive, vm).unwrap().to_pyobject(vm)); + } + let mut v: Vec = args + .into_iter() + .map(|x| { + self.get_index(x, vm) + .ok_or_else(|| vm.new_index_error("no such group".to_owned())) + .map(|index| { + self.get_slice(index, str_drive, vm) + .map(|x| x.to_pyobject(vm)) + .unwrap_or_else(|| vm.ctx.none()) + }) + }) + .try_collect()?; + if v.len() == 1 { + Ok(v.pop().unwrap()) + } else { + Ok(vm.ctx.new_tuple(v).into()) + } + }) } #[pymethod(magic)] @@ -677,13 +668,12 @@ mod _sre { group: PyObjectRef, vm: &VirtualMachine, ) -> PyResult> { - self.pattern - .with_str_drive(self.string.clone(), vm, |str_drive| { - let i = self - .get_index(group, vm) - .ok_or_else(|| vm.new_index_error("no such group".to_owned()))?; - Ok(self.get_slice(i, str_drive, vm)) - }) + with_sre_str!(self.pattern, &self.string, vm, |str_drive| { + let i = self + .get_index(group, vm) + .ok_or_else(|| vm.new_index_error("no such group".to_owned()))?; + Ok(self.get_slice(i, str_drive, vm)) + }) } #[pymethod] @@ -694,17 +684,16 @@ mod _sre { ) -> PyResult { let default = default.unwrap_or_else(|| vm.ctx.none()); - self.pattern - .with_str_drive(self.string.clone(), vm, |str_drive| { - let v: Vec = (1..self.regs.len()) - .map(|i| { - self.get_slice(i, str_drive, vm) - .map(|s| s.to_pyobject(vm)) - .unwrap_or_else(|| default.clone()) - }) - .collect(); - Ok(PyTuple::new_ref(v, &vm.ctx)) - }) + with_sre_str!(self.pattern, &self.string, vm, |str_drive| { + let v: Vec = (1..self.regs.len()) + .map(|i| { + self.get_slice(i, str_drive, vm) + .map(|s| s.to_pyobject(vm)) + .unwrap_or_else(|| default.clone()) + }) + .collect(); + Ok(PyTuple::new_ref(v, &vm.ctx)) + }) } #[pymethod] @@ -715,33 +704,31 @@ mod _sre { ) -> PyResult { let default = default.unwrap_or_else(|| vm.ctx.none()); - self.pattern - .with_str_drive(self.string.clone(), vm, |str_drive| { - let dict = vm.ctx.new_dict(); + with_sre_str!(self.pattern, &self.string, vm, |str_drive| { + let dict = vm.ctx.new_dict(); - for (key, index) in self.pattern.groupindex.clone() { - let value = self - .get_index(index, vm) - .and_then(|x| self.get_slice(x, str_drive, vm)) - .map(|x| x.to_pyobject(vm)) - .unwrap_or_else(|| default.clone()); - dict.set_item(&*key, value, vm)?; - } - Ok(dict) - }) + for (key, index) in self.pattern.groupindex.clone() { + let value = self + .get_index(index, vm) + .and_then(|x| self.get_slice(x, str_drive, vm)) + .map(|x| x.to_pyobject(vm)) + .unwrap_or_else(|| default.clone()); + dict.set_item(&*key, value, vm)?; + } + Ok(dict) + }) } #[pymethod(magic)] fn repr(&self, vm: &VirtualMachine) -> PyResult { - self.pattern - .with_str_drive(self.string.clone(), vm, |str_drive| { - Ok(format!( - "", - self.regs[0].0, - self.regs[0].1, - self.get_slice(0, str_drive, vm).unwrap().repr(vm)? - )) - }) + with_sre_str!(self.pattern, &self.string, vm, |str_drive| { + Ok(format!( + "", + self.regs[0].0, + self.regs[0].1, + self.get_slice(0, str_drive, vm).unwrap().repr(vm)? + )) + }) } fn get_index(&self, group: PyObjectRef, vm: &VirtualMachine) -> Option { @@ -763,17 +750,17 @@ mod _sre { } } - fn get_slice( + fn get_slice( &self, index: usize, - str_drive: StrDrive, + str_drive: S, vm: &VirtualMachine, ) -> Option { let (start, end) = self.regs[index]; if start < 0 || end < 0 { return None; } - Some(slice_drive(&str_drive, start as usize, end as usize, vm)) + Some(str_drive.slice(start as usize, end as usize, vm)) } #[pyclassmethod(magic)] @@ -814,24 +801,19 @@ mod _sre { #[pymethod(name = "match")] fn pymatch(&self, vm: &VirtualMachine) -> PyResult>> { - self.pattern.with_state( - self.string.clone(), - self.start.load(), - self.end, - vm, - |mut state| { - state.must_advance = self.must_advance.load(); - state = state.pymatch(); + with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { + let mut state = s.create_state(&self.pattern, self.start.load(), self.end); + state.must_advance = self.must_advance.load(); + state.pymatch(); - self.must_advance - .store(state.start == state.string_position); - self.start.store(state.string_position); + self.must_advance + .store(state.start == state.string_position); + self.start.store(state.string_position); - Ok(state.has_matched.then(|| { - Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm) - })) - }, - ) + Ok(state.has_matched.then(|| { + Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm) + })) + }) } #[pymethod] @@ -839,24 +821,20 @@ mod _sre { if self.start.load() > self.end { return Ok(None); } - self.pattern.with_state( - self.string.clone(), - self.start.load(), - self.end, - vm, - |mut state| { - state.must_advance = self.must_advance.load(); - state = state.search(); + with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { + let mut state = s.create_state(&self.pattern, self.start.load(), self.end); + state.must_advance = self.must_advance.load(); - self.must_advance - .store(state.string_position == state.start); - self.start.store(state.string_position); + state.search(); - Ok(state.has_matched.then(|| { - Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm) - })) - }, - ) + self.must_advance + .store(state.string_position == state.start); + self.start.store(state.string_position); + + Ok(state.has_matched.then(|| { + Match::new(&state, self.pattern.clone(), self.string.clone()).into_ref(vm) + })) + }) } } }