From ef7c296001a2ebfe17430d10f5293a2bfeef4fb3 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Wed, 6 Jan 2021 10:30:57 +0200 Subject: [PATCH] impl Match.groupdict --- Lib/test/test_re.py | 4 +- extra_tests/snippets/stdlib_re.py | 1 + vm/src/stdlib/sre.rs | 125 ++++++++++++++++++------------ vm/src/stdlib/sre/interp.rs | 58 -------------- 4 files changed, 78 insertions(+), 110 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 74a39d90e..33fd4f0a3 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -255,7 +255,7 @@ class ReTests(unittest.TestCase): pat = '(?:%s)(?(200)z|t)' % pat self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) - @unittest.skip('TODO: RUSTPYTHON: named group index') + # @unittest.skip('TODO: RUSTPYTHON: named group index') def test_symbolic_refs(self): self.checkTemplateError('(?Px)', r'\g, unterminated name', 3) @@ -594,7 +594,7 @@ class ReTests(unittest.TestCase): # TODO: RUSTPYTHON # @unittest.expectedFailure - @unittest.skip('TODO: RUSTPYTHON: named group index') + # @unittest.skip('TODO: RUSTPYTHON: named group index') def test_expand(self): self.assertEqual(re.match("(?Pfirst) (?Psecond)", "first second") diff --git a/extra_tests/snippets/stdlib_re.py b/extra_tests/snippets/stdlib_re.py index 87e1a27c9..073f9b097 100644 --- a/extra_tests/snippets/stdlib_re.py +++ b/extra_tests/snippets/stdlib_re.py @@ -56,3 +56,4 @@ assert re.compile('(a+)').match('aaa').groups() == ('aaa',) # test Match object method assert re.compile('(a)(bc)').match('abc')[1] == 'a' +assert re.compile('a(b)(?Pc)d').match('abcd').groupdict() == {'a': 'c'} diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index a569f2749..dff761f8b 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -6,15 +6,18 @@ pub(crate) use _sre::make_module; #[pymodule] mod _sre { use itertools::Itertools; + use num_traits::ToPrimitive; use rustpython_common::borrow::BorrowValue; - use rustpython_common::lock::OnceCell; use super::constants::SreFlag; use super::interp::{self, lower_ascii, lower_unicode, upper_unicode, State}; use crate::builtins::tuple::PyTupleRef; - use crate::builtins::{PyDictRef, PyList, PyStr, PyStrRef, PyTypeRef}; + use crate::builtins::{PyDictRef, PyInt, PyList, PyStrRef, PyTypeRef}; use crate::function::{Args, OptionalArg}; - use crate::pyobject::{Either, IntoPyObject, PyCallable, PyIterable, PyObjectRef, PyRef, PyResult, PyValue, StaticType}; + use crate::pyobject::{ + Either, IntoPyObject, ItemProtocol, PyCallable, PyObjectRef, PyRef, PyResult, PyValue, + StaticType, + }; use crate::VirtualMachine; use std::convert::TryFrom; @@ -314,7 +317,6 @@ mod _sre { endpos: usize, lastindex: isize, regs: Vec<(isize, isize)>, - regs_pytuple: OnceCell, // lastgroup } impl PyValue for Match { @@ -346,7 +348,6 @@ mod _sre { endpos: state.end, lastindex: state.lastindex, regs, - regs_pytuple: OnceCell::new(), } } @@ -376,29 +377,33 @@ mod _sre { } #[pyproperty] fn regs(&self, vm: &VirtualMachine) -> PyTupleRef { - self.regs_pytuple - .get_or_init(|| { - PyTupleRef::with_elements( - self.regs.iter().map(|&x| x.into_pyobject(vm)).collect(), - &vm.ctx, - ) - }) - .clone() + PyTupleRef::with_elements( + self.regs.iter().map(|&x| x.into_pyobject(vm)).collect(), + &vm.ctx, + ) } #[pymethod] - fn start(&self, group: OptionalArg, vm: &VirtualMachine) -> PyResult { - self.get_index(group.unwrap_or(0), vm) - .map(|x| self.regs[x].0) + fn start(&self, group: OptionalArg, vm: &VirtualMachine) -> PyResult { + self.span(group, vm).map(|x| x.0) } #[pymethod] - fn end(&self, group: OptionalArg, vm: &VirtualMachine) -> PyResult { - self.get_index(group.unwrap_or(0), vm) - .map(|x| self.regs[x].1) + fn end(&self, group: OptionalArg, vm: &VirtualMachine) -> PyResult { + self.span(group, vm).map(|x| x.1) } #[pymethod] - fn span(&self, group: OptionalArg, vm: &VirtualMachine) -> PyResult<(isize, isize)> { - self.get_index(group.unwrap_or(0), vm).map(|x| self.regs[x]) + fn span( + &self, + group: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult<(isize, isize)> { + let index = match group { + OptionalArg::Present(group) => self + .get_index(group, vm) + .ok_or_else(|| vm.new_index_error("no such group".to_owned()))?, + OptionalArg::Missing => 0, + }; + Ok(self.regs[index]) } #[pymethod] @@ -409,16 +414,17 @@ mod _sre { } #[pymethod] - fn group(&self, args: Args, vm: &VirtualMachine) -> PyResult { - let mut args = args.into_vec(); + fn group(&self, args: Args, vm: &VirtualMachine) -> PyResult { + let args = args.into_vec(); if args.is_empty() { - args.push(0); + return Ok(self.get_slice(0).unwrap().into_pyobject(vm)); } let mut v: Vec = args - .iter() - .map(|&x| { + .into_iter() + .map(|x| { self.get_index(x, vm) - .map(|i| self.get_slice(i).unwrap().into_pyobject(vm)) + .ok_or_else(|| vm.new_index_error("no such group".to_owned())) + .map(|index| self.get_slice(index).unwrap().into_pyobject(vm)) }) .try_collect()?; if v.len() == 1 { @@ -429,22 +435,16 @@ mod _sre { } #[pymethod(magic)] - fn getitem(&self, index: isize, vm: &VirtualMachine) -> Option { - self.get_index(index, vm) - .ok() - .and_then(|i| self.get_slice(i)) + fn getitem(&self, group: PyObjectRef, vm: &VirtualMachine) -> Option { + self.get_index(group, vm).and_then(|i| self.get_slice(i)) } #[pymethod] - fn groups( - zelf: PyRef, - default: OptionalArg, - vm: &VirtualMachine, - ) -> PyTupleRef { - let default = default.unwrap_or(vm.ctx.none()); - let v: Vec = (1..zelf.regs.len()) + fn groups(&self, default: OptionalArg, vm: &VirtualMachine) -> PyTupleRef { + let default = default.unwrap_or_else(|| vm.ctx.none()); + let v: Vec = (1..self.regs.len()) .map(|i| { - zelf.get_slice(i) + self.get_slice(i) .map(|s| s.into_pyobject(vm)) .unwrap_or_else(|| default.clone()) }) @@ -452,27 +452,52 @@ mod _sre { PyTupleRef::with_elements(v, &vm.ctx) } + #[pymethod] + fn groupdict(&self, default: OptionalArg, vm: &VirtualMachine) -> PyResult { + let default = default.unwrap_or_else(|| vm.ctx.none()); + let dict = vm.ctx.new_dict(); + for (key, index) in self.pattern.groupindex.clone() { + let value = self + .get_index(index, vm) + .and_then(|x| self.get_slice(x)) + .map(|x| x.into_pyobject(vm)) + .unwrap_or_else(|| default.clone()); + dict.set_item(key, value, vm)?; + } + Ok(dict) + } + #[pymethod(magic)] - fn repr(zelf: PyRef) -> String { + fn repr(&self) -> String { format!( "", - zelf.regs[0].0, - zelf.regs[0].1, - zelf.get_slice(0).unwrap() + self.regs[0].0, + self.regs[0].1, + self.get_slice(0).unwrap() ) } - fn get_index(&self, group: isize, vm: &VirtualMachine) -> PyResult { - // TODO: support key, value index - if group >= 0 && group as usize <= self.pattern.groups { - Ok(group as usize) + fn get_index(&self, group: PyObjectRef, vm: &VirtualMachine) -> Option { + let i = match group.downcast::() { + Ok(i) => i, + Err(group) => self + .pattern + .groupindex + .get_item_option(group, vm) + .ok()?? + .downcast::() + .ok()?, + }; + let i = i.borrow_value().to_isize()?; + if i >= 0 && i as usize <= self.pattern.groups { + Some(i as usize) } else { - Err(vm.new_index_error("no such group".to_owned())) + None } } - fn get_slice(&self, group: usize) -> Option { - let (start, end) = self.regs[group]; + fn get_slice(&self, index: usize) -> Option { + let (start, end) = self.regs[index]; if start < 0 || end < 0 { return None; } diff --git a/vm/src/stdlib/sre/interp.rs b/vm/src/stdlib/sre/interp.rs index a3263eddc..c118984b3 100644 --- a/vm/src/stdlib/sre/interp.rs +++ b/vm/src/stdlib/sre/interp.rs @@ -1260,64 +1260,6 @@ impl OpcodeExecutor for OpMinUntil { } _ => unreachable!(), } - // match self.jump_id { - // 0 => { - // drive.state.string_position = drive.ctx().string_position; - // let repeat = match drive.state.repeat_stack.last_mut() { - // Some(repeat) => repeat, - // None => { - // todo!("Internal re error: MAX_UNTIL without REPEAT."); - // } - // }; - // self.count = repeat.count + 1; - - // if self.count < repeat.mincount as isize { - // // not enough matches - // repeat.count = self.count; - // drive.push_new_context(4); - // self.jump_id = 1; - // return Some(()); - // } - - // // see if the tail matches - // drive.state.marks_push(); - // drive.push_new_context(1); - // self.jump_id = 2; - // Some(()) - // } - // 1 => { - // let child_ctx = drive.state.popped_context.unwrap(); - // drive.ctx_mut().has_matched = child_ctx.has_matched; - // if drive.ctx().has_matched != Some(true) { - // drive.state.string_position = drive.ctx().string_position; - // let repeat = drive.state.repeat_stack.last_mut().unwrap(); - // repeat.count = self.count - 1; - // } - // None - // } - // 2 => { - // let child_ctx = drive.state.popped_context.unwrap(); - // if child_ctx.has_matched == Some(true) { - // drive.state.repeat_stack.pop(); - // drive.ctx_mut().has_matched = Some(true); - // return None; - // } - // drive.state.string_position = drive.ctx().string_position; - // drive.state.marks_pop(); - - // // match more until tail matches - // let repeat = drive.state.repeat_stack.last_mut().unwrap(); - // if self.count >= repeat.maxcount as isize && repeat.maxcount != MAXREPEAT { - // drive.ctx_mut().has_matched = Some(false); - // return None; - // } - // repeat.count = self.count; - // drive.push_new_context(4); - // self.jump_id = 1; - // Some(()) - // } - // _ => unreachable!(), - // } } }