From 3f2640971a88647f25ee981a3fda215ca30a1dec Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sat, 26 Dec 2020 16:59:30 +0200 Subject: [PATCH] create _sre.Match --- vm/src/stdlib/sre.rs | 84 ++++++++++++++++++++++++++++++------- vm/src/stdlib/sre/interp.rs | 68 ++++++++++++++++++++---------- 2 files changed, 114 insertions(+), 38 deletions(-) diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index f2ff1ff87..ccae4f1b0 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -10,11 +10,14 @@ mod _sre { constants::SreFlag, interp::{self, State}, }; - use crate::builtins::{PyStrRef, PyTypeRef}; use crate::byteslike::PyBytesLike; use crate::common::borrow::BorrowValue; use crate::pyobject::{Either, PyCallable, PyObjectRef, PyResult, PyValue, StaticType}; use crate::VirtualMachine; + use crate::{ + builtins::{PyStrRef, PyTypeRef}, + pyobject::PyRef, + }; use std::collections::HashMap; use std::convert::TryFrom; @@ -121,13 +124,13 @@ mod _sre { #[pyattr] #[pyclass(name = "Pattern")] #[derive(Debug)] - struct Pattern { - pattern: PyObjectRef, - flags: SreFlag, - code: Vec, - groups: usize, - groupindex: HashMap, - indexgroup: Vec>, + pub(crate) struct Pattern { + pub pattern: PyObjectRef, + pub flags: SreFlag, + pub code: Vec, + pub groups: usize, + pub groupindex: HashMap, + pub indexgroup: Vec>, } impl PyValue for Pattern { @@ -139,16 +142,10 @@ mod _sre { #[pyimpl] impl Pattern { #[pymethod(name = "match")] - fn pymatch(&self, string_args: StringArgs) -> Option { + fn pymatch(&self, string_args: StringArgs, vm: &VirtualMachine) -> Option> { let start = string_args.pos; let end = string_args.endpos; - let flags = self.flags; - let pattern_codes = self.code.clone(); - let string = string_args.string.borrow_value(); - let mut state = State::new(string, start, end, flags, pattern_codes); - dbg!(&state); - dbg!(interp::pymatch(state)); - None + interp::pymatch(string_args.string, start, end, &self).map(|x| x.into_ref(vm)) } #[pymethod] fn fullmatch(&self, string_args: StringArgs) -> Option { @@ -179,4 +176,59 @@ mod _sre { self.flags.bits() } } + + #[pyattr] + #[pyclass(name = "Match")] + #[derive(Debug)] + pub(crate) struct Match { + string: PyStrRef, + pattern: PyObjectRef, + start: usize, + end: usize, + lastindex: isize, + // regs + // lastgroup + } + impl PyValue for Match { + fn class(vm: &VirtualMachine) -> &PyTypeRef { + Self::static_type() + } + } + + #[pyimpl] + impl Match { + pub(crate) fn new(state: &State, pattern: PyObjectRef, string: PyStrRef) -> Self { + Self { + string, + pattern, + start: state.start, + end: state.end, + lastindex: state.lastindex, + } + } + #[pyproperty] + fn pos(&self) -> usize { + self.start + } + #[pyproperty] + fn endpos(&self) -> usize { + self.end + } + #[pyproperty] + fn lastindex(&self) -> isize { + self.lastindex + } + #[pyproperty] + fn lastgroup(&self) -> Option { + None + } + #[pyproperty] + fn re(&self) -> PyObjectRef { + self.pattern.clone() + } + #[pyproperty] + fn string(&self) -> PyStrRef { + self.string.clone() + } + } } diff --git a/vm/src/stdlib/sre/interp.rs b/vm/src/stdlib/sre/interp.rs index 8e0968cbf..71d89cc0e 100644 --- a/vm/src/stdlib/sre/interp.rs +++ b/vm/src/stdlib/sre/interp.rs @@ -1,21 +1,24 @@ // good luck to those that follow; here be dragons -use super::_sre::MAXREPEAT; +use rustpython_common::borrow::BorrowValue; + +use super::_sre::{Match, Pattern, MAXREPEAT}; use super::constants::{SreAtCode, SreCatCode, SreFlag, SreOpcode}; +use crate::builtins::PyStrRef; use std::collections::HashMap; use std::convert::TryFrom; #[derive(Debug)] -pub struct State<'a> { +pub(crate) struct State<'a> { string: &'a str, // chars count string_len: usize, - start: usize, - end: usize, + pub start: usize, + pub end: usize, flags: SreFlag, - pattern_codes: Vec, + pattern_codes: &'a [u32], marks: Vec>, - lastindex: isize, + pub lastindex: isize, marks_stack: Vec<(Vec>, isize)>, context_stack: Vec, repeat: Option, @@ -28,7 +31,7 @@ impl<'a> State<'a> { start: usize, end: usize, flags: SreFlag, - pattern_codes: Vec, + pattern_codes: &'a [u32], ) -> Self { let string_len = string.chars().count(); let end = std::cmp::min(end, string_len); @@ -75,20 +78,36 @@ impl<'a> State<'a> { } } fn marks_push(&mut self) { - self.marks_stack.push(self.marks.clone(), self.lastindex); + self.marks_stack.push((self.marks.clone(), self.lastindex)); } fn marks_pop(&mut self) { - (self.marks, self.lastindex) = self.marks_stack.pop().unwrap(); + let (marks, lastindex) = self.marks_stack.pop().unwrap(); + self.marks = marks; + self.lastindex = lastindex; } fn marks_pop_keep(&mut self) { - (self.marks, self.lastindex) = self.marks_stack.last().unwrap(); + let (marks, lastindex) = self.marks_stack.last().unwrap().clone(); + self.marks = marks; + self.lastindex = lastindex; } fn marks_pop_discard(&mut self) { self.marks_stack.pop(); } } -pub(crate) fn pymatch(mut state: State) -> bool { +pub(crate) fn pymatch( + string: PyStrRef, + start: usize, + end: usize, + pattern: &Pattern, +) -> Option { + let mut state = State::new( + string.borrow_value(), + start, + end, + pattern.flags.clone(), + &pattern.code, + ); let ctx = MatchContext { string_position: state.start, string_offset: state @@ -117,7 +136,12 @@ pub(crate) fn pymatch(mut state: State) -> bool { state.context_stack.pop(); } } - has_matched.unwrap_or(false) + + if has_matched == None || has_matched == Some(false) { + return None; + } + + Some(Match::new(&state, pattern.pattern.clone(), string.clone())) } #[derive(Debug, Copy, Clone)] @@ -635,7 +659,7 @@ fn charset(set: &[u32], c: char) -> bool { } fn count(stack_drive: &StackDrive, maxcount: usize) -> usize { - let drive = WrapDrive::drive(stack_drive.ctx().clone(), stack_drive); + let mut drive = WrapDrive::drive(stack_drive.ctx().clone(), stack_drive); let maxcount = std::cmp::min(maxcount, drive.remaining_chars()); let opcode = match SreOpcode::try_from(drive.peek_code(1)) { Ok(code) => code, @@ -660,28 +684,28 @@ fn count(stack_drive: &StackDrive, maxcount: usize) -> usize { } } SreOpcode::LITERAL => { - general_count_literal(drive, |code, c| code == c as u32); + general_count_literal(&mut drive, |code, c| code == c as u32); } SreOpcode::NOT_LITERAL => { - general_count_literal(drive, |code, c| code != c as u32); + general_count_literal(&mut drive, |code, c| code != c as u32); } SreOpcode::LITERAL_IGNORE => { - general_count_literal(drive, |code, c| code == lower_ascii(c) as u32); + general_count_literal(&mut drive, |code, c| code == lower_ascii(c) as u32); } SreOpcode::NOT_LITERAL_IGNORE => { - general_count_literal(drive, |code, c| code != lower_ascii(c) as u32); + general_count_literal(&mut drive, |code, c| code != lower_ascii(c) as u32); } SreOpcode::LITERAL_LOC_IGNORE => { - general_count_literal(drive, |code, c| char_loc_ignore(code, c)); + general_count_literal(&mut drive, |code, c| char_loc_ignore(code, c)); } SreOpcode::NOT_LITERAL_LOC_IGNORE => { - general_count_literal(drive, |code, c| !char_loc_ignore(code, c)); + general_count_literal(&mut drive, |code, c| !char_loc_ignore(code, c)); } SreOpcode::LITERAL_UNI_IGNORE => { - general_count_literal(drive, |code, c| code == lower_unicode(c) as u32); + general_count_literal(&mut drive, |code, c| code == lower_unicode(c) as u32); } SreOpcode::NOT_LITERAL_UNI_IGNORE => { - general_count_literal(drive, |code, c| code != lower_unicode(c) as u32); + general_count_literal(&mut drive, |code, c| code != lower_unicode(c) as u32); } _ => { panic!("TODO: Not Implemented."); @@ -691,7 +715,7 @@ fn count(stack_drive: &StackDrive, maxcount: usize) -> usize { drive.ctx().string_position - stack_drive.ctx().string_position } -fn general_count_literal bool>(drive: &mut WrapDrive, f: F) { +fn general_count_literal bool>(drive: &mut WrapDrive, mut f: F) { let ch = drive.peek_code(1); while !drive.at_end() && f(ch, drive.peek_char()) { drive.skip_char(1);