From 78485fd8df79444283d3cb978654500da0ce0590 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Sat, 26 Dec 2020 16:59:30 +0200 Subject: [PATCH] create _sre.Match --- interp.rs | 68 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/interp.rs b/interp.rs index 8e0968cbf..71d89cc0e 100644 --- a/interp.rs +++ b/interp.rs @@ -1,21 +1,24 @@ // good luck to those that follow; here be dragons -use super::_sre::MAXREPEAT; +use rustpython_common::borrow::BorrowValue; + +use super::_sre::{Match, Pattern, MAXREPEAT}; use super::constants::{SreAtCode, SreCatCode, SreFlag, SreOpcode}; +use crate::builtins::PyStrRef; use std::collections::HashMap; use std::convert::TryFrom; #[derive(Debug)] -pub struct State<'a> { +pub(crate) struct State<'a> { string: &'a str, // chars count string_len: usize, - start: usize, - end: usize, + pub start: usize, + pub end: usize, flags: SreFlag, - pattern_codes: Vec, + pattern_codes: &'a [u32], marks: Vec>, - lastindex: isize, + pub lastindex: isize, marks_stack: Vec<(Vec>, isize)>, context_stack: Vec, repeat: Option, @@ -28,7 +31,7 @@ impl<'a> State<'a> { start: usize, end: usize, flags: SreFlag, - pattern_codes: Vec, + pattern_codes: &'a [u32], ) -> Self { let string_len = string.chars().count(); let end = std::cmp::min(end, string_len); @@ -75,20 +78,36 @@ impl<'a> State<'a> { } } fn marks_push(&mut self) { - self.marks_stack.push(self.marks.clone(), self.lastindex); + self.marks_stack.push((self.marks.clone(), self.lastindex)); } fn marks_pop(&mut self) { - (self.marks, self.lastindex) = self.marks_stack.pop().unwrap(); + let (marks, lastindex) = self.marks_stack.pop().unwrap(); + self.marks = marks; + self.lastindex = lastindex; } fn marks_pop_keep(&mut self) { - (self.marks, self.lastindex) = self.marks_stack.last().unwrap(); + let (marks, lastindex) = self.marks_stack.last().unwrap().clone(); + self.marks = marks; + self.lastindex = lastindex; } fn marks_pop_discard(&mut self) { self.marks_stack.pop(); } } -pub(crate) fn pymatch(mut state: State) -> bool { +pub(crate) fn pymatch( + string: PyStrRef, + start: usize, + end: usize, + pattern: &Pattern, +) -> Option { + let mut state = State::new( + string.borrow_value(), + start, + end, + pattern.flags.clone(), + &pattern.code, + ); let ctx = MatchContext { string_position: state.start, string_offset: state @@ -117,7 +136,12 @@ pub(crate) fn pymatch(mut state: State) -> bool { state.context_stack.pop(); } } - has_matched.unwrap_or(false) + + if has_matched == None || has_matched == Some(false) { + return None; + } + + Some(Match::new(&state, pattern.pattern.clone(), string.clone())) } #[derive(Debug, Copy, Clone)] @@ -635,7 +659,7 @@ fn charset(set: &[u32], c: char) -> bool { } fn count(stack_drive: &StackDrive, maxcount: usize) -> usize { - let drive = WrapDrive::drive(stack_drive.ctx().clone(), stack_drive); + let mut drive = WrapDrive::drive(stack_drive.ctx().clone(), stack_drive); let maxcount = std::cmp::min(maxcount, drive.remaining_chars()); let opcode = match SreOpcode::try_from(drive.peek_code(1)) { Ok(code) => code, @@ -660,28 +684,28 @@ fn count(stack_drive: &StackDrive, maxcount: usize) -> usize { } } SreOpcode::LITERAL => { - general_count_literal(drive, |code, c| code == c as u32); + general_count_literal(&mut drive, |code, c| code == c as u32); } SreOpcode::NOT_LITERAL => { - general_count_literal(drive, |code, c| code != c as u32); + general_count_literal(&mut drive, |code, c| code != c as u32); } SreOpcode::LITERAL_IGNORE => { - general_count_literal(drive, |code, c| code == lower_ascii(c) as u32); + general_count_literal(&mut drive, |code, c| code == lower_ascii(c) as u32); } SreOpcode::NOT_LITERAL_IGNORE => { - general_count_literal(drive, |code, c| code != lower_ascii(c) as u32); + general_count_literal(&mut drive, |code, c| code != lower_ascii(c) as u32); } SreOpcode::LITERAL_LOC_IGNORE => { - general_count_literal(drive, |code, c| char_loc_ignore(code, c)); + general_count_literal(&mut drive, |code, c| char_loc_ignore(code, c)); } SreOpcode::NOT_LITERAL_LOC_IGNORE => { - general_count_literal(drive, |code, c| !char_loc_ignore(code, c)); + general_count_literal(&mut drive, |code, c| !char_loc_ignore(code, c)); } SreOpcode::LITERAL_UNI_IGNORE => { - general_count_literal(drive, |code, c| code == lower_unicode(c) as u32); + general_count_literal(&mut drive, |code, c| code == lower_unicode(c) as u32); } SreOpcode::NOT_LITERAL_UNI_IGNORE => { - general_count_literal(drive, |code, c| code != lower_unicode(c) as u32); + general_count_literal(&mut drive, |code, c| code != lower_unicode(c) as u32); } _ => { panic!("TODO: Not Implemented."); @@ -691,7 +715,7 @@ fn count(stack_drive: &StackDrive, maxcount: usize) -> usize { drive.ctx().string_position - stack_drive.ctx().string_position } -fn general_count_literal bool>(drive: &mut WrapDrive, f: F) { +fn general_count_literal bool>(drive: &mut WrapDrive, mut f: F) { let ch = drive.peek_code(1); while !drive.at_end() && f(ch, drive.peek_char()) { drive.skip_char(1);