mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
create _sre.Match
This commit is contained in:
@@ -10,11 +10,14 @@ mod _sre {
|
||||
constants::SreFlag,
|
||||
interp::{self, State},
|
||||
};
|
||||
use crate::builtins::{PyStrRef, PyTypeRef};
|
||||
use crate::byteslike::PyBytesLike;
|
||||
use crate::common::borrow::BorrowValue;
|
||||
use crate::pyobject::{Either, PyCallable, PyObjectRef, PyResult, PyValue, StaticType};
|
||||
use crate::VirtualMachine;
|
||||
use crate::{
|
||||
builtins::{PyStrRef, PyTypeRef},
|
||||
pyobject::PyRef,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
@@ -121,13 +124,13 @@ mod _sre {
|
||||
#[pyattr]
|
||||
#[pyclass(name = "Pattern")]
|
||||
#[derive(Debug)]
|
||||
struct Pattern {
|
||||
pattern: PyObjectRef,
|
||||
flags: SreFlag,
|
||||
code: Vec<u32>,
|
||||
groups: usize,
|
||||
groupindex: HashMap<String, usize>,
|
||||
indexgroup: Vec<Option<String>>,
|
||||
pub(crate) struct Pattern {
|
||||
pub pattern: PyObjectRef,
|
||||
pub flags: SreFlag,
|
||||
pub code: Vec<u32>,
|
||||
pub groups: usize,
|
||||
pub groupindex: HashMap<String, usize>,
|
||||
pub indexgroup: Vec<Option<String>>,
|
||||
}
|
||||
|
||||
impl PyValue for Pattern {
|
||||
@@ -139,16 +142,10 @@ mod _sre {
|
||||
#[pyimpl]
|
||||
impl Pattern {
|
||||
#[pymethod(name = "match")]
|
||||
fn pymatch(&self, string_args: StringArgs) -> Option<PyObjectRef> {
|
||||
fn pymatch(&self, string_args: StringArgs, vm: &VirtualMachine) -> Option<PyRef<Match>> {
|
||||
let start = string_args.pos;
|
||||
let end = string_args.endpos;
|
||||
let flags = self.flags;
|
||||
let pattern_codes = self.code.clone();
|
||||
let string = string_args.string.borrow_value();
|
||||
let mut state = State::new(string, start, end, flags, pattern_codes);
|
||||
dbg!(&state);
|
||||
dbg!(interp::pymatch(state));
|
||||
None
|
||||
interp::pymatch(string_args.string, start, end, &self).map(|x| x.into_ref(vm))
|
||||
}
|
||||
#[pymethod]
|
||||
fn fullmatch(&self, string_args: StringArgs) -> Option<PyObjectRef> {
|
||||
@@ -179,4 +176,59 @@ mod _sre {
|
||||
self.flags.bits()
|
||||
}
|
||||
}
|
||||
|
||||
#[pyattr]
|
||||
#[pyclass(name = "Match")]
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Match {
|
||||
string: PyStrRef,
|
||||
pattern: PyObjectRef,
|
||||
start: usize,
|
||||
end: usize,
|
||||
lastindex: isize,
|
||||
// regs
|
||||
// lastgroup
|
||||
}
|
||||
impl PyValue for Match {
|
||||
fn class(vm: &VirtualMachine) -> &PyTypeRef {
|
||||
Self::static_type()
|
||||
}
|
||||
}
|
||||
|
||||
#[pyimpl]
|
||||
impl Match {
|
||||
pub(crate) fn new(state: &State, pattern: PyObjectRef, string: PyStrRef) -> Self {
|
||||
Self {
|
||||
string,
|
||||
pattern,
|
||||
start: state.start,
|
||||
end: state.end,
|
||||
lastindex: state.lastindex,
|
||||
}
|
||||
}
|
||||
#[pyproperty]
|
||||
fn pos(&self) -> usize {
|
||||
self.start
|
||||
}
|
||||
#[pyproperty]
|
||||
fn endpos(&self) -> usize {
|
||||
self.end
|
||||
}
|
||||
#[pyproperty]
|
||||
fn lastindex(&self) -> isize {
|
||||
self.lastindex
|
||||
}
|
||||
#[pyproperty]
|
||||
fn lastgroup(&self) -> Option<String> {
|
||||
None
|
||||
}
|
||||
#[pyproperty]
|
||||
fn re(&self) -> PyObjectRef {
|
||||
self.pattern.clone()
|
||||
}
|
||||
#[pyproperty]
|
||||
fn string(&self) -> PyStrRef {
|
||||
self.string.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,24 @@
|
||||
// good luck to those that follow; here be dragons
|
||||
|
||||
use super::_sre::MAXREPEAT;
|
||||
use rustpython_common::borrow::BorrowValue;
|
||||
|
||||
use super::_sre::{Match, Pattern, MAXREPEAT};
|
||||
use super::constants::{SreAtCode, SreCatCode, SreFlag, SreOpcode};
|
||||
use crate::builtins::PyStrRef;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct State<'a> {
|
||||
pub(crate) struct State<'a> {
|
||||
string: &'a str,
|
||||
// chars count
|
||||
string_len: usize,
|
||||
start: usize,
|
||||
end: usize,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
flags: SreFlag,
|
||||
pattern_codes: Vec<u32>,
|
||||
pattern_codes: &'a [u32],
|
||||
marks: Vec<Option<usize>>,
|
||||
lastindex: isize,
|
||||
pub lastindex: isize,
|
||||
marks_stack: Vec<(Vec<Option<usize>>, isize)>,
|
||||
context_stack: Vec<MatchContext>,
|
||||
repeat: Option<usize>,
|
||||
@@ -28,7 +31,7 @@ impl<'a> State<'a> {
|
||||
start: usize,
|
||||
end: usize,
|
||||
flags: SreFlag,
|
||||
pattern_codes: Vec<u32>,
|
||||
pattern_codes: &'a [u32],
|
||||
) -> Self {
|
||||
let string_len = string.chars().count();
|
||||
let end = std::cmp::min(end, string_len);
|
||||
@@ -75,20 +78,36 @@ impl<'a> State<'a> {
|
||||
}
|
||||
}
|
||||
fn marks_push(&mut self) {
|
||||
self.marks_stack.push(self.marks.clone(), self.lastindex);
|
||||
self.marks_stack.push((self.marks.clone(), self.lastindex));
|
||||
}
|
||||
fn marks_pop(&mut self) {
|
||||
(self.marks, self.lastindex) = self.marks_stack.pop().unwrap();
|
||||
let (marks, lastindex) = self.marks_stack.pop().unwrap();
|
||||
self.marks = marks;
|
||||
self.lastindex = lastindex;
|
||||
}
|
||||
fn marks_pop_keep(&mut self) {
|
||||
(self.marks, self.lastindex) = self.marks_stack.last().unwrap();
|
||||
let (marks, lastindex) = self.marks_stack.last().unwrap().clone();
|
||||
self.marks = marks;
|
||||
self.lastindex = lastindex;
|
||||
}
|
||||
fn marks_pop_discard(&mut self) {
|
||||
self.marks_stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn pymatch(mut state: State) -> bool {
|
||||
pub(crate) fn pymatch(
|
||||
string: PyStrRef,
|
||||
start: usize,
|
||||
end: usize,
|
||||
pattern: &Pattern,
|
||||
) -> Option<Match> {
|
||||
let mut state = State::new(
|
||||
string.borrow_value(),
|
||||
start,
|
||||
end,
|
||||
pattern.flags.clone(),
|
||||
&pattern.code,
|
||||
);
|
||||
let ctx = MatchContext {
|
||||
string_position: state.start,
|
||||
string_offset: state
|
||||
@@ -117,7 +136,12 @@ pub(crate) fn pymatch(mut state: State) -> bool {
|
||||
state.context_stack.pop();
|
||||
}
|
||||
}
|
||||
has_matched.unwrap_or(false)
|
||||
|
||||
if has_matched == None || has_matched == Some(false) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Match::new(&state, pattern.pattern.clone(), string.clone()))
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
@@ -635,7 +659,7 @@ fn charset(set: &[u32], c: char) -> bool {
|
||||
}
|
||||
|
||||
fn count(stack_drive: &StackDrive, maxcount: usize) -> usize {
|
||||
let drive = WrapDrive::drive(stack_drive.ctx().clone(), stack_drive);
|
||||
let mut drive = WrapDrive::drive(stack_drive.ctx().clone(), stack_drive);
|
||||
let maxcount = std::cmp::min(maxcount, drive.remaining_chars());
|
||||
let opcode = match SreOpcode::try_from(drive.peek_code(1)) {
|
||||
Ok(code) => code,
|
||||
@@ -660,28 +684,28 @@ fn count(stack_drive: &StackDrive, maxcount: usize) -> usize {
|
||||
}
|
||||
}
|
||||
SreOpcode::LITERAL => {
|
||||
general_count_literal(drive, |code, c| code == c as u32);
|
||||
general_count_literal(&mut drive, |code, c| code == c as u32);
|
||||
}
|
||||
SreOpcode::NOT_LITERAL => {
|
||||
general_count_literal(drive, |code, c| code != c as u32);
|
||||
general_count_literal(&mut drive, |code, c| code != c as u32);
|
||||
}
|
||||
SreOpcode::LITERAL_IGNORE => {
|
||||
general_count_literal(drive, |code, c| code == lower_ascii(c) as u32);
|
||||
general_count_literal(&mut drive, |code, c| code == lower_ascii(c) as u32);
|
||||
}
|
||||
SreOpcode::NOT_LITERAL_IGNORE => {
|
||||
general_count_literal(drive, |code, c| code != lower_ascii(c) as u32);
|
||||
general_count_literal(&mut drive, |code, c| code != lower_ascii(c) as u32);
|
||||
}
|
||||
SreOpcode::LITERAL_LOC_IGNORE => {
|
||||
general_count_literal(drive, |code, c| char_loc_ignore(code, c));
|
||||
general_count_literal(&mut drive, |code, c| char_loc_ignore(code, c));
|
||||
}
|
||||
SreOpcode::NOT_LITERAL_LOC_IGNORE => {
|
||||
general_count_literal(drive, |code, c| !char_loc_ignore(code, c));
|
||||
general_count_literal(&mut drive, |code, c| !char_loc_ignore(code, c));
|
||||
}
|
||||
SreOpcode::LITERAL_UNI_IGNORE => {
|
||||
general_count_literal(drive, |code, c| code == lower_unicode(c) as u32);
|
||||
general_count_literal(&mut drive, |code, c| code == lower_unicode(c) as u32);
|
||||
}
|
||||
SreOpcode::NOT_LITERAL_UNI_IGNORE => {
|
||||
general_count_literal(drive, |code, c| code != lower_unicode(c) as u32);
|
||||
general_count_literal(&mut drive, |code, c| code != lower_unicode(c) as u32);
|
||||
}
|
||||
_ => {
|
||||
panic!("TODO: Not Implemented.");
|
||||
@@ -691,7 +715,7 @@ fn count(stack_drive: &StackDrive, maxcount: usize) -> usize {
|
||||
drive.ctx().string_position - stack_drive.ctx().string_position
|
||||
}
|
||||
|
||||
fn general_count_literal<F: FnMut(u32, char) -> bool>(drive: &mut WrapDrive, f: F) {
|
||||
fn general_count_literal<F: FnMut(u32, char) -> bool>(drive: &mut WrapDrive, mut f: F) {
|
||||
let ch = drive.peek_code(1);
|
||||
while !drive.at_end() && f(ch, drive.peek_char()) {
|
||||
drive.skip_char(1);
|
||||
|
||||
Reference in New Issue
Block a user