mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
WIP - native _sre
This commit is contained in:
114
constants.rs
Normal file
114
constants.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Secret Labs' Regular Expression Engine
|
||||
*
|
||||
* regular expression matching engine
|
||||
*
|
||||
* NOTE: This file is generated by sre_constants.py. If you need
|
||||
* to change anything in here, edit sre_constants.py and run it.
|
||||
*
|
||||
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||
*
|
||||
* See the _sre.c file for information on usage and redistribution.
|
||||
*/
|
||||
|
||||
use bitflags::bitflags;
|
||||
|
||||
pub const SRE_MAGIC: usize = 20140917;
|
||||
#[derive(num_enum::TryFromPrimitive, Debug)]
|
||||
#[repr(u32)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum SreOpcode {
|
||||
FAILURE = 0,
|
||||
SUCCESS = 1,
|
||||
ANY = 2,
|
||||
ANY_ALL = 3,
|
||||
ASSERT = 4,
|
||||
ASSERT_NOT = 5,
|
||||
AT = 6,
|
||||
BRANCH = 7,
|
||||
CALL = 8,
|
||||
CATEGORY = 9,
|
||||
CHARSET = 10,
|
||||
BIGCHARSET = 11,
|
||||
GROUPREF = 12,
|
||||
GROUPREF_EXISTS = 13,
|
||||
GROUPREF_IGNORE = 14,
|
||||
IN = 15,
|
||||
IN_IGNORE = 16,
|
||||
INFO = 17,
|
||||
JUMP = 18,
|
||||
LITERAL = 19,
|
||||
LITERAL_IGNORE = 20,
|
||||
MARK = 21,
|
||||
MAX_UNTIL = 22,
|
||||
MIN_UNTIL = 23,
|
||||
NOT_LITERAL = 24,
|
||||
NOT_LITERAL_IGNORE = 25,
|
||||
NEGATE = 26,
|
||||
RANGE = 27,
|
||||
REPEAT = 28,
|
||||
REPEAT_ONE = 29,
|
||||
SUBPATTERN = 30,
|
||||
MIN_REPEAT_ONE = 31,
|
||||
RANGE_IGNORE = 32,
|
||||
}
|
||||
#[derive(num_enum::TryFromPrimitive, Debug)]
|
||||
#[repr(u32)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum SreAtCode {
|
||||
BEGINNING = 0,
|
||||
BEGINNING_LINE = 1,
|
||||
BEGINNING_STRING = 2,
|
||||
BOUNDARY = 3,
|
||||
NON_BOUNDARY = 4,
|
||||
END = 5,
|
||||
END_LINE = 6,
|
||||
END_STRING = 7,
|
||||
LOC_BOUNDARY = 8,
|
||||
LOC_NON_BOUNDARY = 9,
|
||||
UNI_BOUNDARY = 10,
|
||||
UNI_NON_BOUNDARY = 11,
|
||||
}
|
||||
#[derive(num_enum::TryFromPrimitive, Debug)]
|
||||
#[repr(u32)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum SreCatCode {
|
||||
DIGIT = 0,
|
||||
NOT_DIGIT = 1,
|
||||
SPACE = 2,
|
||||
NOT_SPACE = 3,
|
||||
WORD = 4,
|
||||
NOT_WORD = 5,
|
||||
LINEBREAK = 6,
|
||||
NOT_LINEBREAK = 7,
|
||||
LOC_WORD = 8,
|
||||
LOC_NOT_WORD = 9,
|
||||
UNI_DIGIT = 10,
|
||||
UNI_NOT_DIGIT = 11,
|
||||
UNI_SPACE = 12,
|
||||
UNI_NOT_SPACE = 13,
|
||||
UNI_WORD = 14,
|
||||
UNI_NOT_WORD = 15,
|
||||
UNI_LINEBREAK = 16,
|
||||
UNI_NOT_LINEBREAK = 17,
|
||||
}
|
||||
bitflags! {
|
||||
pub struct SreFlag: u16 {
|
||||
const TEMPLATE = 1;
|
||||
const IGNORECASE = 2;
|
||||
const LOCALE = 4;
|
||||
const MULTILINE = 8;
|
||||
const DOTALL = 16;
|
||||
const UNICODE = 32;
|
||||
const VERBOSE = 64;
|
||||
const DEBUG = 128;
|
||||
const ASCII = 256;
|
||||
}
|
||||
}
|
||||
bitflags! {
|
||||
pub struct SreInfo: u32 {
|
||||
const PREFIX = 1;
|
||||
const LITERAL = 2;
|
||||
const CHARSET = 4;
|
||||
}
|
||||
}
|
||||
126
interp.rs
Normal file
126
interp.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
// good luck to those that follow; here be dragons
|
||||
|
||||
use crate::builtins::PyStrRef;
|
||||
|
||||
use super::constants::{SreFlag, SreOpcode};
|
||||
|
||||
use std::convert::TryFrom;
|
||||
use std::{iter, slice};
|
||||
|
||||
pub struct State {
|
||||
start: usize,
|
||||
s_pos: usize,
|
||||
end: usize,
|
||||
pos: usize,
|
||||
flags: SreFlag,
|
||||
marks: Vec<usize>,
|
||||
lastindex: isize,
|
||||
marks_stack: Vec<usize>,
|
||||
context_stack: Vec<MatchContext>,
|
||||
repeat: Option<usize>,
|
||||
s: PyStrRef,
|
||||
}
|
||||
|
||||
// struct State1<'a> {
|
||||
// state: &'a mut State,
|
||||
// }
|
||||
|
||||
struct MatchContext {
|
||||
s_pos: usize,
|
||||
code_pos: usize,
|
||||
}
|
||||
|
||||
// struct Context<'a> {
|
||||
// context_stack: &mut Vec<MatchContext>,
|
||||
// }
|
||||
|
||||
impl State {
|
||||
pub fn new(s: PyStrRef, start: usize, end: usize, flags: SreFlag) -> Self {
|
||||
let end = std::cmp::min(end, s.char_len());
|
||||
Self {
|
||||
start,
|
||||
s_pos: start,
|
||||
end,
|
||||
pos: start,
|
||||
flags,
|
||||
marks: Vec::new(),
|
||||
lastindex: -1,
|
||||
marks_stack: Vec::new(),
|
||||
context_stack: Vec::new(),
|
||||
repeat: None,
|
||||
s,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// struct OpcodeDispatcher {
|
||||
// executing_contexts: HashMap<usize, Rc<State>>,
|
||||
// }
|
||||
|
||||
pub struct BadSreCode;
|
||||
|
||||
pub fn parse_ops(code: &[u32]) -> impl Iterator<Item = Result<Op, BadSreCode>> + '_ {
|
||||
let mut it = code.iter().copied();
|
||||
std::iter::from_fn(move || -> Option<Option<Op>> {
|
||||
let op = it.next()?;
|
||||
let op = SreOpcode::try_from(op)
|
||||
.ok()
|
||||
.and_then(|op| extract_code(op, &mut it));
|
||||
Some(op)
|
||||
})
|
||||
.map(|x| x.ok_or(BadSreCode))
|
||||
}
|
||||
|
||||
type It<'a> = iter::Copied<slice::Iter<'a, u32>>;
|
||||
fn extract_code(op: SreOpcode, it: &mut It) -> Option<Op> {
|
||||
let skip = |it: &mut It| {
|
||||
let skip = it.next()? as usize;
|
||||
if skip > it.len() {
|
||||
None
|
||||
} else {
|
||||
Some(skip)
|
||||
}
|
||||
};
|
||||
match op {
|
||||
SreOpcode::FAILURE => {}
|
||||
SreOpcode::SUCCESS => {}
|
||||
SreOpcode::ANY => {}
|
||||
SreOpcode::ANY_ALL => {}
|
||||
SreOpcode::ASSERT => {}
|
||||
SreOpcode::ASSERT_NOT => {}
|
||||
SreOpcode::AT => {}
|
||||
SreOpcode::BRANCH => {}
|
||||
SreOpcode::CALL => {}
|
||||
SreOpcode::CATEGORY => {}
|
||||
SreOpcode::CHARSET => {}
|
||||
SreOpcode::BIGCHARSET => {}
|
||||
SreOpcode::GROUPREF => {}
|
||||
SreOpcode::GROUPREF_EXISTS => {}
|
||||
SreOpcode::GROUPREF_IGNORE => {}
|
||||
SreOpcode::IN => {}
|
||||
SreOpcode::IN_IGNORE => {}
|
||||
SreOpcode::INFO => {
|
||||
// let skip = it.next()?;
|
||||
}
|
||||
SreOpcode::JUMP => {}
|
||||
SreOpcode::LITERAL => {}
|
||||
SreOpcode::LITERAL_IGNORE => {}
|
||||
SreOpcode::MARK => {}
|
||||
SreOpcode::MAX_UNTIL => {}
|
||||
SreOpcode::MIN_UNTIL => {}
|
||||
SreOpcode::NOT_LITERAL => {}
|
||||
SreOpcode::NOT_LITERAL_IGNORE => {}
|
||||
SreOpcode::NEGATE => {}
|
||||
SreOpcode::RANGE => {}
|
||||
SreOpcode::REPEAT => {}
|
||||
SreOpcode::REPEAT_ONE => {}
|
||||
SreOpcode::SUBPATTERN => {}
|
||||
SreOpcode::MIN_REPEAT_ONE => {}
|
||||
SreOpcode::RANGE_IGNORE => {}
|
||||
}
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub enum Op {
|
||||
Info {},
|
||||
}
|
||||
Reference in New Issue
Block a user