impl Match.groups()

This commit is contained in:
Kangzhi Shi
2021-01-01 11:34:40 +02:00
parent d95a3b7791
commit 080b417f1f
3 changed files with 99 additions and 181 deletions

View File

@@ -21,8 +21,8 @@ p = re.compile('ab')
idpattern = r'([_a-z][_a-z0-9]*)'
# mo = re.search(idpattern, '7382 _boe0+2')
# print(mo)
mo = re.search(idpattern, '7382 _boe0+2')
print(mo)
# TODO:
# assert mo.group(0) == '_boe0'
@@ -45,3 +45,7 @@ assert re.compile('a(b)(cd)').match('abcd').group(0, 1, 2) == ('abcd', 'b', 'cd'
# test op repeat
assert re.compile('(ab)+').match('abab')
assert re.compile('(a)(b)(cd)*').match('abcdcdcd').group(0, 1, 2, 3) == ('abcdcdcd', 'a', 'b', 'cd')
assert re.compile('ab()+cd').match('abcd').group() == 'abcd'
assert re.compile('(a)+').match('aaa').groups() == ('a',)
assert re.compile('(a+)').match('aaa').groups() == ('aaa',)

View File

@@ -309,6 +309,7 @@ mod _sre {
fn span(&self, group: OptionalArg<isize>, vm: &VirtualMachine) -> PyResult<(isize, isize)> {
self.get_index(group.unwrap_or(0), vm).map(|x| self.regs[x])
}
#[pymethod]
fn group(&self, args: Args<isize>, vm: &VirtualMachine) -> PyResult {
let mut args = args.into_vec();
@@ -328,6 +329,24 @@ mod _sre {
Ok(vm.ctx.new_tuple(v))
}
}
#[pymethod]
fn groups(
zelf: PyRef<Match>,
default: OptionalArg<PyObjectRef>,
vm: &VirtualMachine,
) -> PyTupleRef {
let default = default.unwrap_or(vm.ctx.none());
let v: Vec<PyObjectRef> = (1..zelf.regs.len())
.map(|i| {
zelf.get_slice(i)
.map(|s| s.into_pyobject(vm))
.unwrap_or_else(|| default.clone())
})
.collect();
PyTupleRef::with_elements(v, &vm.ctx)
}
#[pymethod(magic)]
fn repr(zelf: PyRef<Match>) -> String {
format!(

View File

@@ -1070,7 +1070,6 @@ impl OpMinRepeatOne {
}
}
// Everything is stored in RepeatContext
struct OpMaxUntil {
jump_id: usize,
count: isize,
@@ -1088,188 +1087,84 @@ impl Default for OpMaxUntil {
impl OpcodeExecutor for OpMaxUntil {
fn next(&mut self, drive: &mut StackDrive) -> Option<()> {
match self.jump_id {
0 => self._0(drive),
1 => self._1(drive),
2 => self._2(drive),
3 => self._3(drive),
4 => self._4(drive),
0 => {
let RepeatContext {
count,
code_position,
last_position,
} = *drive.repeat_ctx();
drive.ctx_mut().code_position = code_position;
let mincount = drive.peek_code(2) as usize;
let maxcount = drive.peek_code(3) as usize;
drive.state.string_position = drive.ctx().string_position;
self.count = count + 1;
if (self.count as usize) < mincount {
// not enough matches
drive.repeat_ctx_mut().count = self.count;
drive.push_new_context(4);
self.jump_id = 1;
return Some(());
}
if ((count as usize) < maxcount || maxcount == MAXREPEAT)
&& drive.state.string_position != last_position
{
// we may have enough matches, if we can match another item, do so
drive.repeat_ctx_mut().count = self.count;
drive.state.marks_push();
self.save_last_position = last_position;
drive.repeat_ctx_mut().last_position = drive.state.string_position;
drive.push_new_context(4);
self.jump_id = 2;
return Some(());
}
self.jump_id = 3;
self.next(drive)
}
1 => {
let child_ctx = drive.state.popped_context.unwrap();
drive.ctx_mut().has_matched = child_ctx.has_matched;
if drive.ctx().has_matched != Some(true) {
drive.repeat_ctx_mut().count = self.count - 1;
drive.state.string_position = drive.ctx().string_position;
}
None
}
2 => {
drive.repeat_ctx_mut().last_position = self.save_last_position;
let child_ctx = drive.state.popped_context.unwrap();
if child_ctx.has_matched == Some(true) {
drive.state.marks_pop_discard();
drive.ctx_mut().has_matched = Some(true);
return None;
}
drive.state.marks_pop();
drive.repeat_ctx_mut().count = self.count - 1;
drive.state.string_position = drive.ctx().string_position;
self.jump_id = 3;
self.next(drive)
}
3 => {
// cannot match more repeated items here. make sure the tail matches
drive.skip_code(drive.peek_code(1) as usize + 1);
drive.push_new_context(1);
self.jump_id = 4;
Some(())
}
4 => {
let child_ctx = drive.state.popped_context.unwrap();
drive.ctx_mut().has_matched = child_ctx.has_matched;
if drive.ctx().has_matched != Some(true) {
drive.state.string_position = drive.ctx().string_position;
}
None
}
_ => unreachable!(),
}
}
}
impl OpMaxUntil {
fn _0(&mut self, drive: &mut StackDrive) -> Option<()> {
let RepeatContext {
count,
code_position,
last_position,
} = *drive.repeat_ctx();
drive.ctx_mut().code_position = code_position;
let mincount = drive.peek_code(2) as usize;
let maxcount = drive.peek_code(3) as usize;
drive.state.string_position = drive.ctx().string_position;
self.count = count + 1;
if (self.count as usize) < mincount {
// not enough matches
drive.repeat_ctx_mut().count = self.count;
drive.push_new_context(4);
self.jump_id = 1;
return Some(());
}
if ((count as usize) < maxcount || maxcount == MAXREPEAT)
&& drive.state.string_position != last_position
{
// we may have enough matches, if we can match another item, do so
drive.repeat_ctx_mut().count = self.count;
drive.state.marks_push();
self.save_last_position = last_position;
drive.repeat_ctx_mut().last_position = drive.state.string_position;
drive.push_new_context(4);
self.jump_id = 2;
return Some(());
}
self.jump_id = 3;
self.next(drive)
}
fn _1(&mut self, drive: &mut StackDrive) -> Option<()> {
let child_ctx = drive.state.popped_context.unwrap();
drive.ctx_mut().has_matched = child_ctx.has_matched;
if drive.ctx().has_matched != Some(true) {
drive.repeat_ctx_mut().count = self.count - 1;
drive.state.string_position = drive.ctx().string_position;
}
None
}
fn _2(&mut self, drive: &mut StackDrive) -> Option<()> {
drive.repeat_ctx_mut().last_position = self.save_last_position;
let child_ctx = drive.state.popped_context.unwrap();
if child_ctx.has_matched == Some(true) {
drive.state.marks_pop_discard();
drive.ctx_mut().has_matched = Some(true);
return None;
}
drive.state.marks_pop();
drive.repeat_ctx_mut().count = self.count - 1;
drive.state.string_position = drive.ctx().string_position;
self.jump_id = 3;
self.next(drive)
}
fn _3(&mut self, drive: &mut StackDrive) -> Option<()> {
// cannot match more repeated items here. make sure the tail matches
drive.skip_code(drive.peek_code(1) as usize + 1);
drive.push_new_context(1);
self.jump_id = 4;
Some(())
}
fn _4(&mut self, drive: &mut StackDrive) -> Option<()> {
let child_ctx = drive.state.popped_context.unwrap();
drive.ctx_mut().has_matched = child_ctx.has_matched;
if drive.ctx().has_matched != Some(true) {
drive.state.string_position = drive.ctx().string_position;
}
None
}
}
// struct OpMaxUntil {
// jump_id: usize,
// count: isize,
// save_last_position: isize,
// }
// impl Default for OpMaxUntil {
// fn default() -> Self {
// Self {
// jump_id: 0,
// count: 0,
// save_last_position: -1,
// }
// }
// }
// impl OpcodeExecutor for OpMaxUntil {
// fn next(&mut self, drive: &mut StackDrive) -> Option<()> {
// match self.jump_id {
// 0 => {
// drive.state.string_position = drive.ctx().string_position;
// let repeat = match drive.state.repeat_stack.last_mut() {
// Some(repeat) => repeat,
// None => {
// panic!("Internal re error: MAX_UNTIL without REPEAT.");
// }
// };
// self.count = repeat.count + 1;
// if self.count < repeat.mincount as isize {
// // not enough matches
// repeat.count = self.count;
// drive.push_new_context(4);
// self.jump_id = 1;
// return Some(());
// }
// if (self.count < repeat.maxcount as isize || repeat.maxcount == MAXREPEAT)
// && (drive.state.string_position as isize != repeat.last_position)
// {
// // we may have enough matches, if we can match another item, do so
// repeat.count = self.count;
// self.save_last_position = repeat.last_position;
// repeat.last_position = drive.state.string_position as isize;
// drive.state.marks_push();
// drive.push_new_context(4);
// self.jump_id = 2;
// return Some(());
// }
// drive.push_new_context(1);
// self.jump_id = 3;
// Some(())
// }
// 1 => {
// let child_ctx = drive.state.popped_context.unwrap();
// drive.ctx_mut().has_matched = child_ctx.has_matched;
// if drive.ctx().has_matched != Some(true) {
// drive.state.string_position = drive.ctx().string_position;
// let repeat = drive.state.repeat_stack.last_mut().unwrap();
// repeat.count = self.count - 1;
// }
// None
// }
// 2 => {
// let repeat = drive.state.repeat_stack.last_mut().unwrap();
// repeat.last_position = drive.state.string_position as isize;
// let child_ctx = drive.state.popped_context.unwrap();
// if child_ctx.has_matched == Some(true) {
// drive.state.marks_pop_discard();
// drive.ctx_mut().has_matched = Some(true);
// return None;
// }
// repeat.count = self.count - 1;
// drive.state.marks_pop();
// drive.state.string_position = drive.ctx().string_position;
// drive.push_new_context(1);
// self.jump_id = 3;
// Some(())
// }
// 3 => {
// // cannot match more repeated items here. make sure the tail matches
// let child_ctx = drive.state.popped_context.unwrap();
// drive.ctx_mut().has_matched = child_ctx.has_matched;
// if drive.ctx().has_matched != Some(true) {
// drive.state.string_position = drive.ctx().string_position;
// } else {
// drive.state.repeat_stack.pop();
// }
// None
// }
// _ => unreachable!(),
// }
// }
// }
struct OpMinUntil {
jump_id: usize,