From 5e543b4524979aefd104e3550fe2d737f251e324 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 7 Jan 2021 12:03:38 +0200 Subject: [PATCH] impl Pattern.split --- vm/src/stdlib/sre.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index e5150b827a..c26d7875d0 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -120,6 +120,14 @@ mod _sre { count: usize, } + #[derive(FromArgs)] + struct SplitArgs { + #[pyarg(any)] + string: PyStrRef, + #[pyarg(any, default = "0")] + maxsplit: isize, + } + #[pyattr] #[pyclass(name = "Pattern")] #[derive(Debug)] @@ -265,6 +273,66 @@ mod _sre { Self::subx(zelf, sub_args, true, vm) } + #[pymethod] + fn split(zelf: PyRef, split_args: SplitArgs, vm: &VirtualMachine) -> PyListRef { + let mut splitlist: Vec = Vec::new(); + + let mut n = 0; + let mut last_pos = 0; + while split_args.maxsplit == 0 || n < split_args.maxsplit { + let m = match interp::search( + split_args.string.clone(), + last_pos, + std::usize::MAX, + zelf.clone(), + ) { + Some(m) => m, + None => { + break; + } + }; + let start = m.regs[0].0 as usize; + let end = m.regs[0].1 as usize; + if start == end { + if last_pos == m.endpos { + break; + } + last_pos = end + 1; + continue; + } + + splitlist.push( + m.string + .borrow_value() + .chars() + .take(start) + .skip(last_pos) + .collect::() + .into_pyobject(vm), + ); + + // add groups (if any) + for i in 1..zelf.groups + 1 { + splitlist.push(m.get_slice(i).unwrap_or_default().into_pyobject(vm)); + } + n += 1; + last_pos = end; + } + + // get segment following last match (even if empty) + splitlist.push( + split_args + .string + .borrow_value() + .chars() + .skip(last_pos) + .collect::() + .into_pyobject(vm), + ); + + PyList::from(splitlist).into_ref(vm) + } + #[pyproperty] fn flags(&self) -> u16 { self.flags.bits()