From 86435b8a4b44d0b79109ace6acd66cbfcebaac66 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 22 Apr 2021 17:15:03 +0200 Subject: [PATCH] add benchmark --- benches/benches.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++ generate_tests.py | 5 +- 2 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 benches/benches.rs diff --git a/benches/benches.rs b/benches/benches.rs new file mode 100644 index 000000000..b86a59296 --- /dev/null +++ b/benches/benches.rs @@ -0,0 +1,112 @@ +#![feature(test)] + +extern crate test; +use test::Bencher; + +use sre_engine::constants::SreFlag; +use sre_engine::engine; +pub struct Pattern { + pub code: &'static [u32], + pub flags: SreFlag, +} + +impl Pattern { + pub fn state<'a>( + &self, + string: impl Into>, + range: std::ops::Range, + ) -> engine::State<'a> { + engine::State::new(string.into(), range.start, range.end, self.flags, self.code) + } +} +#[bench] +fn benchmarks(b: &mut Bencher) { + // # test common prefix + // pattern p1 = re.compile('Python|Perl') # , 'Perl'), # Alternation + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p1 = Pattern { code: &[15, 8, 1, 4, 6, 1, 1, 80, 0, 17, 80, 7, 13, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 16, 11, 9, 17, 101, 17, 114, 17, 108, 16, 2, 0, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern p2 = re.compile('(Python|Perl)') #, 'Perl'), # Grouped alternation + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p2 = Pattern { code: &[15, 8, 1, 4, 6, 1, 0, 80, 0, 18, 0, 17, 80, 7, 13, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 16, 11, 9, 17, 101, 17, 114, 17, 108, 16, 2, 0, 18, 1, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('Python|Perl|Tcl') #, 'Perl'), # Alternation + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p3 = Pattern { code: &[15, 9, 4, 3, 6, 17, 80, 17, 84, 0, 7, 15, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 16, 22, 11, 17, 80, 17, 101, 17, 114, 17, 108, 16, 11, 9, 17, 84, 17, 99, 17, 108, 16, 2, 0, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('(Python|Perl|Tcl)') #, 'Perl'), # Grouped alternation + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p4 = Pattern { code: &[15, 9, 4, 3, 6, 17, 80, 17, 84, 0, 18, 0, 7, 15, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 16, 22, 11, 17, 80, 17, 101, 17, 114, 17, 108, 16, 11, 9, 17, 84, 17, 99, 17, 108, 16, 2, 0, 18, 1, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('(Python)\\1') #, 'PythonPython'), # Backreference + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p5 = Pattern { code: &[15, 18, 1, 12, 12, 6, 0, 80, 121, 116, 104, 111, 110, 0, 0, 0, 0, 0, 0, 18, 0, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 18, 1, 12, 0, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('([0a-z][a-z0-9]*,)+') #, 'a5,b7,c9,'), # Disable the fastmap optimization + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p6 = Pattern { code: &[15, 4, 0, 2, 4294967295, 24, 31, 1, 4294967295, 18, 0, 14, 7, 17, 48, 23, 97, 122, 0, 25, 13, 0, 4294967295, 14, 8, 23, 97, 122, 23, 48, 57, 0, 1, 17, 44, 18, 1, 19, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('([a-z][a-z0-9]*,)+') #, 'a5,b7,c9,'), # A few sets + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p7 = Pattern { code: &[15, 4, 0, 2, 4294967295, 24, 29, 1, 4294967295, 18, 0, 14, 5, 23, 97, 122, 0, 25, 13, 0, 4294967295, 14, 8, 23, 97, 122, 23, 48, 57, 0, 1, 17, 44, 18, 1, 19, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('Python') #, 'Python'), # Simple text literal + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p8 = Pattern { code: &[15, 18, 3, 6, 6, 6, 6, 80, 121, 116, 104, 111, 110, 0, 0, 0, 0, 0, 0, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('.*Python') #, 'Python'), # Bad text literal + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p9 = Pattern { code: &[15, 4, 0, 6, 4294967295, 25, 5, 0, 4294967295, 2, 1, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('.*Python.*') #, 'Python'), # Worse text literal + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p10 = Pattern { code: &[15, 4, 0, 6, 4294967295, 25, 5, 0, 4294967295, 2, 1, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 25, 5, 0, 4294967295, 2, 1, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + // pattern pn = re.compile('.*(Python)') #, 'Python'), # Bad text literal with grouping + // START GENERATED by generate_tests.py + #[rustfmt::skip] let p11 = Pattern { code: &[15, 4, 0, 6, 4294967295, 25, 5, 0, 4294967295, 2, 1, 18, 0, 17, 80, 17, 121, 17, 116, 17, 104, 17, 111, 17, 110, 18, 1, 1], flags: SreFlag::from_bits_truncate(32) }; + // END GENERATED + + let tests = [ + (p1, "Perl"), + (p2, "Perl"), + (p3, "Perl"), + (p4, "Perl"), + (p5, "PythonPython"), + (p6, "a5,b7,c9,"), + (p7, "a5,b7,c9,"), + (p8, "Python"), + (p9, "Python"), + (p10, "Python"), + (p11, "Python"), + ]; + + b.iter(move || { + for (p, s) in &tests { + let mut state = p.state(s.clone(), 0..usize::MAX); + state = state.search(); + assert!(state.has_matched); + state = p.state(s.clone(), 0..usize::MAX); + state = state.pymatch(); + assert!(state.has_matched); + state = p.state(s.clone(), 0..usize::MAX); + state.match_all = true; + state = state.pymatch(); + assert!(state.has_matched); + let s2 = format!("{}{}{}", " ".repeat(10000), s, " ".repeat(10000)); + state = p.state(s2.as_str(), 0..usize::MAX); + state = state.search(); + assert!(state.has_matched); + state = p.state(s2.as_str(), 10000..usize::MAX); + state = state.pymatch(); + assert!(state.has_matched); + state = p.state(s2.as_str(), 10000..10000 + s.len()); + state = state.pymatch(); + assert!(state.has_matched); + state = p.state(s2.as_str(), 10000..10000 + s.len()); + state.match_all = true; + state = state.pymatch(); + assert!(state.has_matched); + } + }) +} diff --git a/generate_tests.py b/generate_tests.py index 7af1d2f0c..b432720cd 100644 --- a/generate_tests.py +++ b/generate_tests.py @@ -5,6 +5,7 @@ import sre_constants import sre_compile import sre_parse import json +from itertools import chain m = re.search(r"const SRE_MAGIC: usize = (\d+);", open("src/constants.rs").read()) sre_engine_magic = int(m.group(1)) @@ -38,8 +39,8 @@ def replace_compiled(m): {indent}#[rustfmt::skip] let {varname} = {pattern}; {indent}// END GENERATED''' -with os.scandir("tests") as d: - for f in d: +with os.scandir("tests") as t, os.scandir("benches") as b: + for f in chain(t, b): path = Path(f.path) if path.suffix == ".rs": replaced = pattern_pattern.sub(replace_compiled, path.read_text())