Merge pull request #3971 from qingshi163/sre-engine

Fix sre-engine wrong repeat context used when multiple max_until recusion
2026-06-02 19:39:49 +09:00 · 2022-07-28 08:51:08 +09:00
parent 2bdc33a9f5 abc7586662
commit cc8a18496e
5 changed files with 179 additions and 61 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2066,9 +2066,9 @@ dependencies = [

 [[package]]
 name = "sre-engine"
-version = "0.1.2"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5872399287c284fed4bc773cb7f6041623ac88213774f5e11e89e2131681fc1"
+checksum = "55e283f0ec6488739d0b972e3c17b70a8698b33c298a169430387f871af51a03"
 dependencies = [
 "bitflags",
 "num_enum",
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -112,8 +112,6 @@ class HTMLParserTestCase(TestCaseBase):
            ("pi", "processing instruction ?"),
            ])

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_simple_html(self):
        self._run_check("""
 <!DOCTYPE html PUBLIC 'foo'>
@@ -258,8 +256,6 @@ text
            ("endtag", "p"),
            ])

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_get_starttag_text(self):
        s = """<foo:bar   \n   one="1"\ttwo=2   >"""
        self._run_check_extra(s, [
@@ -345,8 +341,6 @@ text
                    ('comment', '[if lte IE 7]>pretty?<![endif]')]
        self._run_check(html, expected)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_convert_charrefs(self):
        # default value for convert_charrefs is now True
        collector = lambda: EventCollectorCharrefs()
@@ -420,8 +414,6 @@ text
        self._run_check("<a$b  >", [('starttag', 'a$b', [])])
        self._run_check("<a$b  />", [('startendtag', 'a$b', [])])

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_slashes_in_starttag(self):
        self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
        html = ('<img width=902 height=250px '
@@ -498,8 +490,6 @@ text
                    ('data', '"> confuses the parser')]
        self._run_check(html, expected)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_correct_detection_of_start_tags(self):
        # see #13273
        html = ('<div style=""    ><b>The <a href="some_url">rain</a> '
@@ -618,8 +608,6 @@ text

 class AttributesTestCase(TestCaseBase):

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_attr_syntax(self):
        output = [
          ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
@@ -629,8 +617,6 @@ class AttributesTestCase(TestCaseBase):
        self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
        self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_attr_values(self):
        self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
                        [("starttag", "a", [("b", "xxx\n\txxx"),
@@ -646,8 +632,6 @@ class AttributesTestCase(TestCaseBase):
            "<a href=mailto:xyz@example.com>",
            [("starttag", "a", [("href", "mailto:xyz@example.com")])])

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_attr_nonascii(self):
        # see issue 7311
        self._run_check(
@@ -668,8 +652,6 @@ class AttributesTestCase(TestCaseBase):
            "<a b='&amp;&gt;&lt;&quot;&apos;'>",
            [("starttag", "a", [("b", "&><\"'")])])

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_attr_funky_names(self):
        self._run_check(
            "<a a.b='v' c:d=v e-f=v>",
@@ -718,8 +700,6 @@ class AttributesTestCase(TestCaseBase):
        ]
        self._run_check(html, expected)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_malformed_adjacent_attributes(self):
        # see #12629
        self._run_check('<x><y z=""o"" /></x>',
@@ -732,8 +712,6 @@ class AttributesTestCase(TestCaseBase):
                            ('endtag', 'x')])

    # see #755670 for the following 3 tests
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_adjacent_attributes(self):
        self._run_check('<a width="100%"cellspacing=0>',
                        [("starttag", "a",
@@ -759,8 +737,6 @@ class AttributesTestCase(TestCaseBase):
                          [("href", "http://www.example.org/\">;")]),
                         ("data", "spam"), ("endtag", "a")])

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_with_unquoted_attributes(self):
        # see #12008
        html = ("<html><body bgcolor=d0ca90 text='181008'>"
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,5 +1,6 @@
 from test.support import (gc_collect, bigmemtest, _2G,
-                          cpython_only, captured_stdout)
+                          cpython_only, captured_stdout,
+                          check_disallow_instantiation)
 import locale
 import re
 import sre_compile
@@ -219,6 +220,16 @@ class ReTests(unittest.TestCase):
        re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
        re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
        re.compile(r'(?P<a1>x)\1(?(1)y)')
+        re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
+        # New valid identifiers in Python 3
+        re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
+        re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
+        # Support > 100 groups.
+        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+        pat = '(?:%s)(?(200)z|t)' % pat
+        self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
+
+    def test_symbolic_groups_errors(self):
        self.checkPatternError(r'(?P<a>)(?P<a>)',
                               "redefinition of group name 'a' as group 2; "
                               "was group 1")
@@ -244,16 +255,22 @@ class ReTests(unittest.TestCase):
        self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
        self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
        self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
-        # New valid/invalid identifiers in Python 3
-        re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
-        re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
        self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
-        # Support > 100 groups.
-        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
-        pat = '(?:%s)(?(200)z|t)' % pat
-        self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
+        self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
+        self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3)

    def test_symbolic_refs(self):
+        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
+        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
+        self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
+        # New valid identifiers in Python 3
+        self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
+        self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
+        # Support > 100 groups.
+        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+        self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
+
+    def test_symbolic_refs_errors(self):
        self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
                                'missing >, unterminated name', 3)
        self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
@@ -271,18 +288,14 @@ class ReTests(unittest.TestCase):
                                'invalid group reference 2', 1)
        with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
            re.sub('(?P<a>x)', r'\g<ab>', 'xx')
-        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
-        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
        self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
                                "bad character in group name '-1'", 3)
-        # New valid/invalid identifiers in Python 3
-        self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
-        self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
        self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
                                "bad character in group name '©'", 3)
-        # Support > 100 groups.
-        pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
-        self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
+        self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
+                                "bad character in group name '㊀'", 3)
+        self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
+                                "bad character in group name '¹'", 3)

    def test_re_subn(self):
        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@@ -544,12 +557,30 @@ class ReTests(unittest.TestCase):
        pat = '(?:%s)(?(200)z)' % pat
        self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))

-        self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_re_groupref_exists_errors(self):
+        self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
+        self.checkPatternError(r'()(?(-1)a|b)',
+                               "bad character in group name '-1'", 5)
+        self.checkPatternError(r'()(?(㊀)a|b)',
+                               "bad character in group name '㊀'", 5)
+        self.checkPatternError(r'()(?(¹)a|b)',
+                               "bad character in group name '¹'", 5)
+        self.checkPatternError(r'()(?(1',
+                               "missing ), unterminated name", 5)
+        self.checkPatternError(r'()(?(1)a',
+                               "missing ), unterminated subpattern", 2)
        self.checkPatternError(r'()(?(1)a|b',
                               'missing ), unterminated subpattern', 2)
+        self.checkPatternError(r'()(?(1)a|b|c',
+                               'conditional backref with more than '
+                               'two branches', 10)
        self.checkPatternError(r'()(?(1)a|b|c)',
                               'conditional backref with more than '
                               'two branches', 10)
+        self.checkPatternError(r'()(?(2)a)',
+                               "invalid group reference 2", 5)

    def test_re_groupref_overflow(self):
        from sre_constants import MAXGROUPS
@@ -733,6 +764,10 @@ class ReTests(unittest.TestCase):
                               "undefined character name 'SPAM'", 0)
        self.checkPatternError(r'[\N{SPAM}]',
                               "undefined character name 'SPAM'", 1)
+        self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 0)
+        self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
+                            "undefined character name 'KEYCAP NUMBER SIGN'", 1)
        self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
        self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)

@@ -836,6 +871,8 @@ class ReTests(unittest.TestCase):
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
        self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
    def test_ignore_case(self):
        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
@@ -848,20 +885,36 @@ class ReTests(unittest.TestCase):
        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")

-        assert '\u212a'.lower() == 'k' # 'K'
+        # Two different characters have the same lowercase.
+        assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K'
        self.assertTrue(re.match(r'K', '\u212a', re.I))
        self.assertTrue(re.match(r'k', '\u212a', re.I))
        self.assertTrue(re.match(r'\u212a', 'K', re.I))
        self.assertTrue(re.match(r'\u212a', 'k', re.I))
-        assert '\u017f'.upper() == 'S' # 'ſ'
+
+        # Two different characters have the same uppercase.
+        assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
        self.assertTrue(re.match(r'S', '\u017f', re.I))
        self.assertTrue(re.match(r's', '\u017f', re.I))
        self.assertTrue(re.match(r'\u017f', 'S', re.I))
        self.assertTrue(re.match(r'\u017f', 's', re.I))
+
+        # Two different characters have the same uppercase. Unicode 9.0+.
+        assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
+        self.assertTrue(re.match(r'\u0412', '\u0432', re.I))
+        self.assertTrue(re.match(r'\u0412', '\u1c80', re.I))
+        self.assertTrue(re.match(r'\u0432', '\u0412', re.I))
+        self.assertTrue(re.match(r'\u0432', '\u1c80', re.I))
+        self.assertTrue(re.match(r'\u1c80', '\u0412', re.I))
+        self.assertTrue(re.match(r'\u1c80', '\u0432', re.I))
+
+        # Two different characters have the same multicharacter uppercase.
        assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ﬅ', 'ﬆ'
        self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
        self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
    def test_ignore_case_set(self):
        self.assertTrue(re.match(r'[19A]', 'A', re.I))
        self.assertTrue(re.match(r'[19a]', 'a', re.I))
@@ -871,20 +924,37 @@ class ReTests(unittest.TestCase):
        self.assertTrue(re.match(br'[19a]', b'a', re.I))
        self.assertTrue(re.match(br'[19a]', b'A', re.I))
        self.assertTrue(re.match(br'[19A]', b'a', re.I))
-        assert '\u212a'.lower() == 'k' # 'K'
+
+        # Two different characters have the same lowercase.
+        assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K'
        self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
        self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
        self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
        self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
-        assert '\u017f'.upper() == 'S' # 'ſ'
+
+        # Two different characters have the same uppercase.
+        assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
        self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
        self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
        self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
        self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
+
+        # Two different characters have the same uppercase. Unicode 9.0+.
+        assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
+        self.assertTrue(re.match(r'[19\u0412]', '\u0432', re.I))
+        self.assertTrue(re.match(r'[19\u0412]', '\u1c80', re.I))
+        self.assertTrue(re.match(r'[19\u0432]', '\u0412', re.I))
+        self.assertTrue(re.match(r'[19\u0432]', '\u1c80', re.I))
+        self.assertTrue(re.match(r'[19\u1c80]', '\u0412', re.I))
+        self.assertTrue(re.match(r'[19\u1c80]', '\u0432', re.I))
+
+        # Two different characters have the same multicharacter uppercase.
        assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ﬅ', 'ﬆ'
        self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
        self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
    def test_ignore_case_range(self):
        # Issues #3511, #17381.
        self.assertTrue(re.match(r'[9-a]', '_', re.I))
@@ -904,16 +974,30 @@ class ReTests(unittest.TestCase):
        self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
        self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))

-        assert '\u212a'.lower() == 'k' # 'K'
+        # Two different characters have the same lowercase.
+        assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K'
        self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
        self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
        self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
        self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
-        assert '\u017f'.upper() == 'S' # 'ſ'
+
+        # Two different characters have the same uppercase.
+        assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
        self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
        self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
        self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
        self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
+
+        # Two different characters have the same uppercase. Unicode 9.0+.
+        assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
+        self.assertTrue(re.match(r'[\u0411-\u0413]', '\u0432', re.I))
+        self.assertTrue(re.match(r'[\u0411-\u0413]', '\u1c80', re.I))
+        self.assertTrue(re.match(r'[\u0431-\u0433]', '\u0412', re.I))
+        self.assertTrue(re.match(r'[\u0431-\u0433]', '\u1c80', re.I))
+        self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0412', re.I))
+        self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0432', re.I))
+
+        # Two different characters have the same multicharacter uppercase.
        assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ﬅ', 'ﬆ'
        self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
        self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
@@ -921,6 +1005,7 @@ class ReTests(unittest.TestCase):
    def test_category(self):
        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")

+    @cpython_only
    def test_case_helpers(self):
        import _sre
        for i in range(128):
@@ -1406,6 +1491,8 @@ class ReTests(unittest.TestCase):
            self.assertIsNone(re.compile(b"bla").match(a))
            self.assertEqual(re.compile(b"").match(a).groups(), ())

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
    def test_inline_flags(self):
        # Bug #1700
        upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
@@ -1454,7 +1541,8 @@ class ReTests(unittest.TestCase):
            self.assertTrue(re.match(p, lower_char))
        self.assertEqual(
            str(warns.warnings[0].message),
-            'Flags not at the start of the expression %r' % p
+            'Flags not at the start of the expression %r'
+            ' but at position 1' % p
        )
        self.assertEqual(warns.warnings[0].filename, __file__)

@@ -1463,7 +1551,8 @@ class ReTests(unittest.TestCase):
            self.assertTrue(re.match(p, lower_char))
        self.assertEqual(
            str(warns.warnings[0].message),
-            'Flags not at the start of the expression %r (truncated)' % p[:20]
+            'Flags not at the start of the expression %r (truncated)'
+            ' but at position 1' % p[:20]
        )
        self.assertEqual(warns.warnings[0].filename, __file__)

@@ -1475,7 +1564,8 @@ class ReTests(unittest.TestCase):
                self.assertTrue(re.match(p, b'a'))
            self.assertEqual(
                str(warns.warnings[0].message),
-                'Flags not at the start of the expression %r' % p
+                'Flags not at the start of the expression %r'
+                ' but at position 1' % p
            )
            self.assertEqual(warns.warnings[0].filename, __file__)

@@ -1615,11 +1705,6 @@ class ReTests(unittest.TestCase):
        self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
        self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))

-        self.assertTrue(re.match(r'(?x: a) b', 'a b'))
-        self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
-        self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
-        self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
-
        self.assertTrue(re.match(r'\w(?a:\W)\w', '\xe0\xe0\xe0'))
        self.assertTrue(re.match(r'(?a:\W(?u:\w)\W)', '\xe0\xe0\xe0'))
        self.assertTrue(re.match(r'\W(?u:\w)\W', '\xe0\xe0\xe0', re.ASCII))
@@ -1645,6 +1730,33 @@ class ReTests(unittest.TestCase):
        self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
        self.checkPatternError(r'(?iz', 'unknown flag', 3)

+    def test_ignore_spaces(self):
+        for space in " \t\n\r\v\f":
+            self.assertTrue(re.fullmatch(space + 'a', 'a', re.VERBOSE))
+        for space in b" ", b"\t", b"\n", b"\r", b"\v", b"\f":
+            self.assertTrue(re.fullmatch(space + b'a', b'a', re.VERBOSE))
+        self.assertTrue(re.fullmatch('(?x) a', 'a'))
+        self.assertTrue(re.fullmatch(' (?x) a', 'a', re.VERBOSE))
+        self.assertTrue(re.fullmatch('(?x) (?x) a', 'a'))
+        self.assertTrue(re.fullmatch(' a(?x: b) c', ' ab c'))
+        self.assertTrue(re.fullmatch(' a(?-x: b) c', 'a bc', re.VERBOSE))
+        self.assertTrue(re.fullmatch('(?x) a(?-x: b) c', 'a bc'))
+        self.assertTrue(re.fullmatch('(?x) a| b', 'a'))
+        self.assertTrue(re.fullmatch('(?x) a| b', 'b'))
+
+    def test_comments(self):
+        self.assertTrue(re.fullmatch('#x\na', 'a', re.VERBOSE))
+        self.assertTrue(re.fullmatch(b'#x\na', b'a', re.VERBOSE))
+        self.assertTrue(re.fullmatch('(?x)#x\na', 'a'))
+        self.assertTrue(re.fullmatch('#x\n(?x)#y\na', 'a', re.VERBOSE))
+        self.assertTrue(re.fullmatch('(?x)#x\n(?x)#y\na', 'a'))
+        self.assertTrue(re.fullmatch('#x\na(?x:#y\nb)#z\nc', '#x\nab#z\nc'))
+        self.assertTrue(re.fullmatch('#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc',
+                                     re.VERBOSE))
+        self.assertTrue(re.fullmatch('(?x)#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc'))
+        self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'a'))
+        self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'b'))
+
    def test_bug_6509(self):
        # Replacement strings of both types must parse properly.
        # all strings
@@ -1738,6 +1850,7 @@ class ReTests(unittest.TestCase):
        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))

+    @cpython_only
    def test_repeat_minmax_overflow_maxrepeat(self):
        try:
            from _sre import MAXREPEAT
@@ -1821,7 +1934,8 @@ class ReTests(unittest.TestCase):
                         [(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)])

    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
+    # @unittest.expectedFailure
+    @unittest.skip("")
    def test_bug_2537(self):
        # issue 2537: empty submatches
        for outer_op in ('{0,}', '*', '+', '{1,187}'):
@@ -1832,6 +1946,7 @@ class ReTests(unittest.TestCase):
                self.assertEqual(m.group(1), "")
                self.assertEqual(m.group(2), "y")

+    @cpython_only
    def test_debug_flag(self):
        pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
        with captured_stdout() as out:
@@ -2207,6 +2322,18 @@ class ImplementationTest(unittest.TestCase):
    Test implementation details of the re module.
    """

+    @cpython_only
+    def test_immutable(self):
+        # bpo-43908: check that re types are immutable
+        with self.assertRaises(TypeError):
+            re.Match.foo = 1
+        with self.assertRaises(TypeError):
+            re.Pattern.foo = 1
+        with self.assertRaises(TypeError):
+            pat = re.compile("")
+            tp = type(pat.scanner(""))
+            tp.foo = 1
+
    def test_overlap_table(self):
        f = sre_compile._generate_overlap_table
        self.assertEqual(f(""), [])
@@ -2216,6 +2343,18 @@ class ImplementationTest(unittest.TestCase):
        self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
        self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])

+    def test_signedness(self):
+        self.assertGreaterEqual(sre_compile.MAXREPEAT, 0)
+        self.assertGreaterEqual(sre_compile.MAXGROUPS, 0)
+
+    @cpython_only
+    def test_disallow_instantiation(self):
+        # Ensure that the type disallows instantiation (bpo-43916)
+        check_disallow_instantiation(self, re.Match)
+        check_disallow_instantiation(self, re.Pattern)
+        pat = re.compile("")
+        check_disallow_instantiation(self, type(pat.scanner("")))
+

 class ExternalTests(unittest.TestCase):

@@ -2236,7 +2375,7 @@ class ExternalTests(unittest.TestCase):

    def test_re_tests(self):
        're_tests test suite'
-        from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
+        from test.re_tests import tests, FAIL, SYNTAX_ERROR
        for t in tests:
            pattern = s = outcome = repl = expected = None
            if len(t) == 5:
--- a/extra_tests/snippets/stdlib_re.py
+++ b/extra_tests/snippets/stdlib_re.py
@@ -67,4 +67,7 @@ assert re.match(r'\babc\b', 'abc').group() == 'abc'

 urlpattern = re.compile('//([^/#?]*)(.*)', re.DOTALL)
 url = '//www.example.org:80/foo/bar/baz.html'
-assert urlpattern.match(url).group(1) == 'www.example.org:80'
+assert urlpattern.match(url).group(1) == 'www.example.org:80'
+
+assert re.compile('(?:\w+(?:\s|/(?!>))*)*').match('a /bb />ccc').group() == 'a /bb '
+assert re.compile('(?:(1)?)*').match('111').group() == '111'
--- a/vm/Cargo.toml
+++ b/vm/Cargo.toml
@@ -72,7 +72,7 @@ memoffset = "0.6.5"
 optional = "0.5.0"

 # RustPython crates implementing functionality based on CPython
-sre-engine = "0.1.2"
+sre-engine = "0.2.0"
 # to work on sre-engine locally
 # sre-engine = { path = "../../sre-engine" }