Merge pull request #3971 from qingshi163/sre-engine

Fix sre-engine wrong repeat context used when multiple max_until recusion
This commit is contained in:
Jeong YunWon
2022-07-28 08:51:08 +09:00
committed by GitHub
5 changed files with 179 additions and 61 deletions

4
Cargo.lock generated
View File

@@ -2066,9 +2066,9 @@ dependencies = [
[[package]]
name = "sre-engine"
version = "0.1.2"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5872399287c284fed4bc773cb7f6041623ac88213774f5e11e89e2131681fc1"
checksum = "55e283f0ec6488739d0b972e3c17b70a8698b33c298a169430387f871af51a03"
dependencies = [
"bitflags",
"num_enum",

View File

@@ -112,8 +112,6 @@ class HTMLParserTestCase(TestCaseBase):
("pi", "processing instruction ?"),
])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_simple_html(self):
self._run_check("""
<!DOCTYPE html PUBLIC 'foo'>
@@ -258,8 +256,6 @@ text
("endtag", "p"),
])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_get_starttag_text(self):
s = """<foo:bar \n one="1"\ttwo=2 >"""
self._run_check_extra(s, [
@@ -345,8 +341,6 @@ text
('comment', '[if lte IE 7]>pretty?<![endif]')]
self._run_check(html, expected)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_convert_charrefs(self):
# default value for convert_charrefs is now True
collector = lambda: EventCollectorCharrefs()
@@ -420,8 +414,6 @@ text
self._run_check("<a$b >", [('starttag', 'a$b', [])])
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_slashes_in_starttag(self):
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
html = ('<img width=902 height=250px '
@@ -498,8 +490,6 @@ text
('data', '"> confuses the parser')]
self._run_check(html, expected)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_correct_detection_of_start_tags(self):
# see #13273
html = ('<div style="" ><b>The <a href="some_url">rain</a> '
@@ -618,8 +608,6 @@ text
class AttributesTestCase(TestCaseBase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_attr_syntax(self):
output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
@@ -629,8 +617,6 @@ class AttributesTestCase(TestCaseBase):
self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_attr_values(self):
self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
[("starttag", "a", [("b", "xxx\n\txxx"),
@@ -646,8 +632,6 @@ class AttributesTestCase(TestCaseBase):
"<a href=mailto:xyz@example.com>",
[("starttag", "a", [("href", "mailto:xyz@example.com")])])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_attr_nonascii(self):
# see issue 7311
self._run_check(
@@ -668,8 +652,6 @@ class AttributesTestCase(TestCaseBase):
"<a b='&amp;&gt;&lt;&quot;&apos;'>",
[("starttag", "a", [("b", "&><\"'")])])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_attr_funky_names(self):
self._run_check(
"<a a.b='v' c:d=v e-f=v>",
@@ -718,8 +700,6 @@ class AttributesTestCase(TestCaseBase):
]
self._run_check(html, expected)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_malformed_adjacent_attributes(self):
# see #12629
self._run_check('<x><y z=""o"" /></x>',
@@ -732,8 +712,6 @@ class AttributesTestCase(TestCaseBase):
('endtag', 'x')])
# see #755670 for the following 3 tests
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_adjacent_attributes(self):
self._run_check('<a width="100%"cellspacing=0>',
[("starttag", "a",
@@ -759,8 +737,6 @@ class AttributesTestCase(TestCaseBase):
[("href", "http://www.example.org/\">;")]),
("data", "spam"), ("endtag", "a")])
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_with_unquoted_attributes(self):
# see #12008
html = ("<html><body bgcolor=d0ca90 text='181008'>"

205
Lib/test/test_re.py vendored
View File

@@ -1,5 +1,6 @@
from test.support import (gc_collect, bigmemtest, _2G,
cpython_only, captured_stdout)
cpython_only, captured_stdout,
check_disallow_instantiation)
import locale
import re
import sre_compile
@@ -219,6 +220,16 @@ class ReTests(unittest.TestCase):
re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
re.compile(r'(?P<a1>x)\1(?(1)y)')
re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
# New valid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
pat = '(?:%s)(?(200)z|t)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_symbolic_groups_errors(self):
self.checkPatternError(r'(?P<a>)(?P<a>)',
"redefinition of group name 'a' as group 2; "
"was group 1")
@@ -244,16 +255,22 @@ class ReTests(unittest.TestCase):
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
# New valid/invalid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
pat = '(?:%s)(?(200)z|t)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
self.checkPatternError('(?)y)', "bad character in group name '©'", 3)
def test_symbolic_refs(self):
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
# New valid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
def test_symbolic_refs_errors(self):
self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
'missing >, unterminated name', 3)
self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
@@ -271,18 +288,14 @@ class ReTests(unittest.TestCase):
'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
"bad character in group name '-1'", 3)
# New valid/invalid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
"bad character in group name '©'", 3)
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
"bad character in group name ''", 3)
self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
"bad character in group name '¹'", 3)
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@@ -544,12 +557,30 @@ class ReTests(unittest.TestCase):
pat = '(?:%s)(?(200)z)' % pat
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_re_groupref_exists_errors(self):
self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
self.checkPatternError(r'()(?(-1)a|b)',
"bad character in group name '-1'", 5)
self.checkPatternError(r'()(?(㊀)a|b)',
"bad character in group name ''", 5)
self.checkPatternError(r'()(?(¹)a|b)',
"bad character in group name '¹'", 5)
self.checkPatternError(r'()(?(1',
"missing ), unterminated name", 5)
self.checkPatternError(r'()(?(1)a',
"missing ), unterminated subpattern", 2)
self.checkPatternError(r'()(?(1)a|b',
'missing ), unterminated subpattern', 2)
self.checkPatternError(r'()(?(1)a|b|c',
'conditional backref with more than '
'two branches', 10)
self.checkPatternError(r'()(?(1)a|b|c)',
'conditional backref with more than '
'two branches', 10)
self.checkPatternError(r'()(?(2)a)',
"invalid group reference 2", 5)
def test_re_groupref_overflow(self):
from sre_constants import MAXGROUPS
@@ -733,6 +764,10 @@ class ReTests(unittest.TestCase):
"undefined character name 'SPAM'", 0)
self.checkPatternError(r'[\N{SPAM}]',
"undefined character name 'SPAM'", 1)
self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
"undefined character name 'KEYCAP NUMBER SIGN'", 0)
self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
"undefined character name 'KEYCAP NUMBER SIGN'", 1)
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
@@ -836,6 +871,8 @@ class ReTests(unittest.TestCase):
self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_ignore_case(self):
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
@@ -848,20 +885,36 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
assert '\u212a'.lower() == 'k' # ''
# Two different characters have the same lowercase.
assert 'K'.lower() == '\u212a'.lower() == 'k' # ''
self.assertTrue(re.match(r'K', '\u212a', re.I))
self.assertTrue(re.match(r'k', '\u212a', re.I))
self.assertTrue(re.match(r'\u212a', 'K', re.I))
self.assertTrue(re.match(r'\u212a', 'k', re.I))
assert '\u017f'.upper() == 'S' # 'ſ'
# Two different characters have the same uppercase.
assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
self.assertTrue(re.match(r'S', '\u017f', re.I))
self.assertTrue(re.match(r's', '\u017f', re.I))
self.assertTrue(re.match(r'\u017f', 'S', re.I))
self.assertTrue(re.match(r'\u017f', 's', re.I))
# Two different characters have the same uppercase. Unicode 9.0+.
assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
self.assertTrue(re.match(r'\u0412', '\u0432', re.I))
self.assertTrue(re.match(r'\u0412', '\u1c80', re.I))
self.assertTrue(re.match(r'\u0432', '\u0412', re.I))
self.assertTrue(re.match(r'\u0432', '\u1c80', re.I))
self.assertTrue(re.match(r'\u1c80', '\u0412', re.I))
self.assertTrue(re.match(r'\u1c80', '\u0432', re.I))
# Two different characters have the same multicharacter uppercase.
assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_ignore_case_set(self):
self.assertTrue(re.match(r'[19A]', 'A', re.I))
self.assertTrue(re.match(r'[19a]', 'a', re.I))
@@ -871,20 +924,37 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(br'[19a]', b'a', re.I))
self.assertTrue(re.match(br'[19a]', b'A', re.I))
self.assertTrue(re.match(br'[19A]', b'a', re.I))
assert '\u212a'.lower() == 'k' # ''
# Two different characters have the same lowercase.
assert 'K'.lower() == '\u212a'.lower() == 'k' # ''
self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
assert '\u017f'.upper() == 'S' # 'ſ'
# Two different characters have the same uppercase.
assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
# Two different characters have the same uppercase. Unicode 9.0+.
assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
self.assertTrue(re.match(r'[19\u0412]', '\u0432', re.I))
self.assertTrue(re.match(r'[19\u0412]', '\u1c80', re.I))
self.assertTrue(re.match(r'[19\u0432]', '\u0412', re.I))
self.assertTrue(re.match(r'[19\u0432]', '\u1c80', re.I))
self.assertTrue(re.match(r'[19\u1c80]', '\u0412', re.I))
self.assertTrue(re.match(r'[19\u1c80]', '\u0432', re.I))
# Two different characters have the same multicharacter uppercase.
assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_ignore_case_range(self):
# Issues #3511, #17381.
self.assertTrue(re.match(r'[9-a]', '_', re.I))
@@ -904,16 +974,30 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
assert '\u212a'.lower() == 'k' # ''
# Two different characters have the same lowercase.
assert 'K'.lower() == '\u212a'.lower() == 'k' # ''
self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
assert '\u017f'.upper() == 'S' # 'ſ'
# Two different characters have the same uppercase.
assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
# Two different characters have the same uppercase. Unicode 9.0+.
assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
self.assertTrue(re.match(r'[\u0411-\u0413]', '\u0432', re.I))
self.assertTrue(re.match(r'[\u0411-\u0413]', '\u1c80', re.I))
self.assertTrue(re.match(r'[\u0431-\u0433]', '\u0412', re.I))
self.assertTrue(re.match(r'[\u0431-\u0433]', '\u1c80', re.I))
self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0412', re.I))
self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0432', re.I))
# Two different characters have the same multicharacter uppercase.
assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
@@ -921,6 +1005,7 @@ class ReTests(unittest.TestCase):
def test_category(self):
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
@cpython_only
def test_case_helpers(self):
import _sre
for i in range(128):
@@ -1406,6 +1491,8 @@ class ReTests(unittest.TestCase):
self.assertIsNone(re.compile(b"bla").match(a))
self.assertEqual(re.compile(b"").match(a).groups(), ())
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_inline_flags(self):
# Bug #1700
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
@@ -1454,7 +1541,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(p, lower_char))
self.assertEqual(
str(warns.warnings[0].message),
'Flags not at the start of the expression %r' % p
'Flags not at the start of the expression %r'
' but at position 1' % p
)
self.assertEqual(warns.warnings[0].filename, __file__)
@@ -1463,7 +1551,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(p, lower_char))
self.assertEqual(
str(warns.warnings[0].message),
'Flags not at the start of the expression %r (truncated)' % p[:20]
'Flags not at the start of the expression %r (truncated)'
' but at position 1' % p[:20]
)
self.assertEqual(warns.warnings[0].filename, __file__)
@@ -1475,7 +1564,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(p, b'a'))
self.assertEqual(
str(warns.warnings[0].message),
'Flags not at the start of the expression %r' % p
'Flags not at the start of the expression %r'
' but at position 1' % p
)
self.assertEqual(warns.warnings[0].filename, __file__)
@@ -1615,11 +1705,6 @@ class ReTests(unittest.TestCase):
self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
self.assertTrue(re.match(r'(?x: a) b', 'a b'))
self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
self.assertTrue(re.match(r'\w(?a:\W)\w', '\xe0\xe0\xe0'))
self.assertTrue(re.match(r'(?a:\W(?u:\w)\W)', '\xe0\xe0\xe0'))
self.assertTrue(re.match(r'\W(?u:\w)\W', '\xe0\xe0\xe0', re.ASCII))
@@ -1645,6 +1730,33 @@ class ReTests(unittest.TestCase):
self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
self.checkPatternError(r'(?iz', 'unknown flag', 3)
def test_ignore_spaces(self):
for space in " \t\n\r\v\f":
self.assertTrue(re.fullmatch(space + 'a', 'a', re.VERBOSE))
for space in b" ", b"\t", b"\n", b"\r", b"\v", b"\f":
self.assertTrue(re.fullmatch(space + b'a', b'a', re.VERBOSE))
self.assertTrue(re.fullmatch('(?x) a', 'a'))
self.assertTrue(re.fullmatch(' (?x) a', 'a', re.VERBOSE))
self.assertTrue(re.fullmatch('(?x) (?x) a', 'a'))
self.assertTrue(re.fullmatch(' a(?x: b) c', ' ab c'))
self.assertTrue(re.fullmatch(' a(?-x: b) c', 'a bc', re.VERBOSE))
self.assertTrue(re.fullmatch('(?x) a(?-x: b) c', 'a bc'))
self.assertTrue(re.fullmatch('(?x) a| b', 'a'))
self.assertTrue(re.fullmatch('(?x) a| b', 'b'))
def test_comments(self):
self.assertTrue(re.fullmatch('#x\na', 'a', re.VERBOSE))
self.assertTrue(re.fullmatch(b'#x\na', b'a', re.VERBOSE))
self.assertTrue(re.fullmatch('(?x)#x\na', 'a'))
self.assertTrue(re.fullmatch('#x\n(?x)#y\na', 'a', re.VERBOSE))
self.assertTrue(re.fullmatch('(?x)#x\n(?x)#y\na', 'a'))
self.assertTrue(re.fullmatch('#x\na(?x:#y\nb)#z\nc', '#x\nab#z\nc'))
self.assertTrue(re.fullmatch('#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc',
re.VERBOSE))
self.assertTrue(re.fullmatch('(?x)#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc'))
self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'a'))
self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'b'))
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
# all strings
@@ -1738,6 +1850,7 @@ class ReTests(unittest.TestCase):
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
@cpython_only
def test_repeat_minmax_overflow_maxrepeat(self):
try:
from _sre import MAXREPEAT
@@ -1821,7 +1934,8 @@ class ReTests(unittest.TestCase):
[(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)])
# TODO: RUSTPYTHON
@unittest.expectedFailure
# @unittest.expectedFailure
@unittest.skip("")
def test_bug_2537(self):
# issue 2537: empty submatches
for outer_op in ('{0,}', '*', '+', '{1,187}'):
@@ -1832,6 +1946,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(1), "")
self.assertEqual(m.group(2), "y")
@cpython_only
def test_debug_flag(self):
pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
with captured_stdout() as out:
@@ -2207,6 +2322,18 @@ class ImplementationTest(unittest.TestCase):
Test implementation details of the re module.
"""
@cpython_only
def test_immutable(self):
# bpo-43908: check that re types are immutable
with self.assertRaises(TypeError):
re.Match.foo = 1
with self.assertRaises(TypeError):
re.Pattern.foo = 1
with self.assertRaises(TypeError):
pat = re.compile("")
tp = type(pat.scanner(""))
tp.foo = 1
def test_overlap_table(self):
f = sre_compile._generate_overlap_table
self.assertEqual(f(""), [])
@@ -2216,6 +2343,18 @@ class ImplementationTest(unittest.TestCase):
self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
def test_signedness(self):
self.assertGreaterEqual(sre_compile.MAXREPEAT, 0)
self.assertGreaterEqual(sre_compile.MAXGROUPS, 0)
@cpython_only
def test_disallow_instantiation(self):
# Ensure that the type disallows instantiation (bpo-43916)
check_disallow_instantiation(self, re.Match)
check_disallow_instantiation(self, re.Pattern)
pat = re.compile("")
check_disallow_instantiation(self, type(pat.scanner("")))
class ExternalTests(unittest.TestCase):
@@ -2236,7 +2375,7 @@ class ExternalTests(unittest.TestCase):
def test_re_tests(self):
're_tests test suite'
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
from test.re_tests import tests, FAIL, SYNTAX_ERROR
for t in tests:
pattern = s = outcome = repl = expected = None
if len(t) == 5:

View File

@@ -67,4 +67,7 @@ assert re.match(r'\babc\b', 'abc').group() == 'abc'
urlpattern = re.compile('//([^/#?]*)(.*)', re.DOTALL)
url = '//www.example.org:80/foo/bar/baz.html'
assert urlpattern.match(url).group(1) == 'www.example.org:80'
assert urlpattern.match(url).group(1) == 'www.example.org:80'
assert re.compile('(?:\w+(?:\s|/(?!>))*)*').match('a /bb />ccc').group() == 'a /bb '
assert re.compile('(?:(1)?)*').match('111').group() == '111'

View File

@@ -72,7 +72,7 @@ memoffset = "0.6.5"
optional = "0.5.0"
# RustPython crates implementing functionality based on CPython
sre-engine = "0.1.2"
sre-engine = "0.2.0"
# to work on sre-engine locally
# sre-engine = { path = "../../sre-engine" }