mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Merge pull request #3971 from qingshi163/sre-engine
Fix sre-engine wrong repeat context used when multiple max_until recusion
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -2066,9 +2066,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sre-engine"
|
||||
version = "0.1.2"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5872399287c284fed4bc773cb7f6041623ac88213774f5e11e89e2131681fc1"
|
||||
checksum = "55e283f0ec6488739d0b972e3c17b70a8698b33c298a169430387f871af51a03"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"num_enum",
|
||||
|
||||
24
Lib/test/test_htmlparser.py
vendored
24
Lib/test/test_htmlparser.py
vendored
@@ -112,8 +112,6 @@ class HTMLParserTestCase(TestCaseBase):
|
||||
("pi", "processing instruction ?"),
|
||||
])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_simple_html(self):
|
||||
self._run_check("""
|
||||
<!DOCTYPE html PUBLIC 'foo'>
|
||||
@@ -258,8 +256,6 @@ text
|
||||
("endtag", "p"),
|
||||
])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_get_starttag_text(self):
|
||||
s = """<foo:bar \n one="1"\ttwo=2 >"""
|
||||
self._run_check_extra(s, [
|
||||
@@ -345,8 +341,6 @@ text
|
||||
('comment', '[if lte IE 7]>pretty?<![endif]')]
|
||||
self._run_check(html, expected)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_convert_charrefs(self):
|
||||
# default value for convert_charrefs is now True
|
||||
collector = lambda: EventCollectorCharrefs()
|
||||
@@ -420,8 +414,6 @@ text
|
||||
self._run_check("<a$b >", [('starttag', 'a$b', [])])
|
||||
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_slashes_in_starttag(self):
|
||||
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
|
||||
html = ('<img width=902 height=250px '
|
||||
@@ -498,8 +490,6 @@ text
|
||||
('data', '"> confuses the parser')]
|
||||
self._run_check(html, expected)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_correct_detection_of_start_tags(self):
|
||||
# see #13273
|
||||
html = ('<div style="" ><b>The <a href="some_url">rain</a> '
|
||||
@@ -618,8 +608,6 @@ text
|
||||
|
||||
class AttributesTestCase(TestCaseBase):
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_attr_syntax(self):
|
||||
output = [
|
||||
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
|
||||
@@ -629,8 +617,6 @@ class AttributesTestCase(TestCaseBase):
|
||||
self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
|
||||
self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_attr_values(self):
|
||||
self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
|
||||
[("starttag", "a", [("b", "xxx\n\txxx"),
|
||||
@@ -646,8 +632,6 @@ class AttributesTestCase(TestCaseBase):
|
||||
"<a href=mailto:xyz@example.com>",
|
||||
[("starttag", "a", [("href", "mailto:xyz@example.com")])])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_attr_nonascii(self):
|
||||
# see issue 7311
|
||||
self._run_check(
|
||||
@@ -668,8 +652,6 @@ class AttributesTestCase(TestCaseBase):
|
||||
"<a b='&><"''>",
|
||||
[("starttag", "a", [("b", "&><\"'")])])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_attr_funky_names(self):
|
||||
self._run_check(
|
||||
"<a a.b='v' c:d=v e-f=v>",
|
||||
@@ -718,8 +700,6 @@ class AttributesTestCase(TestCaseBase):
|
||||
]
|
||||
self._run_check(html, expected)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_malformed_adjacent_attributes(self):
|
||||
# see #12629
|
||||
self._run_check('<x><y z=""o"" /></x>',
|
||||
@@ -732,8 +712,6 @@ class AttributesTestCase(TestCaseBase):
|
||||
('endtag', 'x')])
|
||||
|
||||
# see #755670 for the following 3 tests
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_adjacent_attributes(self):
|
||||
self._run_check('<a width="100%"cellspacing=0>',
|
||||
[("starttag", "a",
|
||||
@@ -759,8 +737,6 @@ class AttributesTestCase(TestCaseBase):
|
||||
[("href", "http://www.example.org/\">;")]),
|
||||
("data", "spam"), ("endtag", "a")])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_with_unquoted_attributes(self):
|
||||
# see #12008
|
||||
html = ("<html><body bgcolor=d0ca90 text='181008'>"
|
||||
|
||||
205
Lib/test/test_re.py
vendored
205
Lib/test/test_re.py
vendored
@@ -1,5 +1,6 @@
|
||||
from test.support import (gc_collect, bigmemtest, _2G,
|
||||
cpython_only, captured_stdout)
|
||||
cpython_only, captured_stdout,
|
||||
check_disallow_instantiation)
|
||||
import locale
|
||||
import re
|
||||
import sre_compile
|
||||
@@ -219,6 +220,16 @@ class ReTests(unittest.TestCase):
|
||||
re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
|
||||
re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
|
||||
re.compile(r'(?P<a1>x)\1(?(1)y)')
|
||||
re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)')
|
||||
# New valid identifiers in Python 3
|
||||
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
||||
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
pat = '(?:%s)(?(200)z|t)' % pat
|
||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||
|
||||
def test_symbolic_groups_errors(self):
|
||||
self.checkPatternError(r'(?P<a>)(?P<a>)',
|
||||
"redefinition of group name 'a' as group 2; "
|
||||
"was group 1")
|
||||
@@ -244,16 +255,22 @@ class ReTests(unittest.TestCase):
|
||||
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
|
||||
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
|
||||
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
|
||||
# New valid/invalid identifiers in Python 3
|
||||
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
||||
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
||||
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
pat = '(?:%s)(?(200)z|t)' % pat
|
||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||
self.checkPatternError('(?P=©)', "bad character in group name '©'", 4)
|
||||
self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3)
|
||||
|
||||
def test_symbolic_refs(self):
|
||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
|
||||
self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx')
|
||||
# New valid identifiers in Python 3
|
||||
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
||||
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
|
||||
|
||||
def test_symbolic_refs_errors(self):
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
|
||||
'missing >, unterminated name', 3)
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
|
||||
@@ -271,18 +288,14 @@ class ReTests(unittest.TestCase):
|
||||
'invalid group reference 2', 1)
|
||||
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
|
||||
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
|
||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
|
||||
"bad character in group name '-1'", 3)
|
||||
# New valid/invalid identifiers in Python 3
|
||||
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
||||
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx',
|
||||
"bad character in group name '©'", 3)
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx',
|
||||
"bad character in group name '㊀'", 3)
|
||||
self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx',
|
||||
"bad character in group name '¹'", 3)
|
||||
|
||||
def test_re_subn(self):
|
||||
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
|
||||
@@ -544,12 +557,30 @@ class ReTests(unittest.TestCase):
|
||||
pat = '(?:%s)(?(200)z)' % pat
|
||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||
|
||||
self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_re_groupref_exists_errors(self):
|
||||
self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10)
|
||||
self.checkPatternError(r'()(?(-1)a|b)',
|
||||
"bad character in group name '-1'", 5)
|
||||
self.checkPatternError(r'()(?(㊀)a|b)',
|
||||
"bad character in group name '㊀'", 5)
|
||||
self.checkPatternError(r'()(?(¹)a|b)',
|
||||
"bad character in group name '¹'", 5)
|
||||
self.checkPatternError(r'()(?(1',
|
||||
"missing ), unterminated name", 5)
|
||||
self.checkPatternError(r'()(?(1)a',
|
||||
"missing ), unterminated subpattern", 2)
|
||||
self.checkPatternError(r'()(?(1)a|b',
|
||||
'missing ), unterminated subpattern', 2)
|
||||
self.checkPatternError(r'()(?(1)a|b|c',
|
||||
'conditional backref with more than '
|
||||
'two branches', 10)
|
||||
self.checkPatternError(r'()(?(1)a|b|c)',
|
||||
'conditional backref with more than '
|
||||
'two branches', 10)
|
||||
self.checkPatternError(r'()(?(2)a)',
|
||||
"invalid group reference 2", 5)
|
||||
|
||||
def test_re_groupref_overflow(self):
|
||||
from sre_constants import MAXGROUPS
|
||||
@@ -733,6 +764,10 @@ class ReTests(unittest.TestCase):
|
||||
"undefined character name 'SPAM'", 0)
|
||||
self.checkPatternError(r'[\N{SPAM}]',
|
||||
"undefined character name 'SPAM'", 1)
|
||||
self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
|
||||
"undefined character name 'KEYCAP NUMBER SIGN'", 0)
|
||||
self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
|
||||
"undefined character name 'KEYCAP NUMBER SIGN'", 1)
|
||||
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
|
||||
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
|
||||
|
||||
@@ -836,6 +871,8 @@ class ReTests(unittest.TestCase):
|
||||
self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
|
||||
self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_ignore_case(self):
|
||||
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
|
||||
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
|
||||
@@ -848,20 +885,36 @@ class ReTests(unittest.TestCase):
|
||||
self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
|
||||
self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
|
||||
|
||||
assert '\u212a'.lower() == 'k' # 'K'
|
||||
# Two different characters have the same lowercase.
|
||||
assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K'
|
||||
self.assertTrue(re.match(r'K', '\u212a', re.I))
|
||||
self.assertTrue(re.match(r'k', '\u212a', re.I))
|
||||
self.assertTrue(re.match(r'\u212a', 'K', re.I))
|
||||
self.assertTrue(re.match(r'\u212a', 'k', re.I))
|
||||
assert '\u017f'.upper() == 'S' # 'ſ'
|
||||
|
||||
# Two different characters have the same uppercase.
|
||||
assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
|
||||
self.assertTrue(re.match(r'S', '\u017f', re.I))
|
||||
self.assertTrue(re.match(r's', '\u017f', re.I))
|
||||
self.assertTrue(re.match(r'\u017f', 'S', re.I))
|
||||
self.assertTrue(re.match(r'\u017f', 's', re.I))
|
||||
|
||||
# Two different characters have the same uppercase. Unicode 9.0+.
|
||||
assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
|
||||
self.assertTrue(re.match(r'\u0412', '\u0432', re.I))
|
||||
self.assertTrue(re.match(r'\u0412', '\u1c80', re.I))
|
||||
self.assertTrue(re.match(r'\u0432', '\u0412', re.I))
|
||||
self.assertTrue(re.match(r'\u0432', '\u1c80', re.I))
|
||||
self.assertTrue(re.match(r'\u1c80', '\u0412', re.I))
|
||||
self.assertTrue(re.match(r'\u1c80', '\u0432', re.I))
|
||||
|
||||
# Two different characters have the same multicharacter uppercase.
|
||||
assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
|
||||
self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I))
|
||||
self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I))
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_ignore_case_set(self):
|
||||
self.assertTrue(re.match(r'[19A]', 'A', re.I))
|
||||
self.assertTrue(re.match(r'[19a]', 'a', re.I))
|
||||
@@ -871,20 +924,37 @@ class ReTests(unittest.TestCase):
|
||||
self.assertTrue(re.match(br'[19a]', b'a', re.I))
|
||||
self.assertTrue(re.match(br'[19a]', b'A', re.I))
|
||||
self.assertTrue(re.match(br'[19A]', b'a', re.I))
|
||||
assert '\u212a'.lower() == 'k' # 'K'
|
||||
|
||||
# Two different characters have the same lowercase.
|
||||
assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K'
|
||||
self.assertTrue(re.match(r'[19K]', '\u212a', re.I))
|
||||
self.assertTrue(re.match(r'[19k]', '\u212a', re.I))
|
||||
self.assertTrue(re.match(r'[19\u212a]', 'K', re.I))
|
||||
self.assertTrue(re.match(r'[19\u212a]', 'k', re.I))
|
||||
assert '\u017f'.upper() == 'S' # 'ſ'
|
||||
|
||||
# Two different characters have the same uppercase.
|
||||
assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
|
||||
self.assertTrue(re.match(r'[19S]', '\u017f', re.I))
|
||||
self.assertTrue(re.match(r'[19s]', '\u017f', re.I))
|
||||
self.assertTrue(re.match(r'[19\u017f]', 'S', re.I))
|
||||
self.assertTrue(re.match(r'[19\u017f]', 's', re.I))
|
||||
|
||||
# Two different characters have the same uppercase. Unicode 9.0+.
|
||||
assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
|
||||
self.assertTrue(re.match(r'[19\u0412]', '\u0432', re.I))
|
||||
self.assertTrue(re.match(r'[19\u0412]', '\u1c80', re.I))
|
||||
self.assertTrue(re.match(r'[19\u0432]', '\u0412', re.I))
|
||||
self.assertTrue(re.match(r'[19\u0432]', '\u1c80', re.I))
|
||||
self.assertTrue(re.match(r'[19\u1c80]', '\u0412', re.I))
|
||||
self.assertTrue(re.match(r'[19\u1c80]', '\u0432', re.I))
|
||||
|
||||
# Two different characters have the same multicharacter uppercase.
|
||||
assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
|
||||
self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I))
|
||||
self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I))
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_ignore_case_range(self):
|
||||
# Issues #3511, #17381.
|
||||
self.assertTrue(re.match(r'[9-a]', '_', re.I))
|
||||
@@ -904,16 +974,30 @@ class ReTests(unittest.TestCase):
|
||||
self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I))
|
||||
self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I))
|
||||
|
||||
assert '\u212a'.lower() == 'k' # 'K'
|
||||
# Two different characters have the same lowercase.
|
||||
assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K'
|
||||
self.assertTrue(re.match(r'[J-M]', '\u212a', re.I))
|
||||
self.assertTrue(re.match(r'[j-m]', '\u212a', re.I))
|
||||
self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I))
|
||||
self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I))
|
||||
assert '\u017f'.upper() == 'S' # 'ſ'
|
||||
|
||||
# Two different characters have the same uppercase.
|
||||
assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ'
|
||||
self.assertTrue(re.match(r'[R-T]', '\u017f', re.I))
|
||||
self.assertTrue(re.match(r'[r-t]', '\u017f', re.I))
|
||||
self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I))
|
||||
self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I))
|
||||
|
||||
# Two different characters have the same uppercase. Unicode 9.0+.
|
||||
assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В'
|
||||
self.assertTrue(re.match(r'[\u0411-\u0413]', '\u0432', re.I))
|
||||
self.assertTrue(re.match(r'[\u0411-\u0413]', '\u1c80', re.I))
|
||||
self.assertTrue(re.match(r'[\u0431-\u0433]', '\u0412', re.I))
|
||||
self.assertTrue(re.match(r'[\u0431-\u0433]', '\u1c80', re.I))
|
||||
self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0412', re.I))
|
||||
self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0432', re.I))
|
||||
|
||||
# Two different characters have the same multicharacter uppercase.
|
||||
assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st'
|
||||
self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I))
|
||||
self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I))
|
||||
@@ -921,6 +1005,7 @@ class ReTests(unittest.TestCase):
|
||||
def test_category(self):
|
||||
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
|
||||
|
||||
@cpython_only
|
||||
def test_case_helpers(self):
|
||||
import _sre
|
||||
for i in range(128):
|
||||
@@ -1406,6 +1491,8 @@ class ReTests(unittest.TestCase):
|
||||
self.assertIsNone(re.compile(b"bla").match(a))
|
||||
self.assertEqual(re.compile(b"").match(a).groups(), ())
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_inline_flags(self):
|
||||
# Bug #1700
|
||||
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
|
||||
@@ -1454,7 +1541,8 @@ class ReTests(unittest.TestCase):
|
||||
self.assertTrue(re.match(p, lower_char))
|
||||
self.assertEqual(
|
||||
str(warns.warnings[0].message),
|
||||
'Flags not at the start of the expression %r' % p
|
||||
'Flags not at the start of the expression %r'
|
||||
' but at position 1' % p
|
||||
)
|
||||
self.assertEqual(warns.warnings[0].filename, __file__)
|
||||
|
||||
@@ -1463,7 +1551,8 @@ class ReTests(unittest.TestCase):
|
||||
self.assertTrue(re.match(p, lower_char))
|
||||
self.assertEqual(
|
||||
str(warns.warnings[0].message),
|
||||
'Flags not at the start of the expression %r (truncated)' % p[:20]
|
||||
'Flags not at the start of the expression %r (truncated)'
|
||||
' but at position 1' % p[:20]
|
||||
)
|
||||
self.assertEqual(warns.warnings[0].filename, __file__)
|
||||
|
||||
@@ -1475,7 +1564,8 @@ class ReTests(unittest.TestCase):
|
||||
self.assertTrue(re.match(p, b'a'))
|
||||
self.assertEqual(
|
||||
str(warns.warnings[0].message),
|
||||
'Flags not at the start of the expression %r' % p
|
||||
'Flags not at the start of the expression %r'
|
||||
' but at position 1' % p
|
||||
)
|
||||
self.assertEqual(warns.warnings[0].filename, __file__)
|
||||
|
||||
@@ -1615,11 +1705,6 @@ class ReTests(unittest.TestCase):
|
||||
self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
|
||||
self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
|
||||
|
||||
self.assertTrue(re.match(r'(?x: a) b', 'a b'))
|
||||
self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
|
||||
self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
|
||||
self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
|
||||
|
||||
self.assertTrue(re.match(r'\w(?a:\W)\w', '\xe0\xe0\xe0'))
|
||||
self.assertTrue(re.match(r'(?a:\W(?u:\w)\W)', '\xe0\xe0\xe0'))
|
||||
self.assertTrue(re.match(r'\W(?u:\w)\W', '\xe0\xe0\xe0', re.ASCII))
|
||||
@@ -1645,6 +1730,33 @@ class ReTests(unittest.TestCase):
|
||||
self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
|
||||
self.checkPatternError(r'(?iz', 'unknown flag', 3)
|
||||
|
||||
def test_ignore_spaces(self):
|
||||
for space in " \t\n\r\v\f":
|
||||
self.assertTrue(re.fullmatch(space + 'a', 'a', re.VERBOSE))
|
||||
for space in b" ", b"\t", b"\n", b"\r", b"\v", b"\f":
|
||||
self.assertTrue(re.fullmatch(space + b'a', b'a', re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch('(?x) a', 'a'))
|
||||
self.assertTrue(re.fullmatch(' (?x) a', 'a', re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch('(?x) (?x) a', 'a'))
|
||||
self.assertTrue(re.fullmatch(' a(?x: b) c', ' ab c'))
|
||||
self.assertTrue(re.fullmatch(' a(?-x: b) c', 'a bc', re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch('(?x) a(?-x: b) c', 'a bc'))
|
||||
self.assertTrue(re.fullmatch('(?x) a| b', 'a'))
|
||||
self.assertTrue(re.fullmatch('(?x) a| b', 'b'))
|
||||
|
||||
def test_comments(self):
|
||||
self.assertTrue(re.fullmatch('#x\na', 'a', re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch(b'#x\na', b'a', re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch('(?x)#x\na', 'a'))
|
||||
self.assertTrue(re.fullmatch('#x\n(?x)#y\na', 'a', re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch('(?x)#x\n(?x)#y\na', 'a'))
|
||||
self.assertTrue(re.fullmatch('#x\na(?x:#y\nb)#z\nc', '#x\nab#z\nc'))
|
||||
self.assertTrue(re.fullmatch('#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc',
|
||||
re.VERBOSE))
|
||||
self.assertTrue(re.fullmatch('(?x)#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc'))
|
||||
self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'a'))
|
||||
self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'b'))
|
||||
|
||||
def test_bug_6509(self):
|
||||
# Replacement strings of both types must parse properly.
|
||||
# all strings
|
||||
@@ -1738,6 +1850,7 @@ class ReTests(unittest.TestCase):
|
||||
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
|
||||
self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
|
||||
|
||||
@cpython_only
|
||||
def test_repeat_minmax_overflow_maxrepeat(self):
|
||||
try:
|
||||
from _sre import MAXREPEAT
|
||||
@@ -1821,7 +1934,8 @@ class ReTests(unittest.TestCase):
|
||||
[(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)])
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
# @unittest.expectedFailure
|
||||
@unittest.skip("")
|
||||
def test_bug_2537(self):
|
||||
# issue 2537: empty submatches
|
||||
for outer_op in ('{0,}', '*', '+', '{1,187}'):
|
||||
@@ -1832,6 +1946,7 @@ class ReTests(unittest.TestCase):
|
||||
self.assertEqual(m.group(1), "")
|
||||
self.assertEqual(m.group(2), "y")
|
||||
|
||||
@cpython_only
|
||||
def test_debug_flag(self):
|
||||
pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
|
||||
with captured_stdout() as out:
|
||||
@@ -2207,6 +2322,18 @@ class ImplementationTest(unittest.TestCase):
|
||||
Test implementation details of the re module.
|
||||
"""
|
||||
|
||||
@cpython_only
|
||||
def test_immutable(self):
|
||||
# bpo-43908: check that re types are immutable
|
||||
with self.assertRaises(TypeError):
|
||||
re.Match.foo = 1
|
||||
with self.assertRaises(TypeError):
|
||||
re.Pattern.foo = 1
|
||||
with self.assertRaises(TypeError):
|
||||
pat = re.compile("")
|
||||
tp = type(pat.scanner(""))
|
||||
tp.foo = 1
|
||||
|
||||
def test_overlap_table(self):
|
||||
f = sre_compile._generate_overlap_table
|
||||
self.assertEqual(f(""), [])
|
||||
@@ -2216,6 +2343,18 @@ class ImplementationTest(unittest.TestCase):
|
||||
self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
|
||||
self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
|
||||
|
||||
def test_signedness(self):
|
||||
self.assertGreaterEqual(sre_compile.MAXREPEAT, 0)
|
||||
self.assertGreaterEqual(sre_compile.MAXGROUPS, 0)
|
||||
|
||||
@cpython_only
|
||||
def test_disallow_instantiation(self):
|
||||
# Ensure that the type disallows instantiation (bpo-43916)
|
||||
check_disallow_instantiation(self, re.Match)
|
||||
check_disallow_instantiation(self, re.Pattern)
|
||||
pat = re.compile("")
|
||||
check_disallow_instantiation(self, type(pat.scanner("")))
|
||||
|
||||
|
||||
class ExternalTests(unittest.TestCase):
|
||||
|
||||
@@ -2236,7 +2375,7 @@ class ExternalTests(unittest.TestCase):
|
||||
|
||||
def test_re_tests(self):
|
||||
're_tests test suite'
|
||||
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
||||
from test.re_tests import tests, FAIL, SYNTAX_ERROR
|
||||
for t in tests:
|
||||
pattern = s = outcome = repl = expected = None
|
||||
if len(t) == 5:
|
||||
|
||||
@@ -67,4 +67,7 @@ assert re.match(r'\babc\b', 'abc').group() == 'abc'
|
||||
|
||||
urlpattern = re.compile('//([^/#?]*)(.*)', re.DOTALL)
|
||||
url = '//www.example.org:80/foo/bar/baz.html'
|
||||
assert urlpattern.match(url).group(1) == 'www.example.org:80'
|
||||
assert urlpattern.match(url).group(1) == 'www.example.org:80'
|
||||
|
||||
assert re.compile('(?:\w+(?:\s|/(?!>))*)*').match('a /bb />ccc').group() == 'a /bb '
|
||||
assert re.compile('(?:(1)?)*').match('111').group() == '111'
|
||||
@@ -72,7 +72,7 @@ memoffset = "0.6.5"
|
||||
optional = "0.5.0"
|
||||
|
||||
# RustPython crates implementing functionality based on CPython
|
||||
sre-engine = "0.1.2"
|
||||
sre-engine = "0.2.0"
|
||||
# to work on sre-engine locally
|
||||
# sre-engine = { path = "../../sre-engine" }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user