mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Update fnmatch from CPython 3.10
This commit is contained in:
committed by
Jeong Yunwon
parent
2154d12d96
commit
3d4fe934a1
104
Lib/fnmatch.py
vendored
104
Lib/fnmatch.py
vendored
@@ -9,16 +9,19 @@ expression. They cache the compiled regular expressions for speed.
|
||||
The function translate(PATTERN) returns a regular expression
|
||||
corresponding to PATTERN. (It does not compile it.)
|
||||
"""
|
||||
try:
|
||||
import os
|
||||
except ImportError:
|
||||
import _dummy_os as os
|
||||
import os
|
||||
import posixpath
|
||||
import re
|
||||
import functools
|
||||
|
||||
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
|
||||
|
||||
# Build a thread-safe incrementing counter to help create unique regexp group
|
||||
# names across calls.
|
||||
from itertools import count
|
||||
_nextgroupnum = count().__next__
|
||||
del count
|
||||
|
||||
def fnmatch(name, pat):
|
||||
"""Test whether FILENAME matches PATTERN.
|
||||
|
||||
@@ -49,7 +52,7 @@ def _compile_pattern(pat):
|
||||
return re.compile(res).match
|
||||
|
||||
def filter(names, pat):
|
||||
"""Return the subset of the list NAMES that match PAT."""
|
||||
"""Construct a list from those elements of the iterable NAMES that match PAT."""
|
||||
result = []
|
||||
pat = os.path.normcase(pat)
|
||||
match = _compile_pattern(pat)
|
||||
@@ -80,15 +83,19 @@ def translate(pat):
|
||||
There is no way to quote meta-characters.
|
||||
"""
|
||||
|
||||
STAR = object()
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(pat)
|
||||
res = ''
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i = i+1
|
||||
if c == '*':
|
||||
res = res + '.*'
|
||||
# compress consecutive `*` into one
|
||||
if (not res) or res[-1] is not STAR:
|
||||
add(STAR)
|
||||
elif c == '?':
|
||||
res = res + '.'
|
||||
add('.')
|
||||
elif c == '[':
|
||||
j = i
|
||||
if j < n and pat[j] == '!':
|
||||
@@ -98,10 +105,10 @@ def translate(pat):
|
||||
while j < n and pat[j] != ']':
|
||||
j = j+1
|
||||
if j >= n:
|
||||
res = res + '\\['
|
||||
add('\\[')
|
||||
else:
|
||||
stuff = pat[i:j]
|
||||
if '--' not in stuff:
|
||||
if '-' not in stuff:
|
||||
stuff = stuff.replace('\\', r'\\')
|
||||
else:
|
||||
chunks = []
|
||||
@@ -113,7 +120,16 @@ def translate(pat):
|
||||
chunks.append(pat[i:k])
|
||||
i = k+1
|
||||
k = k+3
|
||||
chunks.append(pat[i:j])
|
||||
chunk = pat[i:j]
|
||||
if chunk:
|
||||
chunks.append(chunk)
|
||||
else:
|
||||
chunks[-1] += '-'
|
||||
# Remove empty ranges -- invalid in RE.
|
||||
for k in range(len(chunks)-1, 0, -1):
|
||||
if chunks[k-1][-1] > chunks[k][0]:
|
||||
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
|
||||
del chunks[k]
|
||||
# Escape backslashes and hyphens for set difference (--).
|
||||
# Hyphens that create ranges shouldn't be escaped.
|
||||
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
|
||||
@@ -121,11 +137,63 @@ def translate(pat):
|
||||
# Escape set operations (&&, ~~ and ||).
|
||||
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
|
||||
i = j+1
|
||||
if stuff[0] == '!':
|
||||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] in ('^', '['):
|
||||
stuff = '\\' + stuff
|
||||
res = '%s[%s]' % (res, stuff)
|
||||
if not stuff:
|
||||
# Empty range: never match.
|
||||
add('(?!)')
|
||||
elif stuff == '!':
|
||||
# Negated empty range: match any character.
|
||||
add('.')
|
||||
else:
|
||||
if stuff[0] == '!':
|
||||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] in ('^', '['):
|
||||
stuff = '\\' + stuff
|
||||
add(f'[{stuff}]')
|
||||
else:
|
||||
res = res + re.escape(c)
|
||||
return r'(?s:%s)\Z' % res
|
||||
add(re.escape(c))
|
||||
assert i == n
|
||||
|
||||
# Deal with STARs.
|
||||
inp = res
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(inp)
|
||||
# Fixed pieces at the start?
|
||||
while i < n and inp[i] is not STAR:
|
||||
add(inp[i])
|
||||
i += 1
|
||||
# Now deal with STAR fixed STAR fixed ...
|
||||
# For an interior `STAR fixed` pairing, we want to do a minimal
|
||||
# .*? match followed by `fixed`, with no possibility of backtracking.
|
||||
# We can't spell that directly, but can trick it into working by matching
|
||||
# .*?fixed
|
||||
# in a lookahead assertion, save the matched part in a group, then
|
||||
# consume that group via a backreference. If the overall match fails,
|
||||
# the lookahead assertion won't try alternatives. So the translation is:
|
||||
# (?=(?P<name>.*?fixed))(?P=name)
|
||||
# Group names are created as needed: g0, g1, g2, ...
|
||||
# The numbers are obtained from _nextgroupnum() to ensure they're unique
|
||||
# across calls and across threads. This is because people rely on the
|
||||
# undocumented ability to join multiple translate() results together via
|
||||
# "|" to build large regexps matching "one of many" shell patterns.
|
||||
while i < n:
|
||||
assert inp[i] is STAR
|
||||
i += 1
|
||||
if i == n:
|
||||
add(".*")
|
||||
break
|
||||
assert inp[i] is not STAR
|
||||
fixed = []
|
||||
while i < n and inp[i] is not STAR:
|
||||
fixed.append(inp[i])
|
||||
i += 1
|
||||
fixed = "".join(fixed)
|
||||
if i == n:
|
||||
add(".*")
|
||||
add(fixed)
|
||||
else:
|
||||
groupnum = _nextgroupnum()
|
||||
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
|
||||
assert i == n
|
||||
res = "".join(res)
|
||||
return fr'(?s:{res})\Z'
|
||||
|
||||
151
Lib/test/test_fnmatch.py
vendored
151
Lib/test/test_fnmatch.py
vendored
@@ -2,6 +2,7 @@
|
||||
|
||||
import unittest
|
||||
import os
|
||||
import string
|
||||
import warnings
|
||||
|
||||
from fnmatch import fnmatch, fnmatchcase, translate, filter
|
||||
@@ -45,6 +46,13 @@ class FnmatchTestCase(unittest.TestCase):
|
||||
check('\nfoo', 'foo*', False)
|
||||
check('\n', '*')
|
||||
|
||||
def test_slow_fnmatch(self):
|
||||
check = self.check_match
|
||||
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
|
||||
# The next "takes forever" if the regexp translation is
|
||||
# straightforward. See bpo-40480.
|
||||
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
|
||||
|
||||
def test_mix_bytes_str(self):
|
||||
self.assertRaises(TypeError, fnmatch, 'test', b'*')
|
||||
self.assertRaises(TypeError, fnmatch, b'test', '*')
|
||||
@@ -89,6 +97,119 @@ class FnmatchTestCase(unittest.TestCase):
|
||||
check('usr/bin', 'usr\\bin', normsep)
|
||||
check('usr\\bin', 'usr\\bin')
|
||||
|
||||
def test_char_set(self):
|
||||
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
|
||||
check = self.check_match
|
||||
tescases = string.ascii_lowercase + string.digits + string.punctuation
|
||||
for c in tescases:
|
||||
check(c, '[az]', c in 'az')
|
||||
check(c, '[!az]', c not in 'az')
|
||||
# Case insensitive.
|
||||
for c in tescases:
|
||||
check(c, '[AZ]', (c in 'az') and ignorecase)
|
||||
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
|
||||
for c in string.ascii_uppercase:
|
||||
check(c, '[az]', (c in 'AZ') and ignorecase)
|
||||
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
|
||||
# Repeated same character.
|
||||
for c in tescases:
|
||||
check(c, '[aa]', c == 'a')
|
||||
# Special cases.
|
||||
for c in tescases:
|
||||
check(c, '[^az]', c in '^az')
|
||||
check(c, '[[az]', c in '[az')
|
||||
check(c, r'[!]]', c != ']')
|
||||
check('[', '[')
|
||||
check('[]', '[]')
|
||||
check('[!', '[!')
|
||||
check('[!]', '[!]')
|
||||
|
||||
def test_range(self):
|
||||
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
|
||||
normsep = os.path.normcase('\\') == os.path.normcase('/')
|
||||
check = self.check_match
|
||||
tescases = string.ascii_lowercase + string.digits + string.punctuation
|
||||
for c in tescases:
|
||||
check(c, '[b-d]', c in 'bcd')
|
||||
check(c, '[!b-d]', c not in 'bcd')
|
||||
check(c, '[b-dx-z]', c in 'bcdxyz')
|
||||
check(c, '[!b-dx-z]', c not in 'bcdxyz')
|
||||
# Case insensitive.
|
||||
for c in tescases:
|
||||
check(c, '[B-D]', (c in 'bcd') and ignorecase)
|
||||
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
|
||||
for c in string.ascii_uppercase:
|
||||
check(c, '[b-d]', (c in 'BCD') and ignorecase)
|
||||
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
|
||||
# Upper bound == lower bound.
|
||||
for c in tescases:
|
||||
check(c, '[b-b]', c == 'b')
|
||||
# Special cases.
|
||||
for c in tescases:
|
||||
check(c, '[!-#]', c not in '-#')
|
||||
check(c, '[!--.]', c not in '-.')
|
||||
check(c, '[^-`]', c in '^_`')
|
||||
if not (normsep and c == '/'):
|
||||
check(c, '[[-^]', c in r'[\]^')
|
||||
check(c, r'[\-^]', c in r'\]^')
|
||||
check(c, '[b-]', c in '-b')
|
||||
check(c, '[!b-]', c not in '-b')
|
||||
check(c, '[-b]', c in '-b')
|
||||
check(c, '[!-b]', c not in '-b')
|
||||
check(c, '[-]', c in '-')
|
||||
check(c, '[!-]', c not in '-')
|
||||
# Upper bound is less that lower bound: error in RE.
|
||||
for c in tescases:
|
||||
check(c, '[d-b]', False)
|
||||
check(c, '[!d-b]', True)
|
||||
check(c, '[d-bx-z]', c in 'xyz')
|
||||
check(c, '[!d-bx-z]', c not in 'xyz')
|
||||
check(c, '[d-b^-`]', c in '^_`')
|
||||
if not (normsep and c == '/'):
|
||||
check(c, '[d-b[-^]', c in r'[\]^')
|
||||
|
||||
def test_sep_in_char_set(self):
|
||||
normsep = os.path.normcase('\\') == os.path.normcase('/')
|
||||
check = self.check_match
|
||||
check('/', r'[/]')
|
||||
check('\\', r'[\]')
|
||||
check('/', r'[\]', normsep)
|
||||
check('\\', r'[/]', normsep)
|
||||
check('[/]', r'[/]', False)
|
||||
check(r'[\\]', r'[/]', False)
|
||||
check('\\', r'[\t]')
|
||||
check('/', r'[\t]', normsep)
|
||||
check('t', r'[\t]')
|
||||
check('\t', r'[\t]', False)
|
||||
|
||||
def test_sep_in_range(self):
|
||||
normsep = os.path.normcase('\\') == os.path.normcase('/')
|
||||
check = self.check_match
|
||||
check('a/b', 'a[.-0]b', not normsep)
|
||||
check('a\\b', 'a[.-0]b', False)
|
||||
check('a\\b', 'a[Z-^]b', not normsep)
|
||||
check('a/b', 'a[Z-^]b', False)
|
||||
|
||||
check('a/b', 'a[/-0]b', not normsep)
|
||||
check(r'a\b', 'a[/-0]b', False)
|
||||
check('a[/-0]b', 'a[/-0]b', False)
|
||||
check(r'a[\-0]b', 'a[/-0]b', False)
|
||||
|
||||
check('a/b', 'a[.-/]b')
|
||||
check(r'a\b', 'a[.-/]b', normsep)
|
||||
check('a[.-/]b', 'a[.-/]b', False)
|
||||
check(r'a[.-\]b', 'a[.-/]b', False)
|
||||
|
||||
check(r'a\b', r'a[\-^]b')
|
||||
check('a/b', r'a[\-^]b', normsep)
|
||||
check(r'a[\-^]b', r'a[\-^]b', False)
|
||||
check('a[/-^]b', r'a[\-^]b', False)
|
||||
|
||||
check(r'a\b', r'a[Z-\]b', not normsep)
|
||||
check('a/b', r'a[Z-\]b', False)
|
||||
check(r'a[Z-\]b', r'a[Z-\]b', False)
|
||||
check('a[Z-/]b', r'a[Z-\]b', False)
|
||||
|
||||
def test_warnings(self):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('error', Warning)
|
||||
@@ -104,6 +225,7 @@ class FnmatchTestCase(unittest.TestCase):
|
||||
class TranslateTestCase(unittest.TestCase):
|
||||
|
||||
def test_translate(self):
|
||||
import re
|
||||
self.assertEqual(translate('*'), r'(?s:.*)\Z')
|
||||
self.assertEqual(translate('?'), r'(?s:.)\Z')
|
||||
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
|
||||
@@ -112,7 +234,34 @@ class TranslateTestCase(unittest.TestCase):
|
||||
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
|
||||
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
|
||||
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
|
||||
|
||||
# from the docs
|
||||
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
|
||||
# squash consecutive stars
|
||||
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
|
||||
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
|
||||
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
|
||||
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
|
||||
# fancy translation to prevent exponential-time match failure
|
||||
t = translate('**a*a****a')
|
||||
digits = re.findall(r'\d+', t)
|
||||
self.assertEqual(len(digits), 4)
|
||||
self.assertEqual(digits[0], digits[1])
|
||||
self.assertEqual(digits[2], digits[3])
|
||||
g1 = f"g{digits[0]}" # e.g., group name "g4"
|
||||
g2 = f"g{digits[2]}" # e.g., group name "g5"
|
||||
self.assertEqual(t,
|
||||
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
|
||||
# and try pasting multiple translate results - it's an undocumented
|
||||
# feature that this works; all the pain of generating unique group
|
||||
# names across calls exists to support this
|
||||
r1 = translate('**a**a**a*')
|
||||
r2 = translate('**b**b**b*')
|
||||
r3 = translate('*c*c*c*')
|
||||
fatre = "|".join([r1, r2, r3])
|
||||
self.assertTrue(re.match(fatre, 'abaccad'))
|
||||
self.assertTrue(re.match(fatre, 'abxbcab'))
|
||||
self.assertTrue(re.match(fatre, 'cbabcaxc'))
|
||||
self.assertFalse(re.match(fatre, 'dabccbad'))
|
||||
|
||||
class FilterTestCase(unittest.TestCase):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user