Update fnmatch from CPython 3.10

This commit is contained in:
CPython Developers
2022-08-16 09:05:29 +09:00
committed by Jeong Yunwon
parent 2154d12d96
commit 3d4fe934a1
2 changed files with 236 additions and 19 deletions

104
Lib/fnmatch.py vendored
View File

@@ -9,16 +9,19 @@ expression. They cache the compiled regular expressions for speed.
The function translate(PATTERN) returns a regular expression
corresponding to PATTERN. (It does not compile it.)
"""
try:
import os
except ImportError:
import _dummy_os as os
import os
import posixpath
import re
import functools
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
# Build a thread-safe incrementing counter to help create unique regexp group
# names across calls.
from itertools import count
_nextgroupnum = count().__next__
del count
def fnmatch(name, pat):
"""Test whether FILENAME matches PATTERN.
@@ -49,7 +52,7 @@ def _compile_pattern(pat):
return re.compile(res).match
def filter(names, pat):
"""Return the subset of the list NAMES that match PAT."""
"""Construct a list from those elements of the iterable NAMES that match PAT."""
result = []
pat = os.path.normcase(pat)
match = _compile_pattern(pat)
@@ -80,15 +83,19 @@ def translate(pat):
There is no way to quote meta-characters.
"""
STAR = object()
res = []
add = res.append
i, n = 0, len(pat)
res = ''
while i < n:
c = pat[i]
i = i+1
if c == '*':
res = res + '.*'
# compress consecutive `*` into one
if (not res) or res[-1] is not STAR:
add(STAR)
elif c == '?':
res = res + '.'
add('.')
elif c == '[':
j = i
if j < n and pat[j] == '!':
@@ -98,10 +105,10 @@ def translate(pat):
while j < n and pat[j] != ']':
j = j+1
if j >= n:
res = res + '\\['
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
@@ -113,7 +120,16 @@ def translate(pat):
chunks.append(pat[i:k])
i = k+1
k = k+3
chunks.append(pat[i:j])
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += '-'
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
@@ -121,11 +137,63 @@ def translate(pat):
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
if not stuff:
# Empty range: never match.
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
res = res + re.escape(c)
return r'(?s:%s)\Z' % res
add(re.escape(c))
assert i == n
# Deal with STARs.
inp = res
res = []
add = res.append
i, n = 0, len(inp)
# Fixed pieces at the start?
while i < n and inp[i] is not STAR:
add(inp[i])
i += 1
# Now deal with STAR fixed STAR fixed ...
# For an interior `STAR fixed` pairing, we want to do a minimal
# .*? match followed by `fixed`, with no possibility of backtracking.
# We can't spell that directly, but can trick it into working by matching
# .*?fixed
# in a lookahead assertion, save the matched part in a group, then
# consume that group via a backreference. If the overall match fails,
# the lookahead assertion won't try alternatives. So the translation is:
# (?=(?P<name>.*?fixed))(?P=name)
# Group names are created as needed: g0, g1, g2, ...
# The numbers are obtained from _nextgroupnum() to ensure they're unique
# across calls and across threads. This is because people rely on the
# undocumented ability to join multiple translate() results together via
# "|" to build large regexps matching "one of many" shell patterns.
while i < n:
assert inp[i] is STAR
i += 1
if i == n:
add(".*")
break
assert inp[i] is not STAR
fixed = []
while i < n and inp[i] is not STAR:
fixed.append(inp[i])
i += 1
fixed = "".join(fixed)
if i == n:
add(".*")
add(fixed)
else:
groupnum = _nextgroupnum()
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
assert i == n
res = "".join(res)
return fr'(?s:{res})\Z'

View File

@@ -2,6 +2,7 @@
import unittest
import os
import string
import warnings
from fnmatch import fnmatch, fnmatchcase, translate, filter
@@ -45,6 +46,13 @@ class FnmatchTestCase(unittest.TestCase):
check('\nfoo', 'foo*', False)
check('\n', '*')
def test_slow_fnmatch(self):
check = self.check_match
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
# The next "takes forever" if the regexp translation is
# straightforward. See bpo-40480.
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
def test_mix_bytes_str(self):
self.assertRaises(TypeError, fnmatch, 'test', b'*')
self.assertRaises(TypeError, fnmatch, b'test', '*')
@@ -89,6 +97,119 @@ class FnmatchTestCase(unittest.TestCase):
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr\\bin')
def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[az]', c in 'az')
check(c, '[!az]', c not in 'az')
# Case insensitive.
for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase)
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase)
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
# Repeated same character.
for c in tescases:
check(c, '[aa]', c == 'a')
# Special cases.
for c in tescases:
check(c, '[^az]', c in '^az')
check(c, '[[az]', c in '[az')
check(c, r'[!]]', c != ']')
check('[', '[')
check('[]', '[]')
check('[!', '[!')
check('[!]', '[!]')
def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[b-d]', c in 'bcd')
check(c, '[!b-d]', c not in 'bcd')
check(c, '[b-dx-z]', c in 'bcdxyz')
check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive.
for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
# Upper bound == lower bound.
for c in tescases:
check(c, '[b-b]', c == 'b')
# Special cases.
for c in tescases:
check(c, '[!-#]', c not in '-#')
check(c, '[!--.]', c not in '-.')
check(c, '[^-`]', c in '^_`')
if not (normsep and c == '/'):
check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b')
check(c, '[!b-]', c not in '-b')
check(c, '[-b]', c in '-b')
check(c, '[!-b]', c not in '-b')
check(c, '[-]', c in '-')
check(c, '[!-]', c not in '-')
# Upper bound is less that lower bound: error in RE.
for c in tescases:
check(c, '[d-b]', False)
check(c, '[!d-b]', True)
check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`')
if not (normsep and c == '/'):
check(c, '[d-b[-^]', c in r'[\]^')
def test_sep_in_char_set(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('/', r'[/]')
check('\\', r'[\]')
check('/', r'[\]', normsep)
check('\\', r'[/]', normsep)
check('[/]', r'[/]', False)
check(r'[\\]', r'[/]', False)
check('\\', r'[\t]')
check('/', r'[\t]', normsep)
check('t', r'[\t]')
check('\t', r'[\t]', False)
def test_sep_in_range(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('a/b', 'a[.-0]b', not normsep)
check('a\\b', 'a[.-0]b', False)
check('a\\b', 'a[Z-^]b', not normsep)
check('a/b', 'a[Z-^]b', False)
check('a/b', 'a[/-0]b', not normsep)
check(r'a\b', 'a[/-0]b', False)
check('a[/-0]b', 'a[/-0]b', False)
check(r'a[\-0]b', 'a[/-0]b', False)
check('a/b', 'a[.-/]b')
check(r'a\b', 'a[.-/]b', normsep)
check('a[.-/]b', 'a[.-/]b', False)
check(r'a[.-\]b', 'a[.-/]b', False)
check(r'a\b', r'a[\-^]b')
check('a/b', r'a[\-^]b', normsep)
check(r'a[\-^]b', r'a[\-^]b', False)
check('a[/-^]b', r'a[\-^]b', False)
check(r'a\b', r'a[Z-\]b', not normsep)
check('a/b', r'a[Z-\]b', False)
check(r'a[Z-\]b', r'a[Z-\]b', False)
check('a[Z-/]b', r'a[Z-\]b', False)
def test_warnings(self):
with warnings.catch_warnings():
warnings.simplefilter('error', Warning)
@@ -104,6 +225,7 @@ class FnmatchTestCase(unittest.TestCase):
class TranslateTestCase(unittest.TestCase):
def test_translate(self):
import re
self.assertEqual(translate('*'), r'(?s:.*)\Z')
self.assertEqual(translate('?'), r'(?s:.)\Z')
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -112,7 +234,34 @@ class TranslateTestCase(unittest.TestCase):
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
# from the docs
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
# squash consecutive stars
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
# fancy translation to prevent exponential-time match failure
t = translate('**a*a****a')
digits = re.findall(r'\d+', t)
self.assertEqual(len(digits), 4)
self.assertEqual(digits[0], digits[1])
self.assertEqual(digits[2], digits[3])
g1 = f"g{digits[0]}" # e.g., group name "g4"
g2 = f"g{digits[2]}" # e.g., group name "g5"
self.assertEqual(t,
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
# and try pasting multiple translate results - it's an undocumented
# feature that this works; all the pain of generating unique group
# names across calls exists to support this
r1 = translate('**a**a**a*')
r2 = translate('**b**b**b*')
r3 = translate('*c*c*c*')
fatre = "|".join([r1, r2, r3])
self.assertTrue(re.match(fatre, 'abaccad'))
self.assertTrue(re.match(fatre, 'abxbcab'))
self.assertTrue(re.match(fatre, 'cbabcaxc'))
self.assertFalse(re.match(fatre, 'dabccbad'))
class FilterTestCase(unittest.TestCase):