diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index e37ad45196..8e010c79c1 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -46,7 +46,7 @@ def read_code(stream): if magic != importlib.util.MAGIC_NUMBER: return None - stream.read(8) # Skip timestamp and size + stream.read(12) # Skip rest of the header return marshal.load(stream) @@ -119,6 +119,9 @@ def iter_modules(path=None, prefix=''): """ if path is None: importers = iter_importers() + elif isinstance(path, str): + raise ValueError("path must be None or list of paths to look for " + "modules in") else: importers = map(get_importer, path) @@ -201,7 +204,8 @@ class ImpImporter: def __init__(self, path=None): global imp - warnings.warn("This emulation is deprecated, use 'importlib' instead", + warnings.warn("This emulation is deprecated and slated for removal " + "in Python 3.12; use 'importlib' instead", DeprecationWarning) _import_imp() self.path = path @@ -268,7 +272,8 @@ class ImpLoader: code = source = None def __init__(self, fullname, file, filename, etc): - warnings.warn("This emulation is deprecated, use 'importlib' instead", + warnings.warn("This emulation is deprecated and slated for removal in " + "Python 3.12; use 'importlib' instead", DeprecationWarning) _import_imp() self.file = file @@ -632,3 +637,79 @@ def get_data(package, resource): parts.insert(0, os.path.dirname(mod.__file__)) resource_name = os.path.join(*parts) return loader.get_data(resource_name) + + +_NAME_PATTERN = None + +def resolve_name(name): + """ + Resolve a name to an object. + + It is expected that `name` will be a string in one of the following + formats, where W is shorthand for a valid Python identifier and dot stands + for a literal period in these pseudo-regexes: + + W(.W)* + W(.W)*:(W(.W)*)? + + The first form is intended for backward compatibility only. It assumes that + some part of the dotted name is a package, and the rest is an object + somewhere within that package, possibly nested inside other objects. + Because the place where the package stops and the object hierarchy starts + can't be inferred by inspection, repeated attempts to import must be done + with this form. + + In the second form, the caller makes the division point clear through the + provision of a single colon: the dotted name to the left of the colon is a + package to be imported, and the dotted name to the right is the object + hierarchy within that package. Only one import is needed in this form. If + it ends with the colon, then a module object is returned. + + The function will return an object (which might be a module), or raise one + of the following exceptions: + + ValueError - if `name` isn't in a recognised format + ImportError - if an import failed when it shouldn't have + AttributeError - if a failure occurred when traversing the object hierarchy + within the imported package to get to the desired object. + """ + global _NAME_PATTERN + if _NAME_PATTERN is None: + # Lazy import to speedup Python startup time + import re + dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*' + _NAME_PATTERN = re.compile(f'^(?P{dotted_words})' + f'(?P:(?P{dotted_words})?)?$', + re.UNICODE) + + m = _NAME_PATTERN.match(name) + if not m: + raise ValueError(f'invalid format: {name!r}') + gd = m.groupdict() + if gd.get('cln'): + # there is a colon - a one-step import is all that's needed + mod = importlib.import_module(gd['pkg']) + parts = gd.get('obj') + parts = parts.split('.') if parts else [] + else: + # no colon - have to iterate to find the package boundary + parts = name.split('.') + modname = parts.pop(0) + # first part *must* be a module/package. + mod = importlib.import_module(modname) + while parts: + p = parts[0] + s = f'{modname}.{p}' + try: + mod = importlib.import_module(s) + parts.pop(0) + modname = s + except ImportError: + break + # if we reach this point, mod is the module, already imported, and + # parts is the list of parts in the object hierarchy to be traversed, or + # an empty list if just the module is wanted. + result = mod + for p in parts: + result = getattr(result, p) + return result diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 795a50a828..3a10ec8fc3 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -1,4 +1,3 @@ -from test.support import run_unittest from test.support.import_helper import unload, CleanImport from test.support.warnings_helper import check_warnings import unittest @@ -178,8 +177,6 @@ class PkgutilTests(unittest.TestCase): continue del sys.modules[pkg] - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_walk_packages_raises_on_string_or_bytes_input(self): str_input = 'test_dir' @@ -190,6 +187,99 @@ class PkgutilTests(unittest.TestCase): with self.assertRaises((TypeError, ValueError)): list(pkgutil.walk_packages(bytes_input)) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_name_resolution(self): + import logging + import logging.handlers + + success_cases = ( + ('os', os), + ('os.path', os.path), + ('os.path:pathsep', os.path.pathsep), + ('logging', logging), + ('logging:', logging), + ('logging.handlers', logging.handlers), + ('logging.handlers:', logging.handlers), + ('logging.handlers:SysLogHandler', logging.handlers.SysLogHandler), + ('logging.handlers.SysLogHandler', logging.handlers.SysLogHandler), + ('logging.handlers:SysLogHandler.LOG_ALERT', + logging.handlers.SysLogHandler.LOG_ALERT), + ('logging.handlers.SysLogHandler.LOG_ALERT', + logging.handlers.SysLogHandler.LOG_ALERT), + ('builtins.int', int), + ('builtins:int', int), + ('builtins.int.from_bytes', int.from_bytes), + ('builtins:int.from_bytes', int.from_bytes), + ('builtins.ZeroDivisionError', ZeroDivisionError), + ('builtins:ZeroDivisionError', ZeroDivisionError), + ('os:path', os.path), + ) + + failure_cases = ( + (None, TypeError), + (1, TypeError), + (2.0, TypeError), + (True, TypeError), + ('', ValueError), + ('?abc', ValueError), + ('abc/foo', ValueError), + ('foo', ImportError), + ('os.foo', AttributeError), + ('os.foo:', ImportError), + ('os.pth:pathsep', ImportError), + ('logging.handlers:NoSuchHandler', AttributeError), + ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError), + ('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError), + ('ZeroDivisionError', ImportError), + ('os.path.9abc', ValueError), + ('9abc', ValueError), + ) + + # add some Unicode package names to the mix. + + unicode_words = ('\u0935\u092e\u0938', + '\xe9', '\xc8', + '\uc548\ub155\ud558\uc138\uc694', + '\u3055\u3088\u306a\u3089', + '\u3042\u308a\u304c\u3068\u3046', + '\u0425\u043e\u0440\u043e\u0448\u043e', + '\u0441\u043f\u0430\u0441\u0438\u0431\u043e', + '\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868') + + for uw in unicode_words: + d = os.path.join(self.dirname, uw) + try: + os.makedirs(d, exist_ok=True) + except UnicodeEncodeError: + # When filesystem encoding cannot encode uw: skip this test + continue + # make an empty __init__.py file + f = os.path.join(d, '__init__.py') + with open(f, 'w') as f: + f.write('') + f.flush() + # now import the package we just created; clearing the caches is + # needed, otherwise the newly created package isn't found + importlib.invalidate_caches() + mod = importlib.import_module(uw) + success_cases += (uw, mod), + if len(uw) > 1: + failure_cases += (uw[:-1], ImportError), + + # add an example with a Unicode digit at the start + failure_cases += ('\u0966\u0935\u092e\u0938', ValueError), + + for s, expected in success_cases: + with self.subTest(s=s): + o = pkgutil.resolve_name(s) + self.assertEqual(o, expected) + + for s, exc in failure_cases: + with self.subTest(s=s): + with self.assertRaises(exc): + pkgutil.resolve_name(s) + class PkgutilPEP302Tests(unittest.TestCase): @@ -409,7 +499,8 @@ class ImportlibMigrationTests(unittest.TestCase): def check_deprecated(self): return check_warnings( - ("This emulation is deprecated, use 'importlib' instead", + ("This emulation is deprecated and slated for removal in " + "Python 3.12; use 'importlib' instead", DeprecationWarning)) def test_importer_deprecated(self): @@ -490,9 +581,7 @@ class ImportlibMigrationTests(unittest.TestCase): self.assertEqual(len(w.warnings), 0) -def test_main(): - run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests, - NestedNamespacePackageTest, ImportlibMigrationTests) +def tearDownModule(): # this is necessary if test is run repeated (like when finding leaks) import zipimport import importlib @@ -501,4 +590,4 @@ def test_main(): if __name__ == '__main__': - test_main() + unittest.main()