diff --git a/Lib/__importlib_util.py b/Lib/__importlib_util.py new file mode 100644 index 000000000..d7197bf6b --- /dev/null +++ b/Lib/__importlib_util.py @@ -0,0 +1,97 @@ +"""Utility code for constructing importers, etc.""" +from _frozen_importlib import _resolve_name +from _frozen_importlib import _find_spec + +import sys + + +def resolve_name(name, package): + """Resolve a relative module name to an absolute one.""" + if not name.startswith('.'): + return name + elif not package: + raise ValueError(f'no package specified for {repr(name)} ' + '(required for relative module names)') + level = 0 + for character in name: + if character != '.': + break + level += 1 + return _resolve_name(name[level:], package, level) + + +def _find_spec_from_path(name, path=None): + """Return the spec for the specified module. + + First, sys.modules is checked to see if the module was already imported. If + so, then sys.modules[name].__spec__ is returned. If that happens to be + set to None, then ValueError is raised. If the module is not in + sys.modules, then sys.meta_path is searched for a suitable spec with the + value of 'path' given to the finders. None is returned if no spec could + be found. + + Dotted names do not have their parent packages implicitly imported. You will + most likely need to explicitly import all parent packages in the proper + order for a submodule to get the correct spec. + + """ + if name not in sys.modules: + return _find_spec(name, path) + else: + module = sys.modules[name] + if module is None: + return None + try: + spec = module.__spec__ + except AttributeError: + raise ValueError('{}.__spec__ is not set'.format(name)) from None + else: + if spec is None: + raise ValueError('{}.__spec__ is None'.format(name)) + return spec + + +def find_spec(name, package=None): + """Return the spec for the specified module. + + First, sys.modules is checked to see if the module was already imported. If + so, then sys.modules[name].__spec__ is returned. If that happens to be + set to None, then ValueError is raised. If the module is not in + sys.modules, then sys.meta_path is searched for a suitable spec with the + value of 'path' given to the finders. None is returned if no spec could + be found. + + If the name is for submodule (contains a dot), the parent module is + automatically imported. + + The name and package arguments work the same as importlib.import_module(). + In other words, relative module names (with leading dots) work. + + """ + fullname = resolve_name(name, package) if name.startswith('.') else name + if fullname not in sys.modules: + parent_name = fullname.rpartition('.')[0] + if parent_name: + parent = __import__(parent_name, fromlist=['__path__']) + try: + parent_path = parent.__path__ + except AttributeError as e: + raise ModuleNotFoundError( + f"__path__ attribute not found on {parent_name!r} " + f"while trying to find {fullname!r}", name=fullname) from e + else: + parent_path = None + return _find_spec(fullname, parent_path) + else: + module = sys.modules[fullname] + if module is None: + return None + try: + spec = module.__spec__ + except AttributeError: + raise ValueError('{}.__spec__ is not set'.format(name)) from None + else: + if spec is None: + raise ValueError('{}.__spec__ is None'.format(name)) + return spec + diff --git a/Lib/runpy.py b/Lib/runpy.py new file mode 100644 index 000000000..f69b21a35 --- /dev/null +++ b/Lib/runpy.py @@ -0,0 +1,298 @@ +"""runpy.py - locating and running Python code using the module namespace + +Provides support for locating and running Python scripts using the Python +module namespace instead of the native filesystem. + +This allows Python code to play nicely with non-filesystem based PEP 302 +importers when locating support scripts as well as when importing modules. +""" +# Written by Nick Coghlan +# to implement PEP 338 (Executing Modules as Scripts) + + +import sys +import __importlib_util +# FIXME replace above with below once we can import importlib +# import importlib.machinery # importlib first so we can test #15386 via -m +# import importlib.util +import types +# FIXME uncomment line below once we can import pkgutil +# from pkgutil import read_code, get_importer + +__all__ = [ + "run_module", "run_path", +] + +class _TempModule(object): + """Temporarily replace a module in sys.modules with an empty namespace""" + def __init__(self, mod_name): + self.mod_name = mod_name + self.module = types.ModuleType(mod_name) + self._saved_module = [] + + def __enter__(self): + mod_name = self.mod_name + try: + self._saved_module.append(sys.modules[mod_name]) + except KeyError: + pass + sys.modules[mod_name] = self.module + return self + + def __exit__(self, *args): + if self._saved_module: + sys.modules[self.mod_name] = self._saved_module[0] + else: + del sys.modules[self.mod_name] + self._saved_module = [] + +class _ModifiedArgv0(object): + def __init__(self, value): + self.value = value + self._saved_value = self._sentinel = object() + + def __enter__(self): + if self._saved_value is not self._sentinel: + raise RuntimeError("Already preserving saved value") + self._saved_value = sys.argv[0] + sys.argv[0] = self.value + + def __exit__(self, *args): + self.value = self._sentinel + sys.argv[0] = self._saved_value + +# TODO: Replace these helpers with importlib._bootstrap_external functions. +def _run_code(code, run_globals, init_globals=None, + mod_name=None, mod_spec=None, + pkg_name=None, script_name=None): + """Helper to run code in nominated namespace""" + if init_globals is not None: + run_globals.update(init_globals) + if mod_spec is None: + loader = None + fname = script_name + cached = None + else: + loader = mod_spec.loader + fname = mod_spec.origin + cached = mod_spec.cached + if pkg_name is None: + pkg_name = mod_spec.parent + run_globals.update(__name__ = mod_name, + __file__ = fname, + __cached__ = cached, + __doc__ = None, + __loader__ = loader, + __package__ = pkg_name, + __spec__ = mod_spec) + exec(code, run_globals) + return run_globals + +def _run_module_code(code, init_globals=None, + mod_name=None, mod_spec=None, + pkg_name=None, script_name=None): + """Helper to run code in new namespace with sys modified""" + fname = script_name if mod_spec is None else mod_spec.origin + with _TempModule(mod_name) as temp_module, _ModifiedArgv0(fname): + mod_globals = temp_module.module.__dict__ + _run_code(code, mod_globals, init_globals, + mod_name, mod_spec, pkg_name, script_name) + # Copy the globals of the temporary module, as they + # may be cleared when the temporary module goes away + return mod_globals.copy() + +# Helper to get the full name, spec and code for a module +def _get_module_details(mod_name, error=ImportError): + if mod_name.startswith("."): + raise error("Relative module names not supported") + pkg_name, _, _ = mod_name.rpartition(".") + if pkg_name: + # Try importing the parent to avoid catching initialization errors + try: + __import__(pkg_name) + except ImportError as e: + # If the parent or higher ancestor package is missing, let the + # error be raised by find_spec() below and then be caught. But do + # not allow other errors to be caught. + if e.name is None or (e.name != pkg_name and + not pkg_name.startswith(e.name + ".")): + raise + # Warn if the module has already been imported under its normal name + existing = sys.modules.get(mod_name) + if existing is not None and not hasattr(existing, "__path__"): + from warnings import warn + msg = "{mod_name!r} found in sys.modules after import of " \ + "package {pkg_name!r}, but prior to execution of " \ + "{mod_name!r}; this may result in unpredictable " \ + "behaviour".format(mod_name=mod_name, pkg_name=pkg_name) + warn(RuntimeWarning(msg)) + + try: + # FIXME replace with importlib.util.find_spec() once we can import importlib + spec = __importlib_util.find_spec(mod_name) + except (ImportError, AttributeError, TypeError, ValueError) as ex: + # This hack fixes an impedance mismatch between pkgutil and + # importlib, where the latter raises other errors for cases where + # pkgutil previously raised ImportError + msg = "Error while finding module specification for {!r} ({}: {})" + raise error(msg.format(mod_name, type(ex).__name__, ex)) from ex + if spec is None: + raise error("No module named %s" % mod_name) + if spec.submodule_search_locations is not None: + if mod_name == "__main__" or mod_name.endswith(".__main__"): + raise error("Cannot use package as __main__ module") + try: + pkg_main_name = mod_name + ".__main__" + return _get_module_details(pkg_main_name, error) + except error as e: + if mod_name not in sys.modules: + raise # No module loaded; being a package is irrelevant + raise error(("%s; %r is a package and cannot " + + "be directly executed") %(e, mod_name)) + loader = spec.loader + if loader is None: + raise error("%r is a namespace package and cannot be executed" + % mod_name) + try: + code = loader.get_code(mod_name) + except ImportError as e: + raise error(format(e)) from e + if code is None: + raise error("No code object available for %s" % mod_name) + return mod_name, spec, code + +class _Error(Exception): + """Error that _run_module_as_main() should report without a traceback""" + +# XXX ncoghlan: Should this be documented and made public? +# (Current thoughts: don't repeat the mistake that lead to its +# creation when run_module() no longer met the needs of +# mainmodule.c, but couldn't be changed because it was public) +def _run_module_as_main(mod_name, alter_argv=True): + """Runs the designated module in the __main__ namespace + + Note that the executed module will have full access to the + __main__ namespace. If this is not desirable, the run_module() + function should be used to run the module code in a fresh namespace. + + At the very least, these variables in __main__ will be overwritten: + __name__ + __file__ + __cached__ + __loader__ + __package__ + """ + try: + if alter_argv or mod_name != "__main__": # i.e. -m switch + mod_name, mod_spec, code = _get_module_details(mod_name, _Error) + else: # i.e. directory or zipfile execution + mod_name, mod_spec, code = _get_main_module_details(_Error) + except _Error as exc: + msg = "%s: %s" % (sys.executable, exc) + sys.exit(msg) + main_globals = sys.modules["__main__"].__dict__ + if alter_argv: + sys.argv[0] = mod_spec.origin + return _run_code(code, main_globals, None, + "__main__", mod_spec) + +def run_module(mod_name, init_globals=None, + run_name=None, alter_sys=False): + """Execute a module's code without importing it + + Returns the resulting top level namespace dictionary + """ + mod_name, mod_spec, code = _get_module_details(mod_name) + if run_name is None: + run_name = mod_name + if alter_sys: + return _run_module_code(code, init_globals, run_name, mod_spec) + else: + # Leave the sys module alone + return _run_code(code, {}, init_globals, run_name, mod_spec) + +def _get_main_module_details(error=ImportError): + # Helper that gives a nicer error message when attempting to + # execute a zipfile or directory by invoking __main__.py + # Also moves the standard __main__ out of the way so that the + # preexisting __loader__ entry doesn't cause issues + main_name = "__main__" + saved_main = sys.modules[main_name] + del sys.modules[main_name] + try: + return _get_module_details(main_name) + except ImportError as exc: + if main_name in str(exc): + raise error("can't find %r module in %r" % + (main_name, sys.path[0])) from exc + raise + finally: + sys.modules[main_name] = saved_main + + +def _get_code_from_file(run_name, fname): + # Check for a compiled file first + with open(fname, "rb") as f: + code = read_code(f) + if code is None: + # That didn't work, so try it as normal source code + with open(fname, "rb") as f: + code = compile(f.read(), fname, 'exec') + return code, fname + +def run_path(path_name, init_globals=None, run_name=None): + """Execute code located at the specified filesystem location + + Returns the resulting top level namespace dictionary + + The file path may refer directly to a Python script (i.e. + one that could be directly executed with execfile) or else + it may refer to a zipfile or directory containing a top + level __main__.py script. + """ + if run_name is None: + run_name = "" + pkg_name = run_name.rpartition(".")[0] + importer = get_importer(path_name) + # Trying to avoid importing imp so as to not consume the deprecation warning. + is_NullImporter = False + if type(importer).__module__ == 'imp': + if type(importer).__name__ == 'NullImporter': + is_NullImporter = True + if isinstance(importer, type(None)) or is_NullImporter: + # Not a valid sys.path entry, so run the code directly + # execfile() doesn't help as we want to allow compiled files + code, fname = _get_code_from_file(run_name, path_name) + return _run_module_code(code, init_globals, run_name, + pkg_name=pkg_name, script_name=fname) + else: + # Finder is defined for path, so add it to + # the start of sys.path + sys.path.insert(0, path_name) + try: + # Here's where things are a little different from the run_module + # case. There, we only had to replace the module in sys while the + # code was running and doing so was somewhat optional. Here, we + # have no choice and we have to remove it even while we read the + # code. If we don't do this, a __loader__ attribute in the + # existing __main__ module may prevent location of the new module. + mod_name, mod_spec, code = _get_main_module_details() + with _TempModule(run_name) as temp_module, \ + _ModifiedArgv0(path_name): + mod_globals = temp_module.module.__dict__ + return _run_code(code, mod_globals, init_globals, + run_name, mod_spec, pkg_name).copy() + finally: + try: + sys.path.remove(path_name) + except ValueError: + pass + + +if __name__ == "__main__": + # Run the module specified as the next command line argument + if len(sys.argv) < 2: + print("No module specified for execution", file=sys.stderr) + else: + del sys.argv[0] # Make the requested module sys.argv[0] + _run_module_as_main(sys.argv[0]) diff --git a/src/main.rs b/src/main.rs index eb35e5ab8..54ca231b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,7 +51,7 @@ fn parse_arguments<'a>(app: App<'a, '_>) -> ArgMatches<'a> { .version(crate_version!()) .author(crate_authors!()) .about("Rust implementation of the Python language") - .usage("rustpython [OPTIONS] [-c CMD | -m MODULE | FILE | -] [PYARGS]...") + .usage("rustpython [OPTIONS] [-c CMD | -m MODULE | FILE] [PYARGS]...") .arg( Arg::with_name("script") .required(false) @@ -204,7 +204,7 @@ fn create_settings(matches: &ArgMatches) -> PySettings { let argv = if let Some(script) = matches.values_of("script") { script.map(ToOwned::to_owned).collect() } else if let Some(module) = matches.values_of("m") { - std::iter::once("PLACEHOLEDER".to_owned()) + std::iter::once("PLACEHOLDER".to_owned()) .chain(module.skip(1).map(ToOwned::to_owned)) .collect() } else if let Some(cmd) = matches.values_of("c") { @@ -353,18 +353,12 @@ fn run_command(vm: &VirtualMachine, source: String) -> PyResult<()> { fn run_module(vm: &VirtualMachine, module: &str) -> PyResult<()> { debug!("Running module {}", module); - let importlib = vm.import("_frozen_importlib", &vm.ctx.new_tuple(vec![]), 0)?; - let find_spec = vm.get_attribute(importlib, "_find_spec")?; - let spec = vm.invoke( - find_spec, - vec![vm.ctx.new_str(module.to_owned()), vm.get_none()], - )?; - if !vm.is_none(&spec) { - let origin = vm.get_attribute(spec, "origin")?; - let sys_path = vm.get_attribute(vm.sys_module.clone(), "argv")?; - sys_path.set_item(0, origin, vm)?; - } - vm.import(module, &vm.ctx.new_tuple(vec![]), 0)?; + let main_module = vm.ctx.new_module("__main__", vm.ctx.new_dict()); + vm.get_attribute(vm.sys_module.clone(), "modules")? + .set_item("__main__", main_module, vm)?; + let runpy = vm.import("runpy", &vm.ctx.new_tuple(vec![]), 0)?; + let run_module_as_main = vm.get_attribute(runpy, "_run_module_as_main")?; + vm.invoke(run_module_as_main, vec![vm.new_str(module.to_owned())])?; Ok(()) } diff --git a/vm/src/stdlib/re.rs b/vm/src/stdlib/re.rs index 94ea2c27b..b44f5701d 100644 --- a/vm/src/stdlib/re.rs +++ b/vm/src/stdlib/re.rs @@ -14,21 +14,15 @@ use crate::obj::objstr::{PyString, PyStringRef}; use crate::obj::objtype::PyClassRef; use crate::pyobject::{PyClassImpl, PyObjectRef, PyResult, PyValue, TryFromObject}; use crate::vm::VirtualMachine; -use num_traits::ToPrimitive; +use num_traits::{Signed, ToPrimitive}; -// #[derive(Debug)] #[pyclass(name = "Pattern")] +#[derive(Debug)] struct PyPattern { regex: Regex, pattern: String, } -impl fmt::Debug for PyPattern { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Pattern()") - } -} - const IGNORECASE: usize = 2; const LOCALE: usize = 4; const MULTILINE: usize = 8; @@ -143,6 +137,18 @@ fn re_findall( do_findall(vm, ®ex, string) } +fn re_split( + pattern: PyStringRef, + string: PyStringRef, + maxsplit: OptionalArg, + flags: OptionalArg, + vm: &VirtualMachine, +) -> PyResult { + let flags = extract_flags(flags); + let regex = make_regex(vm, pattern.as_str(), flags)?; + do_split(vm, ®ex, string, maxsplit.into_option()) +} + fn do_sub( vm: &VirtualMachine, pattern: &PyPattern, @@ -150,15 +156,12 @@ fn do_sub( search_text: PyStringRef, limit: usize, ) -> PyResult { - let out = pattern - .regex - .replacen( - search_text.as_str().as_bytes(), - limit, - repl.as_str().as_bytes(), - ) - .into_owned(); - let out = unsafe { String::from_utf8_unchecked(out) }; + let out = pattern.regex.replacen( + search_text.as_str().as_bytes(), + limit, + repl.as_str().as_bytes(), + ); + let out = String::from_utf8_lossy(&out).into_owned(); Ok(vm.new_str(out)) } @@ -208,6 +211,53 @@ fn do_findall(vm: &VirtualMachine, pattern: &PyPattern, search_text: PyStringRef Ok(vm.ctx.new_list(out)) } +fn do_split( + vm: &VirtualMachine, + pattern: &PyPattern, + search_text: PyStringRef, + maxsplit: Option, +) -> PyResult { + if maxsplit + .as_ref() + .map_or(false, |i| i.as_bigint().is_negative()) + { + return Ok(vm.ctx.new_list(vec![search_text.into_object()])); + } + let maxsplit = maxsplit + .map(|i| usize::try_from_object(vm, i.into_object())) + .transpose()? + .unwrap_or(0); + let text = search_text.as_str().as_bytes(); + // essentially Regex::split, but it outputs captures as well + let mut output = Vec::new(); + let mut last = 0; + let mut n = 0; + for captures in pattern.regex.captures_iter(text) { + let full = captures.get(0).unwrap(); + let matched = &text[last..full.start()]; + last = full.end(); + output.push(Some(matched)); + for m in captures.iter().skip(1) { + output.push(m.map(|m| m.as_bytes())); + } + n += 1; + if maxsplit != 0 && n >= maxsplit { + break; + } + } + if last < text.len() { + output.push(Some(&text[last..])); + } + let split = output + .into_iter() + .map(|v| { + v.map(|v| vm.new_str(String::from_utf8_lossy(v).into_owned())) + .unwrap_or_else(|| vm.get_none()) + }) + .collect(); + Ok(vm.ctx.new_list(split)) +} + fn make_regex(vm: &VirtualMachine, pattern: &str, flags: PyRegexFlags) -> PyResult { let unicode = if flags.unicode && flags.ascii { return Err(vm.new_value_error("ASCII and UNICODE flags are incompatible".to_string())); @@ -280,11 +330,8 @@ impl PyPattern { fn sub(&self, repl: PyStringRef, text: PyStringRef, vm: &VirtualMachine) -> PyResult { let replaced_text = self .regex - .replace_all(text.value.as_bytes(), repl.as_str().as_bytes()) - .into_owned(); - // safe because both the search and replace arguments ^ are unicode strings temporarily - // converted to bytes - let replaced_text = unsafe { String::from_utf8_unchecked(replaced_text) }; + .replace_all(text.value.as_bytes(), repl.as_str().as_bytes()); + let replaced_text = String::from_utf8_lossy(&replaced_text).into_owned(); Ok(vm.ctx.new_str(replaced_text)) } @@ -299,13 +346,13 @@ impl PyPattern { } #[pymethod] - fn split(&self, text: PyStringRef, vm: &VirtualMachine) -> PyObjectRef { - let split = self - .regex - .split(text.as_str().as_bytes()) - .map(|v| vm.new_str(String::from_utf8_lossy(v).into_owned())) - .collect(); - vm.ctx.new_list(split) + fn split( + &self, + search_text: PyStringRef, + maxsplit: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult { + do_split(vm, self, search_text, maxsplit.into_option()) } } @@ -407,6 +454,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { "search" => ctx.new_rustfunc(re_search), "sub" => ctx.new_rustfunc(re_sub), "findall" => ctx.new_rustfunc(re_findall), + "split" => ctx.new_rustfunc(re_split), "IGNORECASE" => ctx.new_int(IGNORECASE), "I" => ctx.new_int(IGNORECASE), "LOCALE" => ctx.new_int(LOCALE),