Merge pull request #1221 from RustPython/coolreader18/runpy

Use runpy to run modules with -m
This commit is contained in:
Windel Bouwman
2019-08-09 21:12:55 +02:00
committed by GitHub
4 changed files with 480 additions and 43 deletions

97
Lib/__importlib_util.py Normal file
View File

@@ -0,0 +1,97 @@
"""Utility code for constructing importers, etc."""
from _frozen_importlib import _resolve_name
from _frozen_importlib import _find_spec
import sys
def resolve_name(name, package):
"""Resolve a relative module name to an absolute one."""
if not name.startswith('.'):
return name
elif not package:
raise ValueError(f'no package specified for {repr(name)} '
'(required for relative module names)')
level = 0
for character in name:
if character != '.':
break
level += 1
return _resolve_name(name[level:], package, level)
def _find_spec_from_path(name, path=None):
"""Return the spec for the specified module.
First, sys.modules is checked to see if the module was already imported. If
so, then sys.modules[name].__spec__ is returned. If that happens to be
set to None, then ValueError is raised. If the module is not in
sys.modules, then sys.meta_path is searched for a suitable spec with the
value of 'path' given to the finders. None is returned if no spec could
be found.
Dotted names do not have their parent packages implicitly imported. You will
most likely need to explicitly import all parent packages in the proper
order for a submodule to get the correct spec.
"""
if name not in sys.modules:
return _find_spec(name, path)
else:
module = sys.modules[name]
if module is None:
return None
try:
spec = module.__spec__
except AttributeError:
raise ValueError('{}.__spec__ is not set'.format(name)) from None
else:
if spec is None:
raise ValueError('{}.__spec__ is None'.format(name))
return spec
def find_spec(name, package=None):
"""Return the spec for the specified module.
First, sys.modules is checked to see if the module was already imported. If
so, then sys.modules[name].__spec__ is returned. If that happens to be
set to None, then ValueError is raised. If the module is not in
sys.modules, then sys.meta_path is searched for a suitable spec with the
value of 'path' given to the finders. None is returned if no spec could
be found.
If the name is for submodule (contains a dot), the parent module is
automatically imported.
The name and package arguments work the same as importlib.import_module().
In other words, relative module names (with leading dots) work.
"""
fullname = resolve_name(name, package) if name.startswith('.') else name
if fullname not in sys.modules:
parent_name = fullname.rpartition('.')[0]
if parent_name:
parent = __import__(parent_name, fromlist=['__path__'])
try:
parent_path = parent.__path__
except AttributeError as e:
raise ModuleNotFoundError(
f"__path__ attribute not found on {parent_name!r} "
f"while trying to find {fullname!r}", name=fullname) from e
else:
parent_path = None
return _find_spec(fullname, parent_path)
else:
module = sys.modules[fullname]
if module is None:
return None
try:
spec = module.__spec__
except AttributeError:
raise ValueError('{}.__spec__ is not set'.format(name)) from None
else:
if spec is None:
raise ValueError('{}.__spec__ is None'.format(name))
return spec

298
Lib/runpy.py Normal file
View File

@@ -0,0 +1,298 @@
"""runpy.py - locating and running Python code using the module namespace
Provides support for locating and running Python scripts using the Python
module namespace instead of the native filesystem.
This allows Python code to play nicely with non-filesystem based PEP 302
importers when locating support scripts as well as when importing modules.
"""
# Written by Nick Coghlan <ncoghlan at gmail.com>
# to implement PEP 338 (Executing Modules as Scripts)
import sys
import __importlib_util
# FIXME replace above with below once we can import importlib
# import importlib.machinery # importlib first so we can test #15386 via -m
# import importlib.util
import types
# FIXME uncomment line below once we can import pkgutil
# from pkgutil import read_code, get_importer
__all__ = [
"run_module", "run_path",
]
class _TempModule(object):
"""Temporarily replace a module in sys.modules with an empty namespace"""
def __init__(self, mod_name):
self.mod_name = mod_name
self.module = types.ModuleType(mod_name)
self._saved_module = []
def __enter__(self):
mod_name = self.mod_name
try:
self._saved_module.append(sys.modules[mod_name])
except KeyError:
pass
sys.modules[mod_name] = self.module
return self
def __exit__(self, *args):
if self._saved_module:
sys.modules[self.mod_name] = self._saved_module[0]
else:
del sys.modules[self.mod_name]
self._saved_module = []
class _ModifiedArgv0(object):
def __init__(self, value):
self.value = value
self._saved_value = self._sentinel = object()
def __enter__(self):
if self._saved_value is not self._sentinel:
raise RuntimeError("Already preserving saved value")
self._saved_value = sys.argv[0]
sys.argv[0] = self.value
def __exit__(self, *args):
self.value = self._sentinel
sys.argv[0] = self._saved_value
# TODO: Replace these helpers with importlib._bootstrap_external functions.
def _run_code(code, run_globals, init_globals=None,
mod_name=None, mod_spec=None,
pkg_name=None, script_name=None):
"""Helper to run code in nominated namespace"""
if init_globals is not None:
run_globals.update(init_globals)
if mod_spec is None:
loader = None
fname = script_name
cached = None
else:
loader = mod_spec.loader
fname = mod_spec.origin
cached = mod_spec.cached
if pkg_name is None:
pkg_name = mod_spec.parent
run_globals.update(__name__ = mod_name,
__file__ = fname,
__cached__ = cached,
__doc__ = None,
__loader__ = loader,
__package__ = pkg_name,
__spec__ = mod_spec)
exec(code, run_globals)
return run_globals
def _run_module_code(code, init_globals=None,
mod_name=None, mod_spec=None,
pkg_name=None, script_name=None):
"""Helper to run code in new namespace with sys modified"""
fname = script_name if mod_spec is None else mod_spec.origin
with _TempModule(mod_name) as temp_module, _ModifiedArgv0(fname):
mod_globals = temp_module.module.__dict__
_run_code(code, mod_globals, init_globals,
mod_name, mod_spec, pkg_name, script_name)
# Copy the globals of the temporary module, as they
# may be cleared when the temporary module goes away
return mod_globals.copy()
# Helper to get the full name, spec and code for a module
def _get_module_details(mod_name, error=ImportError):
if mod_name.startswith("."):
raise error("Relative module names not supported")
pkg_name, _, _ = mod_name.rpartition(".")
if pkg_name:
# Try importing the parent to avoid catching initialization errors
try:
__import__(pkg_name)
except ImportError as e:
# If the parent or higher ancestor package is missing, let the
# error be raised by find_spec() below and then be caught. But do
# not allow other errors to be caught.
if e.name is None or (e.name != pkg_name and
not pkg_name.startswith(e.name + ".")):
raise
# Warn if the module has already been imported under its normal name
existing = sys.modules.get(mod_name)
if existing is not None and not hasattr(existing, "__path__"):
from warnings import warn
msg = "{mod_name!r} found in sys.modules after import of " \
"package {pkg_name!r}, but prior to execution of " \
"{mod_name!r}; this may result in unpredictable " \
"behaviour".format(mod_name=mod_name, pkg_name=pkg_name)
warn(RuntimeWarning(msg))
try:
# FIXME replace with importlib.util.find_spec() once we can import importlib
spec = __importlib_util.find_spec(mod_name)
except (ImportError, AttributeError, TypeError, ValueError) as ex:
# This hack fixes an impedance mismatch between pkgutil and
# importlib, where the latter raises other errors for cases where
# pkgutil previously raised ImportError
msg = "Error while finding module specification for {!r} ({}: {})"
raise error(msg.format(mod_name, type(ex).__name__, ex)) from ex
if spec is None:
raise error("No module named %s" % mod_name)
if spec.submodule_search_locations is not None:
if mod_name == "__main__" or mod_name.endswith(".__main__"):
raise error("Cannot use package as __main__ module")
try:
pkg_main_name = mod_name + ".__main__"
return _get_module_details(pkg_main_name, error)
except error as e:
if mod_name not in sys.modules:
raise # No module loaded; being a package is irrelevant
raise error(("%s; %r is a package and cannot " +
"be directly executed") %(e, mod_name))
loader = spec.loader
if loader is None:
raise error("%r is a namespace package and cannot be executed"
% mod_name)
try:
code = loader.get_code(mod_name)
except ImportError as e:
raise error(format(e)) from e
if code is None:
raise error("No code object available for %s" % mod_name)
return mod_name, spec, code
class _Error(Exception):
"""Error that _run_module_as_main() should report without a traceback"""
# XXX ncoghlan: Should this be documented and made public?
# (Current thoughts: don't repeat the mistake that lead to its
# creation when run_module() no longer met the needs of
# mainmodule.c, but couldn't be changed because it was public)
def _run_module_as_main(mod_name, alter_argv=True):
"""Runs the designated module in the __main__ namespace
Note that the executed module will have full access to the
__main__ namespace. If this is not desirable, the run_module()
function should be used to run the module code in a fresh namespace.
At the very least, these variables in __main__ will be overwritten:
__name__
__file__
__cached__
__loader__
__package__
"""
try:
if alter_argv or mod_name != "__main__": # i.e. -m switch
mod_name, mod_spec, code = _get_module_details(mod_name, _Error)
else: # i.e. directory or zipfile execution
mod_name, mod_spec, code = _get_main_module_details(_Error)
except _Error as exc:
msg = "%s: %s" % (sys.executable, exc)
sys.exit(msg)
main_globals = sys.modules["__main__"].__dict__
if alter_argv:
sys.argv[0] = mod_spec.origin
return _run_code(code, main_globals, None,
"__main__", mod_spec)
def run_module(mod_name, init_globals=None,
run_name=None, alter_sys=False):
"""Execute a module's code without importing it
Returns the resulting top level namespace dictionary
"""
mod_name, mod_spec, code = _get_module_details(mod_name)
if run_name is None:
run_name = mod_name
if alter_sys:
return _run_module_code(code, init_globals, run_name, mod_spec)
else:
# Leave the sys module alone
return _run_code(code, {}, init_globals, run_name, mod_spec)
def _get_main_module_details(error=ImportError):
# Helper that gives a nicer error message when attempting to
# execute a zipfile or directory by invoking __main__.py
# Also moves the standard __main__ out of the way so that the
# preexisting __loader__ entry doesn't cause issues
main_name = "__main__"
saved_main = sys.modules[main_name]
del sys.modules[main_name]
try:
return _get_module_details(main_name)
except ImportError as exc:
if main_name in str(exc):
raise error("can't find %r module in %r" %
(main_name, sys.path[0])) from exc
raise
finally:
sys.modules[main_name] = saved_main
def _get_code_from_file(run_name, fname):
# Check for a compiled file first
with open(fname, "rb") as f:
code = read_code(f)
if code is None:
# That didn't work, so try it as normal source code
with open(fname, "rb") as f:
code = compile(f.read(), fname, 'exec')
return code, fname
def run_path(path_name, init_globals=None, run_name=None):
"""Execute code located at the specified filesystem location
Returns the resulting top level namespace dictionary
The file path may refer directly to a Python script (i.e.
one that could be directly executed with execfile) or else
it may refer to a zipfile or directory containing a top
level __main__.py script.
"""
if run_name is None:
run_name = "<run_path>"
pkg_name = run_name.rpartition(".")[0]
importer = get_importer(path_name)
# Trying to avoid importing imp so as to not consume the deprecation warning.
is_NullImporter = False
if type(importer).__module__ == 'imp':
if type(importer).__name__ == 'NullImporter':
is_NullImporter = True
if isinstance(importer, type(None)) or is_NullImporter:
# Not a valid sys.path entry, so run the code directly
# execfile() doesn't help as we want to allow compiled files
code, fname = _get_code_from_file(run_name, path_name)
return _run_module_code(code, init_globals, run_name,
pkg_name=pkg_name, script_name=fname)
else:
# Finder is defined for path, so add it to
# the start of sys.path
sys.path.insert(0, path_name)
try:
# Here's where things are a little different from the run_module
# case. There, we only had to replace the module in sys while the
# code was running and doing so was somewhat optional. Here, we
# have no choice and we have to remove it even while we read the
# code. If we don't do this, a __loader__ attribute in the
# existing __main__ module may prevent location of the new module.
mod_name, mod_spec, code = _get_main_module_details()
with _TempModule(run_name) as temp_module, \
_ModifiedArgv0(path_name):
mod_globals = temp_module.module.__dict__
return _run_code(code, mod_globals, init_globals,
run_name, mod_spec, pkg_name).copy()
finally:
try:
sys.path.remove(path_name)
except ValueError:
pass
if __name__ == "__main__":
# Run the module specified as the next command line argument
if len(sys.argv) < 2:
print("No module specified for execution", file=sys.stderr)
else:
del sys.argv[0] # Make the requested module sys.argv[0]
_run_module_as_main(sys.argv[0])

View File

@@ -51,7 +51,7 @@ fn parse_arguments<'a>(app: App<'a, '_>) -> ArgMatches<'a> {
.version(crate_version!())
.author(crate_authors!())
.about("Rust implementation of the Python language")
.usage("rustpython [OPTIONS] [-c CMD | -m MODULE | FILE | -] [PYARGS]...")
.usage("rustpython [OPTIONS] [-c CMD | -m MODULE | FILE] [PYARGS]...")
.arg(
Arg::with_name("script")
.required(false)
@@ -204,7 +204,7 @@ fn create_settings(matches: &ArgMatches) -> PySettings {
let argv = if let Some(script) = matches.values_of("script") {
script.map(ToOwned::to_owned).collect()
} else if let Some(module) = matches.values_of("m") {
std::iter::once("PLACEHOLEDER".to_owned())
std::iter::once("PLACEHOLDER".to_owned())
.chain(module.skip(1).map(ToOwned::to_owned))
.collect()
} else if let Some(cmd) = matches.values_of("c") {
@@ -353,18 +353,12 @@ fn run_command(vm: &VirtualMachine, source: String) -> PyResult<()> {
fn run_module(vm: &VirtualMachine, module: &str) -> PyResult<()> {
debug!("Running module {}", module);
let importlib = vm.import("_frozen_importlib", &vm.ctx.new_tuple(vec![]), 0)?;
let find_spec = vm.get_attribute(importlib, "_find_spec")?;
let spec = vm.invoke(
find_spec,
vec![vm.ctx.new_str(module.to_owned()), vm.get_none()],
)?;
if !vm.is_none(&spec) {
let origin = vm.get_attribute(spec, "origin")?;
let sys_path = vm.get_attribute(vm.sys_module.clone(), "argv")?;
sys_path.set_item(0, origin, vm)?;
}
vm.import(module, &vm.ctx.new_tuple(vec![]), 0)?;
let main_module = vm.ctx.new_module("__main__", vm.ctx.new_dict());
vm.get_attribute(vm.sys_module.clone(), "modules")?
.set_item("__main__", main_module, vm)?;
let runpy = vm.import("runpy", &vm.ctx.new_tuple(vec![]), 0)?;
let run_module_as_main = vm.get_attribute(runpy, "_run_module_as_main")?;
vm.invoke(run_module_as_main, vec![vm.new_str(module.to_owned())])?;
Ok(())
}

View File

@@ -14,21 +14,15 @@ use crate::obj::objstr::{PyString, PyStringRef};
use crate::obj::objtype::PyClassRef;
use crate::pyobject::{PyClassImpl, PyObjectRef, PyResult, PyValue, TryFromObject};
use crate::vm::VirtualMachine;
use num_traits::ToPrimitive;
use num_traits::{Signed, ToPrimitive};
// #[derive(Debug)]
#[pyclass(name = "Pattern")]
#[derive(Debug)]
struct PyPattern {
regex: Regex,
pattern: String,
}
impl fmt::Debug for PyPattern {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Pattern()")
}
}
const IGNORECASE: usize = 2;
const LOCALE: usize = 4;
const MULTILINE: usize = 8;
@@ -143,6 +137,18 @@ fn re_findall(
do_findall(vm, &regex, string)
}
fn re_split(
pattern: PyStringRef,
string: PyStringRef,
maxsplit: OptionalArg<PyIntRef>,
flags: OptionalArg<PyIntRef>,
vm: &VirtualMachine,
) -> PyResult {
let flags = extract_flags(flags);
let regex = make_regex(vm, pattern.as_str(), flags)?;
do_split(vm, &regex, string, maxsplit.into_option())
}
fn do_sub(
vm: &VirtualMachine,
pattern: &PyPattern,
@@ -150,15 +156,12 @@ fn do_sub(
search_text: PyStringRef,
limit: usize,
) -> PyResult {
let out = pattern
.regex
.replacen(
search_text.as_str().as_bytes(),
limit,
repl.as_str().as_bytes(),
)
.into_owned();
let out = unsafe { String::from_utf8_unchecked(out) };
let out = pattern.regex.replacen(
search_text.as_str().as_bytes(),
limit,
repl.as_str().as_bytes(),
);
let out = String::from_utf8_lossy(&out).into_owned();
Ok(vm.new_str(out))
}
@@ -208,6 +211,53 @@ fn do_findall(vm: &VirtualMachine, pattern: &PyPattern, search_text: PyStringRef
Ok(vm.ctx.new_list(out))
}
fn do_split(
vm: &VirtualMachine,
pattern: &PyPattern,
search_text: PyStringRef,
maxsplit: Option<PyIntRef>,
) -> PyResult {
if maxsplit
.as_ref()
.map_or(false, |i| i.as_bigint().is_negative())
{
return Ok(vm.ctx.new_list(vec![search_text.into_object()]));
}
let maxsplit = maxsplit
.map(|i| usize::try_from_object(vm, i.into_object()))
.transpose()?
.unwrap_or(0);
let text = search_text.as_str().as_bytes();
// essentially Regex::split, but it outputs captures as well
let mut output = Vec::new();
let mut last = 0;
let mut n = 0;
for captures in pattern.regex.captures_iter(text) {
let full = captures.get(0).unwrap();
let matched = &text[last..full.start()];
last = full.end();
output.push(Some(matched));
for m in captures.iter().skip(1) {
output.push(m.map(|m| m.as_bytes()));
}
n += 1;
if maxsplit != 0 && n >= maxsplit {
break;
}
}
if last < text.len() {
output.push(Some(&text[last..]));
}
let split = output
.into_iter()
.map(|v| {
v.map(|v| vm.new_str(String::from_utf8_lossy(v).into_owned()))
.unwrap_or_else(|| vm.get_none())
})
.collect();
Ok(vm.ctx.new_list(split))
}
fn make_regex(vm: &VirtualMachine, pattern: &str, flags: PyRegexFlags) -> PyResult<PyPattern> {
let unicode = if flags.unicode && flags.ascii {
return Err(vm.new_value_error("ASCII and UNICODE flags are incompatible".to_string()));
@@ -280,11 +330,8 @@ impl PyPattern {
fn sub(&self, repl: PyStringRef, text: PyStringRef, vm: &VirtualMachine) -> PyResult {
let replaced_text = self
.regex
.replace_all(text.value.as_bytes(), repl.as_str().as_bytes())
.into_owned();
// safe because both the search and replace arguments ^ are unicode strings temporarily
// converted to bytes
let replaced_text = unsafe { String::from_utf8_unchecked(replaced_text) };
.replace_all(text.value.as_bytes(), repl.as_str().as_bytes());
let replaced_text = String::from_utf8_lossy(&replaced_text).into_owned();
Ok(vm.ctx.new_str(replaced_text))
}
@@ -299,13 +346,13 @@ impl PyPattern {
}
#[pymethod]
fn split(&self, text: PyStringRef, vm: &VirtualMachine) -> PyObjectRef {
let split = self
.regex
.split(text.as_str().as_bytes())
.map(|v| vm.new_str(String::from_utf8_lossy(v).into_owned()))
.collect();
vm.ctx.new_list(split)
fn split(
&self,
search_text: PyStringRef,
maxsplit: OptionalArg<PyIntRef>,
vm: &VirtualMachine,
) -> PyResult {
do_split(vm, self, search_text, maxsplit.into_option())
}
}
@@ -407,6 +454,7 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
"search" => ctx.new_rustfunc(re_search),
"sub" => ctx.new_rustfunc(re_sub),
"findall" => ctx.new_rustfunc(re_findall),
"split" => ctx.new_rustfunc(re_split),
"IGNORECASE" => ctx.new_int(IGNORECASE),
"I" => ctx.new_int(IGNORECASE),
"LOCALE" => ctx.new_int(LOCALE),