Merge pull request #1318 from youknowone/ast-module

ast module
This commit is contained in:
Windel Bouwman
2019-10-12 12:51:26 +02:00
committed by GitHub
11 changed files with 477 additions and 59 deletions

331
Lib/ast.py vendored Normal file
View File

@@ -0,0 +1,331 @@
"""
ast
~~~
The `ast` module helps Python applications to process trees of the Python
abstract syntax grammar. The abstract syntax itself might change with
each Python release; this module helps to find out programmatically what
the current grammar looks like and allows modifications of it.
An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
a flag to the `compile()` builtin function or by using the `parse()`
function from this module. The result will be a tree of objects whose
classes all inherit from `ast.AST`.
A modified abstract syntax tree can be compiled into a Python code object
using the built-in `compile()` function.
Additionally various helper functions are provided that make working with
the trees simpler. The main intention of the helper functions and this
module in general is to provide an easy to use interface for libraries
that work tightly with the python syntax (template engines for example).
:copyright: Copyright 2008 by Armin Ronacher.
:license: Python License.
"""
from _ast import *
def parse(source, filename='<unknown>', mode='exec'):
"""
Parse the source into an AST node.
Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
"""
return compile(source, filename, mode, PyCF_ONLY_AST)
def literal_eval(node_or_string):
"""
Safely evaluate an expression node or a string containing a Python
expression. The string or node provided may only consist of the following
Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
sets, booleans, and None.
"""
if isinstance(node_or_string, str):
node_or_string = parse(node_or_string, mode='eval')
if isinstance(node_or_string, Expression):
node_or_string = node_or_string.body
def _convert_num(node):
if isinstance(node, Constant):
if isinstance(node.value, (int, float, complex)):
return node.value
elif isinstance(node, Num):
return node.n
raise ValueError('malformed node or string: ' + repr(node))
def _convert_signed_num(node):
if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
operand = _convert_num(node.operand)
if isinstance(node.op, UAdd):
return + operand
else:
return - operand
return _convert_num(node)
def _convert(node):
if isinstance(node, Constant):
return node.value
elif isinstance(node, (Str, Bytes)):
return node.s
elif isinstance(node, Num):
return node.n
elif isinstance(node, Tuple):
return tuple(map(_convert, node.elts))
elif isinstance(node, List):
return list(map(_convert, node.elts))
elif isinstance(node, Set):
return set(map(_convert, node.elts))
elif isinstance(node, Dict):
return dict(zip(map(_convert, node.keys),
map(_convert, node.values)))
elif isinstance(node, NameConstant):
return node.value
elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
left = _convert_signed_num(node.left)
right = _convert_num(node.right)
if isinstance(left, (int, float)) and isinstance(right, complex):
if isinstance(node.op, Add):
return left + right
else:
return left - right
return _convert_signed_num(node)
return _convert(node_or_string)
def dump(node, annotate_fields=True, include_attributes=False):
"""
Return a formatted dump of the tree in *node*. This is mainly useful for
debugging purposes. The returned string will show the names and the values
for fields. This makes the code impossible to evaluate, so if evaluation is
wanted *annotate_fields* must be set to False. Attributes such as line
numbers and column offsets are not dumped by default. If this is wanted,
*include_attributes* can be set to True.
"""
def _format(node):
if isinstance(node, AST):
fields = [(a, _format(b)) for a, b in iter_fields(node)]
rv = '%s(%s' % (node.__class__.__name__, ', '.join(
('%s=%s' % field for field in fields)
if annotate_fields else
(b for a, b in fields)
))
if include_attributes and node._attributes:
rv += fields and ', ' or ' '
rv += ', '.join('%s=%s' % (a, _format(getattr(node, a)))
for a in node._attributes)
return rv + ')'
elif isinstance(node, list):
return '[%s]' % ', '.join(_format(x) for x in node)
return repr(node)
if not isinstance(node, AST):
raise TypeError('expected AST, got %r' % node.__class__.__name__)
return _format(node)
def copy_location(new_node, old_node):
"""
Copy source location (`lineno` and `col_offset` attributes) from
*old_node* to *new_node* if possible, and return *new_node*.
"""
for attr in 'lineno', 'col_offset':
if attr in old_node._attributes and attr in new_node._attributes \
and hasattr(old_node, attr):
setattr(new_node, attr, getattr(old_node, attr))
return new_node
def fix_missing_locations(node):
"""
When you compile a node tree with compile(), the compiler expects lineno and
col_offset attributes for every node that supports them. This is rather
tedious to fill in for generated nodes, so this helper adds these attributes
recursively where not already set, by setting them to the values of the
parent node. It works recursively starting at *node*.
"""
def _fix(node, lineno, col_offset):
if 'lineno' in node._attributes:
if not hasattr(node, 'lineno'):
node.lineno = lineno
else:
lineno = node.lineno
if 'col_offset' in node._attributes:
if not hasattr(node, 'col_offset'):
node.col_offset = col_offset
else:
col_offset = node.col_offset
for child in iter_child_nodes(node):
_fix(child, lineno, col_offset)
_fix(node, 1, 0)
return node
def increment_lineno(node, n=1):
"""
Increment the line number of each node in the tree starting at *node* by *n*.
This is useful to "move code" to a different location in a file.
"""
for child in walk(node):
if 'lineno' in child._attributes:
child.lineno = getattr(child, 'lineno', 0) + n
return node
def iter_fields(node):
"""
Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
that is present on *node*.
"""
for field in node._fields:
try:
yield field, getattr(node, field)
except AttributeError:
pass
def iter_child_nodes(node):
"""
Yield all direct child nodes of *node*, that is, all fields that are nodes
and all items of fields that are lists of nodes.
"""
for name, field in iter_fields(node):
if isinstance(field, AST):
yield field
elif isinstance(field, list):
for item in field:
if isinstance(item, AST):
yield item
def get_docstring(node, clean=True):
"""
Return the docstring for the given node or None if no docstring can
be found. If the node provided does not have docstrings a TypeError
will be raised.
If *clean* is `True`, all tabs are expanded to spaces and any whitespace
that can be uniformly removed from the second line onwards is removed.
"""
if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
raise TypeError("%r can't have docstrings" % node.__class__.__name__)
if not(node.body and isinstance(node.body[0], Expr)):
return None
node = node.body[0].value
if isinstance(node, Str):
text = node.s
elif isinstance(node, Constant) and isinstance(node.value, str):
text = node.value
else:
return None
if clean:
import inspect
text = inspect.cleandoc(text)
return text
def walk(node):
"""
Recursively yield all descendant nodes in the tree starting at *node*
(including *node* itself), in no specified order. This is useful if you
only want to modify nodes in place and don't care about the context.
"""
from collections import deque
todo = deque([node])
while todo:
node = todo.popleft()
todo.extend(iter_child_nodes(node))
yield node
class NodeVisitor(object):
"""
A node visitor base class that walks the abstract syntax tree and calls a
visitor function for every node found. This function may return a value
which is forwarded by the `visit` method.
This class is meant to be subclassed, with the subclass adding visitor
methods.
Per default the visitor functions for the nodes are ``'visit_'`` +
class name of the node. So a `TryFinally` node visit function would
be `visit_TryFinally`. This behavior can be changed by overriding
the `visit` method. If no visitor function exists for a node
(return value `None`) the `generic_visit` visitor is used instead.
Don't use the `NodeVisitor` if you want to apply changes to nodes during
traversing. For this a special visitor exists (`NodeTransformer`) that
allows modifications.
"""
def visit(self, node):
"""Visit a node."""
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Called if no explicit visitor function exists for a node."""
for field, value in iter_fields(node):
if isinstance(value, list):
for item in value:
if isinstance(item, AST):
self.visit(item)
elif isinstance(value, AST):
self.visit(value)
class NodeTransformer(NodeVisitor):
"""
A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
allows modification of nodes.
The `NodeTransformer` will walk the AST and use the return value of the
visitor methods to replace or remove the old node. If the return value of
the visitor method is ``None``, the node will be removed from its location,
otherwise it is replaced with the return value. The return value may be the
original node in which case no replacement takes place.
Here is an example transformer that rewrites all occurrences of name lookups
(``foo``) to ``data['foo']``::
class RewriteName(NodeTransformer):
def visit_Name(self, node):
return copy_location(Subscript(
value=Name(id='data', ctx=Load()),
slice=Index(value=Str(s=node.id)),
ctx=node.ctx
), node)
Keep in mind that if the node you're operating on has child nodes you must
either transform the child nodes yourself or call the :meth:`generic_visit`
method for the node first.
For nodes that were part of a collection of statements (that applies to all
statement nodes), the visitor may also return a list of nodes rather than
just a single node.
Usually you use the transformer like this::
node = YourTransformer().visit(node)
"""
def generic_visit(self, node):
for field, old_value in iter_fields(node):
if isinstance(old_value, list):
new_values = []
for value in old_value:
if isinstance(value, AST):
value = self.visit(value)
if value is None:
continue
elif not isinstance(value, AST):
new_values.extend(value)
continue
new_values.append(value)
old_value[:] = new_values
elif isinstance(old_value, AST):
new_node = self.visit(old_value)
if new_node is None:
delattr(node, field)
else:
setattr(node, field, new_node)
return node

View File

@@ -6,6 +6,7 @@
//! https://github.com/micropython/micropython/blob/master/py/compile.c
use crate::error::{CompileError, CompileErrorType};
pub use crate::mode::Mode;
use crate::output_stream::{CodeObjectStream, OutputStream};
use crate::peephole::PeepholeOptimizer;
use crate::symboltable::{
@@ -105,36 +106,6 @@ pub fn compile_program_single(
})
}
#[derive(Clone, Copy)]
pub enum Mode {
Exec,
Eval,
Single,
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" => Ok(Mode::Exec),
"eval" => Ok(Mode::Eval),
"single" => Ok(Mode::Single),
_ => Err(ModeParseError { _priv: () }),
}
}
}
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
}
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode should be "exec", "eval", or "single""#)
}
}
impl<O> Default for Compiler<O>
where
O: OutputStream,

View File

@@ -8,6 +8,7 @@ extern crate log;
pub mod compile;
pub mod error;
pub mod mode;
pub(crate) mod output_stream;
pub mod peephole;
pub mod symboltable;

40
compiler/src/mode.rs Normal file
View File

@@ -0,0 +1,40 @@
use rustpython_parser::parser;
#[derive(Clone, Copy)]
pub enum Mode {
Exec,
Eval,
Single,
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" => Ok(Mode::Exec),
"eval" => Ok(Mode::Eval),
"single" => Ok(Mode::Single),
_ => Err(ModeParseError { _priv: () }),
}
}
}
impl Mode {
pub fn to_parser_mode(self) -> parser::Mode {
match self {
Self::Exec | Self::Single => parser::Mode::Program,
Self::Eval => parser::Mode::Statement,
}
}
}
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
}
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode should be "exec", "eval", or "single""#)
}
}

View File

@@ -11,6 +11,7 @@ mod fstring;
mod function;
pub mod lexer;
pub mod location;
pub mod mode;
pub mod parser;
lalrpop_mod!(
#[allow(clippy::all)]

27
parser/src/mode.rs Normal file
View File

@@ -0,0 +1,27 @@
#[derive(Clone, Copy)]
pub enum Mode {
Program,
Statement,
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" | "single" => Ok(Mode::Program),
"eval" => Ok(Mode::Statement),
_ => Err(ModeParseError { _priv: () }),
}
}
}
#[derive(Debug)]
pub struct ModeParseError {
_priv: (),
}
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode should be "exec", "eval", or "single""#)
}
}

View File

@@ -3,6 +3,7 @@ use std::iter;
use crate::ast;
use crate::error::ParseError;
use crate::lexer;
pub use crate::mode::Mode;
use crate::python;
use crate::token;
@@ -73,6 +74,20 @@ pub fn parse_expression(source: &str) -> Result<ast::Expression, ParseError> {
do_lalr_parsing!(source, Expression, StartExpression)
}
// Parse a given source code
pub fn parse(source: &str, mode: Mode) -> Result<ast::Top, ParseError> {
Ok(match mode {
Mode::Program => {
let ast = parse_program(source)?;
ast::Top::Program(ast)
}
Mode::Statement => {
let statement = parse_statement(source)?;
ast::Top::Statement(statement)
}
})
}
#[cfg(test)]
mod tests {
use super::ast;

View File

@@ -29,10 +29,10 @@ use crate::pyobject::{
TryFromObject, TypeProtocol,
};
use crate::scope::Scope;
use crate::vm::VirtualMachine;
use crate::stdlib::ast;
#[cfg(not(target_arch = "wasm32"))]
use crate::stdlib::io::io_open;
use crate::vm::VirtualMachine;
fn builtin_abs(x: PyObjectRef, vm: &VirtualMachine) -> PyResult {
let method = vm.get_method_or_type_error(x.clone(), "__abs__", || {
@@ -124,22 +124,41 @@ struct CompileArgs {
optimize: OptionalArg<PyIntRef>,
}
#[cfg(feature = "rustpython-compiler")]
fn builtin_compile(args: CompileArgs, vm: &VirtualMachine) -> PyResult<PyCodeRef> {
fn builtin_compile(args: CompileArgs, vm: &VirtualMachine) -> PyResult {
// TODO: compile::compile should probably get bytes
let source = match &args.source {
Either::A(string) => string.as_str(),
Either::B(bytes) => str::from_utf8(bytes).unwrap(),
};
let mode = args
.mode
.as_str()
.parse::<compile::Mode>()
.map_err(|err| vm.new_value_error(err.to_string()))?;
let mode_str = args.mode.as_str();
vm.compile(source, mode, args.filename.as_str().to_string())
.map_err(|err| vm.new_syntax_error(&err))
let flags = args
.flags
.map_or(Ok(0), |v| i32::try_from_object(vm, v.into_object()))?;
if (flags & ast::PY_COMPILE_FLAG_AST_ONLY).is_zero() {
#[cfg(feature = "rustpython-compiler")]
{
let mode = mode_str
.parse::<compile::Mode>()
.map_err(|err| vm.new_value_error(err.to_string()))?;
vm.compile(&source, mode, args.filename.as_str().to_string())
.map(|o| o.into_object())
.map_err(|err| vm.new_syntax_error(&err))
}
#[cfg(not(feature = "rustpython-compiler"))]
{
Err(vm.new_value_error("PyCF_ONLY_AST flag is required without compiler support"))
}
} else {
use rustpython_parser::parser;
let mode = mode_str
.parse::<parser::Mode>()
.map_err(|err| vm.new_value_error(err.to_string()))?;
ast::parse(&vm, &source, mode)
}
}
fn builtin_delattr(obj: PyObjectRef, attr: PyStringRef, vm: &VirtualMachine) -> PyResult<()> {
@@ -748,7 +767,6 @@ pub fn make_module(vm: &VirtualMachine, module: PyObjectRef) {
#[cfg(feature = "rustpython-compiler")]
{
extend_module!(vm, module, {
"compile" => ctx.new_rustfunc(builtin_compile),
"eval" => ctx.new_rustfunc(builtin_eval),
"exec" => ctx.new_rustfunc(builtin_exec),
});
@@ -771,6 +789,7 @@ pub fn make_module(vm: &VirtualMachine, module: PyObjectRef) {
"callable" => ctx.new_rustfunc(builtin_callable),
"chr" => ctx.new_rustfunc(builtin_chr),
"classmethod" => ctx.classmethod_type(),
"compile" => ctx.new_rustfunc(builtin_compile),
"complex" => ctx.complex_type(),
"delattr" => ctx.new_rustfunc(builtin_delattr),
"dict" => ctx.dict_type(),

View File

@@ -381,6 +381,16 @@ impl<T> OptionalArg<T> {
}
}
pub fn map_or<U, F>(self, default: U, f: F) -> U
where
F: FnOnce(T) -> U,
{
match self {
Present(value) => f(value),
Missing => default,
}
}
pub fn map_or_else<U, D, F>(self, default: D, f: F) -> U
where
D: FnOnce() -> U,

View File

@@ -7,10 +7,9 @@ use std::ops::Deref;
use num_complex::Complex64;
use rustpython_parser::{ast, parser};
use rustpython_parser::{ast, mode::Mode, parser};
use crate::obj::objlist::PyListRef;
use crate::obj::objstr::PyStringRef;
use crate::obj::objtype::PyClassRef;
use crate::pyobject::{PyObjectRef, PyRef, PyResult, PyValue};
use crate::vm::VirtualMachine;
@@ -19,9 +18,12 @@ use crate::vm::VirtualMachine;
struct AstNode;
type AstNodeRef = PyRef<AstNode>;
const MODULE_NAME: &str = "_ast";
pub const PY_COMPILE_FLAG_AST_ONLY: i32 = 0x0400;
impl PyValue for AstNode {
fn class(vm: &VirtualMachine) -> PyClassRef {
vm.class("ast", "AST")
vm.class(MODULE_NAME, "AST")
}
}
@@ -48,14 +50,17 @@ macro_rules! node {
}
}
fn program_to_ast(vm: &VirtualMachine, program: &ast::Program) -> PyResult<AstNodeRef> {
let py_body = statements_to_ast(vm, &program.statements)?;
Ok(node!(vm, Module, { body => py_body }))
fn top_to_ast(vm: &VirtualMachine, top: &ast::Top) -> PyResult<PyListRef> {
match top {
ast::Top::Program(program) => statements_to_ast(vm, &program.statements),
ast::Top::Statement(statements) => statements_to_ast(vm, statements),
ast::Top::Expression(_) => unimplemented!("top_to_ast unimplemented ast::Top::Expression"),
}
}
// Create a node class instance
fn create_node(vm: &VirtualMachine, name: &str) -> PyResult<AstNodeRef> {
AstNode.into_ref_with_type(vm, vm.class("ast", name))
AstNode.into_ref_with_type(vm, vm.class(MODULE_NAME, name))
}
fn statements_to_ast(vm: &VirtualMachine, statements: &[ast::Statement]) -> PyResult<PyListRef> {
@@ -630,20 +635,17 @@ fn string_to_ast(vm: &VirtualMachine, string: &ast::StringGroup) -> PyResult<Ast
Ok(string)
}
fn ast_parse(source: PyStringRef, vm: &VirtualMachine) -> PyResult<AstNodeRef> {
let internal_ast = parser::parse_program(source.as_str())
.map_err(|err| vm.new_value_error(format!("{}", err)))?;
// source.clone();
program_to_ast(&vm, &internal_ast)
pub(crate) fn parse(vm: &VirtualMachine, source: &str, mode: Mode) -> PyResult {
let ast = parser::parse(source, mode).map_err(|err| vm.new_value_error(format!("{}", err)))?;
let py_body = top_to_ast(vm, &ast)?;
Ok(node!(vm, Module, { body => py_body }).into_object())
}
#[allow(clippy::cognitive_complexity)]
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
let ctx = &vm.ctx;
let ast_base = py_class!(ctx, "AST", ctx.object(), {});
py_module!(vm, "ast", {
"parse" => ctx.new_rustfunc(ast_parse),
py_module!(vm, MODULE_NAME, {
"AST" => ast_base.clone(),
// TODO: There's got to be a better way!
"alias" => py_class!(ctx, "alias", ast_base.clone(), {}),
@@ -709,5 +711,6 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
"withitem" => py_class!(ctx, "withitem", ast_base.clone(), {}),
"Yield" => py_class!(ctx, "Yield", ast_base.clone(), {}),
"YieldFrom" => py_class!(ctx, "YieldFrom", ast_base.clone(), {}),
"PyCF_ONLY_AST" => ctx.new_int(PY_COMPILE_FLAG_AST_ONLY),
})
}

View File

@@ -1,6 +1,6 @@
pub mod array;
#[cfg(feature = "rustpython-parser")]
mod ast;
pub(crate) mod ast;
mod binascii;
mod codecs;
mod collections;
@@ -86,7 +86,7 @@ pub fn get_module_inits() -> HashMap<String, StdlibInitFunc> {
#[cfg(feature = "rustpython-parser")]
{
modules.insert(
"ast".to_string(),
"_ast".to_string(),
Box::new(ast::make_module) as StdlibInitFunc,
);
modules.insert("keyword".to_string(), Box::new(keyword::make_module));