Merge pull request #72 from RustPython/exceptions

Add row and column info to lexer
This commit is contained in:
Windel Bouwman
2018-08-22 19:07:52 +02:00
committed by GitHub
6 changed files with 442 additions and 229 deletions

View File

@@ -2,12 +2,9 @@
* Implement abstract syntax tree nodes for the python language.
*/
pub use super::lexer::Location;
/*
#[derive(Debug)]
pub struct Location {
pub row: i32,
pub column: i32,
}
#[derive(Debug)]
pub struct Node {
@@ -17,7 +14,7 @@ pub struct Node {
#[derive(Debug, PartialEq)]
pub struct Program {
pub statements: Vec<Statement>,
pub statements: Vec<LocatedStatement>,
}
#[derive(Debug, PartialEq)]
@@ -28,6 +25,14 @@ pub struct SingleImport {
pub alias: Option<String>,
}
#[derive(Debug, PartialEq)]
pub struct Located<T> {
pub location: Location,
pub node: T,
}
pub type LocatedStatement = Located<Statement>;
#[derive(Debug, PartialEq)]
pub enum Statement {
Break,
@@ -60,27 +65,27 @@ pub enum Statement {
},
If {
test: Expression,
body: Vec<Statement>,
orelse: Option<Vec<Statement>>,
body: Vec<LocatedStatement>,
orelse: Option<Vec<LocatedStatement>>,
},
While {
test: Expression,
body: Vec<Statement>,
orelse: Option<Vec<Statement>>,
body: Vec<LocatedStatement>,
orelse: Option<Vec<LocatedStatement>>,
},
With {
items: Expression,
body: Vec<Statement>,
body: Vec<LocatedStatement>,
},
For {
target: Vec<Expression>,
iter: Vec<Expression>,
body: Vec<Statement>,
orelse: Option<Vec<Statement>>,
body: Vec<LocatedStatement>,
orelse: Option<Vec<LocatedStatement>>,
},
ClassDef {
name: String,
body: Vec<Statement>,
body: Vec<LocatedStatement>,
args: Vec<String>,
// TODO: docstring: String,
},
@@ -88,7 +93,7 @@ pub enum Statement {
name: String,
args: Vec<String>,
// docstring: String,
body: Vec<Statement>,
body: Vec<LocatedStatement>,
},
}

View File

@@ -10,7 +10,7 @@ pub struct Lexer<'input> {
pending: Vec<Spanned<Tok>>,
chr0: Option<char>,
chr1: Option<char>,
location: usize,
location: Location,
}
#[derive(Debug)]
@@ -18,7 +18,22 @@ pub enum LexicalError {
StringError,
}
pub type Spanned<Tok> = Result<(usize, Tok, usize), LexicalError>;
#[derive(Clone, Debug, Default, PartialEq)]
pub struct Location {
row: usize,
column: usize,
}
impl Location {
pub fn new(row: usize, column: usize) -> Self {
Location {
row: row,
column: column,
}
}
}
pub type Spanned<Tok> = Result<(Location, Tok, Location), LexicalError>;
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
@@ -29,22 +44,25 @@ impl<'input> Lexer<'input> {
indentation_stack: vec![0],
pending: Vec::new(),
chr0: None,
location: 0,
location: Location::new(0, 0),
chr1: None,
};
lxr.next_char();
lxr.next_char();
// Start at top row (=1) left column (=1)
lxr.location.row = 1;
lxr.location.column = 1;
lxr
}
// Lexer helper functions:
fn lex_identifier(&mut self) -> Spanned<Tok> {
let mut name = String::new();
let start_pos = self.location;
let start_pos = self.get_pos();
while self.is_char() {
name.push(self.next_char().unwrap());
}
let end_pos = self.location;
let end_pos = self.get_pos();
let mut keywords: HashMap<String, Tok> = HashMap::new();
@@ -95,7 +113,7 @@ impl<'input> Lexer<'input> {
fn lex_number(&mut self) -> Spanned<Tok> {
let mut value_text = String::new();
let start_pos = self.location;
let start_pos = self.get_pos();
while self.is_number() {
value_text.push(self.next_char().unwrap());
}
@@ -108,7 +126,7 @@ impl<'input> Lexer<'input> {
}
}
let end_pos = self.location;
let end_pos = self.get_pos();
let value = value_text;
@@ -122,9 +140,11 @@ impl<'input> Lexer<'input> {
self.next_char();
match self.chr0 {
Some('\n') => {
self.new_line();
return;
}
Some('\r') => {
self.new_line();
return;
}
Some(_) => {}
@@ -136,7 +156,7 @@ impl<'input> Lexer<'input> {
fn lex_string(&mut self) -> Spanned<Tok> {
let quote_char = self.next_char().unwrap();
let mut string_content = String::new();
let start_pos = self.location;
let start_pos = self.get_pos();
// If the next two characters are also the quote character, then we have a triple-quoted
// string; consume those two characters and ensure that we require a triple-quote to close
@@ -215,7 +235,7 @@ impl<'input> Lexer<'input> {
}
}
}
let end_pos = self.location;
let end_pos = self.get_pos();
return Ok((
start_pos,
@@ -245,13 +265,19 @@ impl<'input> Lexer<'input> {
let nxt = self.chars.next();
self.chr0 = self.chr1;
self.chr1 = nxt.map(|x| x.1);
self.location = match nxt {
Some(p) => p.0,
None => 99999,
};
self.location.column += 1;
c
}
fn get_pos(&self) -> Location {
self.location.clone()
}
fn new_line(&mut self) {
self.location.row += 1;
self.location.column = 1;
}
fn inner_next(&mut self) -> Option<Spanned<Tok>> {
if !self.pending.is_empty() {
return Some(self.pending.remove(0));
@@ -283,12 +309,14 @@ impl<'input> Lexer<'input> {
self.next_char();
}
self.at_begin_of_line = true;
self.new_line();
continue 'top_loop;
}
Some('\n') => {
// Empty line!
self.next_char();
self.at_begin_of_line = true;
self.new_line();
continue 'top_loop;
}
_ => {
@@ -305,14 +333,18 @@ impl<'input> Lexer<'input> {
} else if col > current_indentation {
// New indentation level:
self.indentation_stack.push(col);
return Some(Ok((0, Tok::Indent, 0)));
let tok_start = self.get_pos();
let tok_end = tok_start.clone();
return Some(Ok((tok_start, Tok::Indent, tok_end)));
} else if col < current_indentation {
// One or more dedentations
// Pop off other levels until col is found:
while col < *self.indentation_stack.last().unwrap() {
self.indentation_stack.pop().unwrap();
self.pending.push(Ok((0, Tok::Dedent, 0)));
let tok_start = self.get_pos();
let tok_end = tok_start.clone();
self.pending.push(Ok((tok_start, Tok::Dedent, tok_end)));
}
if col != *self.indentation_stack.last().unwrap() {
@@ -339,284 +371,333 @@ impl<'input> Lexer<'input> {
return Some(self.lex_string());
}
Some('=') => {
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((self.location, Tok::EqEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::EqEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Equal, tok_end)));
}
_ => return Some(Ok((self.location, Tok::Equal, self.location + 1))),
}
}
Some('+') => {
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((self.location, Tok::PlusEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::PlusEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Plus, tok_end)));
}
_ => return Some(Ok((self.location, Tok::Plus, self.location + 1))),
}
}
Some('*') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::StarEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::StarEqual, tok_end)));
}
Some('*') => {
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((
tok_start,
Tok::DoubleStarEqual,
self.location + 1,
)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::DoubleStarEqual, tok_end)));
}
_ => {
return Some(Ok((tok_start, Tok::DoubleStar, self.location + 1)))
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::DoubleStar, tok_end)));
}
}
}
_ => return Some(Ok((tok_start, Tok::Star, self.location + 1))),
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Star, tok_end)));
}
}
}
Some('/') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::SlashEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::SlashEqual, tok_end)));
}
Some('/') => {
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((
tok_start,
Tok::DoubleSlashEqual,
self.location + 1,
)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::DoubleSlashEqual, tok_end)));
}
_ => {
return Some(Ok((
tok_start,
Tok::DoubleSlash,
self.location + 1,
)))
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::DoubleSlash, tok_end)));
}
}
}
_ => return Some(Ok((tok_start, Tok::Slash, self.location + 1))),
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Slash, tok_end)));
}
}
}
Some('%') => {
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((self.location, Tok::PercentEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::PercentEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Percent, tok_end)));
}
_ => return Some(Ok((self.location, Tok::Percent, self.location + 1))),
}
}
Some('|') => {
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((self.location, Tok::VbarEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::VbarEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Vbar, tok_end)));
}
_ => return Some(Ok((self.location, Tok::Vbar, self.location + 1))),
}
}
Some('^') => {
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((
self.location,
Tok::CircumflexEqual,
self.location + 1,
)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::CircumflexEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::CircumFlex, tok_end)));
}
_ => return Some(Ok((self.location, Tok::CircumFlex, self.location + 1))),
}
}
Some('&') => {
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((self.location, Tok::AmperEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::AmperEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Amper, tok_end)));
}
_ => return Some(Ok((self.location, Tok::Amper, self.location + 1))),
}
}
Some('-') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::MinusEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::MinusEqual, tok_end)));
}
Some('>') => {
self.next_char();
return Some(Ok((tok_start, Tok::Rarrow, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Rarrow, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Minus, tok_end)));
}
_ => return Some(Ok((tok_start, Tok::Minus, self.location + 1))),
}
}
Some('@') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::AtEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::AtEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::At, tok_end)));
}
_ => return Some(Ok((tok_start, Tok::At, self.location + 1))),
}
}
Some('!') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::NotEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::NotEqual, tok_end)));
}
_ => panic!("Invalid token '!'"),
}
}
Some('~') => {
self.next_char();
return Some(Ok((0, Tok::Tilde, 0)));
return Some(self.eat_single_char(Tok::Tilde));
}
Some('(') => {
self.next_char();
let result = self.eat_single_char(Tok::Lpar);
self.nesting += 1;
return Some(Ok((0, Tok::Lpar, 0)));
return Some(result);
}
Some(')') => {
self.next_char();
let result = self.eat_single_char(Tok::Rpar);
self.nesting -= 1;
return Some(Ok((0, Tok::Rpar, 0)));
return Some(result);
}
Some('[') => {
self.next_char();
let result = self.eat_single_char(Tok::Lsqb);
self.nesting += 1;
return Some(Ok((0, Tok::Lsqb, 0)));
return Some(result);
}
Some(']') => {
self.next_char();
let result = self.eat_single_char(Tok::Rsqb);
self.nesting -= 1;
return Some(Ok((self.location, Tok::Rsqb, self.location + 1)));
return Some(result);
}
Some('{') => {
self.next_char();
let result = self.eat_single_char(Tok::Lbrace);
self.nesting += 1;
return Some(Ok((0, Tok::Lbrace, 0)));
return Some(result);
}
Some('}') => {
self.next_char();
let result = self.eat_single_char(Tok::Rbrace);
self.nesting -= 1;
return Some(Ok((self.location, Tok::Rbrace, self.location + 1)));
return Some(result);
}
Some(':') => {
self.next_char();
return Some(Ok((self.location, Tok::Colon, self.location + 1)));
return Some(self.eat_single_char(Tok::Colon));
}
Some(';') => {
self.next_char();
return Some(Ok((self.location, Tok::Semi, self.location + 1)));
return Some(self.eat_single_char(Tok::Semi));
}
Some('<') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('<') => {
self.next_char();
match self.chr0 {
Some('=') => {
return Some(Ok((
tok_start,
Tok::LeftShiftEqual,
self.location + 1,
)))
self.next_char();
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::LeftShiftEqual, tok_end)));
}
_ => {
return Some(Ok((tok_start, Tok::LeftShift, self.location + 1)))
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::LeftShift, tok_end)));
}
}
}
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::LessEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::LessEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Less, tok_end)));
}
_ => return Some(Ok((tok_start, Tok::Less, self.location + 1))),
}
}
Some('>') => {
let tok_start = self.location;
let tok_start = self.get_pos();
self.next_char();
match self.chr0 {
Some('>') => {
self.next_char();
match self.chr0 {
Some('=') => {
return Some(Ok((
tok_start,
Tok::RightShiftEqual,
self.location + 1,
)))
self.next_char();
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::RightShiftEqual, tok_end)));
}
_ => {
return Some(Ok((tok_start, Tok::RightShift, self.location + 1)))
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::RightShift, tok_end)));
}
}
}
Some('=') => {
self.next_char();
return Some(Ok((tok_start, Tok::GreaterEqual, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::GreaterEqual, tok_end)));
}
_ => {
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Greater, tok_end)));
}
_ => return Some(Ok((tok_start, Tok::Greater, self.location + 1))),
}
}
Some(',') => {
let tok_start = self.get_pos();
self.next_char();
return Some(Ok((self.location, Tok::Comma, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Comma, tok_end)));
}
Some('.') => {
let tok_start = self.get_pos();
self.next_char();
return Some(Ok((self.location, Tok::Dot, self.location + 1)));
let tok_end = self.get_pos();
return Some(Ok((tok_start, Tok::Dot, tok_end)));
}
Some('\r') => {
let tok_start = self.get_pos();
self.next_char();
let tok_end = self.get_pos();
self.new_line();
// Depending on the nesting level, we emit newline or not:
if self.nesting == 0 {
self.at_begin_of_line = true;
return Some(Ok((self.location, Tok::Newline, self.location + 1)));
return Some(Ok((tok_start, Tok::Newline, tok_end)));
} else {
continue;
}
}
Some('\n') => {
let tok_start = self.get_pos();
self.next_char();
let tok_end = self.get_pos();
self.new_line();
// Depending on the nesting level, we emit newline or not:
if self.nesting == 0 {
self.at_begin_of_line = true;
return Some(Ok((self.location, Tok::Newline, self.location + 1)));
return Some(Ok((tok_start, Tok::Newline, tok_end)));
} else {
continue;
}
@@ -634,6 +715,13 @@ impl<'input> Lexer<'input> {
}
}
}
fn eat_single_char(&mut self, ty: Tok) -> Spanned<Tok> {
let tok_start = self.get_pos();
self.next_char();
let tok_end = self.get_pos();
Ok((tok_start, ty, tok_end))
}
}
/* Implement iterator pattern for the get_tok function.

View File

@@ -52,7 +52,7 @@ pub fn parse_program(source: &String) -> Result<ast::Program, String> {
}
}
pub fn parse_statement(source: &String) -> Result<ast::Statement, String> {
pub fn parse_statement(source: &String) -> Result<ast::LocatedStatement, String> {
let lxr = lexer::Lexer::new(&source);
match python::StatementParser::new().parse(lxr) {
Err(why) => Err(String::from(format!("{:?}", why))),
@@ -88,14 +88,17 @@ mod tests {
assert_eq!(
parse_ast,
ast::Program {
statements: vec![ast::Statement::Expression {
expression: ast::Expression::Call {
function: Box::new(ast::Expression::Identifier {
name: String::from("print"),
}),
args: vec![ast::Expression::String {
value: String::from("Hello world"),
},],
statements: vec![ast::LocatedStatement {
location: ast::Location::new(1, 1),
node: ast::Statement::Expression {
expression: ast::Expression::Call {
function: Box::new(ast::Expression::Identifier {
name: String::from("print"),
}),
args: vec![ast::Expression::String {
value: String::from("Hello world"),
},],
},
},
},],
}
@@ -109,19 +112,22 @@ mod tests {
assert_eq!(
parse_ast,
ast::Program {
statements: vec![ast::Statement::Expression {
expression: ast::Expression::Call {
function: Box::new(ast::Expression::Identifier {
name: String::from("print"),
}),
args: vec![
ast::Expression::String {
value: String::from("Hello world"),
},
ast::Expression::Number {
value: ast::Number::Integer { value: 2 },
},
],
statements: vec![ast::LocatedStatement {
location: ast::Location::new(1, 1),
node: ast::Statement::Expression {
expression: ast::Expression::Call {
function: Box::new(ast::Expression::Identifier {
name: String::from("print"),
}),
args: vec![
ast::Expression::String {
value: String::from("Hello world"),
},
ast::Expression::Number {
value: ast::Number::Integer { value: 2 },
},
],
},
},
},],
}
@@ -134,30 +140,45 @@ mod tests {
let parse_ast = parse_statement(&source).unwrap();
assert_eq!(
parse_ast,
ast::Statement::If {
test: ast::Expression::Number {
value: ast::Number::Integer { value: 1 },
},
body: vec![ast::Statement::Expression {
expression: ast::Expression::Number {
value: ast::Number::Integer { value: 10 },
},
},],
orelse: Some(vec![ast::Statement::If {
ast::LocatedStatement {
location: ast::Location::new(1, 1),
node: ast::Statement::If {
test: ast::Expression::Number {
value: ast::Number::Integer { value: 2 },
value: ast::Number::Integer { value: 1 },
},
body: vec![ast::Statement::Expression {
expression: ast::Expression::Number {
value: ast::Number::Integer { value: 20 },
body: vec![ast::LocatedStatement {
location: ast::Location::new(1, 7),
node: ast::Statement::Expression {
expression: ast::Expression::Number {
value: ast::Number::Integer { value: 10 },
}
},
},],
orelse: Some(vec![ast::Statement::Expression {
expression: ast::Expression::Number {
value: ast::Number::Integer { value: 30 },
},
orelse: Some(vec![ast::LocatedStatement {
location: ast::Location::new(2, 1),
node: ast::Statement::If {
test: ast::Expression::Number {
value: ast::Number::Integer { value: 2 },
},
body: vec![ast::LocatedStatement {
location: ast::Location::new(2, 9),
node: ast::Statement::Expression {
expression: ast::Expression::Number {
value: ast::Number::Integer { value: 20 },
},
},
},],
orelse: Some(vec![ast::LocatedStatement {
location: ast::Location::new(3, 7),
node: ast::Statement::Expression {
expression: ast::Expression::Number {
value: ast::Number::Integer { value: 30 },
},
},
},]),
}
},]),
},]),
}
}
);
}
@@ -168,18 +189,21 @@ mod tests {
let parse_ast = parse_statement(&source);
assert_eq!(
parse_ast,
Ok(ast::Statement::Expression {
expression: ast::Expression::Lambda {
args: vec![String::from("x"), String::from("y")],
body: Box::new(ast::Expression::Binop {
a: Box::new(ast::Expression::Identifier {
name: String::from("x"),
}),
op: ast::Operator::Mult,
b: Box::new(ast::Expression::Identifier {
name: String::from("y"),
Ok(ast::LocatedStatement {
location: ast::Location::new(1, 1),
node: ast::Statement::Expression {
expression: ast::Expression::Lambda {
args: vec![String::from("x"), String::from("y")],
body: Box::new(ast::Expression::Binop {
a: Box::new(ast::Expression::Identifier {
name: String::from("x"),
}),
op: ast::Operator::Mult,
b: Box::new(ast::Expression::Identifier {
name: String::from("y"),
})
})
})
}
}
})
)
@@ -190,14 +214,23 @@ mod tests {
let source = String::from("class Foo(A, B):\n def __init__(self):\n pass\n");
assert_eq!(
parse_statement(&source),
Ok(ast::Statement::ClassDef {
name: String::from("Foo"),
args: vec![String::from("A"), String::from("B")],
body: vec![ast::Statement::FunctionDef {
name: String::from("__init__"),
args: vec![String::from("self")],
body: vec![ast::Statement::Pass],
}],
Ok(ast::LocatedStatement {
location: ast::Location::new(1, 1),
node: ast::Statement::ClassDef {
name: String::from("Foo"),
args: vec![String::from("A"), String::from("B")],
body: vec![ast::LocatedStatement {
location: ast::Location::new(2, 2),
node: ast::Statement::FunctionDef {
name: String::from("__init__"),
args: vec![String::from("self")],
body: vec![ast::LocatedStatement {
location: ast::Location::new(3, 3),
node: ast::Statement::Pass,
}],
}
}],
}
})
)
}

View File

@@ -11,37 +11,42 @@ pub Program: ast::Program = {
};
// A file line either has a declaration, or an empty newline:
FileLine: Option<ast::Statement> = {
FileLine: Option<ast::LocatedStatement> = {
<s:Statement> => Some(s),
"\n" => None,
};
Suite: Vec<ast::Statement> = {
Suite: Vec<ast::LocatedStatement> = {
<s:SimpleStatement> => vec![s],
"\n" indent <s:Statement+> dedent => s,
};
pub Statement: ast::Statement = {
pub Statement: ast::LocatedStatement = {
SimpleStatement,
CompoundStatement,
};
SimpleStatement: ast::Statement = {
SimpleStatement: ast::LocatedStatement = {
<s:SmallStatement> "\n" => s,
<s:SmallStatement> ";" => s,
};
SmallStatement: ast::Statement = {
SmallStatement: ast::LocatedStatement = {
// <e:Expression> => ast::Statement::Expression { expression: e },
ExpressionStatement,
"pass" => ast::Statement::Pass,
<loc:@L> "pass" => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Pass,
}
},
FlowStatement,
ImportStatement,
AssertStatement,
};
ExpressionStatement: ast::Statement = {
<e:TestList> <e2:AssignSuffix*> => {
ExpressionStatement: ast::LocatedStatement = {
<loc:@L> <e:TestList> <e2:AssignSuffix*> => {
//match e2 {
// None => ast::Statement::Expression { expression: e },
// Some(e3) => ast::Statement::Expression { expression: e },
@@ -52,21 +57,30 @@ ExpressionStatement: ast::Statement = {
let rhs = e2.into_iter().next().unwrap();
// ast::Expression::Tuple { elements: e2.into_iter().next().unwrap()
let v = rhs.into_iter().next().unwrap();
let lhs = ast::Statement::Assign { targets: e, value: v };
let lhs = ast::LocatedStatement {
location: loc.clone(),
node: ast::Statement::Assign { targets: e, value: v },
};
lhs
} else {
if e.len() > 1 {
panic!("Not good?");
// ast::Statement::Expression { expression: e[0] }
} else {
ast::Statement::Expression { expression: e.into_iter().next().unwrap() }
ast::LocatedStatement {
location: loc.clone(),
node: ast::Statement::Expression { expression: e.into_iter().next().unwrap() },
}
}
}
},
<e1:Test> <op:AugAssign> <e2:TestList> => {
<loc:@L> <e1:Test> <op:AugAssign> <e2:TestList> => {
// TODO: this works in most cases:
let rhs = e2.into_iter().next().unwrap();
ast::Statement::AugAssign { target: e1, op: op, value: rhs }
ast::LocatedStatement {
location: loc,
node: ast::Statement::AugAssign { target: e1, op: op, value: rhs },
}
},
};
@@ -90,17 +104,34 @@ AugAssign: ast::Operator = {
"//=" => ast::Operator::FloorDiv,
};
FlowStatement: ast::Statement = {
"break" => ast::Statement::Break,
"continue" => ast::Statement::Continue,
"return" <t:TestList?> => ast::Statement::Return { value: t},
FlowStatement: ast::LocatedStatement = {
<loc:@L> "break" => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Break,
}
},
<loc:@L> "continue" => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Continue,
}
},
<loc:@L> "return" <t:TestList?> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Return { value: t},
}
},
// raise
// yield
};
ImportStatement: ast::Statement = {
"import" <i: Comma<ImportPart<<DottedName>>>> => {
ast::Statement::Import {
ImportStatement: ast::LocatedStatement = {
<loc:@L> "import" <i: Comma<ImportPart<<DottedName>>>> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Import {
import_parts: i
.iter()
.map(|(n, a)|
@@ -110,10 +141,13 @@ ImportStatement: ast::Statement = {
alias: a.clone()
})
.collect()
},
}
},
"from" <n:DottedName> "import" <i: Comma<ImportPart<Identifier>>> => {
ast::Statement::Import {
<loc:@L> "from" <n:DottedName> "import" <i: Comma<ImportPart<Identifier>>> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Import {
import_parts: i
.iter()
.map(|(i, a)|
@@ -123,6 +157,7 @@ ImportStatement: ast::Statement = {
alias: a.clone()
})
.collect()
},
}
},
};
@@ -136,17 +171,22 @@ DottedName: String = {
<n:name> => n,
};
AssertStatement: ast::Statement = {
"assert" <t:Test> <m: ("," Test)?> => ast::Statement::Assert {
test: t,
msg: match m {
Some(e) => Some(e.1),
None => None,
AssertStatement: ast::LocatedStatement = {
<loc:@L> "assert" <t:Test> <m: ("," Test)?> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::Assert {
test: t,
msg: match m {
Some(e) => Some(e.1),
None => None,
}
}
}
},
};
CompoundStatement: ast::Statement = {
CompoundStatement: ast::LocatedStatement = {
IfStatement,
WhileStatement,
ForStatement,
@@ -155,8 +195,8 @@ CompoundStatement: ast::Statement = {
ClassDef,
};
IfStatement: ast::Statement = {
"if" <t:Test> ":" <s1:Suite> <s2:("elif" Test ":" Suite)*> <s3:("else" ":" Suite)?> => {
IfStatement: ast::LocatedStatement = {
<loc:@L> "if" <t:Test> ":" <s1:Suite> <s2:(@L "elif" Test ":" Suite)*> <s3:("else" ":" Suite)?> => {
// Determine last else:
let mut last = match s3 {
Some(s) => Some(s.2),
@@ -165,39 +205,62 @@ IfStatement: ast::Statement = {
// handle elif:
for i in s2.into_iter().rev() {
last = Some(vec![ast::Statement::If { test: i.1, body: i.3, orelse: last }]);
let x = ast::LocatedStatement {
location: i.0,
node: ast::Statement::If { test: i.2, body: i.4, orelse: last },
};
last = Some(vec![x]);
}
ast::Statement::If { test: t, body: s1, orelse: last }
ast::LocatedStatement {
location: loc,
node: ast::Statement::If { test: t, body: s1, orelse: last }
}
},
};
WhileStatement: ast::Statement = {
"while" <e:Test> ":" <s:Suite> <s2:("else" ":" Suite)?> => {
WhileStatement: ast::LocatedStatement = {
<loc:@L> "while" <e:Test> ":" <s:Suite> <s2:("else" ":" Suite)?> => {
let or_else = match s2 {
Some(s) => Some(s.2),
None => None,
};
ast::Statement::While { test: e, body: s, orelse: or_else }
ast::LocatedStatement {
location: loc,
node: ast::Statement::While { test: e, body: s, orelse: or_else },
}
},
};
ForStatement: ast::Statement = {
"for" <e:ExpressionList> "in" <t:TestList> ":" <s:Suite> <s2:("else" ":" Suite)?> => {
ForStatement: ast::LocatedStatement = {
<loc:@L> "for" <e:ExpressionList> "in" <t:TestList> ":" <s:Suite> <s2:("else" ":" Suite)?> => {
let or_else = match s2 {
Some(s) => Some(s.2),
None => None,
};
ast::Statement::For { target: e, iter: t, body: s, orelse: or_else }
ast::LocatedStatement {
location: loc,
node: ast::Statement::For { target: e, iter: t, body: s, orelse: or_else },
}
},
};
WithStatement: ast::Statement = {
"with" <t:Test> "as" <_e:Expression> ":" <s:Suite> => ast::Statement::With { items: t, body: s },
WithStatement: ast::LocatedStatement = {
<loc:@L> "with" <t:Test> "as" <_e:Expression> ":" <s:Suite> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::With { items: t, body: s },
}
},
};
FuncDef: ast::Statement = {
"def" <i:Identifier> <a:Parameters> ":" <s:Suite> => ast::Statement::FunctionDef { name: i, args: a, body: s },
FuncDef: ast::LocatedStatement = {
<loc:@L> "def" <i:Identifier> <a:Parameters> ":" <s:Suite> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::FunctionDef { name: i, args: a, body: s }
}
},
};
Parameters: Vec<String> = {
@@ -208,11 +271,17 @@ TypedArgsList: Vec<String> = {
<a: Comma<Identifier>> => a,
};
ClassDef: ast::Statement = {
"class" <n:Identifier> <a:Parameters?> ":" <s:Suite> => ast::Statement::ClassDef {
name: n,
args: a.unwrap_or(vec![]),
body: s},
ClassDef: ast::LocatedStatement = {
<loc:@L> "class" <n:Identifier> <a:Parameters?> ":" <s:Suite> => {
ast::LocatedStatement {
location: loc,
node: ast::Statement::ClassDef {
name: n,
args: a.unwrap_or(vec![]),
body: s
},
}
},
};
Test: ast::Expression = {
@@ -419,7 +488,7 @@ Identifier: String = <s:name> => s;
// Hook external lexer:
extern {
type Location = usize;
type Location = lexer::Location;
type Error = lexer::LexicalError;
enum lexer::Tok {

View File

@@ -10,6 +10,9 @@ let call_function = 0x64;
/*
* Primitive instruction type, which can be encoded and decoded.
*/
extern crate rustpython_parser;
use self::rustpython_parser::ast;
use std::collections::HashMap;
use std::fmt;
@@ -17,6 +20,7 @@ use std::fmt;
pub struct CodeObject {
pub instructions: Vec<Instruction>,
pub label_map: HashMap<Label, usize>,
pub locations: Vec<ast::Location>,
pub arg_names: Vec<String>,
}
@@ -25,6 +29,7 @@ impl CodeObject {
CodeObject {
instructions: Vec::new(),
label_map: HashMap::new(),
locations: Vec::new(),
arg_names: arg_names,
}
}
@@ -172,6 +177,7 @@ impl fmt::Debug for CodeObject {
let inst_str = self
.instructions
.iter()
.zip(self.locations.iter())
.enumerate()
.map(|(i, inst)| format!("Inst {}: {:?}", i, inst))
.collect::<Vec<_>>()

View File

@@ -12,6 +12,7 @@ use super::vm::VirtualMachine;
struct Compiler {
code_object_stack: Vec<CodeObject>,
nxt_label: usize,
current_source_location: ast::Location,
}
pub fn compile(
@@ -30,7 +31,7 @@ pub fn compile(
},
Mode::Eval => match parser::parse_statement(source) {
Ok(statement) => {
if let &ast::Statement::Expression { ref expression } = &statement {
if let &ast::Statement::Expression { ref expression } = &statement.node {
compiler.compile_expression(expression);
compiler.emit(Instruction::ReturnValue);
} else {
@@ -42,7 +43,7 @@ pub fn compile(
Mode::Single => match parser::parse_program(source) {
Ok(ast) => {
for statement in ast.statements {
if let &ast::Statement::Expression { ref expression } = &statement {
if let &ast::Statement::Expression { ref expression } = &statement.node {
compiler.compile_expression(expression);
compiler.emit(Instruction::PrintExpr);
} else {
@@ -59,6 +60,7 @@ pub fn compile(
};
let code = compiler.pop_code_object();
trace!("Compilation completed: {:?}", code);
Ok(PyObject::new(
PyObjectKind::Code { code: code },
vm.get_type(),
@@ -78,6 +80,7 @@ impl Compiler {
Compiler {
code_object_stack: Vec::new(),
nxt_label: 0,
current_source_location: ast::Location::default(),
}
}
@@ -101,15 +104,17 @@ impl Compiler {
self.emit(Instruction::ReturnValue);
}
fn compile_statements(&mut self, statements: &Vec<ast::Statement>) {
fn compile_statements(&mut self, statements: &Vec<ast::LocatedStatement>) {
for statement in statements {
self.compile_statement(statement)
}
}
fn compile_statement(&mut self, statement: &ast::Statement) {
fn compile_statement(&mut self, statement: &ast::LocatedStatement) {
trace!("Compiling {:?}", statement);
match statement {
self.set_source_location(&statement.location);
match &statement.node {
ast::Statement::Import { import_parts } => {
for ast::SingleImport {
module,
@@ -607,6 +612,9 @@ impl Compiler {
// Low level helper functions:
fn emit(&mut self, instruction: Instruction) {
self.current_code_object().instructions.push(instruction);
// TODO: insert source filename
let location = self.current_source_location.clone();
self.current_code_object().locations.push(location);
}
fn current_code_object(&mut self) -> &mut CodeObject {
@@ -626,4 +634,8 @@ impl Compiler {
// assert!(label not in self.label_map)
self.current_code_object().label_map.insert(label, position);
}
fn set_source_location(&mut self, location: &ast::Location) {
self.current_source_location = location.clone();
}
}