diff --git a/parser/src/ast.rs b/parser/src/ast.rs index a84d9ac2c..7757726f0 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -2,12 +2,9 @@ * Implement abstract syntax tree nodes for the python language. */ +pub use super::lexer::Location; /* #[derive(Debug)] -pub struct Location { - pub row: i32, - pub column: i32, -} #[derive(Debug)] pub struct Node { @@ -17,7 +14,7 @@ pub struct Node { #[derive(Debug, PartialEq)] pub struct Program { - pub statements: Vec, + pub statements: Vec, } #[derive(Debug, PartialEq)] @@ -28,6 +25,14 @@ pub struct SingleImport { pub alias: Option, } +#[derive(Debug, PartialEq)] +pub struct Located { + pub location: Location, + pub node: T, +} + +pub type LocatedStatement = Located; + #[derive(Debug, PartialEq)] pub enum Statement { Break, @@ -60,27 +65,27 @@ pub enum Statement { }, If { test: Expression, - body: Vec, - orelse: Option>, + body: Vec, + orelse: Option>, }, While { test: Expression, - body: Vec, - orelse: Option>, + body: Vec, + orelse: Option>, }, With { items: Expression, - body: Vec, + body: Vec, }, For { target: Vec, iter: Vec, - body: Vec, - orelse: Option>, + body: Vec, + orelse: Option>, }, ClassDef { name: String, - body: Vec, + body: Vec, args: Vec, // TODO: docstring: String, }, @@ -88,7 +93,7 @@ pub enum Statement { name: String, args: Vec, // docstring: String, - body: Vec, + body: Vec, }, } diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 259d46a82..684593a39 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -10,7 +10,7 @@ pub struct Lexer<'input> { pending: Vec>, chr0: Option, chr1: Option, - location: usize, + location: Location, } #[derive(Debug)] @@ -18,7 +18,22 @@ pub enum LexicalError { StringError, } -pub type Spanned = Result<(usize, Tok, usize), LexicalError>; +#[derive(Clone, Debug, Default, PartialEq)] +pub struct Location { + row: usize, + column: usize, +} + +impl Location { + pub fn new(row: usize, column: usize) -> Self { + Location { + row: row, + column: column, + } + } +} + +pub type Spanned = Result<(Location, Tok, Location), LexicalError>; impl<'input> Lexer<'input> { pub fn new(input: &'input str) -> Self { @@ -29,22 +44,25 @@ impl<'input> Lexer<'input> { indentation_stack: vec![0], pending: Vec::new(), chr0: None, - location: 0, + location: Location::new(0, 0), chr1: None, }; lxr.next_char(); lxr.next_char(); + // Start at top row (=1) left column (=1) + lxr.location.row = 1; + lxr.location.column = 1; lxr } // Lexer helper functions: fn lex_identifier(&mut self) -> Spanned { let mut name = String::new(); - let start_pos = self.location; + let start_pos = self.get_pos(); while self.is_char() { name.push(self.next_char().unwrap()); } - let end_pos = self.location; + let end_pos = self.get_pos(); let mut keywords: HashMap = HashMap::new(); @@ -95,7 +113,7 @@ impl<'input> Lexer<'input> { fn lex_number(&mut self) -> Spanned { let mut value_text = String::new(); - let start_pos = self.location; + let start_pos = self.get_pos(); while self.is_number() { value_text.push(self.next_char().unwrap()); } @@ -108,7 +126,7 @@ impl<'input> Lexer<'input> { } } - let end_pos = self.location; + let end_pos = self.get_pos(); let value = value_text; @@ -122,9 +140,11 @@ impl<'input> Lexer<'input> { self.next_char(); match self.chr0 { Some('\n') => { + self.new_line(); return; } Some('\r') => { + self.new_line(); return; } Some(_) => {} @@ -136,7 +156,7 @@ impl<'input> Lexer<'input> { fn lex_string(&mut self) -> Spanned { let quote_char = self.next_char().unwrap(); let mut string_content = String::new(); - let start_pos = self.location; + let start_pos = self.get_pos(); // If the next two characters are also the quote character, then we have a triple-quoted // string; consume those two characters and ensure that we require a triple-quote to close @@ -215,7 +235,7 @@ impl<'input> Lexer<'input> { } } } - let end_pos = self.location; + let end_pos = self.get_pos(); return Ok(( start_pos, @@ -245,13 +265,19 @@ impl<'input> Lexer<'input> { let nxt = self.chars.next(); self.chr0 = self.chr1; self.chr1 = nxt.map(|x| x.1); - self.location = match nxt { - Some(p) => p.0, - None => 99999, - }; + self.location.column += 1; c } + fn get_pos(&self) -> Location { + self.location.clone() + } + + fn new_line(&mut self) { + self.location.row += 1; + self.location.column = 1; + } + fn inner_next(&mut self) -> Option> { if !self.pending.is_empty() { return Some(self.pending.remove(0)); @@ -283,12 +309,14 @@ impl<'input> Lexer<'input> { self.next_char(); } self.at_begin_of_line = true; + self.new_line(); continue 'top_loop; } Some('\n') => { // Empty line! self.next_char(); self.at_begin_of_line = true; + self.new_line(); continue 'top_loop; } _ => { @@ -305,14 +333,18 @@ impl<'input> Lexer<'input> { } else if col > current_indentation { // New indentation level: self.indentation_stack.push(col); - return Some(Ok((0, Tok::Indent, 0))); + let tok_start = self.get_pos(); + let tok_end = tok_start.clone(); + return Some(Ok((tok_start, Tok::Indent, tok_end))); } else if col < current_indentation { // One or more dedentations // Pop off other levels until col is found: while col < *self.indentation_stack.last().unwrap() { self.indentation_stack.pop().unwrap(); - self.pending.push(Ok((0, Tok::Dedent, 0))); + let tok_start = self.get_pos(); + let tok_end = tok_start.clone(); + self.pending.push(Ok((tok_start, Tok::Dedent, tok_end))); } if col != *self.indentation_stack.last().unwrap() { @@ -339,284 +371,333 @@ impl<'input> Lexer<'input> { return Some(self.lex_string()); } Some('=') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::EqEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::EqEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Equal, tok_end))); } - _ => return Some(Ok((self.location, Tok::Equal, self.location + 1))), } } Some('+') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::PlusEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::PlusEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Plus, tok_end))); } - _ => return Some(Ok((self.location, Tok::Plus, self.location + 1))), } } Some('*') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::StarEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::StarEqual, tok_end))); } Some('*') => { self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok(( - tok_start, - Tok::DoubleStarEqual, - self.location + 1, - ))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::DoubleStarEqual, tok_end))); } _ => { - return Some(Ok((tok_start, Tok::DoubleStar, self.location + 1))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::DoubleStar, tok_end))); } } } - _ => return Some(Ok((tok_start, Tok::Star, self.location + 1))), + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Star, tok_end))); + } } } Some('/') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::SlashEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::SlashEqual, tok_end))); } Some('/') => { self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok(( - tok_start, - Tok::DoubleSlashEqual, - self.location + 1, - ))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::DoubleSlashEqual, tok_end))); } _ => { - return Some(Ok(( - tok_start, - Tok::DoubleSlash, - self.location + 1, - ))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::DoubleSlash, tok_end))); } } } - _ => return Some(Ok((tok_start, Tok::Slash, self.location + 1))), + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Slash, tok_end))); + } } } Some('%') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::PercentEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::PercentEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Percent, tok_end))); } - _ => return Some(Ok((self.location, Tok::Percent, self.location + 1))), } } Some('|') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::VbarEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::VbarEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Vbar, tok_end))); } - _ => return Some(Ok((self.location, Tok::Vbar, self.location + 1))), } } Some('^') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok(( - self.location, - Tok::CircumflexEqual, - self.location + 1, - ))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::CircumflexEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::CircumFlex, tok_end))); } - _ => return Some(Ok((self.location, Tok::CircumFlex, self.location + 1))), } } Some('&') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::AmperEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::AmperEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Amper, tok_end))); } - _ => return Some(Ok((self.location, Tok::Amper, self.location + 1))), } } Some('-') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::MinusEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::MinusEqual, tok_end))); } Some('>') => { self.next_char(); - return Some(Ok((tok_start, Tok::Rarrow, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Rarrow, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Minus, tok_end))); } - _ => return Some(Ok((tok_start, Tok::Minus, self.location + 1))), } } Some('@') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::AtEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::AtEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::At, tok_end))); } - _ => return Some(Ok((tok_start, Tok::At, self.location + 1))), } } Some('!') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::NotEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::NotEqual, tok_end))); } _ => panic!("Invalid token '!'"), } } Some('~') => { - self.next_char(); - return Some(Ok((0, Tok::Tilde, 0))); + return Some(self.eat_single_char(Tok::Tilde)); } Some('(') => { - self.next_char(); + let result = self.eat_single_char(Tok::Lpar); self.nesting += 1; - return Some(Ok((0, Tok::Lpar, 0))); + return Some(result); } Some(')') => { - self.next_char(); + let result = self.eat_single_char(Tok::Rpar); self.nesting -= 1; - return Some(Ok((0, Tok::Rpar, 0))); + return Some(result); } Some('[') => { - self.next_char(); + let result = self.eat_single_char(Tok::Lsqb); self.nesting += 1; - return Some(Ok((0, Tok::Lsqb, 0))); + return Some(result); } Some(']') => { - self.next_char(); + let result = self.eat_single_char(Tok::Rsqb); self.nesting -= 1; - return Some(Ok((self.location, Tok::Rsqb, self.location + 1))); + return Some(result); } Some('{') => { - self.next_char(); + let result = self.eat_single_char(Tok::Lbrace); self.nesting += 1; - return Some(Ok((0, Tok::Lbrace, 0))); + return Some(result); } Some('}') => { - self.next_char(); + let result = self.eat_single_char(Tok::Rbrace); self.nesting -= 1; - return Some(Ok((self.location, Tok::Rbrace, self.location + 1))); + return Some(result); } Some(':') => { - self.next_char(); - return Some(Ok((self.location, Tok::Colon, self.location + 1))); + return Some(self.eat_single_char(Tok::Colon)); } Some(';') => { - self.next_char(); - return Some(Ok((self.location, Tok::Semi, self.location + 1))); + return Some(self.eat_single_char(Tok::Semi)); } Some('<') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('<') => { self.next_char(); match self.chr0 { Some('=') => { - return Some(Ok(( - tok_start, - Tok::LeftShiftEqual, - self.location + 1, - ))) + self.next_char(); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::LeftShiftEqual, tok_end))); } _ => { - return Some(Ok((tok_start, Tok::LeftShift, self.location + 1))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::LeftShift, tok_end))); } } } Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::LessEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::LessEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Less, tok_end))); } - _ => return Some(Ok((tok_start, Tok::Less, self.location + 1))), } } Some('>') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('>') => { self.next_char(); match self.chr0 { Some('=') => { - return Some(Ok(( - tok_start, - Tok::RightShiftEqual, - self.location + 1, - ))) + self.next_char(); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::RightShiftEqual, tok_end))); } _ => { - return Some(Ok((tok_start, Tok::RightShift, self.location + 1))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::RightShift, tok_end))); } } } Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::GreaterEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::GreaterEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Greater, tok_end))); } - _ => return Some(Ok((tok_start, Tok::Greater, self.location + 1))), } } Some(',') => { + let tok_start = self.get_pos(); self.next_char(); - return Some(Ok((self.location, Tok::Comma, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Comma, tok_end))); } Some('.') => { + let tok_start = self.get_pos(); self.next_char(); - return Some(Ok((self.location, Tok::Dot, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Dot, tok_end))); } Some('\r') => { + let tok_start = self.get_pos(); self.next_char(); + let tok_end = self.get_pos(); + self.new_line(); // Depending on the nesting level, we emit newline or not: if self.nesting == 0 { self.at_begin_of_line = true; - return Some(Ok((self.location, Tok::Newline, self.location + 1))); + return Some(Ok((tok_start, Tok::Newline, tok_end))); } else { continue; } } Some('\n') => { + let tok_start = self.get_pos(); self.next_char(); + let tok_end = self.get_pos(); + self.new_line(); // Depending on the nesting level, we emit newline or not: if self.nesting == 0 { self.at_begin_of_line = true; - return Some(Ok((self.location, Tok::Newline, self.location + 1))); + return Some(Ok((tok_start, Tok::Newline, tok_end))); } else { continue; } @@ -634,6 +715,13 @@ impl<'input> Lexer<'input> { } } } + + fn eat_single_char(&mut self, ty: Tok) -> Spanned { + let tok_start = self.get_pos(); + self.next_char(); + let tok_end = self.get_pos(); + Ok((tok_start, ty, tok_end)) + } } /* Implement iterator pattern for the get_tok function. diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 333e91997..1b199b022 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -52,7 +52,7 @@ pub fn parse_program(source: &String) -> Result { } } -pub fn parse_statement(source: &String) -> Result { +pub fn parse_statement(source: &String) -> Result { let lxr = lexer::Lexer::new(&source); match python::StatementParser::new().parse(lxr) { Err(why) => Err(String::from(format!("{:?}", why))), @@ -88,14 +88,17 @@ mod tests { assert_eq!( parse_ast, ast::Program { - statements: vec![ast::Statement::Expression { - expression: ast::Expression::Call { - function: Box::new(ast::Expression::Identifier { - name: String::from("print"), - }), - args: vec![ast::Expression::String { - value: String::from("Hello world"), - },], + statements: vec![ast::LocatedStatement { + location: ast::Location::new(1, 1), + node: ast::Statement::Expression { + expression: ast::Expression::Call { + function: Box::new(ast::Expression::Identifier { + name: String::from("print"), + }), + args: vec![ast::Expression::String { + value: String::from("Hello world"), + },], + }, }, },], } @@ -109,19 +112,22 @@ mod tests { assert_eq!( parse_ast, ast::Program { - statements: vec![ast::Statement::Expression { - expression: ast::Expression::Call { - function: Box::new(ast::Expression::Identifier { - name: String::from("print"), - }), - args: vec![ - ast::Expression::String { - value: String::from("Hello world"), - }, - ast::Expression::Number { - value: ast::Number::Integer { value: 2 }, - }, - ], + statements: vec![ast::LocatedStatement { + location: ast::Location::new(1, 1), + node: ast::Statement::Expression { + expression: ast::Expression::Call { + function: Box::new(ast::Expression::Identifier { + name: String::from("print"), + }), + args: vec![ + ast::Expression::String { + value: String::from("Hello world"), + }, + ast::Expression::Number { + value: ast::Number::Integer { value: 2 }, + }, + ], + }, }, },], } @@ -134,30 +140,45 @@ mod tests { let parse_ast = parse_statement(&source).unwrap(); assert_eq!( parse_ast, - ast::Statement::If { - test: ast::Expression::Number { - value: ast::Number::Integer { value: 1 }, - }, - body: vec![ast::Statement::Expression { - expression: ast::Expression::Number { - value: ast::Number::Integer { value: 10 }, - }, - },], - orelse: Some(vec![ast::Statement::If { + ast::LocatedStatement { + location: ast::Location::new(1, 1), + node: ast::Statement::If { test: ast::Expression::Number { - value: ast::Number::Integer { value: 2 }, + value: ast::Number::Integer { value: 1 }, }, - body: vec![ast::Statement::Expression { - expression: ast::Expression::Number { - value: ast::Number::Integer { value: 20 }, + body: vec![ast::LocatedStatement { + location: ast::Location::new(1, 7), + node: ast::Statement::Expression { + expression: ast::Expression::Number { + value: ast::Number::Integer { value: 10 }, + } }, },], - orelse: Some(vec![ast::Statement::Expression { - expression: ast::Expression::Number { - value: ast::Number::Integer { value: 30 }, - }, + orelse: Some(vec![ast::LocatedStatement { + location: ast::Location::new(2, 1), + node: ast::Statement::If { + test: ast::Expression::Number { + value: ast::Number::Integer { value: 2 }, + }, + body: vec![ast::LocatedStatement { + location: ast::Location::new(2, 9), + node: ast::Statement::Expression { + expression: ast::Expression::Number { + value: ast::Number::Integer { value: 20 }, + }, + }, + },], + orelse: Some(vec![ast::LocatedStatement { + location: ast::Location::new(3, 7), + node: ast::Statement::Expression { + expression: ast::Expression::Number { + value: ast::Number::Integer { value: 30 }, + }, + }, + },]), + } },]), - },]), + } } ); } @@ -168,18 +189,21 @@ mod tests { let parse_ast = parse_statement(&source); assert_eq!( parse_ast, - Ok(ast::Statement::Expression { - expression: ast::Expression::Lambda { - args: vec![String::from("x"), String::from("y")], - body: Box::new(ast::Expression::Binop { - a: Box::new(ast::Expression::Identifier { - name: String::from("x"), - }), - op: ast::Operator::Mult, - b: Box::new(ast::Expression::Identifier { - name: String::from("y"), + Ok(ast::LocatedStatement { + location: ast::Location::new(1, 1), + node: ast::Statement::Expression { + expression: ast::Expression::Lambda { + args: vec![String::from("x"), String::from("y")], + body: Box::new(ast::Expression::Binop { + a: Box::new(ast::Expression::Identifier { + name: String::from("x"), + }), + op: ast::Operator::Mult, + b: Box::new(ast::Expression::Identifier { + name: String::from("y"), + }) }) - }) + } } }) ) @@ -190,14 +214,23 @@ mod tests { let source = String::from("class Foo(A, B):\n def __init__(self):\n pass\n"); assert_eq!( parse_statement(&source), - Ok(ast::Statement::ClassDef { - name: String::from("Foo"), - args: vec![String::from("A"), String::from("B")], - body: vec![ast::Statement::FunctionDef { - name: String::from("__init__"), - args: vec![String::from("self")], - body: vec![ast::Statement::Pass], - }], + Ok(ast::LocatedStatement { + location: ast::Location::new(1, 1), + node: ast::Statement::ClassDef { + name: String::from("Foo"), + args: vec![String::from("A"), String::from("B")], + body: vec![ast::LocatedStatement { + location: ast::Location::new(2, 2), + node: ast::Statement::FunctionDef { + name: String::from("__init__"), + args: vec![String::from("self")], + body: vec![ast::LocatedStatement { + location: ast::Location::new(3, 3), + node: ast::Statement::Pass, + }], + } + }], + } }) ) } diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index 5ffce2ddd..538b93d4a 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -11,37 +11,42 @@ pub Program: ast::Program = { }; // A file line either has a declaration, or an empty newline: -FileLine: Option = { +FileLine: Option = { => Some(s), "\n" => None, }; -Suite: Vec = { +Suite: Vec = { => vec![s], "\n" indent dedent => s, }; -pub Statement: ast::Statement = { +pub Statement: ast::LocatedStatement = { SimpleStatement, CompoundStatement, }; -SimpleStatement: ast::Statement = { +SimpleStatement: ast::LocatedStatement = { "\n" => s, ";" => s, }; -SmallStatement: ast::Statement = { +SmallStatement: ast::LocatedStatement = { // => ast::Statement::Expression { expression: e }, ExpressionStatement, - "pass" => ast::Statement::Pass, + "pass" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Pass, + } + }, FlowStatement, ImportStatement, AssertStatement, }; -ExpressionStatement: ast::Statement = { - => { +ExpressionStatement: ast::LocatedStatement = { + => { //match e2 { // None => ast::Statement::Expression { expression: e }, // Some(e3) => ast::Statement::Expression { expression: e }, @@ -52,21 +57,30 @@ ExpressionStatement: ast::Statement = { let rhs = e2.into_iter().next().unwrap(); // ast::Expression::Tuple { elements: e2.into_iter().next().unwrap() let v = rhs.into_iter().next().unwrap(); - let lhs = ast::Statement::Assign { targets: e, value: v }; + let lhs = ast::LocatedStatement { + location: loc.clone(), + node: ast::Statement::Assign { targets: e, value: v }, + }; lhs } else { if e.len() > 1 { panic!("Not good?"); // ast::Statement::Expression { expression: e[0] } } else { - ast::Statement::Expression { expression: e.into_iter().next().unwrap() } + ast::LocatedStatement { + location: loc.clone(), + node: ast::Statement::Expression { expression: e.into_iter().next().unwrap() }, + } } } }, - => { + => { // TODO: this works in most cases: let rhs = e2.into_iter().next().unwrap(); - ast::Statement::AugAssign { target: e1, op: op, value: rhs } + ast::LocatedStatement { + location: loc, + node: ast::Statement::AugAssign { target: e1, op: op, value: rhs }, + } }, }; @@ -90,17 +104,34 @@ AugAssign: ast::Operator = { "//=" => ast::Operator::FloorDiv, }; -FlowStatement: ast::Statement = { - "break" => ast::Statement::Break, - "continue" => ast::Statement::Continue, - "return" => ast::Statement::Return { value: t}, +FlowStatement: ast::LocatedStatement = { + "break" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Break, + } + }, + "continue" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Continue, + } + }, + "return" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Return { value: t}, + } + }, // raise // yield }; -ImportStatement: ast::Statement = { - "import" >>> => { - ast::Statement::Import { +ImportStatement: ast::LocatedStatement = { + "import" >>> => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Import { import_parts: i .iter() .map(|(n, a)| @@ -110,10 +141,13 @@ ImportStatement: ast::Statement = { alias: a.clone() }) .collect() + }, } }, - "from" "import" >> => { - ast::Statement::Import { + "from" "import" >> => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Import { import_parts: i .iter() .map(|(i, a)| @@ -123,6 +157,7 @@ ImportStatement: ast::Statement = { alias: a.clone() }) .collect() + }, } }, }; @@ -136,17 +171,22 @@ DottedName: String = { => n, }; -AssertStatement: ast::Statement = { - "assert" => ast::Statement::Assert { - test: t, - msg: match m { - Some(e) => Some(e.1), - None => None, +AssertStatement: ast::LocatedStatement = { + "assert" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::Assert { + test: t, + msg: match m { + Some(e) => Some(e.1), + None => None, + } + } } }, }; -CompoundStatement: ast::Statement = { +CompoundStatement: ast::LocatedStatement = { IfStatement, WhileStatement, ForStatement, @@ -155,8 +195,8 @@ CompoundStatement: ast::Statement = { ClassDef, }; -IfStatement: ast::Statement = { - "if" ":" => { +IfStatement: ast::LocatedStatement = { + "if" ":" => { // Determine last else: let mut last = match s3 { Some(s) => Some(s.2), @@ -165,39 +205,62 @@ IfStatement: ast::Statement = { // handle elif: for i in s2.into_iter().rev() { - last = Some(vec![ast::Statement::If { test: i.1, body: i.3, orelse: last }]); + let x = ast::LocatedStatement { + location: i.0, + node: ast::Statement::If { test: i.2, body: i.4, orelse: last }, + }; + last = Some(vec![x]); } - ast::Statement::If { test: t, body: s1, orelse: last } + ast::LocatedStatement { + location: loc, + node: ast::Statement::If { test: t, body: s1, orelse: last } + } }, }; -WhileStatement: ast::Statement = { - "while" ":" => { +WhileStatement: ast::LocatedStatement = { + "while" ":" => { let or_else = match s2 { Some(s) => Some(s.2), None => None, }; - ast::Statement::While { test: e, body: s, orelse: or_else } + ast::LocatedStatement { + location: loc, + node: ast::Statement::While { test: e, body: s, orelse: or_else }, + } }, }; -ForStatement: ast::Statement = { - "for" "in" ":" => { +ForStatement: ast::LocatedStatement = { + "for" "in" ":" => { let or_else = match s2 { Some(s) => Some(s.2), None => None, }; - ast::Statement::For { target: e, iter: t, body: s, orelse: or_else } + ast::LocatedStatement { + location: loc, + node: ast::Statement::For { target: e, iter: t, body: s, orelse: or_else }, + } }, }; -WithStatement: ast::Statement = { - "with" "as" <_e:Expression> ":" => ast::Statement::With { items: t, body: s }, +WithStatement: ast::LocatedStatement = { + "with" "as" <_e:Expression> ":" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::With { items: t, body: s }, + } + }, }; -FuncDef: ast::Statement = { - "def" ":" => ast::Statement::FunctionDef { name: i, args: a, body: s }, +FuncDef: ast::LocatedStatement = { + "def" ":" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::FunctionDef { name: i, args: a, body: s } + } + }, }; Parameters: Vec = { @@ -208,11 +271,17 @@ TypedArgsList: Vec = { > => a, }; -ClassDef: ast::Statement = { - "class" ":" => ast::Statement::ClassDef { - name: n, - args: a.unwrap_or(vec![]), - body: s}, +ClassDef: ast::LocatedStatement = { + "class" ":" => { + ast::LocatedStatement { + location: loc, + node: ast::Statement::ClassDef { + name: n, + args: a.unwrap_or(vec![]), + body: s + }, + } + }, }; Test: ast::Expression = { @@ -419,7 +488,7 @@ Identifier: String = => s; // Hook external lexer: extern { - type Location = usize; + type Location = lexer::Location; type Error = lexer::LexicalError; enum lexer::Tok { diff --git a/vm/src/bytecode.rs b/vm/src/bytecode.rs index 3e889b4a4..cfa54e0b6 100644 --- a/vm/src/bytecode.rs +++ b/vm/src/bytecode.rs @@ -10,6 +10,9 @@ let call_function = 0x64; /* * Primitive instruction type, which can be encoded and decoded. */ +extern crate rustpython_parser; + +use self::rustpython_parser::ast; use std::collections::HashMap; use std::fmt; @@ -17,6 +20,7 @@ use std::fmt; pub struct CodeObject { pub instructions: Vec, pub label_map: HashMap, + pub locations: Vec, pub arg_names: Vec, } @@ -25,6 +29,7 @@ impl CodeObject { CodeObject { instructions: Vec::new(), label_map: HashMap::new(), + locations: Vec::new(), arg_names: arg_names, } } @@ -172,6 +177,7 @@ impl fmt::Debug for CodeObject { let inst_str = self .instructions .iter() + .zip(self.locations.iter()) .enumerate() .map(|(i, inst)| format!("Inst {}: {:?}", i, inst)) .collect::>() diff --git a/vm/src/compile.rs b/vm/src/compile.rs index 58c3ad23b..9963409f0 100644 --- a/vm/src/compile.rs +++ b/vm/src/compile.rs @@ -12,6 +12,7 @@ use super::vm::VirtualMachine; struct Compiler { code_object_stack: Vec, nxt_label: usize, + current_source_location: ast::Location, } pub fn compile( @@ -30,7 +31,7 @@ pub fn compile( }, Mode::Eval => match parser::parse_statement(source) { Ok(statement) => { - if let &ast::Statement::Expression { ref expression } = &statement { + if let &ast::Statement::Expression { ref expression } = &statement.node { compiler.compile_expression(expression); compiler.emit(Instruction::ReturnValue); } else { @@ -42,7 +43,7 @@ pub fn compile( Mode::Single => match parser::parse_program(source) { Ok(ast) => { for statement in ast.statements { - if let &ast::Statement::Expression { ref expression } = &statement { + if let &ast::Statement::Expression { ref expression } = &statement.node { compiler.compile_expression(expression); compiler.emit(Instruction::PrintExpr); } else { @@ -59,6 +60,7 @@ pub fn compile( }; let code = compiler.pop_code_object(); + trace!("Compilation completed: {:?}", code); Ok(PyObject::new( PyObjectKind::Code { code: code }, vm.get_type(), @@ -78,6 +80,7 @@ impl Compiler { Compiler { code_object_stack: Vec::new(), nxt_label: 0, + current_source_location: ast::Location::default(), } } @@ -101,15 +104,17 @@ impl Compiler { self.emit(Instruction::ReturnValue); } - fn compile_statements(&mut self, statements: &Vec) { + fn compile_statements(&mut self, statements: &Vec) { for statement in statements { self.compile_statement(statement) } } - fn compile_statement(&mut self, statement: &ast::Statement) { + fn compile_statement(&mut self, statement: &ast::LocatedStatement) { trace!("Compiling {:?}", statement); - match statement { + self.set_source_location(&statement.location); + + match &statement.node { ast::Statement::Import { import_parts } => { for ast::SingleImport { module, @@ -607,6 +612,9 @@ impl Compiler { // Low level helper functions: fn emit(&mut self, instruction: Instruction) { self.current_code_object().instructions.push(instruction); + // TODO: insert source filename + let location = self.current_source_location.clone(); + self.current_code_object().locations.push(location); } fn current_code_object(&mut self) -> &mut CodeObject { @@ -626,4 +634,8 @@ impl Compiler { // assert!(label not in self.label_map) self.current_code_object().label_map.insert(label, position); } + + fn set_source_location(&mut self, location: &ast::Location) { + self.current_source_location = location.clone(); + } }