From 3d9b2360e39d33647e1b6de7979bbba4c999028f Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 22 Aug 2018 12:14:37 +0200 Subject: [PATCH 1/6] Add row and column info to lexer --- parser/src/lexer.rs | 255 ++++++++++++++++++++++++++------------ parser/src/python.lalrpop | 2 +- 2 files changed, 177 insertions(+), 80 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 259d46a82..315312dad 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -10,7 +10,7 @@ pub struct Lexer<'input> { pending: Vec>, chr0: Option, chr1: Option, - location: usize, + location: Location, } #[derive(Debug)] @@ -18,7 +18,21 @@ pub enum LexicalError { StringError, } -pub type Spanned = Result<(usize, Tok, usize), LexicalError>; +#[derive(Clone, Debug, Default)] +pub struct Location { + row: usize, + column: usize, +} + +impl Location { + pub fn new(row: usize, column: usize) -> Self { + Location { + row: row, column: column, + } + } +} + +pub type Spanned = Result<(Location, Tok, Location), LexicalError>; impl<'input> Lexer<'input> { pub fn new(input: &'input str) -> Self { @@ -29,7 +43,7 @@ impl<'input> Lexer<'input> { indentation_stack: vec![0], pending: Vec::new(), chr0: None, - location: 0, + location: Location::new(0, 0), chr1: None, }; lxr.next_char(); @@ -40,11 +54,11 @@ impl<'input> Lexer<'input> { // Lexer helper functions: fn lex_identifier(&mut self) -> Spanned { let mut name = String::new(); - let start_pos = self.location; + let start_pos = self.get_pos(); while self.is_char() { name.push(self.next_char().unwrap()); } - let end_pos = self.location; + let end_pos = self.get_pos(); let mut keywords: HashMap = HashMap::new(); @@ -95,7 +109,7 @@ impl<'input> Lexer<'input> { fn lex_number(&mut self) -> Spanned { let mut value_text = String::new(); - let start_pos = self.location; + let start_pos = self.get_pos(); while self.is_number() { value_text.push(self.next_char().unwrap()); } @@ -108,7 +122,7 @@ impl<'input> Lexer<'input> { } } - let end_pos = self.location; + let end_pos = self.get_pos(); let value = value_text; @@ -136,7 +150,7 @@ impl<'input> Lexer<'input> { fn lex_string(&mut self) -> Spanned { let quote_char = self.next_char().unwrap(); let mut string_content = String::new(); - let start_pos = self.location; + let start_pos = self.get_pos(); // If the next two characters are also the quote character, then we have a triple-quoted // string; consume those two characters and ensure that we require a triple-quote to close @@ -215,7 +229,7 @@ impl<'input> Lexer<'input> { } } } - let end_pos = self.location; + let end_pos = self.get_pos(); return Ok(( start_pos, @@ -245,13 +259,14 @@ impl<'input> Lexer<'input> { let nxt = self.chars.next(); self.chr0 = self.chr1; self.chr1 = nxt.map(|x| x.1); - self.location = match nxt { - Some(p) => p.0, - None => 99999, - }; + self.location.column += 1; c } + fn get_pos(&self) -> Location { + self.location.clone() + } + fn inner_next(&mut self) -> Option> { if !self.pending.is_empty() { return Some(self.pending.remove(0)); @@ -305,14 +320,18 @@ impl<'input> Lexer<'input> { } else if col > current_indentation { // New indentation level: self.indentation_stack.push(col); - return Some(Ok((0, Tok::Indent, 0))); + let tok_start = self.get_pos(); + let tok_end = tok_start.clone(); + return Some(Ok((tok_start, Tok::Indent, tok_end))); } else if col < current_indentation { // One or more dedentations // Pop off other levels until col is found: while col < *self.indentation_stack.last().unwrap() { self.indentation_stack.pop().unwrap(); - self.pending.push(Ok((0, Tok::Dedent, 0))); + let tok_start = self.get_pos(); + let tok_end = tok_start.clone(); + self.pending.push(Ok((tok_start, Tok::Dedent, tok_end))); } if col != *self.indentation_stack.last().unwrap() { @@ -339,284 +358,355 @@ impl<'input> Lexer<'input> { return Some(self.lex_string()); } Some('=') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::EqEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::EqEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Equal, tok_end))); } - _ => return Some(Ok((self.location, Tok::Equal, self.location + 1))), } } Some('+') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::PlusEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::PlusEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Plus, tok_end))); } - _ => return Some(Ok((self.location, Tok::Plus, self.location + 1))), } } Some('*') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::StarEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::StarEqual, tok_end))); } Some('*') => { self.next_char(); match self.chr0 { Some('=') => { self.next_char(); + let tok_end = self.get_pos(); return Some(Ok(( tok_start, Tok::DoubleStarEqual, - self.location + 1, + tok_end, ))); } _ => { - return Some(Ok((tok_start, Tok::DoubleStar, self.location + 1))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::DoubleStar, tok_end))) } } } - _ => return Some(Ok((tok_start, Tok::Star, self.location + 1))), + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Star, tok_end))); + } } } Some('/') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::SlashEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::SlashEqual, tok_end))); } Some('/') => { self.next_char(); match self.chr0 { Some('=') => { self.next_char(); + let tok_end = self.get_pos(); return Some(Ok(( tok_start, Tok::DoubleSlashEqual, - self.location + 1, + tok_end, ))); } _ => { + let tok_end = self.get_pos(); return Some(Ok(( tok_start, Tok::DoubleSlash, - self.location + 1, + tok_end, ))) } } } - _ => return Some(Ok((tok_start, Tok::Slash, self.location + 1))), + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Slash, tok_end))); + } } } Some('%') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::PercentEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::PercentEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Percent, tok_end))); } - _ => return Some(Ok((self.location, Tok::Percent, self.location + 1))), } } Some('|') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::VbarEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::VbarEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Vbar, tok_end))); } - _ => return Some(Ok((self.location, Tok::Vbar, self.location + 1))), } } Some('^') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); + let tok_end = self.get_pos(); return Some(Ok(( - self.location, + tok_start, Tok::CircumflexEqual, - self.location + 1, + tok_end, ))); } - _ => return Some(Ok((self.location, Tok::CircumFlex, self.location + 1))), + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::CircumFlex, tok_end))); + } } } Some('&') => { + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((self.location, Tok::AmperEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::AmperEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Amper, tok_end))); } - _ => return Some(Ok((self.location, Tok::Amper, self.location + 1))), } } Some('-') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::MinusEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::MinusEqual, tok_end))); } Some('>') => { self.next_char(); - return Some(Ok((tok_start, Tok::Rarrow, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Rarrow, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Minus, tok_end))); } - _ => return Some(Ok((tok_start, Tok::Minus, self.location + 1))), } } Some('@') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::AtEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::AtEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::At, tok_end))); } - _ => return Some(Ok((tok_start, Tok::At, self.location + 1))), } } Some('!') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::NotEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::NotEqual, tok_end))); } _ => panic!("Invalid token '!'"), } } Some('~') => { - self.next_char(); - return Some(Ok((0, Tok::Tilde, 0))); + return Some(self.eat_single_char(Tok::Tilde)); } Some('(') => { - self.next_char(); + let result = self.eat_single_char(Tok::Lpar); self.nesting += 1; - return Some(Ok((0, Tok::Lpar, 0))); + return Some(result); } Some(')') => { - self.next_char(); + let result = self.eat_single_char(Tok::Rpar); self.nesting -= 1; - return Some(Ok((0, Tok::Rpar, 0))); + return Some(result); } Some('[') => { - self.next_char(); + let result = self.eat_single_char(Tok::Lsqb); self.nesting += 1; - return Some(Ok((0, Tok::Lsqb, 0))); + return Some(result); } Some(']') => { - self.next_char(); + let result = self.eat_single_char(Tok::Rsqb); self.nesting -= 1; - return Some(Ok((self.location, Tok::Rsqb, self.location + 1))); + return Some(result); } Some('{') => { - self.next_char(); + let result = self.eat_single_char(Tok::Lbrace); self.nesting += 1; - return Some(Ok((0, Tok::Lbrace, 0))); + return Some(result); } Some('}') => { - self.next_char(); + let result = self.eat_single_char(Tok::Rbrace); self.nesting -= 1; - return Some(Ok((self.location, Tok::Rbrace, self.location + 1))); + return Some(result); } Some(':') => { - self.next_char(); - return Some(Ok((self.location, Tok::Colon, self.location + 1))); + return Some(self.eat_single_char(Tok::Colon)); } Some(';') => { - self.next_char(); - return Some(Ok((self.location, Tok::Semi, self.location + 1))); + return Some(self.eat_single_char(Tok::Semi)) } Some('<') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('<') => { self.next_char(); match self.chr0 { Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); return Some(Ok(( tok_start, Tok::LeftShiftEqual, - self.location + 1, + tok_end, ))) } _ => { - return Some(Ok((tok_start, Tok::LeftShift, self.location + 1))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::LeftShift, tok_end))) } } } Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::LessEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::LessEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Less, tok_end))); } - _ => return Some(Ok((tok_start, Tok::Less, self.location + 1))), } } Some('>') => { - let tok_start = self.location; + let tok_start = self.get_pos(); self.next_char(); match self.chr0 { Some('>') => { self.next_char(); match self.chr0 { Some('=') => { + self.next_char(); + let tok_end = self.get_pos(); return Some(Ok(( tok_start, Tok::RightShiftEqual, - self.location + 1, + tok_end, ))) } _ => { - return Some(Ok((tok_start, Tok::RightShift, self.location + 1))) + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::RightShift, tok_end))) } } } Some('=') => { self.next_char(); - return Some(Ok((tok_start, Tok::GreaterEqual, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::GreaterEqual, tok_end))); + } + _ => { + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Greater, tok_end))) } - _ => return Some(Ok((tok_start, Tok::Greater, self.location + 1))), } } Some(',') => { + let tok_start = self.get_pos(); self.next_char(); - return Some(Ok((self.location, Tok::Comma, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Comma, tok_end))); } Some('.') => { + let tok_start = self.get_pos(); self.next_char(); - return Some(Ok((self.location, Tok::Dot, self.location + 1))); + let tok_end = self.get_pos(); + return Some(Ok((tok_start, Tok::Dot, tok_end))); } Some('\r') => { + let tok_start = self.get_pos(); self.next_char(); + let tok_end = self.get_pos(); // Depending on the nesting level, we emit newline or not: if self.nesting == 0 { self.at_begin_of_line = true; - return Some(Ok((self.location, Tok::Newline, self.location + 1))); + return Some(Ok((tok_start, Tok::Newline, tok_end))); } else { continue; } } Some('\n') => { + let tok_start = self.get_pos(); self.next_char(); + let tok_end = self.get_pos(); // Depending on the nesting level, we emit newline or not: if self.nesting == 0 { self.at_begin_of_line = true; - return Some(Ok((self.location, Tok::Newline, self.location + 1))); + return Some(Ok((tok_start, Tok::Newline, tok_end))); } else { continue; } @@ -634,6 +724,13 @@ impl<'input> Lexer<'input> { } } } + + fn eat_single_char(&mut self, ty: Tok) -> Spanned { + let tok_start = self.get_pos(); + self.next_char(); + let tok_end = self.get_pos(); + Ok((tok_start, ty, tok_end)) + } } /* Implement iterator pattern for the get_tok function. diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index 5ffce2ddd..c36638aab 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -419,7 +419,7 @@ Identifier: String = => s; // Hook external lexer: extern { - type Location = usize; + type Location = lexer::Location; type Error = lexer::LexicalError; enum lexer::Tok { From f6edf876fd3d7562337a2bbe53b1e359ced2c87c Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 22 Aug 2018 13:08:07 +0200 Subject: [PATCH 2/6] Some formatting --- parser/src/lexer.rs | 49 ++++++++++++--------------------------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 315312dad..a78a2fdc0 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -27,7 +27,8 @@ pub struct Location { impl Location { pub fn new(row: usize, column: usize) -> Self { Location { - row: row, column: column, + row: row, + column: column, } } } @@ -402,15 +403,11 @@ impl<'input> Lexer<'input> { Some('=') => { self.next_char(); let tok_end = self.get_pos(); - return Some(Ok(( - tok_start, - Tok::DoubleStarEqual, - tok_end, - ))); + return Some(Ok((tok_start, Tok::DoubleStarEqual, tok_end))); } _ => { let tok_end = self.get_pos(); - return Some(Ok((tok_start, Tok::DoubleStar, tok_end))) + return Some(Ok((tok_start, Tok::DoubleStar, tok_end))); } } } @@ -435,19 +432,11 @@ impl<'input> Lexer<'input> { Some('=') => { self.next_char(); let tok_end = self.get_pos(); - return Some(Ok(( - tok_start, - Tok::DoubleSlashEqual, - tok_end, - ))); + return Some(Ok((tok_start, Tok::DoubleSlashEqual, tok_end))); } _ => { let tok_end = self.get_pos(); - return Some(Ok(( - tok_start, - Tok::DoubleSlash, - tok_end, - ))) + return Some(Ok((tok_start, Tok::DoubleSlash, tok_end))); } } } @@ -494,11 +483,7 @@ impl<'input> Lexer<'input> { Some('=') => { self.next_char(); let tok_end = self.get_pos(); - return Some(Ok(( - tok_start, - Tok::CircumflexEqual, - tok_end, - ))); + return Some(Ok((tok_start, Tok::CircumflexEqual, tok_end))); } _ => { let tok_end = self.get_pos(); @@ -605,7 +590,7 @@ impl<'input> Lexer<'input> { return Some(self.eat_single_char(Tok::Colon)); } Some(';') => { - return Some(self.eat_single_char(Tok::Semi)) + return Some(self.eat_single_char(Tok::Semi)); } Some('<') => { let tok_start = self.get_pos(); @@ -617,15 +602,11 @@ impl<'input> Lexer<'input> { Some('=') => { self.next_char(); let tok_end = self.get_pos(); - return Some(Ok(( - tok_start, - Tok::LeftShiftEqual, - tok_end, - ))) + return Some(Ok((tok_start, Tok::LeftShiftEqual, tok_end))); } _ => { let tok_end = self.get_pos(); - return Some(Ok((tok_start, Tok::LeftShift, tok_end))) + return Some(Ok((tok_start, Tok::LeftShift, tok_end))); } } } @@ -650,15 +631,11 @@ impl<'input> Lexer<'input> { Some('=') => { self.next_char(); let tok_end = self.get_pos(); - return Some(Ok(( - tok_start, - Tok::RightShiftEqual, - tok_end, - ))) + return Some(Ok((tok_start, Tok::RightShiftEqual, tok_end))); } _ => { let tok_end = self.get_pos(); - return Some(Ok((tok_start, Tok::RightShift, tok_end))) + return Some(Ok((tok_start, Tok::RightShift, tok_end))); } } } @@ -669,7 +646,7 @@ impl<'input> Lexer<'input> { } _ => { let tok_end = self.get_pos(); - return Some(Ok((tok_start, Tok::Greater, tok_end))) + return Some(Ok((tok_start, Tok::Greater, tok_end))); } } } From 4897e107140fe002972b03eec4d70cf8495e5686 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 22 Aug 2018 16:18:47 +0200 Subject: [PATCH 3/6] Add location to statement ast type --- parser/src/ast.rs | 13 ++-- parser/src/lexer.rs | 16 +++- parser/src/parser.rs | 151 +++++++++++++++++++++++--------------- parser/src/python.lalrpop | 131 +++++++++++++++++++++++++-------- vm/src/compile.rs | 38 +++++----- 5 files changed, 234 insertions(+), 115 deletions(-) diff --git a/parser/src/ast.rs b/parser/src/ast.rs index a84d9ac2c..c1f952298 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -2,12 +2,9 @@ * Implement abstract syntax tree nodes for the python language. */ +pub use super::lexer::Location; /* #[derive(Debug)] -pub struct Location { - pub row: i32, - pub column: i32, -} #[derive(Debug)] pub struct Node { @@ -29,7 +26,13 @@ pub struct SingleImport { } #[derive(Debug, PartialEq)] -pub enum Statement { +pub struct Statement { + pub location: Location, + pub statement: StatementType, +} + +#[derive(Debug, PartialEq)] +pub enum StatementType { Break, Continue, Return { diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index a78a2fdc0..684593a39 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -18,7 +18,7 @@ pub enum LexicalError { StringError, } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct Location { row: usize, column: usize, @@ -49,6 +49,9 @@ impl<'input> Lexer<'input> { }; lxr.next_char(); lxr.next_char(); + // Start at top row (=1) left column (=1) + lxr.location.row = 1; + lxr.location.column = 1; lxr } @@ -137,9 +140,11 @@ impl<'input> Lexer<'input> { self.next_char(); match self.chr0 { Some('\n') => { + self.new_line(); return; } Some('\r') => { + self.new_line(); return; } Some(_) => {} @@ -268,6 +273,11 @@ impl<'input> Lexer<'input> { self.location.clone() } + fn new_line(&mut self) { + self.location.row += 1; + self.location.column = 1; + } + fn inner_next(&mut self) -> Option> { if !self.pending.is_empty() { return Some(self.pending.remove(0)); @@ -299,12 +309,14 @@ impl<'input> Lexer<'input> { self.next_char(); } self.at_begin_of_line = true; + self.new_line(); continue 'top_loop; } Some('\n') => { // Empty line! self.next_char(); self.at_begin_of_line = true; + self.new_line(); continue 'top_loop; } _ => { @@ -666,6 +678,7 @@ impl<'input> Lexer<'input> { let tok_start = self.get_pos(); self.next_char(); let tok_end = self.get_pos(); + self.new_line(); // Depending on the nesting level, we emit newline or not: if self.nesting == 0 { @@ -679,6 +692,7 @@ impl<'input> Lexer<'input> { let tok_start = self.get_pos(); self.next_char(); let tok_end = self.get_pos(); + self.new_line(); // Depending on the nesting level, we emit newline or not: if self.nesting == 0 { diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 333e91997..550d87b95 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -88,14 +88,17 @@ mod tests { assert_eq!( parse_ast, ast::Program { - statements: vec![ast::Statement::Expression { - expression: ast::Expression::Call { - function: Box::new(ast::Expression::Identifier { - name: String::from("print"), - }), - args: vec![ast::Expression::String { - value: String::from("Hello world"), - },], + statements: vec![ast::Statement { + location: ast::Location::new(1, 1), + statement: ast::StatementType::Expression { + expression: ast::Expression::Call { + function: Box::new(ast::Expression::Identifier { + name: String::from("print"), + }), + args: vec![ast::Expression::String { + value: String::from("Hello world"), + },], + }, }, },], } @@ -109,19 +112,22 @@ mod tests { assert_eq!( parse_ast, ast::Program { - statements: vec![ast::Statement::Expression { - expression: ast::Expression::Call { - function: Box::new(ast::Expression::Identifier { - name: String::from("print"), - }), - args: vec![ - ast::Expression::String { - value: String::from("Hello world"), - }, - ast::Expression::Number { - value: ast::Number::Integer { value: 2 }, - }, - ], + statements: vec![ast::Statement { + location: ast::Location::new(1, 1), + statement: ast::StatementType::Expression { + expression: ast::Expression::Call { + function: Box::new(ast::Expression::Identifier { + name: String::from("print"), + }), + args: vec![ + ast::Expression::String { + value: String::from("Hello world"), + }, + ast::Expression::Number { + value: ast::Number::Integer { value: 2 }, + }, + ], + }, }, },], } @@ -134,30 +140,45 @@ mod tests { let parse_ast = parse_statement(&source).unwrap(); assert_eq!( parse_ast, - ast::Statement::If { - test: ast::Expression::Number { - value: ast::Number::Integer { value: 1 }, - }, - body: vec![ast::Statement::Expression { - expression: ast::Expression::Number { - value: ast::Number::Integer { value: 10 }, - }, - },], - orelse: Some(vec![ast::Statement::If { + ast::Statement { + location: ast::Location::new(1, 1), + statement: ast::StatementType::If { test: ast::Expression::Number { - value: ast::Number::Integer { value: 2 }, + value: ast::Number::Integer { value: 1 }, }, - body: vec![ast::Statement::Expression { - expression: ast::Expression::Number { - value: ast::Number::Integer { value: 20 }, + body: vec![ast::Statement { + location: ast::Location::new(1, 7), + statement: ast::StatementType::Expression { + expression: ast::Expression::Number { + value: ast::Number::Integer { value: 10 }, + } }, },], - orelse: Some(vec![ast::Statement::Expression { - expression: ast::Expression::Number { - value: ast::Number::Integer { value: 30 }, - }, + orelse: Some(vec![ast::Statement { + location: ast::Location::new(2, 1), + statement: ast::StatementType::If { + test: ast::Expression::Number { + value: ast::Number::Integer { value: 2 }, + }, + body: vec![ast::Statement { + location: ast::Location::new(2, 9), + statement: ast::StatementType::Expression { + expression: ast::Expression::Number { + value: ast::Number::Integer { value: 20 }, + }, + }, + },], + orelse: Some(vec![ast::Statement { + location: ast::Location::new(3, 7), + statement: ast::StatementType::Expression { + expression: ast::Expression::Number { + value: ast::Number::Integer { value: 30 }, + }, + }, + },]), + } },]), - },]), + } } ); } @@ -168,18 +189,21 @@ mod tests { let parse_ast = parse_statement(&source); assert_eq!( parse_ast, - Ok(ast::Statement::Expression { - expression: ast::Expression::Lambda { - args: vec![String::from("x"), String::from("y")], - body: Box::new(ast::Expression::Binop { - a: Box::new(ast::Expression::Identifier { - name: String::from("x"), - }), - op: ast::Operator::Mult, - b: Box::new(ast::Expression::Identifier { - name: String::from("y"), + Ok(ast::Statement { + location: ast::Location::new(1, 1), + statement: ast::StatementType::Expression { + expression: ast::Expression::Lambda { + args: vec![String::from("x"), String::from("y")], + body: Box::new(ast::Expression::Binop { + a: Box::new(ast::Expression::Identifier { + name: String::from("x"), + }), + op: ast::Operator::Mult, + b: Box::new(ast::Expression::Identifier { + name: String::from("y"), + }) }) - }) + } } }) ) @@ -190,14 +214,23 @@ mod tests { let source = String::from("class Foo(A, B):\n def __init__(self):\n pass\n"); assert_eq!( parse_statement(&source), - Ok(ast::Statement::ClassDef { - name: String::from("Foo"), - args: vec![String::from("A"), String::from("B")], - body: vec![ast::Statement::FunctionDef { - name: String::from("__init__"), - args: vec![String::from("self")], - body: vec![ast::Statement::Pass], - }], + Ok(ast::Statement { + location: ast::Location::new(1, 1), + statement: ast::StatementType::ClassDef { + name: String::from("Foo"), + args: vec![String::from("A"), String::from("B")], + body: vec![ast::Statement { + location: ast::Location::new(2, 2), + statement: ast::StatementType::FunctionDef { + name: String::from("__init__"), + args: vec![String::from("self")], + body: vec![ast::Statement { + location: ast::Location::new(3, 3), + statement: ast::StatementType::Pass, + }], + } + }], + } }) ) } diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index c36638aab..b02d635e4 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -34,14 +34,19 @@ SimpleStatement: ast::Statement = { SmallStatement: ast::Statement = { // => ast::Statement::Expression { expression: e }, ExpressionStatement, - "pass" => ast::Statement::Pass, + "pass" => { + ast::Statement { + location: loc, + statement: ast::StatementType::Pass, + } + }, FlowStatement, ImportStatement, AssertStatement, }; ExpressionStatement: ast::Statement = { - => { + => { //match e2 { // None => ast::Statement::Expression { expression: e }, // Some(e3) => ast::Statement::Expression { expression: e }, @@ -52,21 +57,30 @@ ExpressionStatement: ast::Statement = { let rhs = e2.into_iter().next().unwrap(); // ast::Expression::Tuple { elements: e2.into_iter().next().unwrap() let v = rhs.into_iter().next().unwrap(); - let lhs = ast::Statement::Assign { targets: e, value: v }; + let lhs = ast::Statement { + location: loc.clone(), + statement: ast::StatementType::Assign { targets: e, value: v }, + }; lhs } else { if e.len() > 1 { panic!("Not good?"); // ast::Statement::Expression { expression: e[0] } } else { - ast::Statement::Expression { expression: e.into_iter().next().unwrap() } + ast::Statement { + location: loc.clone(), + statement: ast::StatementType::Expression { expression: e.into_iter().next().unwrap() }, + } } } }, - => { + => { // TODO: this works in most cases: let rhs = e2.into_iter().next().unwrap(); - ast::Statement::AugAssign { target: e1, op: op, value: rhs } + ast::Statement { + location: loc, + statement: ast::StatementType::AugAssign { target: e1, op: op, value: rhs }, + } }, }; @@ -91,16 +105,33 @@ AugAssign: ast::Operator = { }; FlowStatement: ast::Statement = { - "break" => ast::Statement::Break, - "continue" => ast::Statement::Continue, - "return" => ast::Statement::Return { value: t}, + "break" => { + ast::Statement { + location: loc, + statement: ast::StatementType::Break, + } + }, + "continue" => { + ast::Statement { + location: loc, + statement: ast::StatementType::Continue, + } + }, + "return" => { + ast::Statement { + location: loc, + statement: ast::StatementType::Return { value: t}, + } + }, // raise // yield }; ImportStatement: ast::Statement = { - "import" >>> => { - ast::Statement::Import { + "import" >>> => { + ast::Statement { + location: loc, + statement: ast::StatementType::Import { import_parts: i .iter() .map(|(n, a)| @@ -110,10 +141,13 @@ ImportStatement: ast::Statement = { alias: a.clone() }) .collect() + }, } }, - "from" "import" >> => { - ast::Statement::Import { + "from" "import" >> => { + ast::Statement { + location: loc, + statement: ast::StatementType::Import { import_parts: i .iter() .map(|(i, a)| @@ -123,6 +157,7 @@ ImportStatement: ast::Statement = { alias: a.clone() }) .collect() + }, } }, }; @@ -137,11 +172,16 @@ DottedName: String = { }; AssertStatement: ast::Statement = { - "assert" => ast::Statement::Assert { - test: t, - msg: match m { - Some(e) => Some(e.1), - None => None, + "assert" => { + ast::Statement { + location: loc, + statement: ast::StatementType::Assert { + test: t, + msg: match m { + Some(e) => Some(e.1), + None => None, + } + } } }, }; @@ -156,7 +196,7 @@ CompoundStatement: ast::Statement = { }; IfStatement: ast::Statement = { - "if" ":" => { + "if" ":" => { // Determine last else: let mut last = match s3 { Some(s) => Some(s.2), @@ -165,39 +205,62 @@ IfStatement: ast::Statement = { // handle elif: for i in s2.into_iter().rev() { - last = Some(vec![ast::Statement::If { test: i.1, body: i.3, orelse: last }]); + let x = ast::Statement { + location: i.0, + statement: ast::StatementType::If { test: i.2, body: i.4, orelse: last }, + }; + last = Some(vec![x]); } - ast::Statement::If { test: t, body: s1, orelse: last } + ast::Statement { + location: loc, + statement: ast::StatementType::If { test: t, body: s1, orelse: last } + } }, }; WhileStatement: ast::Statement = { - "while" ":" => { + "while" ":" => { let or_else = match s2 { Some(s) => Some(s.2), None => None, }; - ast::Statement::While { test: e, body: s, orelse: or_else } + ast::Statement { + location: loc, + statement: ast::StatementType::While { test: e, body: s, orelse: or_else }, + } }, }; ForStatement: ast::Statement = { - "for" "in" ":" => { + "for" "in" ":" => { let or_else = match s2 { Some(s) => Some(s.2), None => None, }; - ast::Statement::For { target: e, iter: t, body: s, orelse: or_else } + ast::Statement { + location: loc, + statement: ast::StatementType::For { target: e, iter: t, body: s, orelse: or_else }, + } }, }; WithStatement: ast::Statement = { - "with" "as" <_e:Expression> ":" => ast::Statement::With { items: t, body: s }, + "with" "as" <_e:Expression> ":" => { + ast::Statement { + location: loc, + statement: ast::StatementType::With { items: t, body: s }, + } + }, }; FuncDef: ast::Statement = { - "def" ":" => ast::Statement::FunctionDef { name: i, args: a, body: s }, + "def" ":" => { + ast::Statement { + location: loc, + statement: ast::StatementType::FunctionDef { name: i, args: a, body: s } + } + }, }; Parameters: Vec = { @@ -209,10 +272,16 @@ TypedArgsList: Vec = { }; ClassDef: ast::Statement = { - "class" ":" => ast::Statement::ClassDef { - name: n, - args: a.unwrap_or(vec![]), - body: s}, + "class" ":" => { + ast::Statement { + location: loc, + statement: ast::StatementType::ClassDef { + name: n, + args: a.unwrap_or(vec![]), + body: s + }, + } + }, }; Test: ast::Expression = { diff --git a/vm/src/compile.rs b/vm/src/compile.rs index 58c3ad23b..5bf77de4b 100644 --- a/vm/src/compile.rs +++ b/vm/src/compile.rs @@ -30,7 +30,7 @@ pub fn compile( }, Mode::Eval => match parser::parse_statement(source) { Ok(statement) => { - if let &ast::Statement::Expression { ref expression } = &statement { + if let &ast::StatementType::Expression { ref expression } = &statement.statement { compiler.compile_expression(expression); compiler.emit(Instruction::ReturnValue); } else { @@ -42,7 +42,7 @@ pub fn compile( Mode::Single => match parser::parse_program(source) { Ok(ast) => { for statement in ast.statements { - if let &ast::Statement::Expression { ref expression } = &statement { + if let &ast::StatementType::Expression { ref expression } = &statement.statement { compiler.compile_expression(expression); compiler.emit(Instruction::PrintExpr); } else { @@ -109,8 +109,8 @@ impl Compiler { fn compile_statement(&mut self, statement: &ast::Statement) { trace!("Compiling {:?}", statement); - match statement { - ast::Statement::Import { import_parts } => { + match &statement.statement { + ast::StatementType::Import { import_parts } => { for ast::SingleImport { module, symbol, @@ -132,13 +132,13 @@ impl Compiler { }); } } - ast::Statement::Expression { expression } => { + ast::StatementType::Expression { expression } => { self.compile_expression(expression); // Pop result of stack, since we not use it: self.emit(Instruction::Pop); } - ast::Statement::If { test, body, orelse } => { + ast::StatementType::If { test, body, orelse } => { let end_label = self.new_label(); match orelse { None => { @@ -160,7 +160,7 @@ impl Compiler { } self.set_label(end_label); } - ast::Statement::While { + ast::StatementType::While { test, body, orelse: _, @@ -182,10 +182,10 @@ impl Compiler { }); self.set_label(end_label); } - ast::Statement::With { items: _, body: _ } => { + ast::StatementType::With { items: _, body: _ } => { // TODO } - ast::Statement::For { + ast::StatementType::For { target, iter, body, @@ -230,7 +230,7 @@ impl Compiler { self.set_label(end_label); self.emit(Instruction::PopBlock); } - ast::Statement::FunctionDef { name, args, body } => { + ast::StatementType::FunctionDef { name, args, body } => { // Create bytecode for this function: self.code_object_stack.push(CodeObject::new(args.to_vec())); self.compile_statements(body); @@ -257,7 +257,7 @@ impl Compiler { name: name.to_string(), }); } - ast::Statement::ClassDef { name, body, args } => { + ast::StatementType::ClassDef { name, body, args } => { self.emit(Instruction::LoadBuildClass); self.code_object_stack .push(CodeObject::new(vec![String::from("__locals__")])); @@ -300,7 +300,7 @@ impl Compiler { name: name.to_string(), }); } - ast::Statement::Assert { test, msg } => { + ast::StatementType::Assert { test, msg } => { // TODO: if some flag, ignore all assert statements! self.compile_expression(test); @@ -323,13 +323,13 @@ impl Compiler { } self.set_label(end_label); } - ast::Statement::Break => { + ast::StatementType::Break => { self.emit(Instruction::Break); } - ast::Statement::Continue => { + ast::StatementType::Continue => { self.emit(Instruction::Continue); } - ast::Statement::Return { value } => { + ast::StatementType::Return { value } => { match value { Some(e) => { let size = e.len(); @@ -349,14 +349,14 @@ impl Compiler { self.emit(Instruction::ReturnValue); } - ast::Statement::Assign { targets, value } => { + ast::StatementType::Assign { targets, value } => { self.compile_expression(value); for target in targets { self.compile_store(target); } } - ast::Statement::AugAssign { target, op, value } => { + ast::StatementType::AugAssign { target, op, value } => { self.compile_expression(target); self.compile_expression(value); @@ -364,11 +364,11 @@ impl Compiler { self.compile_op(op); self.compile_store(target); } - ast::Statement::Delete { targets: _ } => { + ast::StatementType::Delete { targets: _ } => { // TODO: Remove the given names from the scope // self.emit(Instruction::DeleteName); } - ast::Statement::Pass => { + ast::StatementType::Pass => { self.emit(Instruction::Pass); } } From fb69804a306d145d728f7d65eb5aa0d9095369de Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 22 Aug 2018 17:01:54 +0200 Subject: [PATCH 4/6] Some formatting --- vm/src/compile.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vm/src/compile.rs b/vm/src/compile.rs index 5bf77de4b..e9a94fdb0 100644 --- a/vm/src/compile.rs +++ b/vm/src/compile.rs @@ -42,7 +42,8 @@ pub fn compile( Mode::Single => match parser::parse_program(source) { Ok(ast) => { for statement in ast.statements { - if let &ast::StatementType::Expression { ref expression } = &statement.statement { + if let &ast::StatementType::Expression { ref expression } = &statement.statement + { compiler.compile_expression(expression); compiler.emit(Instruction::PrintExpr); } else { From c2550ad3f1bb5df73af5a7c372459cb6ffab4d6f Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 22 Aug 2018 17:49:07 +0200 Subject: [PATCH 5/6] Introduce LocatedStatement after idea from review --- parser/src/ast.rs | 28 ++++++----- parser/src/parser.rs | 46 +++++++++--------- parser/src/python.lalrpop | 100 +++++++++++++++++++------------------- vm/src/compile.rs | 42 ++++++++-------- 4 files changed, 109 insertions(+), 107 deletions(-) diff --git a/parser/src/ast.rs b/parser/src/ast.rs index c1f952298..7757726f0 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -14,7 +14,7 @@ pub struct Node { #[derive(Debug, PartialEq)] pub struct Program { - pub statements: Vec, + pub statements: Vec, } #[derive(Debug, PartialEq)] @@ -26,13 +26,15 @@ pub struct SingleImport { } #[derive(Debug, PartialEq)] -pub struct Statement { +pub struct Located { pub location: Location, - pub statement: StatementType, + pub node: T, } +pub type LocatedStatement = Located; + #[derive(Debug, PartialEq)] -pub enum StatementType { +pub enum Statement { Break, Continue, Return { @@ -63,27 +65,27 @@ pub enum StatementType { }, If { test: Expression, - body: Vec, - orelse: Option>, + body: Vec, + orelse: Option>, }, While { test: Expression, - body: Vec, - orelse: Option>, + body: Vec, + orelse: Option>, }, With { items: Expression, - body: Vec, + body: Vec, }, For { target: Vec, iter: Vec, - body: Vec, - orelse: Option>, + body: Vec, + orelse: Option>, }, ClassDef { name: String, - body: Vec, + body: Vec, args: Vec, // TODO: docstring: String, }, @@ -91,7 +93,7 @@ pub enum StatementType { name: String, args: Vec, // docstring: String, - body: Vec, + body: Vec, }, } diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 550d87b95..1b199b022 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -52,7 +52,7 @@ pub fn parse_program(source: &String) -> Result { } } -pub fn parse_statement(source: &String) -> Result { +pub fn parse_statement(source: &String) -> Result { let lxr = lexer::Lexer::new(&source); match python::StatementParser::new().parse(lxr) { Err(why) => Err(String::from(format!("{:?}", why))), @@ -88,9 +88,9 @@ mod tests { assert_eq!( parse_ast, ast::Program { - statements: vec![ast::Statement { + statements: vec![ast::LocatedStatement { location: ast::Location::new(1, 1), - statement: ast::StatementType::Expression { + node: ast::Statement::Expression { expression: ast::Expression::Call { function: Box::new(ast::Expression::Identifier { name: String::from("print"), @@ -112,9 +112,9 @@ mod tests { assert_eq!( parse_ast, ast::Program { - statements: vec![ast::Statement { + statements: vec![ast::LocatedStatement { location: ast::Location::new(1, 1), - statement: ast::StatementType::Expression { + node: ast::Statement::Expression { expression: ast::Expression::Call { function: Box::new(ast::Expression::Identifier { name: String::from("print"), @@ -140,37 +140,37 @@ mod tests { let parse_ast = parse_statement(&source).unwrap(); assert_eq!( parse_ast, - ast::Statement { + ast::LocatedStatement { location: ast::Location::new(1, 1), - statement: ast::StatementType::If { + node: ast::Statement::If { test: ast::Expression::Number { value: ast::Number::Integer { value: 1 }, }, - body: vec![ast::Statement { + body: vec![ast::LocatedStatement { location: ast::Location::new(1, 7), - statement: ast::StatementType::Expression { + node: ast::Statement::Expression { expression: ast::Expression::Number { value: ast::Number::Integer { value: 10 }, } }, },], - orelse: Some(vec![ast::Statement { + orelse: Some(vec![ast::LocatedStatement { location: ast::Location::new(2, 1), - statement: ast::StatementType::If { + node: ast::Statement::If { test: ast::Expression::Number { value: ast::Number::Integer { value: 2 }, }, - body: vec![ast::Statement { + body: vec![ast::LocatedStatement { location: ast::Location::new(2, 9), - statement: ast::StatementType::Expression { + node: ast::Statement::Expression { expression: ast::Expression::Number { value: ast::Number::Integer { value: 20 }, }, }, },], - orelse: Some(vec![ast::Statement { + orelse: Some(vec![ast::LocatedStatement { location: ast::Location::new(3, 7), - statement: ast::StatementType::Expression { + node: ast::Statement::Expression { expression: ast::Expression::Number { value: ast::Number::Integer { value: 30 }, }, @@ -189,9 +189,9 @@ mod tests { let parse_ast = parse_statement(&source); assert_eq!( parse_ast, - Ok(ast::Statement { + Ok(ast::LocatedStatement { location: ast::Location::new(1, 1), - statement: ast::StatementType::Expression { + node: ast::Statement::Expression { expression: ast::Expression::Lambda { args: vec![String::from("x"), String::from("y")], body: Box::new(ast::Expression::Binop { @@ -214,19 +214,19 @@ mod tests { let source = String::from("class Foo(A, B):\n def __init__(self):\n pass\n"); assert_eq!( parse_statement(&source), - Ok(ast::Statement { + Ok(ast::LocatedStatement { location: ast::Location::new(1, 1), - statement: ast::StatementType::ClassDef { + node: ast::Statement::ClassDef { name: String::from("Foo"), args: vec![String::from("A"), String::from("B")], - body: vec![ast::Statement { + body: vec![ast::LocatedStatement { location: ast::Location::new(2, 2), - statement: ast::StatementType::FunctionDef { + node: ast::Statement::FunctionDef { name: String::from("__init__"), args: vec![String::from("self")], - body: vec![ast::Statement { + body: vec![ast::LocatedStatement { location: ast::Location::new(3, 3), - statement: ast::StatementType::Pass, + node: ast::Statement::Pass, }], } }], diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index b02d635e4..538b93d4a 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -11,33 +11,33 @@ pub Program: ast::Program = { }; // A file line either has a declaration, or an empty newline: -FileLine: Option = { +FileLine: Option = { => Some(s), "\n" => None, }; -Suite: Vec = { +Suite: Vec = { => vec![s], "\n" indent dedent => s, }; -pub Statement: ast::Statement = { +pub Statement: ast::LocatedStatement = { SimpleStatement, CompoundStatement, }; -SimpleStatement: ast::Statement = { +SimpleStatement: ast::LocatedStatement = { "\n" => s, ";" => s, }; -SmallStatement: ast::Statement = { +SmallStatement: ast::LocatedStatement = { // => ast::Statement::Expression { expression: e }, ExpressionStatement, "pass" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Pass, + node: ast::Statement::Pass, } }, FlowStatement, @@ -45,7 +45,7 @@ SmallStatement: ast::Statement = { AssertStatement, }; -ExpressionStatement: ast::Statement = { +ExpressionStatement: ast::LocatedStatement = { => { //match e2 { // None => ast::Statement::Expression { expression: e }, @@ -57,9 +57,9 @@ ExpressionStatement: ast::Statement = { let rhs = e2.into_iter().next().unwrap(); // ast::Expression::Tuple { elements: e2.into_iter().next().unwrap() let v = rhs.into_iter().next().unwrap(); - let lhs = ast::Statement { + let lhs = ast::LocatedStatement { location: loc.clone(), - statement: ast::StatementType::Assign { targets: e, value: v }, + node: ast::Statement::Assign { targets: e, value: v }, }; lhs } else { @@ -67,9 +67,9 @@ ExpressionStatement: ast::Statement = { panic!("Not good?"); // ast::Statement::Expression { expression: e[0] } } else { - ast::Statement { + ast::LocatedStatement { location: loc.clone(), - statement: ast::StatementType::Expression { expression: e.into_iter().next().unwrap() }, + node: ast::Statement::Expression { expression: e.into_iter().next().unwrap() }, } } } @@ -77,9 +77,9 @@ ExpressionStatement: ast::Statement = { => { // TODO: this works in most cases: let rhs = e2.into_iter().next().unwrap(); - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::AugAssign { target: e1, op: op, value: rhs }, + node: ast::Statement::AugAssign { target: e1, op: op, value: rhs }, } }, }; @@ -104,34 +104,34 @@ AugAssign: ast::Operator = { "//=" => ast::Operator::FloorDiv, }; -FlowStatement: ast::Statement = { +FlowStatement: ast::LocatedStatement = { "break" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Break, + node: ast::Statement::Break, } }, "continue" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Continue, + node: ast::Statement::Continue, } }, "return" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Return { value: t}, + node: ast::Statement::Return { value: t}, } }, // raise // yield }; -ImportStatement: ast::Statement = { +ImportStatement: ast::LocatedStatement = { "import" >>> => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Import { + node: ast::Statement::Import { import_parts: i .iter() .map(|(n, a)| @@ -145,9 +145,9 @@ ImportStatement: ast::Statement = { } }, "from" "import" >> => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Import { + node: ast::Statement::Import { import_parts: i .iter() .map(|(i, a)| @@ -171,11 +171,11 @@ DottedName: String = { => n, }; -AssertStatement: ast::Statement = { +AssertStatement: ast::LocatedStatement = { "assert" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::Assert { + node: ast::Statement::Assert { test: t, msg: match m { Some(e) => Some(e.1), @@ -186,7 +186,7 @@ AssertStatement: ast::Statement = { }, }; -CompoundStatement: ast::Statement = { +CompoundStatement: ast::LocatedStatement = { IfStatement, WhileStatement, ForStatement, @@ -195,7 +195,7 @@ CompoundStatement: ast::Statement = { ClassDef, }; -IfStatement: ast::Statement = { +IfStatement: ast::LocatedStatement = { "if" ":" => { // Determine last else: let mut last = match s3 { @@ -205,60 +205,60 @@ IfStatement: ast::Statement = { // handle elif: for i in s2.into_iter().rev() { - let x = ast::Statement { + let x = ast::LocatedStatement { location: i.0, - statement: ast::StatementType::If { test: i.2, body: i.4, orelse: last }, + node: ast::Statement::If { test: i.2, body: i.4, orelse: last }, }; last = Some(vec![x]); } - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::If { test: t, body: s1, orelse: last } + node: ast::Statement::If { test: t, body: s1, orelse: last } } }, }; -WhileStatement: ast::Statement = { +WhileStatement: ast::LocatedStatement = { "while" ":" => { let or_else = match s2 { Some(s) => Some(s.2), None => None, }; - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::While { test: e, body: s, orelse: or_else }, + node: ast::Statement::While { test: e, body: s, orelse: or_else }, } }, }; -ForStatement: ast::Statement = { +ForStatement: ast::LocatedStatement = { "for" "in" ":" => { let or_else = match s2 { Some(s) => Some(s.2), None => None, }; - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::For { target: e, iter: t, body: s, orelse: or_else }, + node: ast::Statement::For { target: e, iter: t, body: s, orelse: or_else }, } }, }; -WithStatement: ast::Statement = { +WithStatement: ast::LocatedStatement = { "with" "as" <_e:Expression> ":" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::With { items: t, body: s }, + node: ast::Statement::With { items: t, body: s }, } }, }; -FuncDef: ast::Statement = { +FuncDef: ast::LocatedStatement = { "def" ":" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::FunctionDef { name: i, args: a, body: s } + node: ast::Statement::FunctionDef { name: i, args: a, body: s } } }, }; @@ -271,11 +271,11 @@ TypedArgsList: Vec = { > => a, }; -ClassDef: ast::Statement = { +ClassDef: ast::LocatedStatement = { "class" ":" => { - ast::Statement { + ast::LocatedStatement { location: loc, - statement: ast::StatementType::ClassDef { + node: ast::Statement::ClassDef { name: n, args: a.unwrap_or(vec![]), body: s diff --git a/vm/src/compile.rs b/vm/src/compile.rs index e9a94fdb0..13542099b 100644 --- a/vm/src/compile.rs +++ b/vm/src/compile.rs @@ -30,7 +30,7 @@ pub fn compile( }, Mode::Eval => match parser::parse_statement(source) { Ok(statement) => { - if let &ast::StatementType::Expression { ref expression } = &statement.statement { + if let &ast::Statement::Expression { ref expression } = &statement.node { compiler.compile_expression(expression); compiler.emit(Instruction::ReturnValue); } else { @@ -42,7 +42,7 @@ pub fn compile( Mode::Single => match parser::parse_program(source) { Ok(ast) => { for statement in ast.statements { - if let &ast::StatementType::Expression { ref expression } = &statement.statement + if let &ast::Statement::Expression { ref expression } = &statement.node { compiler.compile_expression(expression); compiler.emit(Instruction::PrintExpr); @@ -102,16 +102,16 @@ impl Compiler { self.emit(Instruction::ReturnValue); } - fn compile_statements(&mut self, statements: &Vec) { + fn compile_statements(&mut self, statements: &Vec) { for statement in statements { self.compile_statement(statement) } } - fn compile_statement(&mut self, statement: &ast::Statement) { + fn compile_statement(&mut self, statement: &ast::LocatedStatement) { trace!("Compiling {:?}", statement); - match &statement.statement { - ast::StatementType::Import { import_parts } => { + match &statement.node { + ast::Statement::Import { import_parts } => { for ast::SingleImport { module, symbol, @@ -133,13 +133,13 @@ impl Compiler { }); } } - ast::StatementType::Expression { expression } => { + ast::Statement::Expression { expression } => { self.compile_expression(expression); // Pop result of stack, since we not use it: self.emit(Instruction::Pop); } - ast::StatementType::If { test, body, orelse } => { + ast::Statement::If { test, body, orelse } => { let end_label = self.new_label(); match orelse { None => { @@ -161,7 +161,7 @@ impl Compiler { } self.set_label(end_label); } - ast::StatementType::While { + ast::Statement::While { test, body, orelse: _, @@ -183,10 +183,10 @@ impl Compiler { }); self.set_label(end_label); } - ast::StatementType::With { items: _, body: _ } => { + ast::Statement::With { items: _, body: _ } => { // TODO } - ast::StatementType::For { + ast::Statement::For { target, iter, body, @@ -231,7 +231,7 @@ impl Compiler { self.set_label(end_label); self.emit(Instruction::PopBlock); } - ast::StatementType::FunctionDef { name, args, body } => { + ast::Statement::FunctionDef { name, args, body } => { // Create bytecode for this function: self.code_object_stack.push(CodeObject::new(args.to_vec())); self.compile_statements(body); @@ -258,7 +258,7 @@ impl Compiler { name: name.to_string(), }); } - ast::StatementType::ClassDef { name, body, args } => { + ast::Statement::ClassDef { name, body, args } => { self.emit(Instruction::LoadBuildClass); self.code_object_stack .push(CodeObject::new(vec![String::from("__locals__")])); @@ -301,7 +301,7 @@ impl Compiler { name: name.to_string(), }); } - ast::StatementType::Assert { test, msg } => { + ast::Statement::Assert { test, msg } => { // TODO: if some flag, ignore all assert statements! self.compile_expression(test); @@ -324,13 +324,13 @@ impl Compiler { } self.set_label(end_label); } - ast::StatementType::Break => { + ast::Statement::Break => { self.emit(Instruction::Break); } - ast::StatementType::Continue => { + ast::Statement::Continue => { self.emit(Instruction::Continue); } - ast::StatementType::Return { value } => { + ast::Statement::Return { value } => { match value { Some(e) => { let size = e.len(); @@ -350,14 +350,14 @@ impl Compiler { self.emit(Instruction::ReturnValue); } - ast::StatementType::Assign { targets, value } => { + ast::Statement::Assign { targets, value } => { self.compile_expression(value); for target in targets { self.compile_store(target); } } - ast::StatementType::AugAssign { target, op, value } => { + ast::Statement::AugAssign { target, op, value } => { self.compile_expression(target); self.compile_expression(value); @@ -365,11 +365,11 @@ impl Compiler { self.compile_op(op); self.compile_store(target); } - ast::StatementType::Delete { targets: _ } => { + ast::Statement::Delete { targets: _ } => { // TODO: Remove the given names from the scope // self.emit(Instruction::DeleteName); } - ast::StatementType::Pass => { + ast::Statement::Pass => { self.emit(Instruction::Pass); } } From f5955533714e783ba28aed067b048f3b14167c62 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Wed, 22 Aug 2018 18:35:32 +0200 Subject: [PATCH 6/6] Add location vector to code object --- vm/src/bytecode.rs | 6 ++++++ vm/src/compile.rs | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/vm/src/bytecode.rs b/vm/src/bytecode.rs index 3e889b4a4..cfa54e0b6 100644 --- a/vm/src/bytecode.rs +++ b/vm/src/bytecode.rs @@ -10,6 +10,9 @@ let call_function = 0x64; /* * Primitive instruction type, which can be encoded and decoded. */ +extern crate rustpython_parser; + +use self::rustpython_parser::ast; use std::collections::HashMap; use std::fmt; @@ -17,6 +20,7 @@ use std::fmt; pub struct CodeObject { pub instructions: Vec, pub label_map: HashMap, + pub locations: Vec, pub arg_names: Vec, } @@ -25,6 +29,7 @@ impl CodeObject { CodeObject { instructions: Vec::new(), label_map: HashMap::new(), + locations: Vec::new(), arg_names: arg_names, } } @@ -172,6 +177,7 @@ impl fmt::Debug for CodeObject { let inst_str = self .instructions .iter() + .zip(self.locations.iter()) .enumerate() .map(|(i, inst)| format!("Inst {}: {:?}", i, inst)) .collect::>() diff --git a/vm/src/compile.rs b/vm/src/compile.rs index 13542099b..9963409f0 100644 --- a/vm/src/compile.rs +++ b/vm/src/compile.rs @@ -12,6 +12,7 @@ use super::vm::VirtualMachine; struct Compiler { code_object_stack: Vec, nxt_label: usize, + current_source_location: ast::Location, } pub fn compile( @@ -42,8 +43,7 @@ pub fn compile( Mode::Single => match parser::parse_program(source) { Ok(ast) => { for statement in ast.statements { - if let &ast::Statement::Expression { ref expression } = &statement.node - { + if let &ast::Statement::Expression { ref expression } = &statement.node { compiler.compile_expression(expression); compiler.emit(Instruction::PrintExpr); } else { @@ -60,6 +60,7 @@ pub fn compile( }; let code = compiler.pop_code_object(); + trace!("Compilation completed: {:?}", code); Ok(PyObject::new( PyObjectKind::Code { code: code }, vm.get_type(), @@ -79,6 +80,7 @@ impl Compiler { Compiler { code_object_stack: Vec::new(), nxt_label: 0, + current_source_location: ast::Location::default(), } } @@ -110,6 +112,8 @@ impl Compiler { fn compile_statement(&mut self, statement: &ast::LocatedStatement) { trace!("Compiling {:?}", statement); + self.set_source_location(&statement.location); + match &statement.node { ast::Statement::Import { import_parts } => { for ast::SingleImport { @@ -608,6 +612,9 @@ impl Compiler { // Low level helper functions: fn emit(&mut self, instruction: Instruction) { self.current_code_object().instructions.push(instruction); + // TODO: insert source filename + let location = self.current_source_location.clone(); + self.current_code_object().locations.push(location); } fn current_code_object(&mut self) -> &mut CodeObject { @@ -627,4 +634,8 @@ impl Compiler { // assert!(label not in self.label_map) self.current_code_object().label_map.insert(label, position); } + + fn set_source_location(&mut self, location: &ast::Location) { + self.current_source_location = location.clone(); + } }