diff --git a/examples/parse_folder.rs b/examples/parse_folder.rs index 513333b95..ad0c5f859 100644 --- a/examples/parse_folder.rs +++ b/examples/parse_folder.rs @@ -14,8 +14,8 @@ extern crate log; use clap::{App, Arg}; use rustpython_parser::{ast, parser}; -use std::path::{Path, PathBuf}; -use std::time::Instant; +use std::path::Path; +use std::time::{Duration, Instant}; fn main() { env_logger::init(); @@ -61,30 +61,45 @@ fn parse_folder(path: &Path) -> std::io::Result> { } if metadata.is_file() && path.extension().and_then(|s| s.to_str()) == Some("py") { - let result = parse_python_file(&path); - match &result { + let parsed_file = parse_python_file(&path); + match &parsed_file.result { Ok(_) => {} Err(y) => error!("Erreur in file {:?} {:?}", path, y), } - res.push(ParsedFile { - filename: Box::new(path), - result, - }); + + res.push(parsed_file); } } Ok(res) } -fn parse_python_file(filename: &Path) -> ParseResult { +fn parse_python_file(filename: &Path) -> ParsedFile { info!("Parsing file {:?}", filename); - let source = std::fs::read_to_string(filename).map_err(|e| e.to_string())?; - parser::parse_program(&source).map_err(|e| e.to_string()) + match std::fs::read_to_string(filename) { + Err(e) => ParsedFile { + // filename: Box::new(filename.to_path_buf()), + // code: "".to_string(), + num_lines: 0, + result: Err(e.to_string()), + }, + Ok(source) => { + let num_lines = source.to_string().lines().count(); + let result = parser::parse_program(&source).map_err(|e| e.to_string()); + ParsedFile { + // filename: Box::new(filename.to_path_buf()), + // code: source.to_string(), + num_lines, + result, + } + } + } } fn statistics(results: ScanResult) { // println!("Processed {:?} files", res.len()); println!("Scanned a total of {} files", results.parsed_files.len()); - let total = results.parsed_files.len(); + let total: usize = results.parsed_files.len(); + let total_lines: usize = results.parsed_files.iter().map(|p| p.num_lines).sum(); let failed = results .parsed_files .iter() @@ -103,9 +118,19 @@ fn statistics(results: ScanResult) { let duration = results.t2 - results.t1; println!("Total time spend: {:?}", duration); println!( - "File processing rate: {} files/second", - (total * 1_000_000) as f64 / duration.as_micros() as f64 + "Processed {} files. That's {} files/second", + total, + rate(total, duration) ); + println!( + "Processed {} lines of python code. That's {} lines/second", + total_lines, + rate(total_lines, duration) + ); +} + +fn rate(counter: usize, duration: Duration) -> f64 { + (counter * 1_000_000) as f64 / duration.as_micros() as f64 } struct ScanResult { @@ -115,7 +140,9 @@ struct ScanResult { } struct ParsedFile { - filename: Box, + // filename: Box, + // code: String, + num_lines: usize, result: ParseResult, } diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index ae83a1d8d..0f2e28bd2 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -340,18 +340,7 @@ where /// Lex a hex/octal/decimal/binary number without a decimal point. fn lex_number_radix(&mut self, start_pos: Location, radix: u32) -> LexResult { - let mut value_text = String::new(); - - loop { - if let Some(c) = self.take_number(radix) { - value_text.push(c); - } else if self.chr0 == Some('_') { - self.next_char(); - } else { - break; - } - } - + let value_text = self.radix_run(radix); let end_pos = self.get_pos(); let value = BigInt::from_str_radix(&value_text, radix).map_err(|e| LexicalError { error: LexicalErrorType::OtherError(format!("{:?}", e)), @@ -360,24 +349,19 @@ where Ok((start_pos, Tok::Int { value }, end_pos)) } + /// Lex a normal number, that is, no octal, hex or binary number. fn lex_normal_number(&mut self) -> LexResult { let start_pos = self.get_pos(); - let mut value_text = String::new(); - // Normal number: - while let Some(c) = self.take_number(10) { - value_text.push(c); - } + let mut value_text = self.radix_run(10); // If float: if self.chr0 == Some('.') || self.at_exponent() { // Take '.': if self.chr0 == Some('.') { value_text.push(self.next_char().unwrap()); - while let Some(c) = self.take_number(10) { - value_text.push(c); - } + value_text.push_str(&self.radix_run(10)); } // 1e6 for example: @@ -389,9 +373,7 @@ where value_text.push(self.next_char().unwrap()); } - while let Some(c) = self.take_number(10) { - value_text.push(c); - } + value_text.push_str(&self.radix_run(10)); } let value = f64::from_str(&value_text).unwrap(); @@ -426,6 +408,57 @@ where } } + /// Consume a sequence of numbers with the given radix, + /// the digits can be decorated with underscores + /// like this: '1_2_3_4' == '1234' + fn radix_run(&mut self, radix: u32) -> String { + let mut value_text = String::new(); + loop { + if let Some(c) = self.take_number(radix) { + value_text.push(c); + } else if self.chr0 == Some('_') && Lexer::::is_digit_of_radix(self.chr1, radix) { + self.next_char(); + } else { + break; + } + } + value_text + } + + /// Consume a single character with the given radix. + fn take_number(&mut self, radix: u32) -> Option { + let take_char = Lexer::::is_digit_of_radix(self.chr0, radix); + + if take_char { + Some(self.next_char().unwrap()) + } else { + None + } + } + + /// Test if a digit is of a certain radix. + fn is_digit_of_radix(c: Option, radix: u32) -> bool { + match radix { + 2 => match c { + Some('0'..='1') => true, + _ => false, + }, + 8 => match c { + Some('0'..='7') => true, + _ => false, + }, + 10 => match c { + Some('0'..='9') => true, + _ => false, + }, + 16 => match c { + Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true, + _ => false, + }, + x => unimplemented!("Radix not implemented: {}", x), + } + } + /// Test if we face '[eE][-+]?[0-9]+' fn at_exponent(&self) -> bool { match self.chr0 { @@ -626,34 +659,6 @@ where } } - fn take_number(&mut self, radix: u32) -> Option { - let take_char = match radix { - 2 => match self.chr0 { - Some('0'..='1') => true, - _ => false, - }, - 8 => match self.chr0 { - Some('0'..='7') => true, - _ => false, - }, - 10 => match self.chr0 { - Some('0'..='9') => true, - _ => false, - }, - 16 => match self.chr0 { - Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true, - _ => false, - }, - x => unimplemented!("Radix not implemented: {}", x), - }; - - if take_char { - Some(self.next_char().unwrap()) - } else { - None - } - } - /// This is the main entry point. Call this function to retrieve the next token. /// This function is used by the iterator implementation. fn inner_next(&mut self) -> LexResult { diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index 6af9fea61..8929dc542 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -247,7 +247,7 @@ ImportDots: usize = { ImportAsNames: Vec = { >> => i, - "(" >> ")" => i, + "(" >> ","? ")" => i, "*" => { // Star import all vec![ast::ImportSymbol { symbol: "*".to_string(), alias: None }] @@ -952,11 +952,11 @@ Atom: ast::Expression = { }; ListLiteralValues: Vec = { - > <_trailing_comma:","?> => e, + > ","? => e, }; DictLiteralValues: Vec<(Option, ast::Expression)> = { - > <_trailing_comma:","?> => elements, + > ","? => elements, }; DictEntry: (ast::Expression, ast::Expression) = { diff --git a/tests/snippets/numbers.py b/tests/snippets/numbers.py index c36602ee1..b90168d41 100644 --- a/tests/snippets/numbers.py +++ b/tests/snippets/numbers.py @@ -1,3 +1,5 @@ +from testutils import assertRaises + x = 5 x.__init__(6) assert x == 5 @@ -42,3 +44,12 @@ assert int(0).__rxor__(1) == 1 assert int(1).__rxor__(1) == 0 assert int(3).__rxor__(-3) == -2 assert int(3).__rxor__(4) == 7 + +# Test underscores in numbers: +assert 1_2 == 12 +assert 1_2_3 == 123 +assert 1_2.3_4 == 12.34 +assert 1_2.3_4e0_0 == 12.34 + +with assertRaises(SyntaxError): + eval('1__2')