Merge pull request #1229 from RustPython/syntax-fixes

Improve lexing of numbers with underscores.
This commit is contained in:
Aviv Palivoda
2019-08-11 18:47:25 +03:00
committed by GitHub
4 changed files with 112 additions and 69 deletions

View File

@@ -14,8 +14,8 @@ extern crate log;
use clap::{App, Arg};
use rustpython_parser::{ast, parser};
use std::path::{Path, PathBuf};
use std::time::Instant;
use std::path::Path;
use std::time::{Duration, Instant};
fn main() {
env_logger::init();
@@ -61,30 +61,45 @@ fn parse_folder(path: &Path) -> std::io::Result<Vec<ParsedFile>> {
}
if metadata.is_file() && path.extension().and_then(|s| s.to_str()) == Some("py") {
let result = parse_python_file(&path);
match &result {
let parsed_file = parse_python_file(&path);
match &parsed_file.result {
Ok(_) => {}
Err(y) => error!("Erreur in file {:?} {:?}", path, y),
}
res.push(ParsedFile {
filename: Box::new(path),
result,
});
res.push(parsed_file);
}
}
Ok(res)
}
fn parse_python_file(filename: &Path) -> ParseResult {
fn parse_python_file(filename: &Path) -> ParsedFile {
info!("Parsing file {:?}", filename);
let source = std::fs::read_to_string(filename).map_err(|e| e.to_string())?;
parser::parse_program(&source).map_err(|e| e.to_string())
match std::fs::read_to_string(filename) {
Err(e) => ParsedFile {
// filename: Box::new(filename.to_path_buf()),
// code: "".to_string(),
num_lines: 0,
result: Err(e.to_string()),
},
Ok(source) => {
let num_lines = source.to_string().lines().count();
let result = parser::parse_program(&source).map_err(|e| e.to_string());
ParsedFile {
// filename: Box::new(filename.to_path_buf()),
// code: source.to_string(),
num_lines,
result,
}
}
}
}
fn statistics(results: ScanResult) {
// println!("Processed {:?} files", res.len());
println!("Scanned a total of {} files", results.parsed_files.len());
let total = results.parsed_files.len();
let total: usize = results.parsed_files.len();
let total_lines: usize = results.parsed_files.iter().map(|p| p.num_lines).sum();
let failed = results
.parsed_files
.iter()
@@ -103,9 +118,19 @@ fn statistics(results: ScanResult) {
let duration = results.t2 - results.t1;
println!("Total time spend: {:?}", duration);
println!(
"File processing rate: {} files/second",
(total * 1_000_000) as f64 / duration.as_micros() as f64
"Processed {} files. That's {} files/second",
total,
rate(total, duration)
);
println!(
"Processed {} lines of python code. That's {} lines/second",
total_lines,
rate(total_lines, duration)
);
}
fn rate(counter: usize, duration: Duration) -> f64 {
(counter * 1_000_000) as f64 / duration.as_micros() as f64
}
struct ScanResult {
@@ -115,7 +140,9 @@ struct ScanResult {
}
struct ParsedFile {
filename: Box<PathBuf>,
// filename: Box<PathBuf>,
// code: String,
num_lines: usize,
result: ParseResult,
}

View File

@@ -340,18 +340,7 @@ where
/// Lex a hex/octal/decimal/binary number without a decimal point.
fn lex_number_radix(&mut self, start_pos: Location, radix: u32) -> LexResult {
let mut value_text = String::new();
loop {
if let Some(c) = self.take_number(radix) {
value_text.push(c);
} else if self.chr0 == Some('_') {
self.next_char();
} else {
break;
}
}
let value_text = self.radix_run(radix);
let end_pos = self.get_pos();
let value = BigInt::from_str_radix(&value_text, radix).map_err(|e| LexicalError {
error: LexicalErrorType::OtherError(format!("{:?}", e)),
@@ -360,24 +349,19 @@ where
Ok((start_pos, Tok::Int { value }, end_pos))
}
/// Lex a normal number, that is, no octal, hex or binary number.
fn lex_normal_number(&mut self) -> LexResult {
let start_pos = self.get_pos();
let mut value_text = String::new();
// Normal number:
while let Some(c) = self.take_number(10) {
value_text.push(c);
}
let mut value_text = self.radix_run(10);
// If float:
if self.chr0 == Some('.') || self.at_exponent() {
// Take '.':
if self.chr0 == Some('.') {
value_text.push(self.next_char().unwrap());
while let Some(c) = self.take_number(10) {
value_text.push(c);
}
value_text.push_str(&self.radix_run(10));
}
// 1e6 for example:
@@ -389,9 +373,7 @@ where
value_text.push(self.next_char().unwrap());
}
while let Some(c) = self.take_number(10) {
value_text.push(c);
}
value_text.push_str(&self.radix_run(10));
}
let value = f64::from_str(&value_text).unwrap();
@@ -426,6 +408,57 @@ where
}
}
/// Consume a sequence of numbers with the given radix,
/// the digits can be decorated with underscores
/// like this: '1_2_3_4' == '1234'
fn radix_run(&mut self, radix: u32) -> String {
let mut value_text = String::new();
loop {
if let Some(c) = self.take_number(radix) {
value_text.push(c);
} else if self.chr0 == Some('_') && Lexer::<T>::is_digit_of_radix(self.chr1, radix) {
self.next_char();
} else {
break;
}
}
value_text
}
/// Consume a single character with the given radix.
fn take_number(&mut self, radix: u32) -> Option<char> {
let take_char = Lexer::<T>::is_digit_of_radix(self.chr0, radix);
if take_char {
Some(self.next_char().unwrap())
} else {
None
}
}
/// Test if a digit is of a certain radix.
fn is_digit_of_radix(c: Option<char>, radix: u32) -> bool {
match radix {
2 => match c {
Some('0'..='1') => true,
_ => false,
},
8 => match c {
Some('0'..='7') => true,
_ => false,
},
10 => match c {
Some('0'..='9') => true,
_ => false,
},
16 => match c {
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true,
_ => false,
},
x => unimplemented!("Radix not implemented: {}", x),
}
}
/// Test if we face '[eE][-+]?[0-9]+'
fn at_exponent(&self) -> bool {
match self.chr0 {
@@ -626,34 +659,6 @@ where
}
}
fn take_number(&mut self, radix: u32) -> Option<char> {
let take_char = match radix {
2 => match self.chr0 {
Some('0'..='1') => true,
_ => false,
},
8 => match self.chr0 {
Some('0'..='7') => true,
_ => false,
},
10 => match self.chr0 {
Some('0'..='9') => true,
_ => false,
},
16 => match self.chr0 {
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true,
_ => false,
},
x => unimplemented!("Radix not implemented: {}", x),
};
if take_char {
Some(self.next_char().unwrap())
} else {
None
}
}
/// This is the main entry point. Call this function to retrieve the next token.
/// This function is used by the iterator implementation.
fn inner_next(&mut self) -> LexResult {

View File

@@ -247,7 +247,7 @@ ImportDots: usize = {
ImportAsNames: Vec<ast::ImportSymbol> = {
<i:OneOrMore<ImportAsAlias<Identifier>>> => i,
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ")" => i,
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
"*" => {
// Star import all
vec![ast::ImportSymbol { symbol: "*".to_string(), alias: None }]
@@ -952,11 +952,11 @@ Atom: ast::Expression = {
};
ListLiteralValues: Vec<ast::Expression> = {
<e:OneOrMore<TestOrStarExpr>> <_trailing_comma:","?> => e,
<e:OneOrMore<TestOrStarExpr>> ","? => e,
};
DictLiteralValues: Vec<(Option<ast::Expression>, ast::Expression)> = {
<elements:OneOrMore<DictElement>> <_trailing_comma:","?> => elements,
<elements:OneOrMore<DictElement>> ","? => elements,
};
DictEntry: (ast::Expression, ast::Expression) = {

View File

@@ -1,3 +1,5 @@
from testutils import assertRaises
x = 5
x.__init__(6)
assert x == 5
@@ -42,3 +44,12 @@ assert int(0).__rxor__(1) == 1
assert int(1).__rxor__(1) == 0
assert int(3).__rxor__(-3) == -2
assert int(3).__rxor__(4) == 7
# Test underscores in numbers:
assert 1_2 == 12
assert 1_2_3 == 123
assert 1_2.3_4 == 12.34
assert 1_2.3_4e0_0 == 12.34
with assertRaises(SyntaxError):
eval('1__2')