mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-09 22:49:57 +09:00
Merge pull request #1229 from RustPython/syntax-fixes
Improve lexing of numbers with underscores.
This commit is contained in:
@@ -14,8 +14,8 @@ extern crate log;
|
||||
use clap::{App, Arg};
|
||||
|
||||
use rustpython_parser::{ast, parser};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
use std::path::Path;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
fn main() {
|
||||
env_logger::init();
|
||||
@@ -61,30 +61,45 @@ fn parse_folder(path: &Path) -> std::io::Result<Vec<ParsedFile>> {
|
||||
}
|
||||
|
||||
if metadata.is_file() && path.extension().and_then(|s| s.to_str()) == Some("py") {
|
||||
let result = parse_python_file(&path);
|
||||
match &result {
|
||||
let parsed_file = parse_python_file(&path);
|
||||
match &parsed_file.result {
|
||||
Ok(_) => {}
|
||||
Err(y) => error!("Erreur in file {:?} {:?}", path, y),
|
||||
}
|
||||
res.push(ParsedFile {
|
||||
filename: Box::new(path),
|
||||
result,
|
||||
});
|
||||
|
||||
res.push(parsed_file);
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn parse_python_file(filename: &Path) -> ParseResult {
|
||||
fn parse_python_file(filename: &Path) -> ParsedFile {
|
||||
info!("Parsing file {:?}", filename);
|
||||
let source = std::fs::read_to_string(filename).map_err(|e| e.to_string())?;
|
||||
parser::parse_program(&source).map_err(|e| e.to_string())
|
||||
match std::fs::read_to_string(filename) {
|
||||
Err(e) => ParsedFile {
|
||||
// filename: Box::new(filename.to_path_buf()),
|
||||
// code: "".to_string(),
|
||||
num_lines: 0,
|
||||
result: Err(e.to_string()),
|
||||
},
|
||||
Ok(source) => {
|
||||
let num_lines = source.to_string().lines().count();
|
||||
let result = parser::parse_program(&source).map_err(|e| e.to_string());
|
||||
ParsedFile {
|
||||
// filename: Box::new(filename.to_path_buf()),
|
||||
// code: source.to_string(),
|
||||
num_lines,
|
||||
result,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn statistics(results: ScanResult) {
|
||||
// println!("Processed {:?} files", res.len());
|
||||
println!("Scanned a total of {} files", results.parsed_files.len());
|
||||
let total = results.parsed_files.len();
|
||||
let total: usize = results.parsed_files.len();
|
||||
let total_lines: usize = results.parsed_files.iter().map(|p| p.num_lines).sum();
|
||||
let failed = results
|
||||
.parsed_files
|
||||
.iter()
|
||||
@@ -103,9 +118,19 @@ fn statistics(results: ScanResult) {
|
||||
let duration = results.t2 - results.t1;
|
||||
println!("Total time spend: {:?}", duration);
|
||||
println!(
|
||||
"File processing rate: {} files/second",
|
||||
(total * 1_000_000) as f64 / duration.as_micros() as f64
|
||||
"Processed {} files. That's {} files/second",
|
||||
total,
|
||||
rate(total, duration)
|
||||
);
|
||||
println!(
|
||||
"Processed {} lines of python code. That's {} lines/second",
|
||||
total_lines,
|
||||
rate(total_lines, duration)
|
||||
);
|
||||
}
|
||||
|
||||
fn rate(counter: usize, duration: Duration) -> f64 {
|
||||
(counter * 1_000_000) as f64 / duration.as_micros() as f64
|
||||
}
|
||||
|
||||
struct ScanResult {
|
||||
@@ -115,7 +140,9 @@ struct ScanResult {
|
||||
}
|
||||
|
||||
struct ParsedFile {
|
||||
filename: Box<PathBuf>,
|
||||
// filename: Box<PathBuf>,
|
||||
// code: String,
|
||||
num_lines: usize,
|
||||
result: ParseResult,
|
||||
}
|
||||
|
||||
|
||||
@@ -340,18 +340,7 @@ where
|
||||
|
||||
/// Lex a hex/octal/decimal/binary number without a decimal point.
|
||||
fn lex_number_radix(&mut self, start_pos: Location, radix: u32) -> LexResult {
|
||||
let mut value_text = String::new();
|
||||
|
||||
loop {
|
||||
if let Some(c) = self.take_number(radix) {
|
||||
value_text.push(c);
|
||||
} else if self.chr0 == Some('_') {
|
||||
self.next_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let value_text = self.radix_run(radix);
|
||||
let end_pos = self.get_pos();
|
||||
let value = BigInt::from_str_radix(&value_text, radix).map_err(|e| LexicalError {
|
||||
error: LexicalErrorType::OtherError(format!("{:?}", e)),
|
||||
@@ -360,24 +349,19 @@ where
|
||||
Ok((start_pos, Tok::Int { value }, end_pos))
|
||||
}
|
||||
|
||||
/// Lex a normal number, that is, no octal, hex or binary number.
|
||||
fn lex_normal_number(&mut self) -> LexResult {
|
||||
let start_pos = self.get_pos();
|
||||
|
||||
let mut value_text = String::new();
|
||||
|
||||
// Normal number:
|
||||
while let Some(c) = self.take_number(10) {
|
||||
value_text.push(c);
|
||||
}
|
||||
let mut value_text = self.radix_run(10);
|
||||
|
||||
// If float:
|
||||
if self.chr0 == Some('.') || self.at_exponent() {
|
||||
// Take '.':
|
||||
if self.chr0 == Some('.') {
|
||||
value_text.push(self.next_char().unwrap());
|
||||
while let Some(c) = self.take_number(10) {
|
||||
value_text.push(c);
|
||||
}
|
||||
value_text.push_str(&self.radix_run(10));
|
||||
}
|
||||
|
||||
// 1e6 for example:
|
||||
@@ -389,9 +373,7 @@ where
|
||||
value_text.push(self.next_char().unwrap());
|
||||
}
|
||||
|
||||
while let Some(c) = self.take_number(10) {
|
||||
value_text.push(c);
|
||||
}
|
||||
value_text.push_str(&self.radix_run(10));
|
||||
}
|
||||
|
||||
let value = f64::from_str(&value_text).unwrap();
|
||||
@@ -426,6 +408,57 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume a sequence of numbers with the given radix,
|
||||
/// the digits can be decorated with underscores
|
||||
/// like this: '1_2_3_4' == '1234'
|
||||
fn radix_run(&mut self, radix: u32) -> String {
|
||||
let mut value_text = String::new();
|
||||
loop {
|
||||
if let Some(c) = self.take_number(radix) {
|
||||
value_text.push(c);
|
||||
} else if self.chr0 == Some('_') && Lexer::<T>::is_digit_of_radix(self.chr1, radix) {
|
||||
self.next_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
value_text
|
||||
}
|
||||
|
||||
/// Consume a single character with the given radix.
|
||||
fn take_number(&mut self, radix: u32) -> Option<char> {
|
||||
let take_char = Lexer::<T>::is_digit_of_radix(self.chr0, radix);
|
||||
|
||||
if take_char {
|
||||
Some(self.next_char().unwrap())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if a digit is of a certain radix.
|
||||
fn is_digit_of_radix(c: Option<char>, radix: u32) -> bool {
|
||||
match radix {
|
||||
2 => match c {
|
||||
Some('0'..='1') => true,
|
||||
_ => false,
|
||||
},
|
||||
8 => match c {
|
||||
Some('0'..='7') => true,
|
||||
_ => false,
|
||||
},
|
||||
10 => match c {
|
||||
Some('0'..='9') => true,
|
||||
_ => false,
|
||||
},
|
||||
16 => match c {
|
||||
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true,
|
||||
_ => false,
|
||||
},
|
||||
x => unimplemented!("Radix not implemented: {}", x),
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if we face '[eE][-+]?[0-9]+'
|
||||
fn at_exponent(&self) -> bool {
|
||||
match self.chr0 {
|
||||
@@ -626,34 +659,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn take_number(&mut self, radix: u32) -> Option<char> {
|
||||
let take_char = match radix {
|
||||
2 => match self.chr0 {
|
||||
Some('0'..='1') => true,
|
||||
_ => false,
|
||||
},
|
||||
8 => match self.chr0 {
|
||||
Some('0'..='7') => true,
|
||||
_ => false,
|
||||
},
|
||||
10 => match self.chr0 {
|
||||
Some('0'..='9') => true,
|
||||
_ => false,
|
||||
},
|
||||
16 => match self.chr0 {
|
||||
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => true,
|
||||
_ => false,
|
||||
},
|
||||
x => unimplemented!("Radix not implemented: {}", x),
|
||||
};
|
||||
|
||||
if take_char {
|
||||
Some(self.next_char().unwrap())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the main entry point. Call this function to retrieve the next token.
|
||||
/// This function is used by the iterator implementation.
|
||||
fn inner_next(&mut self) -> LexResult {
|
||||
|
||||
@@ -247,7 +247,7 @@ ImportDots: usize = {
|
||||
|
||||
ImportAsNames: Vec<ast::ImportSymbol> = {
|
||||
<i:OneOrMore<ImportAsAlias<Identifier>>> => i,
|
||||
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ")" => i,
|
||||
"(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" => i,
|
||||
"*" => {
|
||||
// Star import all
|
||||
vec![ast::ImportSymbol { symbol: "*".to_string(), alias: None }]
|
||||
@@ -952,11 +952,11 @@ Atom: ast::Expression = {
|
||||
};
|
||||
|
||||
ListLiteralValues: Vec<ast::Expression> = {
|
||||
<e:OneOrMore<TestOrStarExpr>> <_trailing_comma:","?> => e,
|
||||
<e:OneOrMore<TestOrStarExpr>> ","? => e,
|
||||
};
|
||||
|
||||
DictLiteralValues: Vec<(Option<ast::Expression>, ast::Expression)> = {
|
||||
<elements:OneOrMore<DictElement>> <_trailing_comma:","?> => elements,
|
||||
<elements:OneOrMore<DictElement>> ","? => elements,
|
||||
};
|
||||
|
||||
DictEntry: (ast::Expression, ast::Expression) = {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from testutils import assertRaises
|
||||
|
||||
x = 5
|
||||
x.__init__(6)
|
||||
assert x == 5
|
||||
@@ -42,3 +44,12 @@ assert int(0).__rxor__(1) == 1
|
||||
assert int(1).__rxor__(1) == 0
|
||||
assert int(3).__rxor__(-3) == -2
|
||||
assert int(3).__rxor__(4) == 7
|
||||
|
||||
# Test underscores in numbers:
|
||||
assert 1_2 == 12
|
||||
assert 1_2_3 == 123
|
||||
assert 1_2.3_4 == 12.34
|
||||
assert 1_2.3_4e0_0 == 12.34
|
||||
|
||||
with assertRaises(SyntaxError):
|
||||
eval('1__2')
|
||||
|
||||
Reference in New Issue
Block a user