Speedup compile time for rustpython-parser

This commit is contained in:
Noa
2022-04-22 17:16:36 -05:00
parent 666db0ff3b
commit ceb59abbd9
8 changed files with 47178 additions and 207 deletions

3
.gitattributes vendored
View File

@@ -1 +1,2 @@
Lib/** linguist-vendored
Lib/** linguist-vendored
parser/src/python.rs linguist-generated

171
Cargo.lock generated
View File

@@ -80,15 +80,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbf56136a5198c7b01a49e3afcbef6cf84597273d298f54432926024107b0109"
[[package]]
name = "ascii-canvas"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6"
dependencies = [
"term",
]
[[package]]
name = "atty"
version = "0.2.14"
@@ -121,21 +112,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bit-set"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -547,12 +523,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "diff"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499"
[[package]]
name = "digest"
version = "0.10.3"
@@ -609,15 +579,6 @@ version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "ena"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7402b94a93c24e742487327a7cd839dc9d36fec9de9fb25b09f2dae459f36c3"
dependencies = [
"log",
]
[[package]]
name = "encode_unicode"
version = "0.3.6"
@@ -668,12 +629,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "fixedbitset"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "flame"
version = "0.2.2"
@@ -901,37 +856,11 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67c21572b4949434e4fc1e1978b99c5f77064153c59d998bf13ecd96fb5ecba7"
[[package]]
name = "lalrpop"
version = "0.19.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852b75a095da6b69da8c5557731c3afd06525d4f655a4fc1c799e2ec8bc4dce4"
dependencies = [
"ascii-canvas",
"atty",
"bit-set",
"diff",
"ena",
"itertools",
"lalrpop-util",
"petgraph",
"pico-args",
"regex",
"regex-syntax",
"string_cache",
"term",
"tiny-keccak",
"unicode-xid",
]
[[package]]
name = "lalrpop-util"
version = "0.19.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6d265705249fe209280676d8f68887859fa42e1d34f342fc05bd47726a5e188"
dependencies = [
"regex",
]
[[package]]
name = "lazy_static"
@@ -1110,12 +1039,6 @@ dependencies = [
"rand_core",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "nibble_vec"
version = "0.1.0"
@@ -1337,25 +1260,23 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc"
[[package]]
name = "petgraph"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7"
dependencies = [
"fixedbitset",
"indexmap",
]
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_macros",
"phf_shared 0.10.0",
"proc-macro-hack",
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
@@ -1364,33 +1285,10 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"phf_shared",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0"
dependencies = [
"phf_generator",
"phf_shared 0.10.0",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
dependencies = [
"siphasher",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
@@ -1400,12 +1298,6 @@ dependencies = [
"siphasher",
]
[[package]]
name = "pico-args"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468"
[[package]]
name = "pkg-config"
version = "0.3.22"
@@ -1457,12 +1349,6 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-crate"
version = "1.1.0"
@@ -1473,12 +1359,6 @@ dependencies = [
"toml",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.37"
@@ -1824,13 +1704,14 @@ version = "0.1.2"
dependencies = [
"ahash",
"insta",
"lalrpop",
"lalrpop-util",
"log",
"num-bigint",
"num-traits",
"phf",
"phf_codegen",
"rustpython-ast",
"tiny-keccak",
"unic-emoji-char",
"unic-ucd-ident",
"unicode_names2",
@@ -2214,19 +2095,6 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d44a3643b4ff9caf57abcee9c2c621d6c03d9135e0d8b589bd9afb5992cb176a"
[[package]]
name = "string_cache"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6"
dependencies = [
"lazy_static 1.4.0",
"new_debug_unreachable",
"parking_lot 0.11.2",
"phf_shared 0.8.0",
"precomputed-hash",
]
[[package]]
name = "strsim"
version = "0.8.0"
@@ -2305,17 +2173,6 @@ version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9bffcddbc2458fa3e6058414599e3c838a022abae82e5c67b4f7f80298d5bff"
[[package]]
name = "term"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
dependencies = [
"dirs-next",
"rustversion",
"winapi",
]
[[package]]
name = "termcolor"
version = "1.1.2"

View File

@@ -9,7 +9,8 @@ license = "MIT"
edition = "2021"
[build-dependencies]
lalrpop = "0.19.7"
tiny-keccak = { version = "2", features = ["sha3"] }
phf_codegen = "0.10"
[dependencies]
rustpython-ast = { path = "../ast" }
@@ -20,7 +21,7 @@ num-traits = "0.2.14"
unic-emoji-char = "0.9.0"
unic-ucd-ident = "0.9.0"
unicode_names2 = "0.5.0"
phf = { version = "0.10.1", features = ["macros"] }
phf = "0.10.1"
ahash = "0.7.6"
[dev-dependencies]

View File

@@ -1,3 +1,95 @@
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::PathBuf;
use tiny_keccak::{Hasher, Sha3};
fn main() {
lalrpop::process_root().unwrap()
check_lalrpop();
gen_phf();
}
fn check_lalrpop() {
println!("cargo:rerun-if-changed=src/python.lalrpop");
let sha3_line = BufReader::with_capacity(128, File::open("src/python.rs").unwrap())
.lines()
.nth(1)
.unwrap()
.unwrap();
let expected_sha3_str = sha3_line.strip_prefix("// sha3: ").unwrap();
let mut hasher = Sha3::v256();
hasher.update(&std::fs::read("src/python.lalrpop").unwrap());
let mut actual_sha3 = [0u8; 32];
hasher.finalize(&mut actual_sha3);
// stupid stupid stupid hack. lalrpop outputs each byte as "{:x}" instead of "{:02x}"
let sha3_equal = if expected_sha3_str.len() == 64 {
let mut expected_sha3 = [0u8; 32];
for (i, b) in expected_sha3.iter_mut().enumerate() {
*b = u8::from_str_radix(&expected_sha3_str[i * 2..][..2], 16).unwrap();
}
actual_sha3 == expected_sha3
} else {
let mut actual_sha3_str = String::new();
for byte in actual_sha3 {
write!(actual_sha3_str, "{byte:x}").unwrap();
}
actual_sha3_str == expected_sha3_str
};
if !sha3_equal {
eprintln!("you need to recompile lalrpop!");
std::process::exit(1);
}
}
fn gen_phf() {
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
let mut kwds = phf_codegen::Map::new();
let kwds = kwds
// Alphabetical keywords:
.entry("...", "Tok::Ellipsis")
.entry("False", "Tok::False")
.entry("None", "Tok::None")
.entry("True", "Tok::True")
// moreso "standard" keywords
.entry("and", "Tok::And")
.entry("as", "Tok::As")
.entry("assert", "Tok::Assert")
.entry("async", "Tok::Async")
.entry("await", "Tok::Await")
.entry("break", "Tok::Break")
.entry("class", "Tok::Class")
.entry("continue", "Tok::Continue")
.entry("def", "Tok::Def")
.entry("del", "Tok::Del")
.entry("elif", "Tok::Elif")
.entry("else", "Tok::Else")
.entry("except", "Tok::Except")
.entry("finally", "Tok::Finally")
.entry("for", "Tok::For")
.entry("from", "Tok::From")
.entry("global", "Tok::Global")
.entry("if", "Tok::If")
.entry("import", "Tok::Import")
.entry("in", "Tok::In")
.entry("is", "Tok::Is")
.entry("lambda", "Tok::Lambda")
.entry("nonlocal", "Tok::Nonlocal")
.entry("not", "Tok::Not")
.entry("or", "Tok::Or")
.entry("pass", "Tok::Pass")
.entry("raise", "Tok::Raise")
.entry("return", "Tok::Return")
.entry("try", "Tok::Try")
.entry("while", "Tok::While")
.entry("with", "Tok::With")
.entry("yield", "Tok::Yield")
.build();
writeln!(
BufWriter::new(File::create(out_dir.join("keywords.rs")).unwrap()),
"{kwds}",
)
.unwrap();
}

1
parser/regen_lalrpop.sh Executable file
View File

@@ -0,0 +1 @@
lalrpop src/python.lalrpop

View File

@@ -68,46 +68,9 @@ pub struct Lexer<T: Iterator<Item = char>> {
location: Location,
}
pub static KEYWORDS: phf::Map<&'static str, Tok> = phf::phf_map! {
// Alphabetical keywords:
"..." => Tok::Ellipsis,
"False" => Tok::False,
"None" => Tok::None,
"True" => Tok::True,
"and" => Tok::And,
"as" => Tok::As,
"assert" => Tok::Assert,
"async" => Tok::Async,
"await" => Tok::Await,
"break" => Tok::Break,
"class" => Tok::Class,
"continue" => Tok::Continue,
"def" => Tok::Def,
"del" => Tok::Del,
"elif" => Tok::Elif,
"else" => Tok::Else,
"except" => Tok::Except,
"finally" => Tok::Finally,
"for" => Tok::For,
"from" => Tok::From,
"global" => Tok::Global,
"if" => Tok::If,
"import" => Tok::Import,
"in" => Tok::In,
"is" => Tok::Is,
"lambda" => Tok::Lambda,
"nonlocal" => Tok::Nonlocal,
"not" => Tok::Not,
"or" => Tok::Or,
"pass" => Tok::Pass,
"raise" => Tok::Raise,
"return" => Tok::Return,
"try" => Tok::Try,
"while" => Tok::While,
"with" => Tok::With,
"yield" => Tok::Yield,
};
// generated in build.rs, in gen_phf()
pub static KEYWORDS: phf::Map<&'static str, Tok> =
include!(concat!(env!("OUT_DIR"), "/keywords.rs"));
pub type Spanned = (Location, Tok, Location);
pub type LexResult = Result<Spanned, LexicalError>;

View File

@@ -20,7 +20,6 @@
#[macro_use]
extern crate log;
use lalrpop_util::lalrpop_mod;
pub use rustpython_ast as ast;
pub mod error;
@@ -29,9 +28,8 @@ mod function;
pub mod lexer;
pub mod mode;
pub mod parser;
lalrpop_mod!(
#[allow(clippy::all)]
#[allow(unused)]
python
);
#[allow(clippy::all)]
#[allow(unused)]
#[rustfmt::skip]
mod python;
pub mod token;

47058
parser/src/python.rs generated Normal file

File diff suppressed because it is too large Load Diff