diff --git a/Cargo.lock b/Cargo.lock index abfd82126..dacaafc76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1851,6 +1851,7 @@ dependencies = [ "ahash", "anyhow", "insta", + "itertools", "lalrpop", "lalrpop-util", "log", diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 5252422ae..60dabdcc3 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -9,22 +9,23 @@ license = "MIT" edition = "2021" [build-dependencies] -tiny-keccak = { version = "2", features = ["sha3"] } -phf_codegen = "0.10" -lalrpop = { version = "0.19.8", optional = true } anyhow = "1.0.45" +lalrpop = { version = "0.19.8", optional = true } +phf_codegen = "0.10" +tiny-keccak = { version = "2", features = ["sha3"] } [dependencies] -rustpython-ast = { path = "../ast" } +ahash = "0.7.6" +itertools = "0.10.3" lalrpop-util = "0.19.8" log = "0.4.16" num-bigint = "0.4.3" num-traits = "0.2.14" +phf = "0.10.1" +rustpython-ast = { path = "../ast" } unic-emoji-char = "0.9.0" unic-ucd-ident = "0.9.0" unicode_names2 = "0.5.0" -phf = "0.10.1" -ahash = "0.7.6" [dev-dependencies] insta = "1.14.0" diff --git a/parser/python.lalrpop b/parser/python.lalrpop index f68096168..3bf83dce4 100644 --- a/parser/python.lalrpop +++ b/parser/python.lalrpop @@ -6,9 +6,9 @@ use crate::{ ast, error::{LexicalError, LexicalErrorType}, - fstring::parse_located_fstring, function::{ArgumentList, parse_args, parse_params}, lexer, + string::parse_strings, token::StringKind }; use num_bigint::BigInt; @@ -961,26 +961,7 @@ SliceOp: Option = { } Atom: ast::Expr = { - =>? { - let values = s.into_iter().map(|(loc, (value, kind))| { - if let StringKind::F = kind { - parse_located_fstring(&value, loc) - } else { - let kind = (kind == StringKind::U).then(|| "u".to_owned()); - Ok(ast::Expr::new( - loc, - ast::ExprKind::Constant { value: value.into(), kind }, - )) - } - }); - let values = values.collect::, _>>()?; - - Ok(if values.len() > 1 { - ast::Expr::new(location, ast::ExprKind::JoinedStr { values }) - } else { - values.into_iter().next().unwrap() - }) - }, + =>? parse_strings(s).map_err(|e| e.into()), => ast::Expr { location, custom: (), diff --git a/parser/src/lib.rs b/parser/src/lib.rs index abe5f0f1a..694e0a501 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -30,4 +30,5 @@ pub mod mode; pub mod parser; #[rustfmt::skip] mod python; +mod string; pub mod token; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 6169ec00a..ed80864e2 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -91,6 +91,20 @@ mod tests { insta::assert_debug_snapshot!(parse_ast); } + #[test] + fn test_parse_string() { + let source = String::from("'Hello world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_f_string() { + let source = String::from("f'Hello world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + #[test] fn test_parse_print_hello() { let source = String::from("print('Hello world')"); diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap b/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap new file mode 100644 index 000000000..445e51dc9 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__parser__tests__parse_f_string.snap @@ -0,0 +1,39 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + ], + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__parse_string.snap b/parser/src/snapshots/rustpython_parser__parser__tests__parse_string.snap new file mode 100644 index 000000000..b808f28ed --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__parser__tests__parse_string.snap @@ -0,0 +1,28 @@ +--- +source: parser/src/parser.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap new file mode 100644 index 000000000..687ee9608 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_1.snap @@ -0,0 +1,39 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + ], + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap new file mode 100644 index 000000000..687ee9608 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_2.snap @@ -0,0 +1,39 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + ], + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap new file mode 100644 index 000000000..197f1c572 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_f_string_concat_3.snap @@ -0,0 +1,63 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + Located { + location: Location { + row: 1, + column: 12, + }, + custom: (), + node: FormattedValue { + value: Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Constant { + value: Str( + "!", + ), + kind: None, + }, + }, + conversion: 0, + format_spec: None, + }, + }, + ], + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_string_concat.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_string_concat.snap new file mode 100644 index 000000000..25ed23370 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_string_concat.snap @@ -0,0 +1,28 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap new file mode 100644 index 000000000..bcba0524e --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_1.snap @@ -0,0 +1,41 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: Some( + "u", + ), + }, + }, + ], + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap new file mode 100644 index 000000000..4e4ba23d5 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_f_string_concat_2.snap @@ -0,0 +1,41 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: JoinedStr { + values: [ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Constant { + value: Str( + "Hello world!", + ), + kind: Some( + "u", + ), + }, + }, + ], + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_string_concat_1.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_string_concat_1.snap new file mode 100644 index 000000000..25ed23370 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_string_concat_1.snap @@ -0,0 +1,28 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 2, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: None, + }, + }, + }, + }, +] diff --git a/parser/src/snapshots/rustpython_parser__string__tests__parse_u_string_concat_2.snap b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_string_concat_2.snap new file mode 100644 index 000000000..190854c39 --- /dev/null +++ b/parser/src/snapshots/rustpython_parser__string__tests__parse_u_string_concat_2.snap @@ -0,0 +1,30 @@ +--- +source: parser/src/string.rs +expression: parse_ast +--- +[ + Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Expr { + value: Located { + location: Location { + row: 1, + column: 3, + }, + custom: (), + node: Constant { + value: Str( + "Hello world", + ), + kind: Some( + "u", + ), + }, + }, + }, + }, +] diff --git a/parser/src/string.rs b/parser/src/string.rs new file mode 100644 index 000000000..02a55145c --- /dev/null +++ b/parser/src/string.rs @@ -0,0 +1,143 @@ +use crate::{ + ast::{Constant, Expr, ExprKind, Location}, + error::{LexicalError, LexicalErrorType}, + fstring::parse_located_fstring, + token::StringKind, +}; +use itertools::Itertools; + +pub fn parse_strings(values: Vec<(Location, (String, StringKind))>) -> Result { + // Preserve the initial location and kind. + let initial_location = values[0].0; + let initial_kind = (values[0].1 .1 == StringKind::U).then(|| "u".to_owned()); + + // Determine whether the list of values contains any f-strings. (If not, we can return a + // single Constant at the end, rather than a JoinedStr.) + let mut has_fstring = false; + + // De-duplicate adjacent constants. + let mut deduped: Vec = vec![]; + let mut current: Vec = vec![]; + + let take_current = |current: &mut Vec| -> Expr { + Expr::new( + initial_location, + ExprKind::Constant { + value: Constant::Str(current.drain(..).join("")), + kind: initial_kind.clone(), + }, + ) + }; + + for (location, (string, string_kind)) in values { + match string_kind { + StringKind::Normal | StringKind::U => current.push(string), + StringKind::F => { + has_fstring = true; + let values = if let ExprKind::JoinedStr { values } = + parse_located_fstring(&string, location) + .map_err(|e| LexicalError { + location, + error: LexicalErrorType::FStringError(e.error), + })? + .node + { + values + } else { + unreachable!("parse_located_fstring returned a non-JoinedStr.") + }; + for value in values { + match value.node { + ExprKind::FormattedValue { .. } => { + if !current.is_empty() { + deduped.push(take_current(&mut current)); + } + deduped.push(value) + } + ExprKind::Constant { value, .. } => { + if let Constant::Str(value) = value { + current.push(value); + } else { + unreachable!("Unexpected non-string constant."); + } + } + _ => unreachable!("Unexpected non-string expression."), + } + } + } + } + } + if !current.is_empty() { + deduped.push(take_current(&mut current)); + } + + Ok(if has_fstring { + Expr::new(initial_location, ExprKind::JoinedStr { values: deduped }) + } else { + deduped + .into_iter() + .exactly_one() + .expect("String must be concatenated to a single element.") + }) +} + +#[cfg(test)] +mod tests { + use crate::parser::parse_program; + + #[test] + fn test_parse_string_concat() { + let source = String::from("'Hello ' 'world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_u_string_concat_1() { + let source = String::from("'Hello ' u'world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_u_string_concat_2() { + let source = String::from("u'Hello ' 'world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_f_string_concat_1() { + let source = String::from("'Hello ' f'world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_f_string_concat_2() { + let source = String::from("'Hello ' f'world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_f_string_concat_3() { + let source = String::from("'Hello ' f'world{\"!\"}'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_u_f_string_concat_1() { + let source = String::from("u'Hello ' f'world'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } + + #[test] + fn test_parse_u_f_string_concat_2() { + let source = String::from("u'Hello ' f'world' '!'"); + let parse_ast = parse_program(&source).unwrap(); + insta::assert_debug_snapshot!(parse_ast); + } +}