Implement CPython-like implicit string concatenation in parser (#4097)

This commit is contained in:
Charlie Marsh
2022-08-21 00:49:00 -04:00
committed by GitHub
parent e44ccb068d
commit bea97cd254
16 changed files with 544 additions and 27 deletions

1
Cargo.lock generated
View File

@@ -1851,6 +1851,7 @@ dependencies = [
"ahash",
"anyhow",
"insta",
"itertools",
"lalrpop",
"lalrpop-util",
"log",

View File

@@ -9,22 +9,23 @@ license = "MIT"
edition = "2021"
[build-dependencies]
tiny-keccak = { version = "2", features = ["sha3"] }
phf_codegen = "0.10"
lalrpop = { version = "0.19.8", optional = true }
anyhow = "1.0.45"
lalrpop = { version = "0.19.8", optional = true }
phf_codegen = "0.10"
tiny-keccak = { version = "2", features = ["sha3"] }
[dependencies]
rustpython-ast = { path = "../ast" }
ahash = "0.7.6"
itertools = "0.10.3"
lalrpop-util = "0.19.8"
log = "0.4.16"
num-bigint = "0.4.3"
num-traits = "0.2.14"
phf = "0.10.1"
rustpython-ast = { path = "../ast" }
unic-emoji-char = "0.9.0"
unic-ucd-ident = "0.9.0"
unicode_names2 = "0.5.0"
phf = "0.10.1"
ahash = "0.7.6"
[dev-dependencies]
insta = "1.14.0"

View File

@@ -6,9 +6,9 @@
use crate::{
ast,
error::{LexicalError, LexicalErrorType},
fstring::parse_located_fstring,
function::{ArgumentList, parse_args, parse_params},
lexer,
string::parse_strings,
token::StringKind
};
use num_bigint::BigInt;
@@ -961,26 +961,7 @@ SliceOp: Option<ast::Expr> = {
}
Atom: ast::Expr = {
<location:@L> <s:(@L string)+> =>? {
let values = s.into_iter().map(|(loc, (value, kind))| {
if let StringKind::F = kind {
parse_located_fstring(&value, loc)
} else {
let kind = (kind == StringKind::U).then(|| "u".to_owned());
Ok(ast::Expr::new(
loc,
ast::ExprKind::Constant { value: value.into(), kind },
))
}
});
let values = values.collect::<Result<Vec<_>, _>>()?;
Ok(if values.len() > 1 {
ast::Expr::new(location, ast::ExprKind::JoinedStr { values })
} else {
values.into_iter().next().unwrap()
})
},
<location:@L> <s:(@L string)+> =>? parse_strings(s).map_err(|e| e.into()),
<location:@L> <value:Constant> => ast::Expr {
location,
custom: (),

View File

@@ -30,4 +30,5 @@ pub mod mode;
pub mod parser;
#[rustfmt::skip]
mod python;
mod string;
pub mod token;

View File

@@ -91,6 +91,20 @@ mod tests {
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_string() {
let source = String::from("'Hello world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string() {
let source = String::from("f'Hello world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_hello() {
let source = String::from("print('Hello world')");

View File

@@ -0,0 +1,39 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View File

@@ -0,0 +1,28 @@
---
source: parser/src/parser.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View File

@@ -0,0 +1,39 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View File

@@ -0,0 +1,39 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
],
},
},
},
},
]

View File

@@ -0,0 +1,63 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
Located {
location: Location {
row: 1,
column: 12,
},
custom: (),
node: FormattedValue {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"!",
),
kind: None,
},
},
conversion: 0,
format_spec: None,
},
},
],
},
},
},
},
]

View File

@@ -0,0 +1,28 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View File

@@ -0,0 +1,41 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: Some(
"u",
),
},
},
],
},
},
},
},
]

View File

@@ -0,0 +1,41 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: JoinedStr {
values: [
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world!",
),
kind: Some(
"u",
),
},
},
],
},
},
},
},
]

View File

@@ -0,0 +1,28 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 2,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: None,
},
},
},
},
]

View File

@@ -0,0 +1,30 @@
---
source: parser/src/string.rs
expression: parse_ast
---
[
Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Expr {
value: Located {
location: Location {
row: 1,
column: 3,
},
custom: (),
node: Constant {
value: Str(
"Hello world",
),
kind: Some(
"u",
),
},
},
},
},
]

143
parser/src/string.rs Normal file
View File

@@ -0,0 +1,143 @@
use crate::{
ast::{Constant, Expr, ExprKind, Location},
error::{LexicalError, LexicalErrorType},
fstring::parse_located_fstring,
token::StringKind,
};
use itertools::Itertools;
pub fn parse_strings(values: Vec<(Location, (String, StringKind))>) -> Result<Expr, LexicalError> {
// Preserve the initial location and kind.
let initial_location = values[0].0;
let initial_kind = (values[0].1 .1 == StringKind::U).then(|| "u".to_owned());
// Determine whether the list of values contains any f-strings. (If not, we can return a
// single Constant at the end, rather than a JoinedStr.)
let mut has_fstring = false;
// De-duplicate adjacent constants.
let mut deduped: Vec<Expr> = vec![];
let mut current: Vec<String> = vec![];
let take_current = |current: &mut Vec<String>| -> Expr {
Expr::new(
initial_location,
ExprKind::Constant {
value: Constant::Str(current.drain(..).join("")),
kind: initial_kind.clone(),
},
)
};
for (location, (string, string_kind)) in values {
match string_kind {
StringKind::Normal | StringKind::U => current.push(string),
StringKind::F => {
has_fstring = true;
let values = if let ExprKind::JoinedStr { values } =
parse_located_fstring(&string, location)
.map_err(|e| LexicalError {
location,
error: LexicalErrorType::FStringError(e.error),
})?
.node
{
values
} else {
unreachable!("parse_located_fstring returned a non-JoinedStr.")
};
for value in values {
match value.node {
ExprKind::FormattedValue { .. } => {
if !current.is_empty() {
deduped.push(take_current(&mut current));
}
deduped.push(value)
}
ExprKind::Constant { value, .. } => {
if let Constant::Str(value) = value {
current.push(value);
} else {
unreachable!("Unexpected non-string constant.");
}
}
_ => unreachable!("Unexpected non-string expression."),
}
}
}
}
}
if !current.is_empty() {
deduped.push(take_current(&mut current));
}
Ok(if has_fstring {
Expr::new(initial_location, ExprKind::JoinedStr { values: deduped })
} else {
deduped
.into_iter()
.exactly_one()
.expect("String must be concatenated to a single element.")
})
}
#[cfg(test)]
mod tests {
use crate::parser::parse_program;
#[test]
fn test_parse_string_concat() {
let source = String::from("'Hello ' 'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_1() {
let source = String::from("'Hello ' u'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_2() {
let source = String::from("u'Hello ' 'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_1() {
let source = String::from("'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_2() {
let source = String::from("'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_3() {
let source = String::from("'Hello ' f'world{\"!\"}'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_1() {
let source = String::from("u'Hello ' f'world'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_2() {
let source = String::from("u'Hello ' f'world' '!'");
let parse_ast = parse_program(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}