forked from Rust-related/RustPython
Merge pull request #4442 from bluetech/non-logical-newline-token
lexer: Add `NonLogicalNewline` token
This commit is contained in:
@@ -1075,10 +1075,13 @@ where
|
||||
self.next_char();
|
||||
let tok_end = self.get_pos();
|
||||
|
||||
// Depending on the nesting level, we emit newline or not:
|
||||
// Depending on the nesting level, we emit a logical or
|
||||
// non-logical newline:
|
||||
if self.nesting == 0 {
|
||||
self.at_begin_of_line = true;
|
||||
self.emit((tok_start, Tok::Newline, tok_end));
|
||||
} else {
|
||||
self.emit((tok_start, Tok::NonLogicalNewline, tok_end));
|
||||
}
|
||||
}
|
||||
' ' | '\t' | '\x0C' => {
|
||||
@@ -1464,7 +1467,16 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol);
|
||||
let source = r"x = [
|
||||
|
||||
1,2
|
||||
,(3,
|
||||
4,
|
||||
), {
|
||||
5,
|
||||
6,\
|
||||
7}]
|
||||
".replace("\n", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1474,9 +1486,32 @@ mod tests {
|
||||
},
|
||||
Tok::Equal,
|
||||
Tok::Lsqb,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(1) },
|
||||
Tok::Comma,
|
||||
Tok::Int { value: BigInt::from(2) },
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Comma,
|
||||
Tok::Lpar,
|
||||
Tok::Int { value: BigInt::from(3) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(4) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Rpar,
|
||||
Tok::Comma,
|
||||
Tok::Lbrace,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(5) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(6) },
|
||||
Tok::Comma,
|
||||
// Continuation here - no NonLogicalNewline.
|
||||
Tok::Int { value: BigInt::from(7) },
|
||||
Tok::Rbrace,
|
||||
Tok::Rsqb,
|
||||
Tok::Newline,
|
||||
]
|
||||
@@ -1492,6 +1527,50 @@ mod tests {
|
||||
test_newline_in_brackets_unix_eol: UNIX_EOL,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_logical_newline_in_string_continuation() {
|
||||
let source = r"(
|
||||
'a'
|
||||
'b'
|
||||
|
||||
'c' \
|
||||
'd'
|
||||
)";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Lpar,
|
||||
Tok::NonLogicalNewline,
|
||||
stok("a"),
|
||||
Tok::NonLogicalNewline,
|
||||
stok("b"),
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::NonLogicalNewline,
|
||||
stok("c"),
|
||||
stok("d"),
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Rpar,
|
||||
Tok::Newline,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_logical_newline_line_comment() {
|
||||
let source = "#Hello\n#World";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Comment("#Hello".to_owned()),
|
||||
// tokenize.py does put an NL here...
|
||||
Tok::Comment("#World".to_owned()),
|
||||
// ... and here, but doesn't seem very useful.
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_operators() {
|
||||
let source = "//////=/ /";
|
||||
|
||||
@@ -96,7 +96,7 @@ pub fn parse_located(
|
||||
let marker_token = (Default::default(), mode.to_marker(), Default::default());
|
||||
let tokenizer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. }));
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
|
||||
python::TopParser::new()
|
||||
.parse(tokenizer)
|
||||
|
||||
@@ -25,6 +25,7 @@ pub enum Tok {
|
||||
triple_quoted: bool,
|
||||
},
|
||||
Newline,
|
||||
NonLogicalNewline,
|
||||
Indent,
|
||||
Dedent,
|
||||
StartModule,
|
||||
@@ -136,6 +137,7 @@ impl fmt::Display for Tok {
|
||||
write!(f, "{kind}{quotes}{value}{quotes}")
|
||||
}
|
||||
Newline => f.write_str("Newline"),
|
||||
NonLogicalNewline => f.write_str("NonLogicalNewline"),
|
||||
Indent => f.write_str("Indent"),
|
||||
Dedent => f.write_str("Dedent"),
|
||||
StartModule => f.write_str("StartProgram"),
|
||||
|
||||
Reference in New Issue
Block a user