forked from Rust-related/RustPython
Fix raw byte literals
This commit is contained in:
@@ -630,12 +630,15 @@ where
|
||||
|
||||
let tok = if is_bytes {
|
||||
if string_content.is_ascii() {
|
||||
Tok::Bytes {
|
||||
value: lex_byte(string_content).map_err(|error| LexicalError {
|
||||
let value = if is_raw {
|
||||
string_content.into_bytes()
|
||||
} else {
|
||||
lex_byte(string_content).map_err(|error| LexicalError {
|
||||
error,
|
||||
location: self.get_pos(),
|
||||
})?,
|
||||
}
|
||||
})?
|
||||
};
|
||||
Tok::Bytes { value }
|
||||
} else {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
@@ -1330,16 +1333,14 @@ fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
|
||||
mod tests {
|
||||
use super::{make_tokenizer, NewlineHandler, Tok};
|
||||
use num_bigint::BigInt;
|
||||
use std::iter::FromIterator;
|
||||
use std::iter::Iterator;
|
||||
|
||||
const WINDOWS_EOL: &str = "\r\n";
|
||||
const MAC_EOL: &str = "\r";
|
||||
const UNIX_EOL: &str = "\n";
|
||||
|
||||
pub fn lex_source(source: &String) -> Vec<Tok> {
|
||||
pub fn lex_source(source: &str) -> Vec<Tok> {
|
||||
let lexer = make_tokenizer(source);
|
||||
Vec::from_iter(lexer.map(|x| x.unwrap().1))
|
||||
lexer.map(|x| x.unwrap().1).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1354,8 +1355,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_raw_string() {
|
||||
let source = String::from("r\"\\\\\" \"\\\\\"");
|
||||
let tokens = lex_source(&source);
|
||||
let source = "r\"\\\\\" \"\\\\\"";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
@@ -1374,8 +1375,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_numbers() {
|
||||
let source = String::from("0x2f 0b1101 0 123 0.2 2j 2.2j");
|
||||
let tokens = lex_source(&source);
|
||||
let source = "0x2f 0b1101 0 123 0.2 2j 2.2j";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
@@ -1410,7 +1411,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!(r"99232 # {}", $eol));
|
||||
let source = format!(r"99232 # {}", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }, Tok::Newline]);
|
||||
}
|
||||
@@ -1430,7 +1431,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!("123 # Foo{}456", $eol));
|
||||
let source = format!("123 # Foo{}456", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1454,8 +1455,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_assignment() {
|
||||
let source = String::from(r"avariable = 99 + 2-0");
|
||||
let tokens = lex_source(&source);
|
||||
let source = r"avariable = 99 + 2-0";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
@@ -1484,7 +1485,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!("def foo():{} return 99{}{}", $eol, $eol, $eol));
|
||||
let source = format!("def foo():{} return 99{}{}", $eol, $eol, $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1520,7 +1521,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!("def foo():{} if x:{}{} return 99{}{}", $eol, $eol, $eol, $eol, $eol));
|
||||
let source = format!("def foo():{} if x:{}{} return 99{}{}", $eol, $eol, $eol, $eol, $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1558,7 +1559,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol));
|
||||
let source = format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1608,7 +1609,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!("x = [{} 1,2{}]{}", $eol, $eol, $eol));
|
||||
let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1638,8 +1639,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_operators() {
|
||||
let source = String::from("//////=/ /");
|
||||
let tokens = lex_source(&source);
|
||||
let source = "//////=/ /";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
@@ -1655,8 +1656,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_string() {
|
||||
let source = String::from(r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\''"#);
|
||||
let tokens = lex_source(&source);
|
||||
let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\''"#;
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
@@ -1698,7 +1699,7 @@ mod tests {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = String::from(format!("\"abc\\{}def\"", $eol));
|
||||
let source = format!("\"abc\\{}def\"", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
@@ -1724,9 +1725,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_single_quoted_byte() {
|
||||
// single quote
|
||||
let all = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
|
||||
let source = String::from(all);
|
||||
let tokens = lex_source(&source);
|
||||
let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
|
||||
let tokens = lex_source(source);
|
||||
let res = (0..=255).collect::<Vec<u8>>();
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]);
|
||||
}
|
||||
@@ -1734,9 +1734,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_double_quoted_byte() {
|
||||
// double quote
|
||||
let all = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
|
||||
let source = String::from(all);
|
||||
let tokens = lex_source(&source);
|
||||
let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
|
||||
let tokens = lex_source(source);
|
||||
let res = (0..=255).collect::<Vec<u8>>();
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]);
|
||||
}
|
||||
@@ -1744,10 +1743,24 @@ mod tests {
|
||||
#[test]
|
||||
fn test_escape_char_in_byte_literal() {
|
||||
// backslash doesnt escape
|
||||
let all = r##"b"omkmok\Xaa""##;
|
||||
let source = String::from(all);
|
||||
let tokens = lex_source(&source);
|
||||
let source = r##"b"omkmok\Xaa""##;
|
||||
let tokens = lex_source(source);
|
||||
let res = vec![111, 109, 107, 109, 111, 107, 92, 88, 97, 97];
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_byte_literal() {
|
||||
let source = r"rb'\x1z'";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Bytes {
|
||||
value: b"\\x1z".to_vec()
|
||||
},
|
||||
Tok::Newline
|
||||
]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user