diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 9ba31635b4..96e9240797 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -630,12 +630,15 @@ where let tok = if is_bytes { if string_content.is_ascii() { - Tok::Bytes { - value: lex_byte(string_content).map_err(|error| LexicalError { + let value = if is_raw { + string_content.into_bytes() + } else { + lex_byte(string_content).map_err(|error| LexicalError { error, location: self.get_pos(), - })?, - } + })? + }; + Tok::Bytes { value } } else { return Err(LexicalError { error: LexicalErrorType::StringError, @@ -1330,16 +1333,14 @@ fn lex_byte(s: String) -> Result, LexicalErrorType> { mod tests { use super::{make_tokenizer, NewlineHandler, Tok}; use num_bigint::BigInt; - use std::iter::FromIterator; - use std::iter::Iterator; const WINDOWS_EOL: &str = "\r\n"; const MAC_EOL: &str = "\r"; const UNIX_EOL: &str = "\n"; - pub fn lex_source(source: &String) -> Vec { + pub fn lex_source(source: &str) -> Vec { let lexer = make_tokenizer(source); - Vec::from_iter(lexer.map(|x| x.unwrap().1)) + lexer.map(|x| x.unwrap().1).collect() } #[test] @@ -1354,8 +1355,8 @@ mod tests { #[test] fn test_raw_string() { - let source = String::from("r\"\\\\\" \"\\\\\""); - let tokens = lex_source(&source); + let source = "r\"\\\\\" \"\\\\\""; + let tokens = lex_source(source); assert_eq!( tokens, vec![ @@ -1374,8 +1375,8 @@ mod tests { #[test] fn test_numbers() { - let source = String::from("0x2f 0b1101 0 123 0.2 2j 2.2j"); - let tokens = lex_source(&source); + let source = "0x2f 0b1101 0 123 0.2 2j 2.2j"; + let tokens = lex_source(source); assert_eq!( tokens, vec![ @@ -1410,7 +1411,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!(r"99232 # {}", $eol)); + let source = format!(r"99232 # {}", $eol); let tokens = lex_source(&source); assert_eq!(tokens, vec![Tok::Int { value: BigInt::from(99232) }, Tok::Newline]); } @@ -1430,7 +1431,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!("123 # Foo{}456", $eol)); + let source = format!("123 # Foo{}456", $eol); let tokens = lex_source(&source); assert_eq!( tokens, @@ -1454,8 +1455,8 @@ mod tests { #[test] fn test_assignment() { - let source = String::from(r"avariable = 99 + 2-0"); - let tokens = lex_source(&source); + let source = r"avariable = 99 + 2-0"; + let tokens = lex_source(source); assert_eq!( tokens, vec![ @@ -1484,7 +1485,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!("def foo():{} return 99{}{}", $eol, $eol, $eol)); + let source = format!("def foo():{} return 99{}{}", $eol, $eol, $eol); let tokens = lex_source(&source); assert_eq!( tokens, @@ -1520,7 +1521,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!("def foo():{} if x:{}{} return 99{}{}", $eol, $eol, $eol, $eol, $eol)); + let source = format!("def foo():{} if x:{}{} return 99{}{}", $eol, $eol, $eol, $eol, $eol); let tokens = lex_source(&source); assert_eq!( tokens, @@ -1558,7 +1559,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol)); + let source = format!("def foo():{}\tif x:{}{}\t return 99{}{}", $eol, $eol, $eol, $eol, $eol); let tokens = lex_source(&source); assert_eq!( tokens, @@ -1608,7 +1609,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!("x = [{} 1,2{}]{}", $eol, $eol, $eol)); + let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol); let tokens = lex_source(&source); assert_eq!( tokens, @@ -1638,8 +1639,8 @@ mod tests { #[test] fn test_operators() { - let source = String::from("//////=/ /"); - let tokens = lex_source(&source); + let source = "//////=/ /"; + let tokens = lex_source(source); assert_eq!( tokens, vec![ @@ -1655,8 +1656,8 @@ mod tests { #[test] fn test_string() { - let source = String::from(r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\''"#); - let tokens = lex_source(&source); + let source = r#""double" 'single' 'can\'t' "\\\"" '\t\r\n' '\g' r'raw\''"#; + let tokens = lex_source(source); assert_eq!( tokens, vec![ @@ -1698,7 +1699,7 @@ mod tests { $( #[test] fn $name() { - let source = String::from(format!("\"abc\\{}def\"", $eol)); + let source = format!("\"abc\\{}def\"", $eol); let tokens = lex_source(&source); assert_eq!( tokens, @@ -1724,9 +1725,8 @@ mod tests { #[test] fn test_single_quoted_byte() { // single quote - let all = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##; - let source = String::from(all); - let tokens = lex_source(&source); + let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##; + let tokens = lex_source(source); let res = (0..=255).collect::>(); assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]); } @@ -1734,9 +1734,8 @@ mod tests { #[test] fn test_double_quoted_byte() { // double quote - let all = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##; - let source = String::from(all); - let tokens = lex_source(&source); + let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##; + let tokens = lex_source(source); let res = (0..=255).collect::>(); assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]); } @@ -1744,10 +1743,24 @@ mod tests { #[test] fn test_escape_char_in_byte_literal() { // backslash doesnt escape - let all = r##"b"omkmok\Xaa""##; - let source = String::from(all); - let tokens = lex_source(&source); + let source = r##"b"omkmok\Xaa""##; + let tokens = lex_source(source); let res = vec![111, 109, 107, 109, 111, 107, 92, 88, 97, 97]; assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]); } + + #[test] + fn test_raw_byte_literal() { + let source = r"rb'\x1z'"; + let tokens = lex_source(source); + assert_eq!( + tokens, + vec![ + Tok::Bytes { + value: b"\\x1z".to_vec() + }, + Tok::Newline + ] + ) + } }