mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Merge pull request #843 from jgirardet/parsenew
support bytes creation from hex and ascii
This commit is contained in:
@@ -542,7 +542,7 @@ where
|
||||
let tok = if is_bytes {
|
||||
if string_content.is_ascii() {
|
||||
Tok::Bytes {
|
||||
value: string_content.as_bytes().to_vec(),
|
||||
value: lex_byte(string_content)?,
|
||||
}
|
||||
} else {
|
||||
return Err(LexicalError::StringError);
|
||||
@@ -1133,6 +1133,54 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn lex_byte(s: String) -> Result<Vec<u8>, LexicalError> {
|
||||
let mut res = vec![];
|
||||
let mut escape = false; //flag if previous was \
|
||||
let mut hex_on = false; // hex mode on or off
|
||||
let mut hex_value = String::new();
|
||||
|
||||
for c in s.chars() {
|
||||
if hex_on {
|
||||
if c.is_ascii_hexdigit() {
|
||||
if hex_value.is_empty() {
|
||||
hex_value.push(c);
|
||||
continue;
|
||||
} else {
|
||||
hex_value.push(c);
|
||||
res.push(u8::from_str_radix(&hex_value, 16).unwrap());
|
||||
hex_on = false;
|
||||
hex_value.clear();
|
||||
}
|
||||
} else {
|
||||
return Err(LexicalError::StringError);
|
||||
}
|
||||
} else {
|
||||
match (c, escape) {
|
||||
('\\', true) => res.push(b'\\'),
|
||||
('\\', false) => {
|
||||
escape = true;
|
||||
continue;
|
||||
}
|
||||
('x', true) => hex_on = true,
|
||||
('x', false) => res.push(b'x'),
|
||||
('t', true) => res.push(b'\t'),
|
||||
('t', false) => res.push(b't'),
|
||||
('n', true) => res.push(b'\n'),
|
||||
('n', false) => res.push(b'n'),
|
||||
('r', true) => res.push(b'\r'),
|
||||
('r', false) => res.push(b'r'),
|
||||
(x, true) => {
|
||||
res.push(b'\\');
|
||||
res.push(x as u8);
|
||||
}
|
||||
(x, false) => res.push(x as u8),
|
||||
}
|
||||
escape = false;
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{make_tokenizer, NewlineHandler, Tok};
|
||||
@@ -1520,4 +1568,28 @@ mod tests {
|
||||
test_string_continuation_mac_eol: MAC_EOL,
|
||||
test_string_continuation_unix_eol: UNIX_EOL,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_byte() {
|
||||
// single quote
|
||||
let all = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
|
||||
let source = String::from(all);
|
||||
let tokens = lex_source(&source);
|
||||
let res = (0..=255).collect::<Vec<u8>>();
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }]);
|
||||
|
||||
// double quote
|
||||
let all = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
|
||||
let source = String::from(all);
|
||||
let tokens = lex_source(&source);
|
||||
let res = (0..=255).collect::<Vec<u8>>();
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }]);
|
||||
|
||||
// backslash doesnt escape
|
||||
let all = r##"b"omkmok\Xaa""##;
|
||||
let source = String::from(all);
|
||||
let tokens = lex_source(&source);
|
||||
let res = vec![111, 109, 107, 109, 111, 107, 92, 88, 97, 97];
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,10 @@ assert bytes("bla", "utf8")
|
||||
with assertRaises(TypeError):
|
||||
bytes("bla")
|
||||
|
||||
assert b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" == bytes(range(0,256))
|
||||
assert b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' == bytes(range(0,256))
|
||||
assert b"omkmok\Xaa" == bytes([111, 109, 107, 109, 111, 107, 92, 88, 97, 97])
|
||||
|
||||
|
||||
a = b"abcd"
|
||||
b = b"ab"
|
||||
|
||||
Reference in New Issue
Block a user