From 77fd1237b2e2e949df7156282fd499475f2301b7 Mon Sep 17 00:00:00 2001 From: Padraic Fanning Date: Sat, 11 Sep 2021 21:30:55 -0400 Subject: [PATCH 1/3] Add uuencoding/decoding code to binascii.rs The code is based on the PyPy implementation. It may not work properly. --- vm/src/stdlib/binascii.rs | 122 +++++++++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 22 deletions(-) diff --git a/vm/src/stdlib/binascii.rs b/vm/src/stdlib/binascii.rs index 7b9b28d86..3b51e1d2f 100644 --- a/vm/src/stdlib/binascii.rs +++ b/vm/src/stdlib/binascii.rs @@ -177,31 +177,109 @@ mod decl { encoded } - #[pyfunction] - fn a2b_uu(s: SerializedData, vm: &VirtualMachine) -> PyResult> { - s.with_ref(|b| { - let mut buf; - let b = if memchr::memchr(b'\n', b).is_some() { - buf = b.to_vec(); - buf.retain(|c| *c != b'\n'); - &buf - } else { - b - }; - // TODO: RUSTPYTHON, implement actual uuencoding code - base64::decode(b) - }) - .map_err(|err| vm.new_value_error(format!("error decoding uuencode: {}", err))) + #[inline] + fn uu_a2b_read(c: &u8, vm: &VirtualMachine) -> PyResult { + // Check the character for legality + // The 64 instead of the expected 63 is because + // there are a few uuencodes out there that use + // '`' as zero instead of space. + if !(0x20..=0x60).contains(c) { + if [b'\r', b'\n'].contains(c) { + return Ok(0); + } + return Err(vm.new_value_error("Illegal char".to_string())); + } + Ok((*c - 0x20) & 0x3f) } #[pyfunction] - fn b2a_uu(data: ArgBytesLike, NewlineArg { newline }: NewlineArg) -> Vec { - #[allow(clippy::redundant_closure)] // https://stackoverflow.com/questions/63916821 - // TODO: RUSTPYTHON, implement actual uuencoding code - let mut encoded = data.with_ref(|b| base64::encode(b)).into_bytes(); - if newline { - encoded.push(b'\n'); + fn a2b_uu(s: SerializedData, vm: &VirtualMachine) -> PyResult> { + s.with_ref(|b| { + // First byte: binary data length (in bytes) + let length = if b.is_empty() { + ((-0x20i32) & 0x3fi32) as usize + } else { + ((b[0] - 0x20) & 0x3f) as usize + }; + + // Allocate the buffer + let mut res = Vec::::with_capacity(length); + + for chunk in b.get(1..).unwrap_or_default().chunks(4) { + let char_a = chunk.get(0).map_or(Ok(0), |x| uu_a2b_read(x, vm))?; + let char_b = chunk.get(1).map_or(Ok(0), |x| uu_a2b_read(x, vm))?; + let char_c = chunk.get(2).map_or(Ok(0), |x| uu_a2b_read(x, vm))?; + let char_d = chunk.get(3).map_or(Ok(0), |x| uu_a2b_read(x, vm))?; + + if res.len() < length { + res.push(char_a << 2 | char_b >> 4); + } else if char_a != 0 || char_b != 0 { + return Err(vm.new_value_error("Trailing garbage".to_string())); + } + + if res.len() < length { + res.push((char_b & 0xf) | char_c >> 2); + } else if char_c != 0 { + return Err(vm.new_value_error("Trailing garbage".to_string())); + } + + if res.len() < length { + res.push((char_c & 0x3) << 6 | char_d); + } else if char_d != 0 { + return Err(vm.new_value_error("Trailing garbage".to_string())); + } + } + + let remaining_length = length - res.len(); + if remaining_length > 0 { + res.extend(vec![0; remaining_length]); + } + Ok(res) + }) + } + + #[derive(FromArgs)] + struct BacktickArg { + #[pyarg(named, default = "true")] + backtick: bool, + } + + #[inline] + fn uu_b2a_write(res: &mut Vec, num: u8, backtick: bool) { + if backtick && num != 0 { + res.push(0x60); + } else { + res.push(0x20 + num); } - encoded + } + + #[pyfunction] + fn b2a_uu( + data: ArgBytesLike, + BacktickArg { backtick }: BacktickArg, + vm: &VirtualMachine, + ) -> PyResult> { + data.with_ref(|b| { + let length = b.len(); + if length > 45 { + return Err(vm.new_value_error("At most 45 bytes at once".to_string())); + } + let mut res = Vec::::with_capacity(2 + ((length + 2) / 3) * 4); + uu_b2a_write(&mut res, length as u8, backtick); + + for chunk in b.chunks(3) { + let char_a = *chunk.get(0).unwrap_or(&0); + let char_b = *chunk.get(1).unwrap_or(&0); + let char_c = *chunk.get(2).unwrap_or(&0); + + uu_b2a_write(&mut res, char_a >> 2, backtick); + uu_b2a_write(&mut res, (char_a & 0x3) << 4 | char_b >> 4, backtick); + uu_b2a_write(&mut res, (char_b & 0xf) << 2 | char_c >> 6, backtick); + uu_b2a_write(&mut res, char_c & 0x3f, backtick); + } + + res.push(0xau8); + Ok(res) + }) } } From 360e0dd8a31488c6007d6bb05b37563a708e7038 Mon Sep 17 00:00:00 2001 From: Padraic Fanning Date: Sat, 11 Sep 2021 21:31:06 -0400 Subject: [PATCH 2/3] Unmark passing tests in test_uu --- Lib/test/test_uu.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_uu.py b/Lib/test/test_uu.py index 55e075ad9..f1288eb5a 100644 --- a/Lib/test/test_uu.py +++ b/Lib/test/test_uu.py @@ -92,8 +92,6 @@ class UUTest(unittest.TestCase): uu.decode(inp, out) self.assertEqual(out.getvalue(), plaintext) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_truncatedinput(self): inp = io.BytesIO(b"begin 644 t1\n" + encodedtext) out = io.BytesIO() @@ -239,8 +237,6 @@ class UUFileTest(unittest.TestCase): s = f.read() self.assertEqual(s, plaintext) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_decodetwice(self): # Verify that decode() will refuse to overwrite an existing file with open(self.tmpin, 'wb') as f: From c08562c4c198c18fe4eab0faf6d8252fd6b46cff Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 14 Sep 2021 23:59:17 +0900 Subject: [PATCH 3/3] Clean up binascii.rs --- vm/src/stdlib/binascii.rs | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/vm/src/stdlib/binascii.rs b/vm/src/stdlib/binascii.rs index 3b51e1d2f..925d5de0d 100644 --- a/vm/src/stdlib/binascii.rs +++ b/vm/src/stdlib/binascii.rs @@ -204,6 +204,7 @@ mod decl { // Allocate the buffer let mut res = Vec::::with_capacity(length); + let trailing_garbage_error = || Err(vm.new_value_error("Trailing garbage".to_string())); for chunk in b.get(1..).unwrap_or_default().chunks(4) { let char_a = chunk.get(0).map_or(Ok(0), |x| uu_a2b_read(x, vm))?; @@ -214,19 +215,19 @@ mod decl { if res.len() < length { res.push(char_a << 2 | char_b >> 4); } else if char_a != 0 || char_b != 0 { - return Err(vm.new_value_error("Trailing garbage".to_string())); + return trailing_garbage_error(); } if res.len() < length { res.push((char_b & 0xf) | char_c >> 2); } else if char_c != 0 { - return Err(vm.new_value_error("Trailing garbage".to_string())); + return trailing_garbage_error(); } if res.len() < length { res.push((char_c & 0x3) << 6 | char_d); } else if char_d != 0 { - return Err(vm.new_value_error("Trailing garbage".to_string())); + return trailing_garbage_error(); } } @@ -244,38 +245,38 @@ mod decl { backtick: bool, } - #[inline] - fn uu_b2a_write(res: &mut Vec, num: u8, backtick: bool) { - if backtick && num != 0 { - res.push(0x60); - } else { - res.push(0x20 + num); - } - } - #[pyfunction] fn b2a_uu( data: ArgBytesLike, BacktickArg { backtick }: BacktickArg, vm: &VirtualMachine, ) -> PyResult> { + #[inline] + fn uu_b2a(num: u8, backtick: bool) -> u8 { + if backtick && num != 0 { + 0x60 + } else { + 0x20 + num + } + } + data.with_ref(|b| { let length = b.len(); if length > 45 { return Err(vm.new_value_error("At most 45 bytes at once".to_string())); } let mut res = Vec::::with_capacity(2 + ((length + 2) / 3) * 4); - uu_b2a_write(&mut res, length as u8, backtick); + res.push(uu_b2a(length as u8, backtick)); for chunk in b.chunks(3) { let char_a = *chunk.get(0).unwrap_or(&0); let char_b = *chunk.get(1).unwrap_or(&0); let char_c = *chunk.get(2).unwrap_or(&0); - uu_b2a_write(&mut res, char_a >> 2, backtick); - uu_b2a_write(&mut res, (char_a & 0x3) << 4 | char_b >> 4, backtick); - uu_b2a_write(&mut res, (char_b & 0xf) << 2 | char_c >> 6, backtick); - uu_b2a_write(&mut res, char_c & 0x3f, backtick); + res.push(uu_b2a(char_a >> 2, backtick)); + res.push(uu_b2a((char_a & 0x3) << 4 | char_b >> 4, backtick)); + res.push(uu_b2a((char_b & 0xf) << 2 | char_c >> 6, backtick)); + res.push(uu_b2a(char_c & 0x3f, backtick)); } res.push(0xau8);