From 6e1c9c68b6193c7c7bcbb3a7c5bcfc4f131060db Mon Sep 17 00:00:00 2001 From: Tony Jinwoo Ahn Date: Sat, 4 Sep 2021 20:25:53 +0900 Subject: [PATCH] Fix range of "invalid start byte" for UTF-8 Signed-off-by: Tony Jinwoo Ahn --- Lib/test/test_unicode.py | 2 -- common/src/encodings.rs | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 951358791..05eeb846d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1896,8 +1896,6 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual((b'aaaa' + seq + b'bbbb').decode('utf-8', 'ignore'), 'aaaa' + res + 'bbbb') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_invalid_start_byte(self): """ Test that an 'invalid start byte' error is raised when the first byte diff --git a/common/src/encodings.rs b/common/src/encodings.rs index fbe5aed3c..83cbcace6 100644 --- a/common/src/encodings.rs +++ b/common/src/encodings.rs @@ -89,7 +89,7 @@ pub mod utf8 { let err_idx = remaining_index + e.valid_up_to(); remaining_data = rest; remaining_index += valid_prefix.len(); - if (0x80..0xc0).contains(&first_err) { + if (0x80..0xc2).contains(&first_err) || (0xf5..=0xff).contains(&first_err) { handle_error!(err_idx..err_idx + 1, "invalid start byte"); } let err_len = match e.error_len() {