From 95e938cb9b64b18d46bb52f3cd9aa01ab99b2003 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sat, 11 Apr 2020 10:59:26 +0900 Subject: [PATCH] Fix {str|bytes|bytearray}.{isascii,islower,isupper} --- Lib/test/string_tests.py | 3 --- vm/src/obj/objbyteinner.rs | 40 +++++++++++++++++++++++++------------- vm/src/obj/objstr.rs | 23 +++++++++++----------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 4a41e5810..bd50efd08 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -857,7 +857,6 @@ class BaseTest: self.checkraises(TypeError, '123', 'zfill') - @unittest.skip("TODO: RUSTPYTHON test_bytes") def test_islower(self): self.checkequal(False, '', 'islower') self.checkequal(True, 'a', 'islower') @@ -868,7 +867,6 @@ class BaseTest: self.checkequal(True, 'abc\n', 'islower') self.checkraises(TypeError, 'abc', 'islower', 42) - @unittest.skip("TODO: RUSTPYTHON test_bytes") def test_isupper(self): self.checkequal(False, '', 'isupper') self.checkequal(False, 'a', 'isupper') @@ -925,7 +923,6 @@ class BaseTest: self.checkequal(False, 'abc\n', 'isalnum') self.checkraises(TypeError, 'abc', 'isalnum', 42) - @unittest.skip("TODO: RUSTPYTHON test_bytes") def test_isascii(self): self.checkequal(True, '', 'isascii') self.checkequal(True, '\x00', 'isascii') diff --git a/vm/src/obj/objbyteinner.rs b/vm/src/obj/objbyteinner.rs index 47bceeb0c..a5cc207b6 100644 --- a/vm/src/obj/objbyteinner.rs +++ b/vm/src/obj/objbyteinner.rs @@ -611,7 +611,7 @@ impl PyByteInner { } pub fn isascii(&self) -> bool { - !self.elements.is_empty() && self.elements.iter().all(|x| char::from(*x).is_ascii()) + self.elements.iter().all(|x| char::from(*x).is_ascii()) } pub fn isdigit(&self) -> bool { @@ -619,25 +619,39 @@ impl PyByteInner { } pub fn islower(&self) -> bool { - !self.elements.is_empty() - && self - .elements - .iter() - .filter(|x| !char::from(**x).is_whitespace()) - .all(|x| char::from(*x).is_lowercase()) - } - - pub fn isspace(&self) -> bool { - !self.elements.is_empty() && self.elements.iter().all(|x| char::from(*x).is_whitespace()) + // CPython _Py_bytes_islower + let mut cased = false; + for b in self.elements.iter() { + let c = *b as char; + if c.is_uppercase() { + return false; + } else if !cased && c.is_lowercase() { + cased = true + } + } + cased } pub fn isupper(&self) -> bool { + // CPython _Py_bytes_isupper + let mut cased = false; + for b in self.elements.iter() { + let c = *b as char; + if c.is_lowercase() { + return false; + } else if !cased && c.is_uppercase() { + cased = true + } + } + cased + } + + pub fn isspace(&self) -> bool { !self.elements.is_empty() && self .elements .iter() - .filter(|x| !char::from(**x).is_whitespace()) - .all(|x| char::from(*x).is_uppercase()) + .all(|x| char::from(*x).is_ascii_whitespace()) } pub fn istitle(&self) -> bool { diff --git a/vm/src/obj/objstr.rs b/vm/src/obj/objstr.rs index c1e698609..b9f07c7b0 100644 --- a/vm/src/obj/objstr.rs +++ b/vm/src/obj/objstr.rs @@ -9,7 +9,6 @@ use std::string::ToString; use num_traits::ToPrimitive; use unic::ucd::category::GeneralCategory; use unic::ucd::ident::{is_xid_continue, is_xid_start}; -use unic::ucd::is_cased; use unicode_casing::CharExt; use super::objbytes::{PyBytes, PyBytesRef}; @@ -826,29 +825,31 @@ impl PyString { !self.value.is_empty() && self.value.chars().all(|c| c.is_ascii_whitespace()) } - // Return true if all cased characters in the string are uppercase and there is at least one cased character, false otherwise. + // Return true if all cased characters in the string are lowercase and there is at least one cased character, false otherwise. #[pymethod] - fn isupper(&self) -> bool { + fn islower(&self) -> bool { + // CPython unicode_islower_impl let mut cased = false; for c in self.value.chars() { - if is_cased(c) && c.is_uppercase() { - cased = true - } else if is_cased(c) && c.is_lowercase() { + if c.is_uppercase() { return false; + } else if !cased && c.is_lowercase() { + cased = true } } cased } - // Return true if all cased characters in the string are lowercase and there is at least one cased character, false otherwise. + // Return true if all cased characters in the string are uppercase and there is at least one cased character, false otherwise. #[pymethod] - fn islower(&self) -> bool { + fn isupper(&self) -> bool { + // CPython unicode_isupper_impl let mut cased = false; for c in self.value.chars() { - if is_cased(c) && c.is_lowercase() { - cased = true - } else if is_cased(c) && c.is_uppercase() { + if c.is_lowercase() { return false; + } else if !cased && c.is_uppercase() { + cased = true } } cased