mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
This PR fixes a regression from my last islower/isupper patch. Python's Bytes doesn't assume an encoding, so methods like islower should only consider ASCII casing. I updated islower/isupper for UTF-8 and WTF-8 to match CPython more closely. The two functions now use the same properties as CPython and now match CPython exactly. I updated the unit tests to pass on Python 3.15. Unicode updates sometimes cause properties to shift. I previously tested everything on Python 3.14, but that lead to failures that I assumed were bugs but were actually due to Unicode differences. For example, U+0295 is a lower case letter in older Unicode versions but is NOT in newer versions. One of the new tests is disabled on Python 3.14 for now because it will fail in CI till CI is bumped to 3.15.
35 lines
908 B
Python
35 lines
908 B
Python
from testutils import assert_raises
|
||
|
||
try:
|
||
b" \xff".decode("ascii")
|
||
except UnicodeDecodeError as e:
|
||
assert e.start == 3
|
||
assert e.end == 4
|
||
else:
|
||
assert False, "should have thrown UnicodeDecodeError"
|
||
|
||
assert_raises(UnicodeEncodeError, "ΒΏcomo estaΜs?".encode, "ascii")
|
||
|
||
|
||
def round_trip(s, encoding="utf-8"):
|
||
encoded = s.encode(encoding)
|
||
decoded = encoded.decode(encoding)
|
||
assert s == decoded
|
||
|
||
|
||
round_trip("πΊβ¦ πΕΔΖ ββ")
|
||
round_trip("β’π£ απ€πΡβππ₯εΟπ« β¬π£")
|
||
round_trip("ππ Χ§πtββπ· οΌ π₯π€")
|
||
|
||
# Bytes should not assume an encoding for isupper/islower
|
||
assert "Γ".isupper()
|
||
assert not "Γ".encode().isupper()
|
||
assert "Γ¦".islower()
|
||
assert not "Γ¦".encode().islower()
|
||
|
||
# Invalid Unicode
|
||
assert not b"\x80\x80".islower()
|
||
assert not b"\x80\x80".isupper()
|
||
assert b"\x80cat\x80".islower()
|
||
assert b"\x80CAT\x80".isupper()
|