mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Compare commits
5 Commits
main
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23813ddbcd | ||
|
|
b9cbd5133b | ||
|
|
fab1c0cc01 | ||
|
|
5dd88ee5ae | ||
|
|
2722bc06de |
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -3739,6 +3739,7 @@ dependencies = [
|
||||
"num_enum",
|
||||
"optional",
|
||||
"rustpython-wtf8",
|
||||
"unic-ucd-category",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -428,7 +428,6 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
|
||||
}
|
||||
#[inline]
|
||||
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
|
||||
// TODO: check with cpython
|
||||
char::try_from(ch).is_ok_and(|c| {
|
||||
GeneralCategoryGroup::Letter
|
||||
.union(GeneralCategoryGroup::Number)
|
||||
|
||||
@@ -891,3 +891,13 @@ assert id(b) != id(b * 0)
|
||||
assert id(b) != id(b * 1)
|
||||
assert id(b) != id(1 * b)
|
||||
assert id(b) != id(b * 2)
|
||||
|
||||
|
||||
# Regression tests for isalpha/isalnum Unicode General Category correctness.
|
||||
# These characters are in letter categories (Ll/Lo) and should return True,
|
||||
# but were missed in older Unicode tables used by unic-ucd-category.
|
||||
# See: https://github.com/RustPython/RustPython/pull/7520#issuecomment-4148322294
|
||||
for _cp in [1376, 1416, 1519, 2160, 2161, 2162, 2163, 2164, 2165, 2166]:
|
||||
_c = chr(_cp)
|
||||
assert _c.isalpha(), f"U+{_cp:04X} should be isalpha"
|
||||
assert _c.isalnum(), f"U+{_cp:04X} should be isalnum"
|
||||
|
||||
@@ -11,6 +11,7 @@ c = ᚴ * 3
|
||||
|
||||
assert c == "👋👋👋"
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
assert unicodedata.category("a") == "Ll"
|
||||
@@ -38,3 +39,10 @@ assert b"xn--pythn-mua.org.".decode("idna") == "pyth\xf6n.org."
|
||||
# TODO: add east_asian_width and mirrored
|
||||
# assert unicodedata.ucd_3_2_0.east_asian_width('\u231a') == 'N'
|
||||
# assert not unicodedata.ucd_3_2_0.mirrored("\u0f3a")
|
||||
|
||||
# U+0345 COMBINING GREEK YPOGEGRAMMENI (category Mn) should not be alphanumeric.
|
||||
# CPython's isalpha/isalnum use Unicode letter categories (Lu/Ll/Lt/Lm/Lo),
|
||||
# not the broader Unicode Alphabetic derived property.
|
||||
assert not "\u0345".isalpha(), "isalpha should not match Mn category characters"
|
||||
assert not "\u0345".isalnum(), "isalnum should not match Mn category characters"
|
||||
assert not re.match(r"\w", "\u0345"), r"\w should not match U+0345 (category Mn)"
|
||||
|
||||
Reference in New Issue
Block a user