mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Compare commits
5 Commits
main
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23813ddbcd | ||
|
|
b9cbd5133b | ||
|
|
fab1c0cc01 | ||
|
|
5dd88ee5ae | ||
|
|
2722bc06de |
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -3739,6 +3739,7 @@ dependencies = [
|
|||||||
"num_enum",
|
"num_enum",
|
||||||
"optional",
|
"optional",
|
||||||
"rustpython-wtf8",
|
"rustpython-wtf8",
|
||||||
|
"unic-ucd-category",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -428,7 +428,6 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
|
|||||||
}
|
}
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
|
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
|
||||||
// TODO: check with cpython
|
|
||||||
char::try_from(ch).is_ok_and(|c| {
|
char::try_from(ch).is_ok_and(|c| {
|
||||||
GeneralCategoryGroup::Letter
|
GeneralCategoryGroup::Letter
|
||||||
.union(GeneralCategoryGroup::Number)
|
.union(GeneralCategoryGroup::Number)
|
||||||
|
|||||||
@@ -891,3 +891,13 @@ assert id(b) != id(b * 0)
|
|||||||
assert id(b) != id(b * 1)
|
assert id(b) != id(b * 1)
|
||||||
assert id(b) != id(1 * b)
|
assert id(b) != id(1 * b)
|
||||||
assert id(b) != id(b * 2)
|
assert id(b) != id(b * 2)
|
||||||
|
|
||||||
|
|
||||||
|
# Regression tests for isalpha/isalnum Unicode General Category correctness.
|
||||||
|
# These characters are in letter categories (Ll/Lo) and should return True,
|
||||||
|
# but were missed in older Unicode tables used by unic-ucd-category.
|
||||||
|
# See: https://github.com/RustPython/RustPython/pull/7520#issuecomment-4148322294
|
||||||
|
for _cp in [1376, 1416, 1519, 2160, 2161, 2162, 2163, 2164, 2165, 2166]:
|
||||||
|
_c = chr(_cp)
|
||||||
|
assert _c.isalpha(), f"U+{_cp:04X} should be isalpha"
|
||||||
|
assert _c.isalnum(), f"U+{_cp:04X} should be isalnum"
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ c = α΄ * 3
|
|||||||
|
|
||||||
assert c == "πππ"
|
assert c == "πππ"
|
||||||
|
|
||||||
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
assert unicodedata.category("a") == "Ll"
|
assert unicodedata.category("a") == "Ll"
|
||||||
@@ -38,3 +39,10 @@ assert b"xn--pythn-mua.org.".decode("idna") == "pyth\xf6n.org."
|
|||||||
# TODO: add east_asian_width and mirrored
|
# TODO: add east_asian_width and mirrored
|
||||||
# assert unicodedata.ucd_3_2_0.east_asian_width('\u231a') == 'N'
|
# assert unicodedata.ucd_3_2_0.east_asian_width('\u231a') == 'N'
|
||||||
# assert not unicodedata.ucd_3_2_0.mirrored("\u0f3a")
|
# assert not unicodedata.ucd_3_2_0.mirrored("\u0f3a")
|
||||||
|
|
||||||
|
# U+0345 COMBINING GREEK YPOGEGRAMMENI (category Mn) should not be alphanumeric.
|
||||||
|
# CPython's isalpha/isalnum use Unicode letter categories (Lu/Ll/Lt/Lm/Lo),
|
||||||
|
# not the broader Unicode Alphabetic derived property.
|
||||||
|
assert not "\u0345".isalpha(), "isalpha should not match Mn category characters"
|
||||||
|
assert not "\u0345".isalnum(), "isalnum should not match Mn category characters"
|
||||||
|
assert not re.match(r"\w", "\u0345"), r"\w should not match U+0345 (category Mn)"
|
||||||
|
|||||||
Reference in New Issue
Block a user