Polish unicode regex helpers

Agent-Logs-Url: https://github.com/RustPython/RustPython/sessions/e4115dbc-b1a5-4a77-90a7-38cf9ac1cdf2

Co-authored-by: youknowone <69878+youknowone@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-04-05 07:45:20 +00:00
committed by GitHub
parent e968d83808
commit 5cf1bd6667

View File

@@ -1,11 +1,13 @@
use crate::{case, classify};
const UNDERSCORE: u32 = '_' as u32;
const fn is_py_ascii_whitespace(byte: u8) -> bool {
matches!(byte, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B')
}
pub fn is_word(cp: u32) -> bool {
cp == '_' as u32
cp == UNDERSCORE
|| u8::try_from(cp)
.map(|byte| byte.is_ascii_alphanumeric())
.unwrap_or(false)
@@ -30,7 +32,7 @@ pub fn is_locale_alnum(cp: u32) -> bool {
}
pub fn is_locale_word(cp: u32) -> bool {
cp == '_' as u32 || is_locale_alnum(cp)
cp == UNDERSCORE || is_locale_alnum(cp)
}
pub const fn is_linebreak(cp: u32) -> bool {
@@ -73,7 +75,7 @@ pub fn is_unicode_alnum(cp: u32) -> bool {
}
pub fn is_unicode_word(cp: u32) -> bool {
cp == '_' as u32 || is_unicode_alnum(cp)
cp == UNDERSCORE || is_unicode_alnum(cp)
}
pub fn lower_unicode(cp: u32) -> u32 {