Add unicodedata.east_asian_width (#4523)

This commit is contained in:
Zhiyan Xiao
2023-02-20 01:38:02 +09:00
committed by GitHub
parent 8b7158f169
commit 3b8d670c81
5 changed files with 46 additions and 5 deletions

7
Cargo.lock generated
View File

@@ -2174,6 +2174,7 @@ dependencies = [
"socket2",
"system-configuration",
"termios",
"ucd",
"unic-char-property",
"unic-normal",
"unic-ucd-age",
@@ -2753,6 +2754,12 @@ version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]]
name = "ucd"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4fa6e588762366f1eb4991ce59ad1b93651d0b769dfb4e4d1c5c4b943d1159"
[[package]]
name = "uname"
version = "0.1.1"

View File

@@ -243,8 +243,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
# For tests of unicodedata.is_normalized / self.db.is_normalized ,
# see test_normalization.py .
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_east_asian_width(self):
eaw = self.db.east_asian_width
self.assertRaises(TypeError, eaw, b'a')

View File

@@ -18,6 +18,7 @@ assert unicodedata.category('A') == 'Lu'
assert unicodedata.name('a') == 'LATIN SMALL LETTER A'
assert unicodedata.lookup('LATIN SMALL LETTER A') == 'a'
assert unicodedata.bidirectional('a') == 'L'
assert unicodedata.east_asian_width('\u231a') == 'W'
assert unicodedata.normalize('NFC', 'bla') == 'bla'
# testing unicodedata.ucd_3_2_0 for idna

View File

@@ -67,6 +67,7 @@ unic-ucd-bidi = "0.9.0"
unic-ucd-category = "0.9.0"
unic-ucd-age = "0.9.0"
unic-ucd-ident = "0.9.0"
ucd = "0.1.1"
# compression
adler32 = "1.2.0"

View File

@@ -10,9 +10,15 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
.into_ref(vm)
.into();
for attr in ["category", "lookup", "name", "bidirectional", "normalize"]
.iter()
.copied()
for attr in [
"category",
"lookup",
"name",
"bidirectional",
"east_asian_width",
"normalize",
]
.into_iter()
{
crate::vm::extend_module!(vm, &module, {
attr => ucd.get_attr(attr, vm).unwrap(),
@@ -29,6 +35,7 @@ mod unicodedata {
VirtualMachine,
};
use itertools::Itertools;
use ucd::{Codepoint, EastAsianWidth};
use unic_char_property::EnumeratedCharProperty;
use unic_normal::StrNormalForm;
use unic_ucd_age::{Age, UnicodeVersion, UNICODE_VERSION};
@@ -113,6 +120,16 @@ mod unicodedata {
Ok(bidi.to_owned())
}
/// NOTE: This function uses 9.0.0 database instead of 3.2.0
#[pymethod]
fn east_asian_width(&self, character: PyStrRef, vm: &VirtualMachine) -> PyResult<String> {
Ok(self
.extract_char(character, vm)?
.map_or(EastAsianWidth::Neutral, |c| c.east_asian_width())
.abbr_name()
.to_owned())
}
#[pymethod]
fn normalize(
&self,
@@ -138,6 +155,23 @@ mod unicodedata {
}
}
trait EastAsianWidthAbbrName {
fn abbr_name(&self) -> &'static str;
}
impl EastAsianWidthAbbrName for EastAsianWidth {
fn abbr_name(&self) -> &'static str {
match self {
EastAsianWidth::Narrow => "Na",
EastAsianWidth::Wide => "W",
EastAsianWidth::Neutral => "N",
EastAsianWidth::Ambiguous => "A",
EastAsianWidth::FullWidth => "F",
EastAsianWidth::HalfWidth => "H",
}
}
}
#[pyattr]
fn ucd_3_2_0(vm: &VirtualMachine) -> PyRef<Ucd> {
Ucd {