mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-09 22:49:57 +09:00
Merge pull request #2896 from whjpji/support_unicode_array
Support unicode array type.
This commit is contained in:
@@ -15,7 +15,7 @@ import warnings
|
||||
import array
|
||||
# from array import _array_reconstructor as array_reconstructor # XXX: RUSTPYTHON
|
||||
|
||||
# sizeof_wchar = array.array('u').itemsize # XXX: RUSTPYTHON
|
||||
sizeof_wchar = array.array('u').itemsize
|
||||
|
||||
|
||||
class ArraySubclass(array.array):
|
||||
@@ -25,10 +25,7 @@ class ArraySubclassWithKwargs(array.array):
|
||||
def __init__(self, typecode, newarg=None):
|
||||
array.array.__init__(self)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
# We did not support typecode u for unicode yet
|
||||
# typecodes = 'ubBhHiIlLfdqQ'
|
||||
typecodes = 'bBhHiIlLfdqQ'
|
||||
typecodes = 'ubBhHiIlLfdqQ'
|
||||
|
||||
class MiscTest(unittest.TestCase):
|
||||
|
||||
@@ -1091,8 +1088,6 @@ class BaseTest:
|
||||
basesize = support.calcvobjsize('Pn2Pi')
|
||||
support.check_sizeof(self, a, basesize)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_initialize_with_unicode(self):
|
||||
if self.typecode != 'u':
|
||||
with self.assertRaises(TypeError) as cm:
|
||||
@@ -1121,8 +1116,6 @@ class BaseTest:
|
||||
|
||||
class StringTest(BaseTest):
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_setitem(self):
|
||||
super().test_setitem()
|
||||
a = array.array(self.typecode, self.example)
|
||||
@@ -1136,173 +1129,105 @@ class UnicodeTest(StringTest, unittest.TestCase):
|
||||
outside = str('\x33')
|
||||
minitemsize = 2
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_add(self):
|
||||
super().test_add()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_buffer(self):
|
||||
super().test_buffer()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_buffer_info(self):
|
||||
super().test_buffer_info()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_byteswap(self):
|
||||
super().test_byteswap()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_cmp(self):
|
||||
super().test_cmp()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_constructor(self):
|
||||
super().test_constructor()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_constructor_with_iterable_argument(self):
|
||||
super().test_constructor_with_iterable_argument()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_copy(self):
|
||||
super().test_copy()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_count(self):
|
||||
super().test_count()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_coveritertraverse(self):
|
||||
super().test_coveritertraverse()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_deepcopy(self):
|
||||
super().test_deepcopy()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_delitem(self):
|
||||
super().test_delitem()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_exhausted_iterator(self):
|
||||
super().test_exhausted_iterator()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_extend(self):
|
||||
super().test_extend()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_extended_getslice(self):
|
||||
super().test_extended_getslice()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_extended_set_del_slice(self):
|
||||
super().test_extended_set_del_slice()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_fromarray(self):
|
||||
super().test_fromarray()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_getitem(self):
|
||||
super().test_getitem()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_getslice(self):
|
||||
super().test_getslice()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_iadd(self):
|
||||
super().test_iadd()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_imul(self):
|
||||
super().test_imul()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_index(self):
|
||||
super().test_index()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_insert(self):
|
||||
super().test_insert()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_len(self):
|
||||
super().test_len()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_mul(self):
|
||||
super().test_mul()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_pop(self):
|
||||
super().test_pop()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_remove(self):
|
||||
super().test_remove()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_repr(self):
|
||||
super().test_repr()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_reverse(self):
|
||||
super().test_reverse()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_setslice(self):
|
||||
super().test_setslice()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_str(self):
|
||||
super().test_str()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_tofrombytes(self):
|
||||
super().test_tofrombytes()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_tofromlist(self):
|
||||
super().test_tofromlist()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_unicode(self):
|
||||
self.assertRaises(TypeError, array.array, 'b', 'foo')
|
||||
|
||||
@@ -1323,8 +1248,6 @@ class UnicodeTest(StringTest, unittest.TestCase):
|
||||
|
||||
self.assertRaises(TypeError, a.fromunicode)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_issue17223(self):
|
||||
# this used to crash
|
||||
if sizeof_wchar == 4:
|
||||
|
||||
@@ -2596,8 +2596,6 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
support.unlink(support.TESTFN)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_constructor(self):
|
||||
r = self.BytesIO(b"\xc3\xa9\n\n")
|
||||
b = self.BufferedReader(r, 1000)
|
||||
@@ -2935,8 +2933,6 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||
|
||||
# Systematic tests of the text I/O API
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_basic_io(self):
|
||||
for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
|
||||
for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le":
|
||||
@@ -2988,8 +2984,6 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||
rlines.append((pos, line))
|
||||
self.assertEqual(rlines, wlines)
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_telling(self):
|
||||
f = self.open(support.TESTFN, "w+", encoding="utf-8")
|
||||
p0 = f.tell()
|
||||
@@ -3608,8 +3602,6 @@ class TextIOWrapperTest(unittest.TestCase):
|
||||
F.tell = lambda x: 0
|
||||
t = self.TextIOWrapper(F(), encoding='utf-8')
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_reconfigure_encoding_read(self):
|
||||
# latin1 -> utf8
|
||||
# (latin1 can decode utf-8 encoded string)
|
||||
@@ -3762,6 +3754,26 @@ class CTextIOWrapperTest(TextIOWrapperTest):
|
||||
io = io
|
||||
shutdown_error = "RuntimeError: could not find io module state"
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_constructor(self):
|
||||
super().test_constructor()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_reconfigure_encoding_read(self):
|
||||
super().test_reconfigure_encoding_read()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_basic_io(self):
|
||||
super().test_basic_io()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_telling(self):
|
||||
super().test_telling()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_uninitialized(self):
|
||||
@@ -3917,8 +3929,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest):
|
||||
def test_line_buffering(self):
|
||||
super().test_line_buffering()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_seeking_too(self):
|
||||
super().test_seeking_too()
|
||||
|
||||
@@ -3927,8 +3937,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest):
|
||||
def test_bufio_write_through(self):
|
||||
super().test_bufio_write_through()
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_seeking(self):
|
||||
super().test_seeking()
|
||||
|
||||
|
||||
@@ -95,8 +95,6 @@ class TestNtpath(NtpathTestCase):
|
||||
tester('ntpath.splitext("xx\\foo.bar.ext")', ('xx\\foo.bar', '.ext'))
|
||||
tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d'))
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_splitdrive(self):
|
||||
tester('ntpath.splitdrive("c:\\foo\\bar")',
|
||||
('c:', '\\foo\\bar'))
|
||||
@@ -479,8 +477,6 @@ class TestNtpath(NtpathTestCase):
|
||||
tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar")
|
||||
tester('ntpath.expandvars("bar\'%foo%")', "bar\'%foo%")
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
@unittest.skipUnless(support.FS_NONASCII, 'need support.FS_NONASCII')
|
||||
def test_expandvars_nonascii(self):
|
||||
def check(value, expected):
|
||||
@@ -730,8 +726,6 @@ class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase):
|
||||
pathmodule = ntpath
|
||||
attributes = ['relpath']
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_expandvars_nonascii(self):
|
||||
super().test_expandvars_nonascii()
|
||||
|
||||
|
||||
@@ -1398,8 +1398,6 @@ class ReTests(unittest.TestCase):
|
||||
for x in not_decimal_digits:
|
||||
self.assertIsNone(re.match(r'^\d$', x))
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_empty_array(self):
|
||||
# SF buf 1647541
|
||||
import array
|
||||
|
||||
@@ -146,6 +146,7 @@ class StrtodTests(unittest.TestCase):
|
||||
digits *= 5
|
||||
exponent -= 1
|
||||
|
||||
# TODO: RUSTPYTHON fails on debug mode
|
||||
def test_halfway_cases(self):
|
||||
# test halfway cases for the round-half-to-even rule
|
||||
for i in range(100 * TEST_SIZE):
|
||||
|
||||
@@ -46,8 +46,6 @@ class TracebackCases(unittest.TestCase):
|
||||
def syntax_error_bad_indentation2(self):
|
||||
compile(" print(2)", "?", "exec")
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_caret(self):
|
||||
err = self.get_exception_format(self.syntax_error_with_caret,
|
||||
SyntaxError)
|
||||
|
||||
@@ -199,8 +199,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||
self.checkequal(0, 'a' * 10, 'count', 'a\U00100304')
|
||||
self.checkequal(0, '\u0102' * 10, 'count', '\u0102\U00100304')
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_find(self):
|
||||
string_tests.CommonTest.test_find(self)
|
||||
# test implementation details of the memchr fast path
|
||||
@@ -260,8 +258,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||
self.checkequal(-1, 'a' * 100, 'rfind', '\U00100304a')
|
||||
self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102')
|
||||
|
||||
# TODO: RUSTPYTHON
|
||||
@unittest.expectedFailure
|
||||
def test_index(self):
|
||||
string_tests.CommonTest.test_index(self)
|
||||
self.checkequalnofix(0, 'abcdefghiabc', 'index', '')
|
||||
|
||||
@@ -54,8 +54,7 @@ type PyMemoryViewRef = PyRef<PyMemoryView>;
|
||||
#[pyimpl(with(Hashable, Comparable, AsBuffer))]
|
||||
impl PyMemoryView {
|
||||
fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
|
||||
FormatSpec::parse(format)
|
||||
.map_err(|msg| vm.new_exception_msg(vm.ctx.types.memoryview_type.clone(), msg))
|
||||
FormatSpec::parse(format, vm)
|
||||
}
|
||||
|
||||
pub fn from_buffer(
|
||||
|
||||
@@ -780,6 +780,12 @@ impl PyStr {
|
||||
self.value.py_join(iter)
|
||||
}
|
||||
|
||||
// FIXME: two traversals of str is expensive
|
||||
#[inline]
|
||||
fn _to_char_idx(r: &str, byte_idx: usize) -> usize {
|
||||
r[..byte_idx].chars().count()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
|
||||
where
|
||||
@@ -791,25 +797,25 @@ impl PyStr {
|
||||
|
||||
#[pymethod]
|
||||
fn find(&self, args: FindArgs) -> isize {
|
||||
self._find(args, |r, s| r.find(s))
|
||||
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
|
||||
.map_or(-1, |v| v as isize)
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn rfind(&self, args: FindArgs) -> isize {
|
||||
self._find(args, |r, s| r.rfind(s))
|
||||
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
|
||||
.map_or(-1, |v| v as isize)
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
|
||||
self._find(args, |r, s| r.find(s))
|
||||
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
|
||||
.ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
|
||||
self._find(args, |r, s| r.rfind(s))
|
||||
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
|
||||
.ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::buffer::{BufferOptions, PyBuffer, ResizeGuard};
|
||||
use crate::builtins::float::IntoPyFloat;
|
||||
use crate::builtins::list::{PyList, PyListRef};
|
||||
use crate::builtins::pystr::PyStrRef;
|
||||
use crate::builtins::pystr::{PyStr, PyStrRef};
|
||||
use crate::builtins::pytype::PyTypeRef;
|
||||
use crate::builtins::slice::PySliceRef;
|
||||
use crate::builtins::{PyByteArray, PyBytes};
|
||||
@@ -15,29 +15,17 @@ use crate::function::OptionalArg;
|
||||
use crate::sliceable::{saturate_index, PySliceableSequence, PySliceableSequenceMut};
|
||||
use crate::slots::{AsBuffer, Comparable, Iterable, PyComparisonOp, PyIter};
|
||||
use crate::utils::Either;
|
||||
use crate::VirtualMachine;
|
||||
use crate::{
|
||||
IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyIterable, PyObjectRef, PyRef,
|
||||
PyResult, PyValue, StaticType, TryFromObject, TypeProtocol,
|
||||
};
|
||||
use crate::{IntoPyResult, VirtualMachine};
|
||||
use crossbeam_utils::atomic::AtomicCell;
|
||||
use itertools::Itertools;
|
||||
use std::cmp::Ordering;
|
||||
use std::convert::TryFrom;
|
||||
use std::{fmt, os::raw};
|
||||
|
||||
struct ArrayTypeSpecifierError {
|
||||
_priv: (),
|
||||
}
|
||||
|
||||
impl fmt::Display for ArrayTypeSpecifierError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! def_array_enum {
|
||||
($(($n:ident, $t:ty, $c:literal, $scode:literal)),*$(,)?) => {
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -47,10 +35,10 @@ macro_rules! def_array_enum {
|
||||
|
||||
#[allow(clippy::naive_bytecount, clippy::float_cmp)]
|
||||
impl ArrayContentType {
|
||||
fn from_char(c: char) -> Result<Self, ArrayTypeSpecifierError> {
|
||||
fn from_char(c: char) -> Result<Self, String> {
|
||||
match c {
|
||||
$($c => Ok(ArrayContentType::$n(Vec::new())),)*
|
||||
_ => Err(ArrayTypeSpecifierError { _priv: () }),
|
||||
_ => Err("bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)".into()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,10 +82,10 @@ macro_rules! def_array_enum {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyObjectRef {
|
||||
fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyResult {
|
||||
match self {
|
||||
$(ArrayContentType::$n(v) => {
|
||||
v.remove(i).into_pyobject(vm)
|
||||
v.remove(i).into_pyresult(vm)
|
||||
})*
|
||||
}
|
||||
}
|
||||
@@ -225,9 +213,11 @@ macro_rules! def_array_enum {
|
||||
Ok(i)
|
||||
}
|
||||
|
||||
fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> Option<PyObjectRef> {
|
||||
fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> PyResult<Option<PyObjectRef>> {
|
||||
match self {
|
||||
$(ArrayContentType::$n(v) => v.get(i).map(|x| x.into_pyobject(vm)),)*
|
||||
$(ArrayContentType::$n(v) => {
|
||||
v.get(i).map(|x| x.into_pyresult(vm)).transpose()
|
||||
})*
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,8 +234,8 @@ macro_rules! def_array_enum {
|
||||
fn getitem(&self, needle: Either<isize, PySliceRef>, vm: &VirtualMachine) -> PyResult {
|
||||
match needle {
|
||||
Either::A(i) => {
|
||||
self.idx(i, "array", vm).map(|i| {
|
||||
self.getitem_by_idx(i, vm).unwrap()
|
||||
self.idx(i, "array", vm).and_then(|i| {
|
||||
self.getitem_by_idx(i, vm).map(Option::unwrap)
|
||||
})
|
||||
}
|
||||
Either::B(slice) => self.getitem_by_slice(slice, vm),
|
||||
@@ -377,13 +367,8 @@ macro_rules! def_array_enum {
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
fn iter<'a>(&'a self, vm: &'a VirtualMachine) -> impl Iterator<Item = PyObjectRef> + 'a {
|
||||
let mut i = 0;
|
||||
std::iter::from_fn(move || {
|
||||
let ret = self.getitem_by_idx(i, vm);
|
||||
i += 1;
|
||||
ret
|
||||
})
|
||||
fn iter<'a, 'vm: 'a>(&'a self, vm: &'vm VirtualMachine) -> impl Iterator<Item = PyResult> + 'a {
|
||||
(0..self.len()).map(move |i| self.getitem_by_idx(i, vm).map(Option::unwrap))
|
||||
}
|
||||
|
||||
fn cmp(&self, other: &ArrayContentType) -> Result<Option<Ordering>, ()> {
|
||||
@@ -404,7 +389,7 @@ macro_rules! def_array_enum {
|
||||
def_array_enum!(
|
||||
(SignedByte, i8, 'b', "b"),
|
||||
(UnsignedByte, u8, 'B', "B"),
|
||||
// TODO: support unicode char
|
||||
(PyUnicode, WideChar, 'u', "u"),
|
||||
(SignedShort, raw::c_short, 'h', "h"),
|
||||
(UnsignedShort, raw::c_ushort, 'H', "H"),
|
||||
(SignedInt, raw::c_int, 'i', "i"),
|
||||
@@ -417,6 +402,16 @@ def_array_enum!(
|
||||
(Double, f64, 'd', "d"),
|
||||
);
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type wchar_t = libc::wchar_t;
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type wchar_t = u32;
|
||||
|
||||
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
|
||||
pub struct WideChar(wchar_t);
|
||||
|
||||
trait ArrayElement: Sized {
|
||||
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self>;
|
||||
fn byteswap(self) -> Self;
|
||||
@@ -464,6 +459,45 @@ fn f64_try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<f
|
||||
IntoPyFloat::try_from_object(vm, obj).map(|x| x.to_f64())
|
||||
}
|
||||
|
||||
impl ArrayElement for WideChar {
|
||||
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
|
||||
PyStrRef::try_from_object(vm, obj)?
|
||||
.as_str()
|
||||
.chars()
|
||||
.exactly_one()
|
||||
.map(|ch| Self(ch as _))
|
||||
.map_err(|_| vm.new_type_error("array item must be unicode character".into()))
|
||||
}
|
||||
fn byteswap(self) -> Self {
|
||||
Self(self.0.swap_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<WideChar> for char {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(ch: WideChar) -> Result<Self, Self::Error> {
|
||||
// safe because every configuration of bytes for the types we support are valid
|
||||
char::from_u32(ch.0 as u32)
|
||||
.ok_or_else(|| { format!("'utf-8' codec can't encode character '\\u{:x}' in position 0: surrogates not allowed", ch.0 ) })
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoPyResult for WideChar {
|
||||
fn into_pyresult(self, vm: &VirtualMachine) -> PyResult {
|
||||
Ok(
|
||||
String::from(char::try_from(self).map_err(|e| vm.new_unicode_encode_error(e))?)
|
||||
.into_pyobject(vm),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for WideChar {
|
||||
fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
unreachable!("`repr(array('u'))` calls `PyStr::repr`")
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(module = "array", name = "array")]
|
||||
#[derive(Debug)]
|
||||
pub struct PyArray {
|
||||
@@ -508,17 +542,33 @@ impl PyArray {
|
||||
let spec = spec.as_str().chars().exactly_one().map_err(|_| {
|
||||
vm.new_type_error("array() argument 1 must be a unicode character, not str".to_owned())
|
||||
})?;
|
||||
let mut array =
|
||||
ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err.to_string()))?;
|
||||
let mut array = ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err))?;
|
||||
|
||||
if let OptionalArg::Present(init) = init {
|
||||
if let Some(init) = init.payload::<PyArray>() {
|
||||
if array.typecode() == init.read().typecode() {
|
||||
array.iadd(&*init.read(), vm)?;
|
||||
} else {
|
||||
for obj in init.read().iter(vm) {
|
||||
array.push(obj, vm)?
|
||||
match (spec, init.read().typecode()) {
|
||||
(spec, ch) if spec == ch => array.frombytes(&init.get_bytes()),
|
||||
(spec, 'u') => {
|
||||
return Err(vm.new_type_error(format!(
|
||||
"cannot use a unicode array to initialize an array with typecode '{}'",
|
||||
spec
|
||||
)))
|
||||
}
|
||||
_ => {
|
||||
for obj in init.read().iter(vm) {
|
||||
array.push(obj?, vm)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if let Some(utf8) = init.payload::<PyStr>() {
|
||||
if spec == 'u' {
|
||||
let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), array.itemsize());
|
||||
array.frombytes(&bytes);
|
||||
} else {
|
||||
return Err(vm.new_type_error(format!(
|
||||
"cannot use a str to initialize an array with typecode '{}'",
|
||||
spec
|
||||
)));
|
||||
}
|
||||
} else if init.payload_is::<PyBytes>() || init.payload_is::<PyByteArray>() {
|
||||
try_bytes_like(vm, &init, |x| array.frombytes(x))?;
|
||||
@@ -584,6 +634,78 @@ impl PyArray {
|
||||
}
|
||||
}
|
||||
|
||||
fn _unicode_to_wchar_bytes(utf8: &str, item_size: usize) -> Vec<u8> {
|
||||
if item_size == 2 {
|
||||
utf8.encode_utf16()
|
||||
.flat_map(|ch| ch.to_ne_bytes())
|
||||
.collect()
|
||||
} else {
|
||||
utf8.chars()
|
||||
.flat_map(|ch| (ch as u32).to_ne_bytes())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn fromunicode(zelf: PyRef<Self>, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
|
||||
let utf8 = PyStrRef::try_from_object(vm, obj.clone()).map_err(|_| {
|
||||
vm.new_type_error(format!(
|
||||
"fromunicode() argument must be str, not {}",
|
||||
obj.class().name
|
||||
))
|
||||
})?;
|
||||
if zelf.read().typecode() != 'u' {
|
||||
return Err(vm.new_value_error(
|
||||
"fromunicode() may only be called on unicode type arrays".into(),
|
||||
));
|
||||
}
|
||||
let mut w = zelf.try_resizable(vm)?;
|
||||
let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), w.itemsize());
|
||||
w.frombytes(&bytes);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn tounicode(&self, vm: &VirtualMachine) -> PyResult<String> {
|
||||
let array = self.array.read();
|
||||
if array.typecode() != 'u' {
|
||||
return Err(
|
||||
vm.new_value_error("tounicode() may only be called on unicode type arrays".into())
|
||||
);
|
||||
}
|
||||
let bytes = array.get_bytes();
|
||||
if self.itemsize() == 2 {
|
||||
// safe because every configuration of bytes for the types we support are valid
|
||||
let utf16 = unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
bytes.as_ptr() as *const u16,
|
||||
bytes.len() / std::mem::size_of::<u16>(),
|
||||
)
|
||||
};
|
||||
Ok(String::from_utf16_lossy(utf16))
|
||||
} else {
|
||||
// safe because every configuration of bytes for the types we support are valid
|
||||
let chars = unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
bytes.as_ptr() as *const u32,
|
||||
bytes.len() / std::mem::size_of::<u32>(),
|
||||
)
|
||||
};
|
||||
chars
|
||||
.iter()
|
||||
.map(|&ch| {
|
||||
// cpython issue 17223
|
||||
char::from_u32(ch).ok_or_else(|| {
|
||||
vm.new_value_error(format!(
|
||||
"character U+{:4x} is not in range [U+0000; U+10ffff]",
|
||||
ch
|
||||
))
|
||||
})
|
||||
})
|
||||
.try_collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethod]
|
||||
fn frombytes(zelf: PyRef<Self>, b: ArgBytesLike, vm: &VirtualMachine) -> PyResult<()> {
|
||||
let b = b.borrow_buf();
|
||||
@@ -621,7 +743,7 @@ impl PyArray {
|
||||
Err(vm.new_index_error("pop from empty array".to_owned()))
|
||||
} else {
|
||||
let i = w.idx(i.unwrap_or(-1), "pop", vm)?;
|
||||
Ok(w.pop(i, vm))
|
||||
w.pop(i, vm)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -643,7 +765,7 @@ impl PyArray {
|
||||
let array = self.read();
|
||||
let mut v = Vec::with_capacity(array.len());
|
||||
for obj in array.iter(vm) {
|
||||
v.push(obj);
|
||||
v.push(obj?);
|
||||
}
|
||||
Ok(vm.ctx.new_list(v))
|
||||
}
|
||||
@@ -767,6 +889,15 @@ impl PyArray {
|
||||
|
||||
#[pymethod(magic)]
|
||||
fn repr(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<String> {
|
||||
if zelf.read().typecode() == 'u' {
|
||||
if zelf.len() == 0 {
|
||||
return Ok("array('u')".into());
|
||||
}
|
||||
return Ok(format!(
|
||||
"array('u', {})",
|
||||
PyStr::from(zelf.tounicode(vm)?).repr(vm)?
|
||||
));
|
||||
}
|
||||
zelf.read().repr(vm)
|
||||
}
|
||||
|
||||
@@ -792,7 +923,7 @@ impl PyArray {
|
||||
let iter = Iterator::zip(array_a.iter(vm), array_b.iter(vm));
|
||||
|
||||
for (a, b) in iter {
|
||||
if !vm.bool_eq(&a, &b)? {
|
||||
if !vm.bool_eq(&a?, &b?)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
@@ -830,8 +961,8 @@ impl Comparable for PyArray {
|
||||
|
||||
for (a, b) in iter {
|
||||
let ret = match op {
|
||||
PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a, &b)?,
|
||||
PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a, &b)?,
|
||||
PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a?, &b?)?,
|
||||
PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a?, &b?)?,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if let Some(v) = ret {
|
||||
@@ -933,7 +1064,7 @@ impl PyArrayIter {}
|
||||
impl PyIter for PyArrayIter {
|
||||
fn next(zelf: &PyRef<Self>, vm: &VirtualMachine) -> PyResult {
|
||||
let pos = zelf.position.fetch_add(1);
|
||||
if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm) {
|
||||
if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm)? {
|
||||
Ok(item)
|
||||
} else {
|
||||
Err(vm.new_stop_iteration())
|
||||
|
||||
@@ -27,6 +27,7 @@ pub(crate) mod _struct {
|
||||
use crate::exceptions::PyBaseExceptionRef;
|
||||
use crate::function::Args;
|
||||
use crate::slots::PyIter;
|
||||
use crate::stdlib::array::wchar_t;
|
||||
use crate::utils::Either;
|
||||
use crate::VirtualMachine;
|
||||
use crate::{IntoPyObject, PyObjectRef, PyRef, PyResult, PyValue, StaticType, TryFromObject};
|
||||
@@ -55,6 +56,7 @@ pub(crate) mod _struct {
|
||||
SByte = b'b',
|
||||
UByte = b'B',
|
||||
Char = b'c',
|
||||
WideChar = b'u',
|
||||
Str = b's',
|
||||
Pascal = b'p',
|
||||
Short = b'h',
|
||||
@@ -168,6 +170,7 @@ pub(crate) mod _struct {
|
||||
pack: Some(pack_char),
|
||||
unpack: Some(unpack_char),
|
||||
},
|
||||
WideChar => native_info!(wchar_t),
|
||||
Short => native_info!(raw::c_short),
|
||||
UShort => native_info!(raw::c_ushort),
|
||||
Int => native_info!(raw::c_int),
|
||||
@@ -225,17 +228,18 @@ pub(crate) mod _struct {
|
||||
))
|
||||
}
|
||||
};
|
||||
FormatSpec::parse(decoded_fmt).map_err(|err| new_struct_error(vm, err))
|
||||
FormatSpec::parse(decoded_fmt, vm)
|
||||
}
|
||||
|
||||
pub fn parse(fmt: &str) -> Result<FormatSpec, String> {
|
||||
pub fn parse(fmt: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
|
||||
let mut chars = fmt.bytes().peekable();
|
||||
|
||||
// First determine "@", "<", ">","!" or "="
|
||||
let endianness = parse_endianness(&mut chars);
|
||||
|
||||
// Now, analyze struct string furter:
|
||||
let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)?;
|
||||
let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)
|
||||
.map_err(|err| new_struct_error(vm, err))?;
|
||||
|
||||
Ok(FormatSpec {
|
||||
endianness,
|
||||
|
||||
Reference in New Issue
Block a user