Merge pull request #2896 from whjpji/support_unicode_array

Support unicode array type.
2026-06-09 22:49:57 +09:00 · 2021-08-21 22:26:18 +09:00
parent 48437d0b93 e652ae8fc0
commit 7868070295
11 changed files with 216 additions and 158 deletions
--- a/Lib/test/test_array.py
+++ b/Lib/test/test_array.py
@@ -15,7 +15,7 @@ import warnings
 import array
 # from array import _array_reconstructor as array_reconstructor  # XXX: RUSTPYTHON

-# sizeof_wchar = array.array('u').itemsize  # XXX: RUSTPYTHON
+sizeof_wchar = array.array('u').itemsize


 class ArraySubclass(array.array):
@@ -25,10 +25,7 @@ class ArraySubclassWithKwargs(array.array):
    def __init__(self, typecode, newarg=None):
        array.array.__init__(self)

-# TODO: RUSTPYTHON
-# We did not support typecode u for unicode yet
-# typecodes = 'ubBhHiIlLfdqQ'
-typecodes = 'bBhHiIlLfdqQ'
+typecodes = 'ubBhHiIlLfdqQ'

 class MiscTest(unittest.TestCase):

@@ -1091,8 +1088,6 @@ class BaseTest:
        basesize = support.calcvobjsize('Pn2Pi')
        support.check_sizeof(self, a, basesize)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_initialize_with_unicode(self):
        if self.typecode != 'u':
            with self.assertRaises(TypeError) as cm:
@@ -1121,8 +1116,6 @@ class BaseTest:

 class StringTest(BaseTest):

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_setitem(self):
        super().test_setitem()
        a = array.array(self.typecode, self.example)
@@ -1136,173 +1129,105 @@ class UnicodeTest(StringTest, unittest.TestCase):
    outside = str('\x33')
    minitemsize = 2

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_add(self):
        super().test_add()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_buffer(self):
        super().test_buffer()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_buffer_info(self):
        super().test_buffer_info()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_byteswap(self):
        super().test_byteswap()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_cmp(self):
        super().test_cmp()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_constructor(self):
        super().test_constructor()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_constructor_with_iterable_argument(self):
        super().test_constructor_with_iterable_argument()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_copy(self):
        super().test_copy()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_count(self):
        super().test_count()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_coveritertraverse(self):
        super().test_coveritertraverse()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_deepcopy(self):
        super().test_deepcopy()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_delitem(self):
        super().test_delitem()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_exhausted_iterator(self):
        super().test_exhausted_iterator()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_extend(self):
        super().test_extend()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_extended_getslice(self):
        super().test_extended_getslice()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_extended_set_del_slice(self):
        super().test_extended_set_del_slice()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_fromarray(self):
        super().test_fromarray()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_getitem(self):
        super().test_getitem()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_getslice(self):
        super().test_getslice()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_iadd(self):
        super().test_iadd()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_imul(self):
        super().test_imul()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_index(self):
        super().test_index()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_insert(self):
        super().test_insert()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_len(self):
        super().test_len()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_mul(self):
        super().test_mul()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_pop(self):
        super().test_pop()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_remove(self):
        super().test_remove()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_repr(self):
        super().test_repr()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_reverse(self):
        super().test_reverse()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_setslice(self):
        super().test_setslice()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_str(self):
        super().test_str()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_tofrombytes(self):
        super().test_tofrombytes()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_tofromlist(self):
        super().test_tofromlist()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_unicode(self):
        self.assertRaises(TypeError, array.array, 'b', 'foo')

@@ -1323,8 +1248,6 @@ class UnicodeTest(StringTest, unittest.TestCase):

        self.assertRaises(TypeError, a.fromunicode)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_issue17223(self):
        # this used to crash
        if sizeof_wchar == 4:
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2596,8 +2596,6 @@ class TextIOWrapperTest(unittest.TestCase):
    def tearDown(self):
        support.unlink(support.TESTFN)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_constructor(self):
        r = self.BytesIO(b"\xc3\xa9\n\n")
        b = self.BufferedReader(r, 1000)
@@ -2935,8 +2933,6 @@ class TextIOWrapperTest(unittest.TestCase):

    # Systematic tests of the text I/O API

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_basic_io(self):
        for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
            for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le":
@@ -2988,8 +2984,6 @@ class TextIOWrapperTest(unittest.TestCase):
            rlines.append((pos, line))
        self.assertEqual(rlines, wlines)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_telling(self):
        f = self.open(support.TESTFN, "w+", encoding="utf-8")
        p0 = f.tell()
@@ -3608,8 +3602,6 @@ class TextIOWrapperTest(unittest.TestCase):
        F.tell = lambda x: 0
        t = self.TextIOWrapper(F(), encoding='utf-8')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_reconfigure_encoding_read(self):
        # latin1 -> utf8
        # (latin1 can decode utf-8 encoded string)
@@ -3762,6 +3754,26 @@ class CTextIOWrapperTest(TextIOWrapperTest):
    io = io
    shutdown_error = "RuntimeError: could not find io module state"

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_constructor(self):
+        super().test_constructor()
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_reconfigure_encoding_read(self):
+        super().test_reconfigure_encoding_read()
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_basic_io(self):
+        super().test_basic_io()
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_telling(self):
+        super().test_telling()
+
    # TODO: RUSTPYTHON
    @unittest.expectedFailure
    def test_uninitialized(self):
@@ -3917,8 +3929,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest):
    def test_line_buffering(self):
        super().test_line_buffering()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_seeking_too(self):
        super().test_seeking_too()

@@ -3927,8 +3937,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest):
    def test_bufio_write_through(self):
        super().test_bufio_write_through()

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_seeking(self):
        super().test_seeking()

--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -95,8 +95,6 @@ class TestNtpath(NtpathTestCase):
        tester('ntpath.splitext("xx\\foo.bar.ext")', ('xx\\foo.bar', '.ext'))
        tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d'))

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_splitdrive(self):
        tester('ntpath.splitdrive("c:\\foo\\bar")',
               ('c:', '\\foo\\bar'))
@@ -479,8 +477,6 @@ class TestNtpath(NtpathTestCase):
            tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar")
            tester('ntpath.expandvars("bar\'%foo%")', "bar\'%foo%")

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    @unittest.skipUnless(support.FS_NONASCII, 'need support.FS_NONASCII')
    def test_expandvars_nonascii(self):
        def check(value, expected):
@@ -730,8 +726,6 @@ class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase):
    pathmodule = ntpath
    attributes = ['relpath']

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_expandvars_nonascii(self):
        super().test_expandvars_nonascii()

--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1398,8 +1398,6 @@ class ReTests(unittest.TestCase):
        for x in not_decimal_digits:
            self.assertIsNone(re.match(r'^\d$', x))

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_empty_array(self):
        # SF buf 1647541
        import array
--- a/Lib/test/test_strtod.py
+++ b/Lib/test/test_strtod.py
@@ -146,6 +146,7 @@ class StrtodTests(unittest.TestCase):
                    digits *= 5
                    exponent -= 1

+    # TODO: RUSTPYTHON fails on debug mode
    def test_halfway_cases(self):
        # test halfway cases for the round-half-to-even rule
        for i in range(100 * TEST_SIZE):
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -46,8 +46,6 @@ class TracebackCases(unittest.TestCase):
    def syntax_error_bad_indentation2(self):
        compile(" print(2)", "?", "exec")

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_caret(self):
        err = self.get_exception_format(self.syntax_error_with_caret,
                                        SyntaxError)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -199,8 +199,6 @@ class UnicodeTest(string_tests.CommonTest,
        self.checkequal(0, 'a' * 10, 'count', 'a\U00100304')
        self.checkequal(0, '\u0102' * 10, 'count', '\u0102\U00100304')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_find(self):
        string_tests.CommonTest.test_find(self)
        # test implementation details of the memchr fast path
@@ -260,8 +258,6 @@ class UnicodeTest(string_tests.CommonTest,
        self.checkequal(-1, 'a' * 100, 'rfind', '\U00100304a')
        self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_index(self):
        string_tests.CommonTest.test_index(self)
        self.checkequalnofix(0, 'abcdefghiabc', 'index',  '')
--- a/vm/src/builtins/memory.rs
+++ b/vm/src/builtins/memory.rs
@@ -54,8 +54,7 @@ type PyMemoryViewRef = PyRef<PyMemoryView>;
 #[pyimpl(with(Hashable, Comparable, AsBuffer))]
 impl PyMemoryView {
    fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
-        FormatSpec::parse(format)
-            .map_err(|msg| vm.new_exception_msg(vm.ctx.types.memoryview_type.clone(), msg))
+        FormatSpec::parse(format, vm)
    }

    pub fn from_buffer(
--- a/vm/src/builtins/pystr.rs
+++ b/vm/src/builtins/pystr.rs
@@ -780,6 +780,12 @@ impl PyStr {
        self.value.py_join(iter)
    }

+    // FIXME: two traversals of str is expensive
+    #[inline]
+    fn _to_char_idx(r: &str, byte_idx: usize) -> usize {
+        r[..byte_idx].chars().count()
+    }
+
    #[inline]
    fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
    where
@@ -791,25 +797,25 @@ impl PyStr {

    #[pymethod]
    fn find(&self, args: FindArgs) -> isize {
-        self._find(args, |r, s| r.find(s))
+        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
            .map_or(-1, |v| v as isize)
    }

    #[pymethod]
    fn rfind(&self, args: FindArgs) -> isize {
-        self._find(args, |r, s| r.rfind(s))
+        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
            .map_or(-1, |v| v as isize)
    }

    #[pymethod]
    fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
-        self._find(args, |r, s| r.find(s))
+        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
            .ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
    }

    #[pymethod]
    fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
-        self._find(args, |r, s| r.rfind(s))
+        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
            .ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
    }

--- a/vm/src/stdlib/array.rs
+++ b/vm/src/stdlib/array.rs
@@ -1,7 +1,7 @@
 use crate::buffer::{BufferOptions, PyBuffer, ResizeGuard};
 use crate::builtins::float::IntoPyFloat;
 use crate::builtins::list::{PyList, PyListRef};
-use crate::builtins::pystr::PyStrRef;
+use crate::builtins::pystr::{PyStr, PyStrRef};
 use crate::builtins::pytype::PyTypeRef;
 use crate::builtins::slice::PySliceRef;
 use crate::builtins::{PyByteArray, PyBytes};
@@ -15,29 +15,17 @@ use crate::function::OptionalArg;
 use crate::sliceable::{saturate_index, PySliceableSequence, PySliceableSequenceMut};
 use crate::slots::{AsBuffer, Comparable, Iterable, PyComparisonOp, PyIter};
 use crate::utils::Either;
-use crate::VirtualMachine;
 use crate::{
    IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyIterable, PyObjectRef, PyRef,
    PyResult, PyValue, StaticType, TryFromObject, TypeProtocol,
 };
+use crate::{IntoPyResult, VirtualMachine};
 use crossbeam_utils::atomic::AtomicCell;
 use itertools::Itertools;
 use std::cmp::Ordering;
+use std::convert::TryFrom;
 use std::{fmt, os::raw};

-struct ArrayTypeSpecifierError {
-    _priv: (),
-}
-
-impl fmt::Display for ArrayTypeSpecifierError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(
-            f,
-            "bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)"
-        )
-    }
-}
-
 macro_rules! def_array_enum {
    ($(($n:ident, $t:ty, $c:literal, $scode:literal)),*$(,)?) => {
        #[derive(Debug, Clone)]
@@ -47,10 +35,10 @@ macro_rules! def_array_enum {

        #[allow(clippy::naive_bytecount, clippy::float_cmp)]
        impl ArrayContentType {
-            fn from_char(c: char) -> Result<Self, ArrayTypeSpecifierError> {
+            fn from_char(c: char) -> Result<Self, String> {
                match c {
                    $($c => Ok(ArrayContentType::$n(Vec::new())),)*
-                    _ => Err(ArrayTypeSpecifierError { _priv: () }),
+                    _ => Err("bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)".into()),
                }
            }

@@ -94,10 +82,10 @@ macro_rules! def_array_enum {
                Ok(())
            }

-            fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyObjectRef {
+            fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyResult {
                match self {
                    $(ArrayContentType::$n(v) => {
-                        v.remove(i).into_pyobject(vm)
+                        v.remove(i).into_pyresult(vm)
                    })*
                }
            }
@@ -225,9 +213,11 @@ macro_rules! def_array_enum {
                Ok(i)
            }

-            fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> Option<PyObjectRef> {
+            fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> PyResult<Option<PyObjectRef>> {
                match self {
-                    $(ArrayContentType::$n(v) => v.get(i).map(|x| x.into_pyobject(vm)),)*
+                    $(ArrayContentType::$n(v) => {
+                        v.get(i).map(|x| x.into_pyresult(vm)).transpose()
+                    })*
                }
            }

@@ -244,8 +234,8 @@ macro_rules! def_array_enum {
            fn getitem(&self, needle: Either<isize, PySliceRef>, vm: &VirtualMachine) -> PyResult {
                match needle {
                    Either::A(i) => {
-                        self.idx(i, "array", vm).map(|i| {
-                            self.getitem_by_idx(i, vm).unwrap()
+                        self.idx(i, "array", vm).and_then(|i| {
+                            self.getitem_by_idx(i, vm).map(Option::unwrap)
                        })
                    }
                    Either::B(slice) => self.getitem_by_slice(slice, vm),
@@ -377,13 +367,8 @@ macro_rules! def_array_enum {
                Ok(s)
            }

-            fn iter<'a>(&'a self, vm: &'a VirtualMachine) -> impl Iterator<Item = PyObjectRef> + 'a {
-                let mut i = 0;
-                std::iter::from_fn(move || {
-                    let ret = self.getitem_by_idx(i, vm);
-                    i += 1;
-                    ret
-                })
+            fn iter<'a, 'vm: 'a>(&'a self, vm: &'vm VirtualMachine) -> impl Iterator<Item = PyResult> + 'a {
+                (0..self.len()).map(move |i| self.getitem_by_idx(i, vm).map(Option::unwrap))
            }

            fn cmp(&self, other: &ArrayContentType) -> Result<Option<Ordering>, ()> {
@@ -404,7 +389,7 @@ macro_rules! def_array_enum {
 def_array_enum!(
    (SignedByte, i8, 'b', "b"),
    (UnsignedByte, u8, 'B', "B"),
-    // TODO: support unicode char
+    (PyUnicode, WideChar, 'u', "u"),
    (SignedShort, raw::c_short, 'h', "h"),
    (UnsignedShort, raw::c_ushort, 'H', "H"),
    (SignedInt, raw::c_int, 'i', "i"),
@@ -417,6 +402,16 @@ def_array_enum!(
    (Double, f64, 'd', "d"),
 );

+#[cfg(not(target_arch = "wasm32"))]
+#[allow(non_camel_case_types)]
+pub type wchar_t = libc::wchar_t;
+#[cfg(target_arch = "wasm32")]
+#[allow(non_camel_case_types)]
+pub type wchar_t = u32;
+
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
+pub struct WideChar(wchar_t);
+
 trait ArrayElement: Sized {
    fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self>;
    fn byteswap(self) -> Self;
@@ -464,6 +459,45 @@ fn f64_try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<f
    IntoPyFloat::try_from_object(vm, obj).map(|x| x.to_f64())
 }

+impl ArrayElement for WideChar {
+    fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
+        PyStrRef::try_from_object(vm, obj)?
+            .as_str()
+            .chars()
+            .exactly_one()
+            .map(|ch| Self(ch as _))
+            .map_err(|_| vm.new_type_error("array item must be unicode character".into()))
+    }
+    fn byteswap(self) -> Self {
+        Self(self.0.swap_bytes())
+    }
+}
+
+impl TryFrom<WideChar> for char {
+    type Error = String;
+
+    fn try_from(ch: WideChar) -> Result<Self, Self::Error> {
+        // safe because every configuration of bytes for the types we support are valid
+        char::from_u32(ch.0 as u32)
+            .ok_or_else(|| { format!("'utf-8' codec can't encode character '\\u{:x}' in position 0: surrogates not allowed", ch.0 ) })
+    }
+}
+
+impl IntoPyResult for WideChar {
+    fn into_pyresult(self, vm: &VirtualMachine) -> PyResult {
+        Ok(
+            String::from(char::try_from(self).map_err(|e| vm.new_unicode_encode_error(e))?)
+                .into_pyobject(vm),
+        )
+    }
+}
+
+impl fmt::Display for WideChar {
+    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        unreachable!("`repr(array('u'))` calls `PyStr::repr`")
+    }
+}
+
 #[pyclass(module = "array", name = "array")]
 #[derive(Debug)]
 pub struct PyArray {
@@ -508,17 +542,33 @@ impl PyArray {
        let spec = spec.as_str().chars().exactly_one().map_err(|_| {
            vm.new_type_error("array() argument 1 must be a unicode character, not str".to_owned())
        })?;
-        let mut array =
-            ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err.to_string()))?;
+        let mut array = ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err))?;

        if let OptionalArg::Present(init) = init {
            if let Some(init) = init.payload::<PyArray>() {
-                if array.typecode() == init.read().typecode() {
-                    array.iadd(&*init.read(), vm)?;
-                } else {
-                    for obj in init.read().iter(vm) {
-                        array.push(obj, vm)?
+                match (spec, init.read().typecode()) {
+                    (spec, ch) if spec == ch => array.frombytes(&init.get_bytes()),
+                    (spec, 'u') => {
+                        return Err(vm.new_type_error(format!(
+                            "cannot use a unicode array to initialize an array with typecode '{}'",
+                            spec
+                        )))
                    }
+                    _ => {
+                        for obj in init.read().iter(vm) {
+                            array.push(obj?, vm)?;
+                        }
+                    }
+                }
+            } else if let Some(utf8) = init.payload::<PyStr>() {
+                if spec == 'u' {
+                    let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), array.itemsize());
+                    array.frombytes(&bytes);
+                } else {
+                    return Err(vm.new_type_error(format!(
+                        "cannot use a str to initialize an array with typecode '{}'",
+                        spec
+                    )));
                }
            } else if init.payload_is::<PyBytes>() || init.payload_is::<PyByteArray>() {
                try_bytes_like(vm, &init, |x| array.frombytes(x))?;
@@ -584,6 +634,78 @@ impl PyArray {
        }
    }

+    fn _unicode_to_wchar_bytes(utf8: &str, item_size: usize) -> Vec<u8> {
+        if item_size == 2 {
+            utf8.encode_utf16()
+                .flat_map(|ch| ch.to_ne_bytes())
+                .collect()
+        } else {
+            utf8.chars()
+                .flat_map(|ch| (ch as u32).to_ne_bytes())
+                .collect()
+        }
+    }
+
+    #[pymethod]
+    fn fromunicode(zelf: PyRef<Self>, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
+        let utf8 = PyStrRef::try_from_object(vm, obj.clone()).map_err(|_| {
+            vm.new_type_error(format!(
+                "fromunicode() argument must be str, not {}",
+                obj.class().name
+            ))
+        })?;
+        if zelf.read().typecode() != 'u' {
+            return Err(vm.new_value_error(
+                "fromunicode() may only be called on unicode type arrays".into(),
+            ));
+        }
+        let mut w = zelf.try_resizable(vm)?;
+        let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), w.itemsize());
+        w.frombytes(&bytes);
+        Ok(())
+    }
+
+    #[pymethod]
+    fn tounicode(&self, vm: &VirtualMachine) -> PyResult<String> {
+        let array = self.array.read();
+        if array.typecode() != 'u' {
+            return Err(
+                vm.new_value_error("tounicode() may only be called on unicode type arrays".into())
+            );
+        }
+        let bytes = array.get_bytes();
+        if self.itemsize() == 2 {
+            // safe because every configuration of bytes for the types we support are valid
+            let utf16 = unsafe {
+                std::slice::from_raw_parts(
+                    bytes.as_ptr() as *const u16,
+                    bytes.len() / std::mem::size_of::<u16>(),
+                )
+            };
+            Ok(String::from_utf16_lossy(utf16))
+        } else {
+            // safe because every configuration of bytes for the types we support are valid
+            let chars = unsafe {
+                std::slice::from_raw_parts(
+                    bytes.as_ptr() as *const u32,
+                    bytes.len() / std::mem::size_of::<u32>(),
+                )
+            };
+            chars
+                .iter()
+                .map(|&ch| {
+                    // cpython issue 17223
+                    char::from_u32(ch).ok_or_else(|| {
+                        vm.new_value_error(format!(
+                            "character U+{:4x} is not in range [U+0000; U+10ffff]",
+                            ch
+                        ))
+                    })
+                })
+                .try_collect()
+        }
+    }
+
    #[pymethod]
    fn frombytes(zelf: PyRef<Self>, b: ArgBytesLike, vm: &VirtualMachine) -> PyResult<()> {
        let b = b.borrow_buf();
@@ -621,7 +743,7 @@ impl PyArray {
            Err(vm.new_index_error("pop from empty array".to_owned()))
        } else {
            let i = w.idx(i.unwrap_or(-1), "pop", vm)?;
-            Ok(w.pop(i, vm))
+            w.pop(i, vm)
        }
    }

@@ -643,7 +765,7 @@ impl PyArray {
        let array = self.read();
        let mut v = Vec::with_capacity(array.len());
        for obj in array.iter(vm) {
-            v.push(obj);
+            v.push(obj?);
        }
        Ok(vm.ctx.new_list(v))
    }
@@ -767,6 +889,15 @@ impl PyArray {

    #[pymethod(magic)]
    fn repr(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<String> {
+        if zelf.read().typecode() == 'u' {
+            if zelf.len() == 0 {
+                return Ok("array('u')".into());
+            }
+            return Ok(format!(
+                "array('u', {})",
+                PyStr::from(zelf.tounicode(vm)?).repr(vm)?
+            ));
+        }
        zelf.read().repr(vm)
    }

@@ -792,7 +923,7 @@ impl PyArray {
        let iter = Iterator::zip(array_a.iter(vm), array_b.iter(vm));

        for (a, b) in iter {
-            if !vm.bool_eq(&a, &b)? {
+            if !vm.bool_eq(&a?, &b?)? {
                return Ok(false);
            }
        }
@@ -830,8 +961,8 @@ impl Comparable for PyArray {

                for (a, b) in iter {
                    let ret = match op {
-                        PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a, &b)?,
-                        PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a, &b)?,
+                        PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a?, &b?)?,
+                        PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a?, &b?)?,
                        _ => unreachable!(),
                    };
                    if let Some(v) = ret {
@@ -933,7 +1064,7 @@ impl PyArrayIter {}
 impl PyIter for PyArrayIter {
    fn next(zelf: &PyRef<Self>, vm: &VirtualMachine) -> PyResult {
        let pos = zelf.position.fetch_add(1);
-        if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm) {
+        if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm)? {
            Ok(item)
        } else {
            Err(vm.new_stop_iteration())
--- a/vm/src/stdlib/pystruct.rs
+++ b/vm/src/stdlib/pystruct.rs
@@ -27,6 +27,7 @@ pub(crate) mod _struct {
    use crate::exceptions::PyBaseExceptionRef;
    use crate::function::Args;
    use crate::slots::PyIter;
+    use crate::stdlib::array::wchar_t;
    use crate::utils::Either;
    use crate::VirtualMachine;
    use crate::{IntoPyObject, PyObjectRef, PyRef, PyResult, PyValue, StaticType, TryFromObject};
@@ -55,6 +56,7 @@ pub(crate) mod _struct {
        SByte = b'b',
        UByte = b'B',
        Char = b'c',
+        WideChar = b'u',
        Str = b's',
        Pascal = b'p',
        Short = b'h',
@@ -168,6 +170,7 @@ pub(crate) mod _struct {
                        pack: Some(pack_char),
                        unpack: Some(unpack_char),
                    },
+                    WideChar => native_info!(wchar_t),
                    Short => native_info!(raw::c_short),
                    UShort => native_info!(raw::c_ushort),
                    Int => native_info!(raw::c_int),
@@ -225,17 +228,18 @@ pub(crate) mod _struct {
                    ))
                }
            };
-            FormatSpec::parse(decoded_fmt).map_err(|err| new_struct_error(vm, err))
+            FormatSpec::parse(decoded_fmt, vm)
        }

-        pub fn parse(fmt: &str) -> Result<FormatSpec, String> {
+        pub fn parse(fmt: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
            let mut chars = fmt.bytes().peekable();

            // First determine "@", "<", ">","!" or "="
            let endianness = parse_endianness(&mut chars);

            // Now, analyze struct string furter:
-            let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)?;
+            let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)
+                .map_err(|err| new_struct_error(vm, err))?;

            Ok(FormatSpec {
                endianness,