diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index d52389b96..0ace4114a 100644 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -15,7 +15,7 @@ import warnings import array # from array import _array_reconstructor as array_reconstructor # XXX: RUSTPYTHON -# sizeof_wchar = array.array('u').itemsize # XXX: RUSTPYTHON +sizeof_wchar = array.array('u').itemsize class ArraySubclass(array.array): @@ -25,10 +25,7 @@ class ArraySubclassWithKwargs(array.array): def __init__(self, typecode, newarg=None): array.array.__init__(self) -# TODO: RUSTPYTHON -# We did not support typecode u for unicode yet -# typecodes = 'ubBhHiIlLfdqQ' -typecodes = 'bBhHiIlLfdqQ' +typecodes = 'ubBhHiIlLfdqQ' class MiscTest(unittest.TestCase): @@ -1091,8 +1088,6 @@ class BaseTest: basesize = support.calcvobjsize('Pn2Pi') support.check_sizeof(self, a, basesize) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_initialize_with_unicode(self): if self.typecode != 'u': with self.assertRaises(TypeError) as cm: @@ -1121,8 +1116,6 @@ class BaseTest: class StringTest(BaseTest): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_setitem(self): super().test_setitem() a = array.array(self.typecode, self.example) @@ -1136,173 +1129,105 @@ class UnicodeTest(StringTest, unittest.TestCase): outside = str('\x33') minitemsize = 2 - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_add(self): super().test_add() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_buffer(self): super().test_buffer() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_buffer_info(self): super().test_buffer_info() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_byteswap(self): super().test_byteswap() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_cmp(self): super().test_cmp() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor(self): super().test_constructor() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor_with_iterable_argument(self): super().test_constructor_with_iterable_argument() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_copy(self): super().test_copy() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_count(self): super().test_count() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_coveritertraverse(self): super().test_coveritertraverse() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_deepcopy(self): super().test_deepcopy() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_delitem(self): super().test_delitem() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exhausted_iterator(self): super().test_exhausted_iterator() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_extend(self): super().test_extend() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_extended_getslice(self): super().test_extended_getslice() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_extended_set_del_slice(self): super().test_extended_set_del_slice() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_fromarray(self): super().test_fromarray() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getitem(self): super().test_getitem() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getslice(self): super().test_getslice() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_iadd(self): super().test_iadd() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_imul(self): super().test_imul() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_index(self): super().test_index() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_insert(self): super().test_insert() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_len(self): super().test_len() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_mul(self): super().test_mul() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pop(self): super().test_pop() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_remove(self): super().test_remove() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_repr(self): super().test_repr() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_reverse(self): super().test_reverse() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_setslice(self): super().test_setslice() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_str(self): super().test_str() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_tofrombytes(self): super().test_tofrombytes() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_tofromlist(self): super().test_tofromlist() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_unicode(self): self.assertRaises(TypeError, array.array, 'b', 'foo') @@ -1323,8 +1248,6 @@ class UnicodeTest(StringTest, unittest.TestCase): self.assertRaises(TypeError, a.fromunicode) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_issue17223(self): # this used to crash if sizeof_wchar == 4: diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 430050e56..c9ab155e1 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2596,8 +2596,6 @@ class TextIOWrapperTest(unittest.TestCase): def tearDown(self): support.unlink(support.TESTFN) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor(self): r = self.BytesIO(b"\xc3\xa9\n\n") b = self.BufferedReader(r, 1000) @@ -2935,8 +2933,6 @@ class TextIOWrapperTest(unittest.TestCase): # Systematic tests of the text I/O API - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_basic_io(self): for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65): for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le": @@ -2988,8 +2984,6 @@ class TextIOWrapperTest(unittest.TestCase): rlines.append((pos, line)) self.assertEqual(rlines, wlines) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_telling(self): f = self.open(support.TESTFN, "w+", encoding="utf-8") p0 = f.tell() @@ -3608,8 +3602,6 @@ class TextIOWrapperTest(unittest.TestCase): F.tell = lambda x: 0 t = self.TextIOWrapper(F(), encoding='utf-8') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_reconfigure_encoding_read(self): # latin1 -> utf8 # (latin1 can decode utf-8 encoded string) @@ -3762,6 +3754,26 @@ class CTextIOWrapperTest(TextIOWrapperTest): io = io shutdown_error = "RuntimeError: could not find io module state" + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_constructor(self): + super().test_constructor() + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_reconfigure_encoding_read(self): + super().test_reconfigure_encoding_read() + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_basic_io(self): + super().test_basic_io() + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_telling(self): + super().test_telling() + # TODO: RUSTPYTHON @unittest.expectedFailure def test_uninitialized(self): @@ -3917,8 +3929,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest): def test_line_buffering(self): super().test_line_buffering() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_seeking_too(self): super().test_seeking_too() @@ -3927,8 +3937,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest): def test_bufio_write_through(self): super().test_bufio_write_through() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_seeking(self): super().test_seeking() diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 3409d44ad..ae3856f3e 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -95,8 +95,6 @@ class TestNtpath(NtpathTestCase): tester('ntpath.splitext("xx\\foo.bar.ext")', ('xx\\foo.bar', '.ext')) tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d')) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_splitdrive(self): tester('ntpath.splitdrive("c:\\foo\\bar")', ('c:', '\\foo\\bar')) @@ -479,8 +477,6 @@ class TestNtpath(NtpathTestCase): tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar") tester('ntpath.expandvars("bar\'%foo%")', "bar\'%foo%") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipUnless(support.FS_NONASCII, 'need support.FS_NONASCII') def test_expandvars_nonascii(self): def check(value, expected): @@ -730,8 +726,6 @@ class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase): pathmodule = ntpath attributes = ['relpath'] - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_expandvars_nonascii(self): super().test_expandvars_nonascii() diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0f3521080..77d497b74 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1398,8 +1398,6 @@ class ReTests(unittest.TestCase): for x in not_decimal_digits: self.assertIsNone(re.match(r'^\d$', x)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_empty_array(self): # SF buf 1647541 import array diff --git a/Lib/test/test_strtod.py b/Lib/test/test_strtod.py index 572764964..d25196570 100644 --- a/Lib/test/test_strtod.py +++ b/Lib/test/test_strtod.py @@ -146,6 +146,7 @@ class StrtodTests(unittest.TestCase): digits *= 5 exponent -= 1 + # TODO: RUSTPYTHON fails on debug mode def test_halfway_cases(self): # test halfway cases for the round-half-to-even rule for i in range(100 * TEST_SIZE): diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index b7120e330..0864e373d 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -46,8 +46,6 @@ class TracebackCases(unittest.TestCase): def syntax_error_bad_indentation2(self): compile(" print(2)", "?", "exec") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_caret(self): err = self.get_exception_format(self.syntax_error_with_caret, SyntaxError) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 1cf83f4d9..951358791 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -199,8 +199,6 @@ class UnicodeTest(string_tests.CommonTest, self.checkequal(0, 'a' * 10, 'count', 'a\U00100304') self.checkequal(0, '\u0102' * 10, 'count', '\u0102\U00100304') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_find(self): string_tests.CommonTest.test_find(self) # test implementation details of the memchr fast path @@ -260,8 +258,6 @@ class UnicodeTest(string_tests.CommonTest, self.checkequal(-1, 'a' * 100, 'rfind', '\U00100304a') self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_index(self): string_tests.CommonTest.test_index(self) self.checkequalnofix(0, 'abcdefghiabc', 'index', '') diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 07c4cb4d3..0e3fba0c7 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -54,8 +54,7 @@ type PyMemoryViewRef = PyRef; #[pyimpl(with(Hashable, Comparable, AsBuffer))] impl PyMemoryView { fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult { - FormatSpec::parse(format) - .map_err(|msg| vm.new_exception_msg(vm.ctx.types.memoryview_type.clone(), msg)) + FormatSpec::parse(format, vm) } pub fn from_buffer( diff --git a/vm/src/builtins/pystr.rs b/vm/src/builtins/pystr.rs index fafcf222a..539ca22c0 100644 --- a/vm/src/builtins/pystr.rs +++ b/vm/src/builtins/pystr.rs @@ -780,6 +780,12 @@ impl PyStr { self.value.py_join(iter) } + // FIXME: two traversals of str is expensive + #[inline] + fn _to_char_idx(r: &str, byte_idx: usize) -> usize { + r[..byte_idx].chars().count() + } + #[inline] fn _find(&self, args: FindArgs, find: F) -> Option where @@ -791,25 +797,25 @@ impl PyStr { #[pymethod] fn find(&self, args: FindArgs) -> isize { - self._find(args, |r, s| r.find(s)) + self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?))) .map_or(-1, |v| v as isize) } #[pymethod] fn rfind(&self, args: FindArgs) -> isize { - self._find(args, |r, s| r.rfind(s)) + self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?))) .map_or(-1, |v| v as isize) } #[pymethod] fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult { - self._find(args, |r, s| r.find(s)) + self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?))) .ok_or_else(|| vm.new_value_error("substring not found".to_owned())) } #[pymethod] fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult { - self._find(args, |r, s| r.rfind(s)) + self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?))) .ok_or_else(|| vm.new_value_error("substring not found".to_owned())) } diff --git a/vm/src/stdlib/array.rs b/vm/src/stdlib/array.rs index 74ddc5577..2fae30f50 100644 --- a/vm/src/stdlib/array.rs +++ b/vm/src/stdlib/array.rs @@ -1,7 +1,7 @@ use crate::buffer::{BufferOptions, PyBuffer, ResizeGuard}; use crate::builtins::float::IntoPyFloat; use crate::builtins::list::{PyList, PyListRef}; -use crate::builtins::pystr::PyStrRef; +use crate::builtins::pystr::{PyStr, PyStrRef}; use crate::builtins::pytype::PyTypeRef; use crate::builtins::slice::PySliceRef; use crate::builtins::{PyByteArray, PyBytes}; @@ -15,29 +15,17 @@ use crate::function::OptionalArg; use crate::sliceable::{saturate_index, PySliceableSequence, PySliceableSequenceMut}; use crate::slots::{AsBuffer, Comparable, Iterable, PyComparisonOp, PyIter}; use crate::utils::Either; -use crate::VirtualMachine; use crate::{ IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyIterable, PyObjectRef, PyRef, PyResult, PyValue, StaticType, TryFromObject, TypeProtocol, }; +use crate::{IntoPyResult, VirtualMachine}; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools; use std::cmp::Ordering; +use std::convert::TryFrom; use std::{fmt, os::raw}; -struct ArrayTypeSpecifierError { - _priv: (), -} - -impl fmt::Display for ArrayTypeSpecifierError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)" - ) - } -} - macro_rules! def_array_enum { ($(($n:ident, $t:ty, $c:literal, $scode:literal)),*$(,)?) => { #[derive(Debug, Clone)] @@ -47,10 +35,10 @@ macro_rules! def_array_enum { #[allow(clippy::naive_bytecount, clippy::float_cmp)] impl ArrayContentType { - fn from_char(c: char) -> Result { + fn from_char(c: char) -> Result { match c { $($c => Ok(ArrayContentType::$n(Vec::new())),)* - _ => Err(ArrayTypeSpecifierError { _priv: () }), + _ => Err("bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)".into()), } } @@ -94,10 +82,10 @@ macro_rules! def_array_enum { Ok(()) } - fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyObjectRef { + fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyResult { match self { $(ArrayContentType::$n(v) => { - v.remove(i).into_pyobject(vm) + v.remove(i).into_pyresult(vm) })* } } @@ -225,9 +213,11 @@ macro_rules! def_array_enum { Ok(i) } - fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> Option { + fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> PyResult> { match self { - $(ArrayContentType::$n(v) => v.get(i).map(|x| x.into_pyobject(vm)),)* + $(ArrayContentType::$n(v) => { + v.get(i).map(|x| x.into_pyresult(vm)).transpose() + })* } } @@ -244,8 +234,8 @@ macro_rules! def_array_enum { fn getitem(&self, needle: Either, vm: &VirtualMachine) -> PyResult { match needle { Either::A(i) => { - self.idx(i, "array", vm).map(|i| { - self.getitem_by_idx(i, vm).unwrap() + self.idx(i, "array", vm).and_then(|i| { + self.getitem_by_idx(i, vm).map(Option::unwrap) }) } Either::B(slice) => self.getitem_by_slice(slice, vm), @@ -377,13 +367,8 @@ macro_rules! def_array_enum { Ok(s) } - fn iter<'a>(&'a self, vm: &'a VirtualMachine) -> impl Iterator + 'a { - let mut i = 0; - std::iter::from_fn(move || { - let ret = self.getitem_by_idx(i, vm); - i += 1; - ret - }) + fn iter<'a, 'vm: 'a>(&'a self, vm: &'vm VirtualMachine) -> impl Iterator + 'a { + (0..self.len()).map(move |i| self.getitem_by_idx(i, vm).map(Option::unwrap)) } fn cmp(&self, other: &ArrayContentType) -> Result, ()> { @@ -404,7 +389,7 @@ macro_rules! def_array_enum { def_array_enum!( (SignedByte, i8, 'b', "b"), (UnsignedByte, u8, 'B', "B"), - // TODO: support unicode char + (PyUnicode, WideChar, 'u', "u"), (SignedShort, raw::c_short, 'h', "h"), (UnsignedShort, raw::c_ushort, 'H', "H"), (SignedInt, raw::c_int, 'i', "i"), @@ -417,6 +402,16 @@ def_array_enum!( (Double, f64, 'd', "d"), ); +#[cfg(not(target_arch = "wasm32"))] +#[allow(non_camel_case_types)] +pub type wchar_t = libc::wchar_t; +#[cfg(target_arch = "wasm32")] +#[allow(non_camel_case_types)] +pub type wchar_t = u32; + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)] +pub struct WideChar(wchar_t); + trait ArrayElement: Sized { fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult; fn byteswap(self) -> Self; @@ -464,6 +459,45 @@ fn f64_try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult PyResult { + PyStrRef::try_from_object(vm, obj)? + .as_str() + .chars() + .exactly_one() + .map(|ch| Self(ch as _)) + .map_err(|_| vm.new_type_error("array item must be unicode character".into())) + } + fn byteswap(self) -> Self { + Self(self.0.swap_bytes()) + } +} + +impl TryFrom for char { + type Error = String; + + fn try_from(ch: WideChar) -> Result { + // safe because every configuration of bytes for the types we support are valid + char::from_u32(ch.0 as u32) + .ok_or_else(|| { format!("'utf-8' codec can't encode character '\\u{:x}' in position 0: surrogates not allowed", ch.0 ) }) + } +} + +impl IntoPyResult for WideChar { + fn into_pyresult(self, vm: &VirtualMachine) -> PyResult { + Ok( + String::from(char::try_from(self).map_err(|e| vm.new_unicode_encode_error(e))?) + .into_pyobject(vm), + ) + } +} + +impl fmt::Display for WideChar { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unreachable!("`repr(array('u'))` calls `PyStr::repr`") + } +} + #[pyclass(module = "array", name = "array")] #[derive(Debug)] pub struct PyArray { @@ -508,17 +542,33 @@ impl PyArray { let spec = spec.as_str().chars().exactly_one().map_err(|_| { vm.new_type_error("array() argument 1 must be a unicode character, not str".to_owned()) })?; - let mut array = - ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err.to_string()))?; + let mut array = ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err))?; if let OptionalArg::Present(init) = init { if let Some(init) = init.payload::() { - if array.typecode() == init.read().typecode() { - array.iadd(&*init.read(), vm)?; - } else { - for obj in init.read().iter(vm) { - array.push(obj, vm)? + match (spec, init.read().typecode()) { + (spec, ch) if spec == ch => array.frombytes(&init.get_bytes()), + (spec, 'u') => { + return Err(vm.new_type_error(format!( + "cannot use a unicode array to initialize an array with typecode '{}'", + spec + ))) } + _ => { + for obj in init.read().iter(vm) { + array.push(obj?, vm)?; + } + } + } + } else if let Some(utf8) = init.payload::() { + if spec == 'u' { + let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), array.itemsize()); + array.frombytes(&bytes); + } else { + return Err(vm.new_type_error(format!( + "cannot use a str to initialize an array with typecode '{}'", + spec + ))); } } else if init.payload_is::() || init.payload_is::() { try_bytes_like(vm, &init, |x| array.frombytes(x))?; @@ -584,6 +634,78 @@ impl PyArray { } } + fn _unicode_to_wchar_bytes(utf8: &str, item_size: usize) -> Vec { + if item_size == 2 { + utf8.encode_utf16() + .flat_map(|ch| ch.to_ne_bytes()) + .collect() + } else { + utf8.chars() + .flat_map(|ch| (ch as u32).to_ne_bytes()) + .collect() + } + } + + #[pymethod] + fn fromunicode(zelf: PyRef, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + let utf8 = PyStrRef::try_from_object(vm, obj.clone()).map_err(|_| { + vm.new_type_error(format!( + "fromunicode() argument must be str, not {}", + obj.class().name + )) + })?; + if zelf.read().typecode() != 'u' { + return Err(vm.new_value_error( + "fromunicode() may only be called on unicode type arrays".into(), + )); + } + let mut w = zelf.try_resizable(vm)?; + let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), w.itemsize()); + w.frombytes(&bytes); + Ok(()) + } + + #[pymethod] + fn tounicode(&self, vm: &VirtualMachine) -> PyResult { + let array = self.array.read(); + if array.typecode() != 'u' { + return Err( + vm.new_value_error("tounicode() may only be called on unicode type arrays".into()) + ); + } + let bytes = array.get_bytes(); + if self.itemsize() == 2 { + // safe because every configuration of bytes for the types we support are valid + let utf16 = unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + }; + Ok(String::from_utf16_lossy(utf16)) + } else { + // safe because every configuration of bytes for the types we support are valid + let chars = unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u32, + bytes.len() / std::mem::size_of::(), + ) + }; + chars + .iter() + .map(|&ch| { + // cpython issue 17223 + char::from_u32(ch).ok_or_else(|| { + vm.new_value_error(format!( + "character U+{:4x} is not in range [U+0000; U+10ffff]", + ch + )) + }) + }) + .try_collect() + } + } + #[pymethod] fn frombytes(zelf: PyRef, b: ArgBytesLike, vm: &VirtualMachine) -> PyResult<()> { let b = b.borrow_buf(); @@ -621,7 +743,7 @@ impl PyArray { Err(vm.new_index_error("pop from empty array".to_owned())) } else { let i = w.idx(i.unwrap_or(-1), "pop", vm)?; - Ok(w.pop(i, vm)) + w.pop(i, vm) } } @@ -643,7 +765,7 @@ impl PyArray { let array = self.read(); let mut v = Vec::with_capacity(array.len()); for obj in array.iter(vm) { - v.push(obj); + v.push(obj?); } Ok(vm.ctx.new_list(v)) } @@ -767,6 +889,15 @@ impl PyArray { #[pymethod(magic)] fn repr(zelf: PyRef, vm: &VirtualMachine) -> PyResult { + if zelf.read().typecode() == 'u' { + if zelf.len() == 0 { + return Ok("array('u')".into()); + } + return Ok(format!( + "array('u', {})", + PyStr::from(zelf.tounicode(vm)?).repr(vm)? + )); + } zelf.read().repr(vm) } @@ -792,7 +923,7 @@ impl PyArray { let iter = Iterator::zip(array_a.iter(vm), array_b.iter(vm)); for (a, b) in iter { - if !vm.bool_eq(&a, &b)? { + if !vm.bool_eq(&a?, &b?)? { return Ok(false); } } @@ -830,8 +961,8 @@ impl Comparable for PyArray { for (a, b) in iter { let ret = match op { - PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a, &b)?, - PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a, &b)?, + PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a?, &b?)?, + PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a?, &b?)?, _ => unreachable!(), }; if let Some(v) = ret { @@ -933,7 +1064,7 @@ impl PyArrayIter {} impl PyIter for PyArrayIter { fn next(zelf: &PyRef, vm: &VirtualMachine) -> PyResult { let pos = zelf.position.fetch_add(1); - if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm) { + if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm)? { Ok(item) } else { Err(vm.new_stop_iteration()) diff --git a/vm/src/stdlib/pystruct.rs b/vm/src/stdlib/pystruct.rs index 555e468bc..0a35deb90 100644 --- a/vm/src/stdlib/pystruct.rs +++ b/vm/src/stdlib/pystruct.rs @@ -27,6 +27,7 @@ pub(crate) mod _struct { use crate::exceptions::PyBaseExceptionRef; use crate::function::Args; use crate::slots::PyIter; + use crate::stdlib::array::wchar_t; use crate::utils::Either; use crate::VirtualMachine; use crate::{IntoPyObject, PyObjectRef, PyRef, PyResult, PyValue, StaticType, TryFromObject}; @@ -55,6 +56,7 @@ pub(crate) mod _struct { SByte = b'b', UByte = b'B', Char = b'c', + WideChar = b'u', Str = b's', Pascal = b'p', Short = b'h', @@ -168,6 +170,7 @@ pub(crate) mod _struct { pack: Some(pack_char), unpack: Some(unpack_char), }, + WideChar => native_info!(wchar_t), Short => native_info!(raw::c_short), UShort => native_info!(raw::c_ushort), Int => native_info!(raw::c_int), @@ -225,17 +228,18 @@ pub(crate) mod _struct { )) } }; - FormatSpec::parse(decoded_fmt).map_err(|err| new_struct_error(vm, err)) + FormatSpec::parse(decoded_fmt, vm) } - pub fn parse(fmt: &str) -> Result { + pub fn parse(fmt: &str, vm: &VirtualMachine) -> PyResult { let mut chars = fmt.bytes().peekable(); // First determine "@", "<", ">","!" or "=" let endianness = parse_endianness(&mut chars); // Now, analyze struct string furter: - let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)?; + let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness) + .map_err(|err| new_struct_error(vm, err))?; Ok(FormatSpec { endianness,