Merge pull request #2896 from whjpji/support_unicode_array

Support unicode array type.
This commit is contained in:
Jeong YunWon
2021-08-21 22:26:18 +09:00
committed by GitHub
11 changed files with 216 additions and 158 deletions

View File

@@ -15,7 +15,7 @@ import warnings
import array
# from array import _array_reconstructor as array_reconstructor # XXX: RUSTPYTHON
# sizeof_wchar = array.array('u').itemsize # XXX: RUSTPYTHON
sizeof_wchar = array.array('u').itemsize
class ArraySubclass(array.array):
@@ -25,10 +25,7 @@ class ArraySubclassWithKwargs(array.array):
def __init__(self, typecode, newarg=None):
array.array.__init__(self)
# TODO: RUSTPYTHON
# We did not support typecode u for unicode yet
# typecodes = 'ubBhHiIlLfdqQ'
typecodes = 'bBhHiIlLfdqQ'
typecodes = 'ubBhHiIlLfdqQ'
class MiscTest(unittest.TestCase):
@@ -1091,8 +1088,6 @@ class BaseTest:
basesize = support.calcvobjsize('Pn2Pi')
support.check_sizeof(self, a, basesize)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_initialize_with_unicode(self):
if self.typecode != 'u':
with self.assertRaises(TypeError) as cm:
@@ -1121,8 +1116,6 @@ class BaseTest:
class StringTest(BaseTest):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_setitem(self):
super().test_setitem()
a = array.array(self.typecode, self.example)
@@ -1136,173 +1129,105 @@ class UnicodeTest(StringTest, unittest.TestCase):
outside = str('\x33')
minitemsize = 2
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_add(self):
super().test_add()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_buffer(self):
super().test_buffer()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_buffer_info(self):
super().test_buffer_info()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_byteswap(self):
super().test_byteswap()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_cmp(self):
super().test_cmp()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_constructor(self):
super().test_constructor()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_constructor_with_iterable_argument(self):
super().test_constructor_with_iterable_argument()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_copy(self):
super().test_copy()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_count(self):
super().test_count()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_coveritertraverse(self):
super().test_coveritertraverse()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_deepcopy(self):
super().test_deepcopy()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_delitem(self):
super().test_delitem()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_exhausted_iterator(self):
super().test_exhausted_iterator()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_extend(self):
super().test_extend()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_extended_getslice(self):
super().test_extended_getslice()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_extended_set_del_slice(self):
super().test_extended_set_del_slice()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_fromarray(self):
super().test_fromarray()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_getitem(self):
super().test_getitem()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_getslice(self):
super().test_getslice()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_iadd(self):
super().test_iadd()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_imul(self):
super().test_imul()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_index(self):
super().test_index()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_insert(self):
super().test_insert()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_len(self):
super().test_len()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_mul(self):
super().test_mul()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_pop(self):
super().test_pop()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_remove(self):
super().test_remove()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_repr(self):
super().test_repr()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_reverse(self):
super().test_reverse()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_setslice(self):
super().test_setslice()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_str(self):
super().test_str()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_tofrombytes(self):
super().test_tofrombytes()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_tofromlist(self):
super().test_tofromlist()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_unicode(self):
self.assertRaises(TypeError, array.array, 'b', 'foo')
@@ -1323,8 +1248,6 @@ class UnicodeTest(StringTest, unittest.TestCase):
self.assertRaises(TypeError, a.fromunicode)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_issue17223(self):
# this used to crash
if sizeof_wchar == 4:

View File

@@ -2596,8 +2596,6 @@ class TextIOWrapperTest(unittest.TestCase):
def tearDown(self):
support.unlink(support.TESTFN)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_constructor(self):
r = self.BytesIO(b"\xc3\xa9\n\n")
b = self.BufferedReader(r, 1000)
@@ -2935,8 +2933,6 @@ class TextIOWrapperTest(unittest.TestCase):
# Systematic tests of the text I/O API
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_basic_io(self):
for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le":
@@ -2988,8 +2984,6 @@ class TextIOWrapperTest(unittest.TestCase):
rlines.append((pos, line))
self.assertEqual(rlines, wlines)
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_telling(self):
f = self.open(support.TESTFN, "w+", encoding="utf-8")
p0 = f.tell()
@@ -3608,8 +3602,6 @@ class TextIOWrapperTest(unittest.TestCase):
F.tell = lambda x: 0
t = self.TextIOWrapper(F(), encoding='utf-8')
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_reconfigure_encoding_read(self):
# latin1 -> utf8
# (latin1 can decode utf-8 encoded string)
@@ -3762,6 +3754,26 @@ class CTextIOWrapperTest(TextIOWrapperTest):
io = io
shutdown_error = "RuntimeError: could not find io module state"
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_constructor(self):
super().test_constructor()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_reconfigure_encoding_read(self):
super().test_reconfigure_encoding_read()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_basic_io(self):
super().test_basic_io()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_telling(self):
super().test_telling()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_uninitialized(self):
@@ -3917,8 +3929,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest):
def test_line_buffering(self):
super().test_line_buffering()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_seeking_too(self):
super().test_seeking_too()
@@ -3927,8 +3937,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest):
def test_bufio_write_through(self):
super().test_bufio_write_through()
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_seeking(self):
super().test_seeking()

View File

@@ -95,8 +95,6 @@ class TestNtpath(NtpathTestCase):
tester('ntpath.splitext("xx\\foo.bar.ext")', ('xx\\foo.bar', '.ext'))
tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d'))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_splitdrive(self):
tester('ntpath.splitdrive("c:\\foo\\bar")',
('c:', '\\foo\\bar'))
@@ -479,8 +477,6 @@ class TestNtpath(NtpathTestCase):
tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar")
tester('ntpath.expandvars("bar\'%foo%")', "bar\'%foo%")
# TODO: RUSTPYTHON
@unittest.expectedFailure
@unittest.skipUnless(support.FS_NONASCII, 'need support.FS_NONASCII')
def test_expandvars_nonascii(self):
def check(value, expected):
@@ -730,8 +726,6 @@ class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase):
pathmodule = ntpath
attributes = ['relpath']
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_expandvars_nonascii(self):
super().test_expandvars_nonascii()

View File

@@ -1398,8 +1398,6 @@ class ReTests(unittest.TestCase):
for x in not_decimal_digits:
self.assertIsNone(re.match(r'^\d$', x))
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_empty_array(self):
# SF buf 1647541
import array

View File

@@ -146,6 +146,7 @@ class StrtodTests(unittest.TestCase):
digits *= 5
exponent -= 1
# TODO: RUSTPYTHON fails on debug mode
def test_halfway_cases(self):
# test halfway cases for the round-half-to-even rule
for i in range(100 * TEST_SIZE):

View File

@@ -46,8 +46,6 @@ class TracebackCases(unittest.TestCase):
def syntax_error_bad_indentation2(self):
compile(" print(2)", "?", "exec")
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_caret(self):
err = self.get_exception_format(self.syntax_error_with_caret,
SyntaxError)

View File

@@ -199,8 +199,6 @@ class UnicodeTest(string_tests.CommonTest,
self.checkequal(0, 'a' * 10, 'count', 'a\U00100304')
self.checkequal(0, '\u0102' * 10, 'count', '\u0102\U00100304')
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_find(self):
string_tests.CommonTest.test_find(self)
# test implementation details of the memchr fast path
@@ -260,8 +258,6 @@ class UnicodeTest(string_tests.CommonTest,
self.checkequal(-1, 'a' * 100, 'rfind', '\U00100304a')
self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102')
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_index(self):
string_tests.CommonTest.test_index(self)
self.checkequalnofix(0, 'abcdefghiabc', 'index', '')

View File

@@ -54,8 +54,7 @@ type PyMemoryViewRef = PyRef<PyMemoryView>;
#[pyimpl(with(Hashable, Comparable, AsBuffer))]
impl PyMemoryView {
fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
FormatSpec::parse(format)
.map_err(|msg| vm.new_exception_msg(vm.ctx.types.memoryview_type.clone(), msg))
FormatSpec::parse(format, vm)
}
pub fn from_buffer(

View File

@@ -780,6 +780,12 @@ impl PyStr {
self.value.py_join(iter)
}
// FIXME: two traversals of str is expensive
#[inline]
fn _to_char_idx(r: &str, byte_idx: usize) -> usize {
r[..byte_idx].chars().count()
}
#[inline]
fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
where
@@ -791,25 +797,25 @@ impl PyStr {
#[pymethod]
fn find(&self, args: FindArgs) -> isize {
self._find(args, |r, s| r.find(s))
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
.map_or(-1, |v| v as isize)
}
#[pymethod]
fn rfind(&self, args: FindArgs) -> isize {
self._find(args, |r, s| r.rfind(s))
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
.map_or(-1, |v| v as isize)
}
#[pymethod]
fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
self._find(args, |r, s| r.find(s))
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
.ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
}
#[pymethod]
fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
self._find(args, |r, s| r.rfind(s))
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
.ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
}

View File

@@ -1,7 +1,7 @@
use crate::buffer::{BufferOptions, PyBuffer, ResizeGuard};
use crate::builtins::float::IntoPyFloat;
use crate::builtins::list::{PyList, PyListRef};
use crate::builtins::pystr::PyStrRef;
use crate::builtins::pystr::{PyStr, PyStrRef};
use crate::builtins::pytype::PyTypeRef;
use crate::builtins::slice::PySliceRef;
use crate::builtins::{PyByteArray, PyBytes};
@@ -15,29 +15,17 @@ use crate::function::OptionalArg;
use crate::sliceable::{saturate_index, PySliceableSequence, PySliceableSequenceMut};
use crate::slots::{AsBuffer, Comparable, Iterable, PyComparisonOp, PyIter};
use crate::utils::Either;
use crate::VirtualMachine;
use crate::{
IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyIterable, PyObjectRef, PyRef,
PyResult, PyValue, StaticType, TryFromObject, TypeProtocol,
};
use crate::{IntoPyResult, VirtualMachine};
use crossbeam_utils::atomic::AtomicCell;
use itertools::Itertools;
use std::cmp::Ordering;
use std::convert::TryFrom;
use std::{fmt, os::raw};
struct ArrayTypeSpecifierError {
_priv: (),
}
impl fmt::Display for ArrayTypeSpecifierError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)"
)
}
}
macro_rules! def_array_enum {
($(($n:ident, $t:ty, $c:literal, $scode:literal)),*$(,)?) => {
#[derive(Debug, Clone)]
@@ -47,10 +35,10 @@ macro_rules! def_array_enum {
#[allow(clippy::naive_bytecount, clippy::float_cmp)]
impl ArrayContentType {
fn from_char(c: char) -> Result<Self, ArrayTypeSpecifierError> {
fn from_char(c: char) -> Result<Self, String> {
match c {
$($c => Ok(ArrayContentType::$n(Vec::new())),)*
_ => Err(ArrayTypeSpecifierError { _priv: () }),
_ => Err("bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f or d)".into()),
}
}
@@ -94,10 +82,10 @@ macro_rules! def_array_enum {
Ok(())
}
fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyObjectRef {
fn pop(&mut self, i: usize, vm: &VirtualMachine) -> PyResult {
match self {
$(ArrayContentType::$n(v) => {
v.remove(i).into_pyobject(vm)
v.remove(i).into_pyresult(vm)
})*
}
}
@@ -225,9 +213,11 @@ macro_rules! def_array_enum {
Ok(i)
}
fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> Option<PyObjectRef> {
fn getitem_by_idx(&self, i: usize, vm: &VirtualMachine) -> PyResult<Option<PyObjectRef>> {
match self {
$(ArrayContentType::$n(v) => v.get(i).map(|x| x.into_pyobject(vm)),)*
$(ArrayContentType::$n(v) => {
v.get(i).map(|x| x.into_pyresult(vm)).transpose()
})*
}
}
@@ -244,8 +234,8 @@ macro_rules! def_array_enum {
fn getitem(&self, needle: Either<isize, PySliceRef>, vm: &VirtualMachine) -> PyResult {
match needle {
Either::A(i) => {
self.idx(i, "array", vm).map(|i| {
self.getitem_by_idx(i, vm).unwrap()
self.idx(i, "array", vm).and_then(|i| {
self.getitem_by_idx(i, vm).map(Option::unwrap)
})
}
Either::B(slice) => self.getitem_by_slice(slice, vm),
@@ -377,13 +367,8 @@ macro_rules! def_array_enum {
Ok(s)
}
fn iter<'a>(&'a self, vm: &'a VirtualMachine) -> impl Iterator<Item = PyObjectRef> + 'a {
let mut i = 0;
std::iter::from_fn(move || {
let ret = self.getitem_by_idx(i, vm);
i += 1;
ret
})
fn iter<'a, 'vm: 'a>(&'a self, vm: &'vm VirtualMachine) -> impl Iterator<Item = PyResult> + 'a {
(0..self.len()).map(move |i| self.getitem_by_idx(i, vm).map(Option::unwrap))
}
fn cmp(&self, other: &ArrayContentType) -> Result<Option<Ordering>, ()> {
@@ -404,7 +389,7 @@ macro_rules! def_array_enum {
def_array_enum!(
(SignedByte, i8, 'b', "b"),
(UnsignedByte, u8, 'B', "B"),
// TODO: support unicode char
(PyUnicode, WideChar, 'u', "u"),
(SignedShort, raw::c_short, 'h', "h"),
(UnsignedShort, raw::c_ushort, 'H', "H"),
(SignedInt, raw::c_int, 'i', "i"),
@@ -417,6 +402,16 @@ def_array_enum!(
(Double, f64, 'd', "d"),
);
#[cfg(not(target_arch = "wasm32"))]
#[allow(non_camel_case_types)]
pub type wchar_t = libc::wchar_t;
#[cfg(target_arch = "wasm32")]
#[allow(non_camel_case_types)]
pub type wchar_t = u32;
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub struct WideChar(wchar_t);
trait ArrayElement: Sized {
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self>;
fn byteswap(self) -> Self;
@@ -464,6 +459,45 @@ fn f64_try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<f
IntoPyFloat::try_from_object(vm, obj).map(|x| x.to_f64())
}
impl ArrayElement for WideChar {
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
PyStrRef::try_from_object(vm, obj)?
.as_str()
.chars()
.exactly_one()
.map(|ch| Self(ch as _))
.map_err(|_| vm.new_type_error("array item must be unicode character".into()))
}
fn byteswap(self) -> Self {
Self(self.0.swap_bytes())
}
}
impl TryFrom<WideChar> for char {
type Error = String;
fn try_from(ch: WideChar) -> Result<Self, Self::Error> {
// safe because every configuration of bytes for the types we support are valid
char::from_u32(ch.0 as u32)
.ok_or_else(|| { format!("'utf-8' codec can't encode character '\\u{:x}' in position 0: surrogates not allowed", ch.0 ) })
}
}
impl IntoPyResult for WideChar {
fn into_pyresult(self, vm: &VirtualMachine) -> PyResult {
Ok(
String::from(char::try_from(self).map_err(|e| vm.new_unicode_encode_error(e))?)
.into_pyobject(vm),
)
}
}
impl fmt::Display for WideChar {
fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
unreachable!("`repr(array('u'))` calls `PyStr::repr`")
}
}
#[pyclass(module = "array", name = "array")]
#[derive(Debug)]
pub struct PyArray {
@@ -508,17 +542,33 @@ impl PyArray {
let spec = spec.as_str().chars().exactly_one().map_err(|_| {
vm.new_type_error("array() argument 1 must be a unicode character, not str".to_owned())
})?;
let mut array =
ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err.to_string()))?;
let mut array = ArrayContentType::from_char(spec).map_err(|err| vm.new_value_error(err))?;
if let OptionalArg::Present(init) = init {
if let Some(init) = init.payload::<PyArray>() {
if array.typecode() == init.read().typecode() {
array.iadd(&*init.read(), vm)?;
} else {
for obj in init.read().iter(vm) {
array.push(obj, vm)?
match (spec, init.read().typecode()) {
(spec, ch) if spec == ch => array.frombytes(&init.get_bytes()),
(spec, 'u') => {
return Err(vm.new_type_error(format!(
"cannot use a unicode array to initialize an array with typecode '{}'",
spec
)))
}
_ => {
for obj in init.read().iter(vm) {
array.push(obj?, vm)?;
}
}
}
} else if let Some(utf8) = init.payload::<PyStr>() {
if spec == 'u' {
let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), array.itemsize());
array.frombytes(&bytes);
} else {
return Err(vm.new_type_error(format!(
"cannot use a str to initialize an array with typecode '{}'",
spec
)));
}
} else if init.payload_is::<PyBytes>() || init.payload_is::<PyByteArray>() {
try_bytes_like(vm, &init, |x| array.frombytes(x))?;
@@ -584,6 +634,78 @@ impl PyArray {
}
}
fn _unicode_to_wchar_bytes(utf8: &str, item_size: usize) -> Vec<u8> {
if item_size == 2 {
utf8.encode_utf16()
.flat_map(|ch| ch.to_ne_bytes())
.collect()
} else {
utf8.chars()
.flat_map(|ch| (ch as u32).to_ne_bytes())
.collect()
}
}
#[pymethod]
fn fromunicode(zelf: PyRef<Self>, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
let utf8 = PyStrRef::try_from_object(vm, obj.clone()).map_err(|_| {
vm.new_type_error(format!(
"fromunicode() argument must be str, not {}",
obj.class().name
))
})?;
if zelf.read().typecode() != 'u' {
return Err(vm.new_value_error(
"fromunicode() may only be called on unicode type arrays".into(),
));
}
let mut w = zelf.try_resizable(vm)?;
let bytes = Self::_unicode_to_wchar_bytes(utf8.as_str(), w.itemsize());
w.frombytes(&bytes);
Ok(())
}
#[pymethod]
fn tounicode(&self, vm: &VirtualMachine) -> PyResult<String> {
let array = self.array.read();
if array.typecode() != 'u' {
return Err(
vm.new_value_error("tounicode() may only be called on unicode type arrays".into())
);
}
let bytes = array.get_bytes();
if self.itemsize() == 2 {
// safe because every configuration of bytes for the types we support are valid
let utf16 = unsafe {
std::slice::from_raw_parts(
bytes.as_ptr() as *const u16,
bytes.len() / std::mem::size_of::<u16>(),
)
};
Ok(String::from_utf16_lossy(utf16))
} else {
// safe because every configuration of bytes for the types we support are valid
let chars = unsafe {
std::slice::from_raw_parts(
bytes.as_ptr() as *const u32,
bytes.len() / std::mem::size_of::<u32>(),
)
};
chars
.iter()
.map(|&ch| {
// cpython issue 17223
char::from_u32(ch).ok_or_else(|| {
vm.new_value_error(format!(
"character U+{:4x} is not in range [U+0000; U+10ffff]",
ch
))
})
})
.try_collect()
}
}
#[pymethod]
fn frombytes(zelf: PyRef<Self>, b: ArgBytesLike, vm: &VirtualMachine) -> PyResult<()> {
let b = b.borrow_buf();
@@ -621,7 +743,7 @@ impl PyArray {
Err(vm.new_index_error("pop from empty array".to_owned()))
} else {
let i = w.idx(i.unwrap_or(-1), "pop", vm)?;
Ok(w.pop(i, vm))
w.pop(i, vm)
}
}
@@ -643,7 +765,7 @@ impl PyArray {
let array = self.read();
let mut v = Vec::with_capacity(array.len());
for obj in array.iter(vm) {
v.push(obj);
v.push(obj?);
}
Ok(vm.ctx.new_list(v))
}
@@ -767,6 +889,15 @@ impl PyArray {
#[pymethod(magic)]
fn repr(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<String> {
if zelf.read().typecode() == 'u' {
if zelf.len() == 0 {
return Ok("array('u')".into());
}
return Ok(format!(
"array('u', {})",
PyStr::from(zelf.tounicode(vm)?).repr(vm)?
));
}
zelf.read().repr(vm)
}
@@ -792,7 +923,7 @@ impl PyArray {
let iter = Iterator::zip(array_a.iter(vm), array_b.iter(vm));
for (a, b) in iter {
if !vm.bool_eq(&a, &b)? {
if !vm.bool_eq(&a?, &b?)? {
return Ok(false);
}
}
@@ -830,8 +961,8 @@ impl Comparable for PyArray {
for (a, b) in iter {
let ret = match op {
PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a, &b)?,
PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a, &b)?,
PyComparisonOp::Lt | PyComparisonOp::Le => vm.bool_seq_lt(&a?, &b?)?,
PyComparisonOp::Gt | PyComparisonOp::Ge => vm.bool_seq_gt(&a?, &b?)?,
_ => unreachable!(),
};
if let Some(v) = ret {
@@ -933,7 +1064,7 @@ impl PyArrayIter {}
impl PyIter for PyArrayIter {
fn next(zelf: &PyRef<Self>, vm: &VirtualMachine) -> PyResult {
let pos = zelf.position.fetch_add(1);
if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm) {
if let Some(item) = zelf.array.read().getitem_by_idx(pos, vm)? {
Ok(item)
} else {
Err(vm.new_stop_iteration())

View File

@@ -27,6 +27,7 @@ pub(crate) mod _struct {
use crate::exceptions::PyBaseExceptionRef;
use crate::function::Args;
use crate::slots::PyIter;
use crate::stdlib::array::wchar_t;
use crate::utils::Either;
use crate::VirtualMachine;
use crate::{IntoPyObject, PyObjectRef, PyRef, PyResult, PyValue, StaticType, TryFromObject};
@@ -55,6 +56,7 @@ pub(crate) mod _struct {
SByte = b'b',
UByte = b'B',
Char = b'c',
WideChar = b'u',
Str = b's',
Pascal = b'p',
Short = b'h',
@@ -168,6 +170,7 @@ pub(crate) mod _struct {
pack: Some(pack_char),
unpack: Some(unpack_char),
},
WideChar => native_info!(wchar_t),
Short => native_info!(raw::c_short),
UShort => native_info!(raw::c_ushort),
Int => native_info!(raw::c_int),
@@ -225,17 +228,18 @@ pub(crate) mod _struct {
))
}
};
FormatSpec::parse(decoded_fmt).map_err(|err| new_struct_error(vm, err))
FormatSpec::parse(decoded_fmt, vm)
}
pub fn parse(fmt: &str) -> Result<FormatSpec, String> {
pub fn parse(fmt: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
let mut chars = fmt.bytes().peekable();
// First determine "@", "<", ">","!" or "="
let endianness = parse_endianness(&mut chars);
// Now, analyze struct string furter:
let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)?;
let (codes, size, arg_count) = parse_format_codes(&mut chars, endianness)
.map_err(|err| new_struct_error(vm, err))?;
Ok(FormatSpec {
endianness,