From ff0adc1204319920f909ae06fcc72c1cc4fe81d3 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Tue, 14 Sep 2021 20:04:56 +0200 Subject: [PATCH 1/4] Impl __reduce__ for array --- Lib/test/test_array.py | 10 +++--- stdlib/src/array.rs | 71 +++++++++++++++++++++++++++++++++--------- 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index cefee4767..90b30f2fe 100644 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -248,16 +248,16 @@ class BaseTest: self.assertEqual(a, b) # TODO: RUSTPYTHON - @unittest.expectedFailure + # @unittest.expectedFailure def test_reduce_ex(self): a = array.array(self.typecode, self.example) for protocol in range(3): self.assertIs(a.__reduce_ex__(protocol)[0], array.array) - for protocol in range(3, pickle.HIGHEST_PROTOCOL + 1): - self.assertIs(a.__reduce_ex__(protocol)[0], array_reconstructor) + # for protocol in range(3, pickle.HIGHEST_PROTOCOL + 1): + # self.assertIs(a.__reduce_ex__(protocol)[0], array_reconstructor) # TODO: RUSTPYTHON - @unittest.expectedFailure + # @unittest.expectedFailure def test_pickle(self): for protocol in range(pickle.HIGHEST_PROTOCOL + 1): a = array.array(self.typecode, self.example) @@ -274,7 +274,7 @@ class BaseTest: self.assertEqual(type(a), type(b)) # TODO: RUSTPYTHON - @unittest.expectedFailure + # @unittest.expectedFailure def test_pickle_for_empty_array(self): for protocol in range(pickle.HIGHEST_PROTOCOL + 1): a = array.array(self.typecode) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index f4816c01a..a056745a5 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -12,8 +12,8 @@ mod array { }; use crate::vm::{ builtins::{ - PyByteArray, PyBytes, PyBytesRef, PyIntRef, PyList, PyListRef, PySliceRef, PyStr, - PyStrRef, PyTypeRef, + PyByteArray, PyBytes, PyBytesRef, PyDictRef, PyFloat, PyInt, PyIntRef, PyList, + PyListRef, PySliceRef, PyStr, PyStrRef, PyTupleRef, PyTypeRef, }, class_or_notimplemented, function::{ @@ -460,6 +460,14 @@ mod array { })* } } + + fn get_objects(&self, vm: &VirtualMachine) -> Vec { + match self { + $(ArrayContentType::$n(v) => { + v.iter().map(|&x| x.to_object(vm)).collect() + })* + } + } } }; } @@ -486,32 +494,36 @@ mod array { trait ArrayElement: Sized { fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult; fn byteswap(self) -> Self; + fn to_object(self, vm: &VirtualMachine) -> PyObjectRef; } macro_rules! impl_array_element { - ($(($t:ty, $f_into:path, $f_swap:path),)*) => {$( + ($(($t:ty, $f_from:path, $f_swap:path, $totype:ty),)*) => {$( impl ArrayElement for $t { fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { - $f_into(vm, obj) + $f_from(vm, obj) } fn byteswap(self) -> Self { $f_swap(self) } + fn to_object(self, vm: &VirtualMachine) -> PyObjectRef { + <$totype>::from(self).into_object(vm) + } } )*}; } impl_array_element!( - (i8, i8::try_from_object, i8::swap_bytes), - (u8, u8::try_from_object, u8::swap_bytes), - (i16, i16::try_from_object, i16::swap_bytes), - (u16, u16::try_from_object, u16::swap_bytes), - (i32, i32::try_from_object, i32::swap_bytes), - (u32, u32::try_from_object, u32::swap_bytes), - (i64, i64::try_from_object, i64::swap_bytes), - (u64, u64::try_from_object, u64::swap_bytes), - (f32, f32_try_into_from_object, f32_swap_bytes), - (f64, f64_try_into_from_object, f64_swap_bytes), + (i8, i8::try_from_object, i8::swap_bytes, PyInt), + (u8, u8::try_from_object, u8::swap_bytes, PyInt), + (i16, i16::try_from_object, i16::swap_bytes, PyInt), + (u16, u16::try_from_object, u16::swap_bytes, PyInt), + (i32, i32::try_from_object, i32::swap_bytes, PyInt), + (u32, u32::try_from_object, u32::swap_bytes, PyInt), + (i64, i64::try_from_object, i64::swap_bytes, PyInt), + (u64, u64::try_from_object, u64::swap_bytes, PyInt), + (f32, f32_try_into_from_object, f32_swap_bytes, PyFloat), + (f64, f64_try_into_from_object, f64_swap_bytes, PyFloat), ); fn f32_swap_bytes(x: f32) -> f32 { @@ -542,6 +554,13 @@ mod array { fn byteswap(self) -> Self { Self(self.0.swap_bytes()) } + fn to_object(self, vm: &VirtualMachine) -> PyObjectRef { + vm.ctx.new_str( + char::from_u32(self.0 as u32) + .unwrap_or_default() + .to_string(), + ) + } } fn u32_to_char(ch: u32) -> Result { @@ -1079,6 +1098,30 @@ mod array { } Ok(true) } + + #[pymethod(magic)] + fn reduce_ex( + zelf: PyRef, + proto: usize, + vm: &VirtualMachine, + ) -> (PyTypeRef, PyTupleRef, Option) { + Self::reduce(zelf, vm) + } + + #[pymethod(magic)] + fn reduce( + zelf: PyRef, + vm: &VirtualMachine, + ) -> (PyTypeRef, PyTupleRef, Option) { + let array = zelf.read(); + let values = vm.ctx.new_list(array.get_objects(vm)); + let typecode = vm.ctx.new_str(array.typecode_str()); + ( + zelf.as_object().clone_class(), + PyTupleRef::with_elements(vec![typecode, values], &vm.ctx), + zelf.as_object().dict(), + ) + } } impl Comparable for PyArray { From de47cd0acbffc11868552b378de7ee1e51c039d1 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Wed, 15 Sep 2021 11:10:56 +0200 Subject: [PATCH 2/4] Impl __reduce_ex__ with _array_reconstructor --- Lib/test/test_array.py | 10 ++----- stdlib/src/array.rs | 59 ++++++++++++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index 90b30f2fe..6e6f4795e 100644 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -247,17 +247,13 @@ class BaseTest: self.assertNotEqual(id(a), id(b)) self.assertEqual(a, b) - # TODO: RUSTPYTHON - # @unittest.expectedFailure def test_reduce_ex(self): a = array.array(self.typecode, self.example) for protocol in range(3): self.assertIs(a.__reduce_ex__(protocol)[0], array.array) - # for protocol in range(3, pickle.HIGHEST_PROTOCOL + 1): - # self.assertIs(a.__reduce_ex__(protocol)[0], array_reconstructor) + for protocol in range(3, pickle.HIGHEST_PROTOCOL + 1): + self.assertIs(a.__reduce_ex__(protocol)[0], array_reconstructor) - # TODO: RUSTPYTHON - # @unittest.expectedFailure def test_pickle(self): for protocol in range(pickle.HIGHEST_PROTOCOL + 1): a = array.array(self.typecode, self.example) @@ -273,8 +269,6 @@ class BaseTest: self.assertEqual(a.x, b.x) self.assertEqual(type(a), type(b)) - # TODO: RUSTPYTHON - # @unittest.expectedFailure def test_pickle_for_empty_array(self): for protocol in range(pickle.HIGHEST_PROTOCOL + 1): a = array.array(self.typecode) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index a056745a5..c650dab0d 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -498,7 +498,7 @@ mod array { } macro_rules! impl_array_element { - ($(($t:ty, $f_from:path, $f_swap:path, $totype:ty),)*) => {$( + ($(($t:ty, $f_from:path, $f_swap:path, $f_to:path),)*) => {$( impl ArrayElement for $t { fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { $f_from(vm, obj) @@ -507,23 +507,28 @@ mod array { $f_swap(self) } fn to_object(self, vm: &VirtualMachine) -> PyObjectRef { - <$totype>::from(self).into_object(vm) + $f_to(self).into_object(vm) } } )*}; } impl_array_element!( - (i8, i8::try_from_object, i8::swap_bytes, PyInt), - (u8, u8::try_from_object, u8::swap_bytes, PyInt), - (i16, i16::try_from_object, i16::swap_bytes, PyInt), - (u16, u16::try_from_object, u16::swap_bytes, PyInt), - (i32, i32::try_from_object, i32::swap_bytes, PyInt), - (u32, u32::try_from_object, u32::swap_bytes, PyInt), - (i64, i64::try_from_object, i64::swap_bytes, PyInt), - (u64, u64::try_from_object, u64::swap_bytes, PyInt), - (f32, f32_try_into_from_object, f32_swap_bytes, PyFloat), - (f64, f64_try_into_from_object, f64_swap_bytes, PyFloat), + (i8, i8::try_from_object, i8::swap_bytes, PyInt::from), + (u8, u8::try_from_object, u8::swap_bytes, PyInt::from), + (i16, i16::try_from_object, i16::swap_bytes, PyInt::from), + (u16, u16::try_from_object, u16::swap_bytes, PyInt::from), + (i32, i32::try_from_object, i32::swap_bytes, PyInt::from), + (u32, u32::try_from_object, u32::swap_bytes, PyInt::from), + (i64, i64::try_from_object, i64::swap_bytes, PyInt::from), + (u64, u64::try_from_object, u64::swap_bytes, PyInt::from), + ( + f32, + f32_try_into_from_object, + f32_swap_bytes, + pyfloat_from_f32 + ), + (f64, f64_try_into_from_object, f64_swap_bytes, PyFloat::from), ); fn f32_swap_bytes(x: f32) -> f32 { @@ -542,6 +547,10 @@ mod array { ArgIntoFloat::try_from_object(vm, obj).map(|x| x.to_f64()) } + fn pyfloat_from_f32(value: f32) -> PyFloat { + PyFloat::from(value as f64) + } + impl ArrayElement for WideChar { fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { PyStrRef::try_from_object(vm, obj)? @@ -1104,20 +1113,36 @@ mod array { zelf: PyRef, proto: usize, vm: &VirtualMachine, - ) -> (PyTypeRef, PyTupleRef, Option) { - Self::reduce(zelf, vm) + ) -> PyResult<(PyObjectRef, PyTupleRef, Option)> { + if proto < 3 { + return Ok(Self::reduce(zelf, vm)); + } + let array = zelf.read(); + let cls = zelf.as_object().clone_class().into_object(); + let typecode = vm.ctx.new_str(array.typecode_str()); + let bytes = vm.ctx.new_bytes(array.get_bytes().to_vec()); + let code = MachineFormatCode::from_typecode(array.typecode()).unwrap(); + let code = PyInt::from(u8::from(code)).into_object(vm); + let module = vm.import("array", None, 0)?; + let func = vm.get_attribute(module, "_array_reconstructor")?; + Ok(( + func, + PyTupleRef::with_elements(vec![cls, typecode, code, bytes], &vm.ctx), + zelf.as_object().dict(), + )) } #[pymethod(magic)] fn reduce( zelf: PyRef, vm: &VirtualMachine, - ) -> (PyTypeRef, PyTupleRef, Option) { + ) -> (PyObjectRef, PyTupleRef, Option) { let array = zelf.read(); - let values = vm.ctx.new_list(array.get_objects(vm)); + let cls = zelf.as_object().clone_class().into_object(); let typecode = vm.ctx.new_str(array.typecode_str()); + let values = vm.ctx.new_list(array.get_objects(vm)); ( - zelf.as_object().clone_class(), + cls, PyTupleRef::with_elements(vec![typecode, values], &vm.ctx), zelf.as_object().dict(), ) From cbd7c59329c07c19942b5f35f09e0e4b515fe080 Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 16 Sep 2021 15:21:52 +0200 Subject: [PATCH 3/4] Fix unicode array pickling --- extra_tests/snippets/stdlib_array.py | 9 ++- stdlib/src/array.rs | 92 +++++++++++++++------------- 2 files changed, 59 insertions(+), 42 deletions(-) diff --git a/extra_tests/snippets/stdlib_array.py b/extra_tests/snippets/stdlib_array.py index b1ac557aa..c0a3dab41 100644 --- a/extra_tests/snippets/stdlib_array.py +++ b/extra_tests/snippets/stdlib_array.py @@ -1,5 +1,6 @@ from testutils import assert_raises from array import array +from pickle import dumps, loads a1 = array("b", [0, 1, 2, 3]) @@ -96,4 +97,10 @@ with assert_raises(IndexError): with assert_raises(IndexError): a[0] = 42 with assert_raises(IndexError): - del a[42] \ No newline at end of file + del a[42] + +test_str = 'πŸŒ‰abc🌐defπŸŒ‰πŸŒ' +u = array('u', test_str) +assert u.__reduce_ex__(1)[1][1] == list(test_str) +assert str(loads(dumps(u, 1))) == f"array('u', '{test_str}')" +assert loads(dumps(u, 1)) == loads(dumps(u, 3)) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index c650dab0d..38510f8b2 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -563,12 +563,8 @@ mod array { fn byteswap(self) -> Self { Self(self.0.swap_bytes()) } - fn to_object(self, vm: &VirtualMachine) -> PyObjectRef { - vm.ctx.new_str( - char::from_u32(self.0 as u32) - .unwrap_or_default() - .to_string(), - ) + fn to_object(self, _vm: &VirtualMachine) -> PyObjectRef { + unreachable!() } } @@ -759,6 +755,38 @@ mod array { } } + fn _wchar_bytes_to_string( + bytes: &[u8], + item_size: usize, + vm: &VirtualMachine, + ) -> PyResult { + if item_size == 2 { + // safe because every configuration of bytes for the types we support are valid + let utf16 = unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + }; + Ok(String::from_utf16_lossy(utf16)) + } else { + // safe because every configuration of bytes for the types we support are valid + let chars = unsafe { + std::slice::from_raw_parts( + bytes.as_ptr() as *const u32, + bytes.len() / std::mem::size_of::(), + ) + }; + chars + .iter() + .map(|&ch| { + // cpython issue 17223 + u32_to_char(ch).map_err(|msg| vm.new_value_error(msg)) + }) + .try_collect() + } + } + fn _unicode_to_wchar_bytes(utf8: &str, item_size: usize) -> Vec { if item_size == 2 { utf8.encode_utf16() @@ -799,31 +827,7 @@ mod array { )); } let bytes = array.get_bytes(); - if self.itemsize() == 2 { - // safe because every configuration of bytes for the types we support are valid - let utf16 = unsafe { - std::slice::from_raw_parts( - bytes.as_ptr() as *const u16, - bytes.len() / std::mem::size_of::(), - ) - }; - Ok(String::from_utf16_lossy(utf16)) - } else { - // safe because every configuration of bytes for the types we support are valid - let chars = unsafe { - std::slice::from_raw_parts( - bytes.as_ptr() as *const u32, - bytes.len() / std::mem::size_of::(), - ) - }; - chars - .iter() - .map(|&ch| { - // cpython issue 17223 - u32_to_char(ch).map_err(|msg| vm.new_value_error(msg)) - }) - .try_collect() - } + Self::_wchar_bytes_to_string(bytes, self.itemsize(), vm) } fn _from_bytes(&self, b: &[u8], itemsize: usize, vm: &VirtualMachine) -> PyResult<()> { @@ -1113,13 +1117,13 @@ mod array { zelf: PyRef, proto: usize, vm: &VirtualMachine, - ) -> PyResult<(PyObjectRef, PyTupleRef, Option)> { + ) -> PyResult<(PyObjectRef, PyObjectRef, Option)> { if proto < 3 { - return Ok(Self::reduce(zelf, vm)); + return Self::reduce(zelf, vm); } let array = zelf.read(); let cls = zelf.as_object().clone_class().into_object(); - let typecode = vm.ctx.new_str(array.typecode_str()); + let typecode = vm.ctx.new_utf8_str(array.typecode_str()); let bytes = vm.ctx.new_bytes(array.get_bytes().to_vec()); let code = MachineFormatCode::from_typecode(array.typecode()).unwrap(); let code = PyInt::from(u8::from(code)).into_object(vm); @@ -1127,7 +1131,7 @@ mod array { let func = vm.get_attribute(module, "_array_reconstructor")?; Ok(( func, - PyTupleRef::with_elements(vec![cls, typecode, code, bytes], &vm.ctx), + vm.ctx.new_tuple(vec![cls, typecode, code, bytes]), zelf.as_object().dict(), )) } @@ -1136,16 +1140,22 @@ mod array { fn reduce( zelf: PyRef, vm: &VirtualMachine, - ) -> (PyObjectRef, PyTupleRef, Option) { + ) -> PyResult<(PyObjectRef, PyObjectRef, Option)> { let array = zelf.read(); let cls = zelf.as_object().clone_class().into_object(); - let typecode = vm.ctx.new_str(array.typecode_str()); - let values = vm.ctx.new_list(array.get_objects(vm)); - ( + let typecode = vm.ctx.new_utf8_str(array.typecode_str()); + let values = if array.typecode() == 'u' { + let s = Self::_wchar_bytes_to_string(array.get_bytes(), array.itemsize(), vm)?; + s.chars().map(|x| x.into_pyobject(vm)).collect() + } else { + array.get_objects(vm) + }; + let values = vm.ctx.new_list(values); + Ok(( cls, - PyTupleRef::with_elements(vec![typecode, values], &vm.ctx), + vm.ctx.new_tuple(vec![typecode, values]), zelf.as_object().dict(), - ) + )) } } From ea69dc53d2cfca83153d9c25e2f4dddfc329317e Mon Sep 17 00:00:00 2001 From: Kangzhi Shi Date: Thu, 7 Oct 2021 10:14:50 +0200 Subject: [PATCH 4/4] remove test that fail on CPython --- extra_tests/snippets/stdlib_array.py | 7 ++++--- stdlib/src/array.rs | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/extra_tests/snippets/stdlib_array.py b/extra_tests/snippets/stdlib_array.py index c0a3dab41..2495b675d 100644 --- a/extra_tests/snippets/stdlib_array.py +++ b/extra_tests/snippets/stdlib_array.py @@ -101,6 +101,7 @@ with assert_raises(IndexError): test_str = 'πŸŒ‰abc🌐defπŸŒ‰πŸŒ' u = array('u', test_str) -assert u.__reduce_ex__(1)[1][1] == list(test_str) -assert str(loads(dumps(u, 1))) == f"array('u', '{test_str}')" -assert loads(dumps(u, 1)) == loads(dumps(u, 3)) +# skip as 2 bytes character enviroment with CPython is failing the test +if u.itemsize >= 4: + assert u.__reduce_ex__(1)[1][1] == list(test_str) + assert loads(dumps(u, 1)) == loads(dumps(u, 3)) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index 38510f8b2..65e7d3ef8 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -13,7 +13,7 @@ mod array { use crate::vm::{ builtins::{ PyByteArray, PyBytes, PyBytesRef, PyDictRef, PyFloat, PyInt, PyIntRef, PyList, - PyListRef, PySliceRef, PyStr, PyStrRef, PyTupleRef, PyTypeRef, + PyListRef, PySliceRef, PyStr, PyStrRef, PyTypeRef, }, class_or_notimplemented, function::{ @@ -28,8 +28,8 @@ mod array { AsBuffer, AsMapping, Comparable, Iterable, IteratorIterable, PyComparisonOp, SlotConstructor, SlotIterator, }, - IdProtocol, PyComparisonValue, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, - TypeProtocol, VirtualMachine, + IdProtocol, PyComparisonValue, PyObjectRef, PyObjectWrap, PyRef, PyResult, PyValue, + TryFromObject, TypeProtocol, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; use itertools::Itertools;