From 97b8d7ca7bfc665f8b0e15e1ae3724e2759b61a8 Mon Sep 17 00:00:00 2001 From: jfh Date: Sun, 29 Aug 2021 17:15:57 +0300 Subject: [PATCH] Add checking for repeat on sequences. --- Lib/test/test_array.py | 1 - Lib/test/test_bytes.py | 1 - Lib/test/test_list.py | 1 - vm/src/builtins/bytearray.rs | 16 ++++++++-------- vm/src/builtins/bytes.rs | 13 ++++++++----- vm/src/builtins/list.rs | 18 +++++++----------- vm/src/builtins/pystr.rs | 10 +++++++--- vm/src/builtins/tuple.rs | 10 +++++----- vm/src/bytesinner.rs | 4 ++-- vm/src/sequence.rs | 23 +++++++++++------------ vm/src/stdlib/array.rs | 22 ++++++++++++---------- vm/src/stdlib/collections.rs | 8 ++++---- vm/src/vm.rs | 17 +++++++++++++++++ 13 files changed, 81 insertions(+), 63 deletions(-) diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index 0ace4114a..6bde5ca24 100644 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -1508,7 +1508,6 @@ class DoubleTest(FPTest, unittest.TestCase): typecode = 'd' minitemsize = 8 - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'capacity overflow'") def test_alloc_overflow(self): from sys import maxsize a = array.array('d', [-1]*65536) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 1e82c899f..e37e5084a 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -396,7 +396,6 @@ class BaseBytesTest: self.assertRaises(TypeError, lambda: b1 + "def") self.assertRaises(TypeError, lambda: "abc" + b2) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'capacity overflow'") def test_repeat(self): for b in b"abc", self.type2test(b"abc"): self.assertEqual(b * 3, b"abcabcabc") diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index 1a6eee45f..b4ae26584 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -61,7 +61,6 @@ class ListTest(list_tests.CommonTest): self.assertEqual(len([0]), 1) self.assertEqual(len([0, 1, 2]), 3) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'capacity overflow'") def test_overflow(self): lst = [4, 5, 6, 7] n = int((sys.maxsize*2+2) // len(lst)) diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 85638161e..8eb5c079c 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -273,7 +273,7 @@ impl PyByteArray { } } - fn irepeat(zelf: &PyRef, n: isize, vm: &VirtualMachine) -> PyResult<()> { + fn irepeat(zelf: &PyRef, n: usize, vm: &VirtualMachine) -> PyResult<()> { if n == 1 { return Ok(()); } @@ -290,11 +290,9 @@ impl PyByteArray { }; let elements = &mut w.elements; - if n <= 0 { + if n == 0 { elements.clear(); } else if n != 1 { - let n = n as usize; - let old = elements.clone(); elements.reserve((n - 1) * old.len()); @@ -602,13 +600,15 @@ impl PyByteArray { #[pymethod(name = "__rmul__")] #[pymethod(magic)] - fn mul(&self, n: isize) -> Self { - self.inner().repeat(n).into() + fn mul(&self, value: isize, vm: &VirtualMachine) -> PyResult { + vm.check_repeat_or_memory_error(self.len(), value) + .map(|value| self.inner().repeat(value).into()) } #[pymethod(magic)] - fn imul(zelf: PyRef, n: isize, vm: &VirtualMachine) -> PyResult> { - Self::irepeat(&zelf, n, vm).map(|_| zelf) + fn imul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { + vm.check_repeat_or_memory_error(zelf.len(), value) + .and_then(|value| Self::irepeat(&zelf, value, vm).map(|_| zelf)) } #[pymethod(name = "__mod__")] diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 7e91719fd..4a6e2a56c 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -440,9 +440,6 @@ impl PyBytes { #[pymethod(name = "__rmul__")] #[pymethod(magic)] fn mul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { - if value > 0 && zelf.inner.len() as isize > std::isize::MAX / value { - return Err(vm.new_overflow_error("repeated bytes are too long".to_owned())); - } if value == 1 && zelf.class().is(&vm.ctx.types.bytes_type) { // Special case: when some `bytes` is multiplied by `1`, // nothing really happens, we need to return an object itself @@ -450,8 +447,14 @@ impl PyBytes { // This only works for `bytes` itself, not its subclasses. return Ok(zelf); } - let bytes: PyBytes = zelf.inner.repeat(value).into(); - Ok(bytes.into_ref(vm)) + // todo: map err to overflow. + vm.check_repeat_or_memory_error(zelf.inner.len(), value) + .map(|value| { + let bytes: PyBytes = zelf.inner.repeat(value).into(); + bytes.into_ref(vm) + }) + // see issue 45044 on b.p.o. + .map_err(|_| vm.new_overflow_error("repeated bytes are too long".to_owned())) } #[pymethod(name = "__mod__")] diff --git a/vm/src/builtins/list.rs b/vm/src/builtins/list.rs index fee44672e..4d7e29fb7 100644 --- a/vm/src/builtins/list.rs +++ b/vm/src/builtins/list.rs @@ -237,25 +237,21 @@ impl PyList { } #[pymethod(magic)] - fn mul(&self, counter: isize, vm: &VirtualMachine) -> PyObjectRef { - let new_elements = sequence::seq_mul(&self.borrow_vec(), counter) + #[pymethod(name = "__rmul__")] + fn mul(&self, value: isize, vm: &VirtualMachine) -> PyResult { + let new_elements = sequence::seq_mul(vm, &self.borrow_vec(), value)? .cloned() .collect(); - vm.ctx.new_list(new_elements) + Ok(vm.ctx.new_list(new_elements)) } #[pymethod(magic)] - fn rmul(&self, counter: isize, vm: &VirtualMachine) -> PyObjectRef { - self.mul(counter, vm) - } - - #[pymethod(magic)] - fn imul(zelf: PyRef, counter: isize) -> PyRef { + fn imul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { let mut elements = zelf.borrow_vec_mut(); let mut new_elements: Vec = - sequence::seq_mul(&*elements, counter).cloned().collect(); + sequence::seq_mul(vm, &*elements, value)?.cloned().collect(); std::mem::swap(elements.deref_mut(), &mut new_elements); - zelf.clone() + Ok(zelf.clone()) } #[pymethod] diff --git a/vm/src/builtins/pystr.rs b/vm/src/builtins/pystr.rs index 539ca22c0..09a1197e2 100644 --- a/vm/src/builtins/pystr.rs +++ b/vm/src/builtins/pystr.rs @@ -342,15 +342,19 @@ impl PyStr { #[pymethod(name = "__rmul__")] #[pymethod(magic)] - fn mul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyRef { + fn mul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { if value == 1 && zelf.class().is(&vm.ctx.types.str_type) { // Special case: when some `str` is multiplied by `1`, // nothing really happens, we need to return an object itself // with the same `id()` to be compatible with CPython. // This only works for `str` itself, not its subclasses. - return zelf; + return Ok(zelf); } - Self::from(zelf.value.repeat(value.to_usize().unwrap_or(0))).into_ref(vm) + // todo: map err to overflow. + vm.check_repeat_or_memory_error(zelf.len(), value) + .map(|value| Self::from(zelf.value.repeat(value)).into_ref(vm)) + // see issue 45044 on b.p.o. + .map_err(|_| vm.new_overflow_error("repeated bytes are too long".to_owned())) } #[pymethod(magic)] diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index eb416dea0..52429c7c5 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -169,22 +169,22 @@ impl PyTuple { #[pymethod(name = "__rmul__")] #[pymethod(magic)] - fn mul(zelf: PyRef, counter: isize, vm: &VirtualMachine) -> PyRef { - if zelf.elements.is_empty() || counter == 0 { + fn mul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { + Ok(if zelf.elements.is_empty() || value == 0 { vm.ctx.empty_tuple.clone() - } else if counter == 1 && zelf.class().is(&vm.ctx.types.tuple_type) { + } else if value == 1 && zelf.class().is(&vm.ctx.types.tuple_type) { // Special case: when some `tuple` is multiplied by `1`, // nothing really happens, we need to return an object itself // with the same `id()` to be compatible with CPython. // This only works for `tuple` itself, not its subclasses. zelf } else { - let elements = sequence::seq_mul(&zelf.elements, counter) + let elements = sequence::seq_mul(vm, &zelf.elements, value)? .cloned() .collect::>() .into_boxed_slice(); Self { elements }.into_ref(vm) - } + }) } #[pymethod(magic)] diff --git a/vm/src/bytesinner.rs b/vm/src/bytesinner.rs index aa9884eab..6408afdcd 100644 --- a/vm/src/bytesinner.rs +++ b/vm/src/bytesinner.rs @@ -964,8 +964,8 @@ impl PyBytesInner { .format(vm, values) } - pub fn repeat(&self, n: isize) -> Vec { - self.elements.repeat(n.to_usize().unwrap_or(0)) + pub fn repeat(&self, n: usize) -> Vec { + self.elements.repeat(n) } } diff --git a/vm/src/sequence.rs b/vm/src/sequence.rs index 60bf245e1..197bb0aad 100644 --- a/vm/src/sequence.rs +++ b/vm/src/sequence.rs @@ -1,7 +1,6 @@ use crate::slots::PyComparisonOp; use crate::vm::VirtualMachine; use crate::{PyObjectRef, PyResult}; -use num_traits::cast::ToPrimitive; pub(super) type DynPyIter<'a> = Box + 'a>; @@ -94,15 +93,15 @@ impl<'a> Iterator for SeqMul<'a> { } } -pub(crate) fn seq_mul(seq: &impl SimpleSeq, repetitions: isize) -> SeqMul { - let repetitions = if seq.len() > 0 { - repetitions.to_usize().unwrap_or(0) - } else { - 0 - }; - SeqMul { - seq, - repetitions, - iter: None, - } +pub(crate) fn seq_mul<'a>( + vm: &VirtualMachine, + seq: &'a impl SimpleSeq, + repetitions: isize, +) -> PyResult> { + vm.check_repeat_or_memory_error(seq.len(), repetitions) + .map(|repetitions| SeqMul { + seq, + repetitions, + iter: None, + }) } diff --git a/vm/src/stdlib/array.rs b/vm/src/stdlib/array.rs index e5eff0cca..24073ee2d 100644 --- a/vm/src/stdlib/array.rs +++ b/vm/src/stdlib/array.rs @@ -301,8 +301,7 @@ macro_rules! def_array_enum { } } - fn mul(&self, counter: isize) -> Self { - let counter = if counter < 0 { 0 } else { counter as usize }; + fn mul(&self, counter: usize) -> Self { match self { $(ArrayContentType::$n(v) => { let elements = v.repeat(counter); @@ -317,11 +316,10 @@ macro_rules! def_array_enum { } } - fn imul(&mut self, counter: isize) { - if counter <= 0 { + fn imul(&mut self, counter: usize) { + if counter == 0 { self.clear(); } else if counter != 1 { - let counter = counter as usize; match self { $(ArrayContentType::$n(v) => { let old = v.clone(); @@ -864,14 +862,18 @@ impl PyArray { #[pymethod(name = "__rmul__")] #[pymethod(magic)] - fn mul(&self, counter: isize, vm: &VirtualMachine) -> PyRef { - PyArray::from(self.read().mul(counter)).into_ref(vm) + fn mul(&self, value: isize, vm: &VirtualMachine) -> PyResult> { + vm.check_repeat_or_memory_error(self.len(), value) + .map(|value| PyArray::from(self.read().mul(value)).into_ref(vm)) } #[pymethod(magic)] - fn imul(zelf: PyRef, counter: isize, vm: &VirtualMachine) -> PyResult> { - zelf.try_resizable(vm)?.imul(counter); - Ok(zelf) + fn imul(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { + vm.check_repeat_or_memory_error(zelf.len(), value) + .and_then(|value| { + zelf.try_resizable(vm)?.imul(value); + Ok(zelf) + }) } #[pymethod(magic)] diff --git a/vm/src/stdlib/collections.rs b/vm/src/stdlib/collections.rs index 801b34d71..6a08e2910 100644 --- a/vm/src/stdlib/collections.rs +++ b/vm/src/stdlib/collections.rs @@ -454,20 +454,20 @@ mod _collections { #[pymethod(magic)] #[pymethod(name = "__rmul__")] - fn mul(&self, n: isize) -> Self { + fn mul(&self, value: isize, vm: &VirtualMachine) -> PyResult { let deque: SimpleSeqDeque = self.borrow_deque().into(); - let mul = sequence::seq_mul(&deque, n); + let mul = sequence::seq_mul(vm, &deque, value)?; let skipped = self .maxlen .and_then(|maxlen| mul.len().checked_sub(maxlen)) .unwrap_or(0); let deque = mul.skip(skipped).cloned().collect(); - PyDeque { + Ok(PyDeque { deque: PyRwLock::new(deque), maxlen: self.maxlen, state: AtomicCell::new(0), - } + }) } #[pymethod(magic)] diff --git a/vm/src/vm.rs b/vm/src/vm.rs index 78d1192ef..ff2758c9b 100644 --- a/vm/src/vm.rs +++ b/vm/src/vm.rs @@ -784,6 +784,11 @@ impl VirtualMachine { self.new_exception_msg(runtime_error, msg) } + pub fn new_memory_error(&self, msg: String) -> PyBaseExceptionRef { + let memory_error_type = self.ctx.exceptions.memory_error.clone(); + self.new_exception_msg(memory_error_type, msg) + } + pub fn new_stop_iteration(&self) -> PyBaseExceptionRef { let stop_iteration_type = self.ctx.exceptions.stop_iteration.clone(); self.new_exception_empty(stop_iteration_type) @@ -1845,6 +1850,18 @@ impl VirtualMachine { }) } + /// Checks that the multiplication is able to be performed. On Ok returns the + /// index as a usize for sequences to be able to use immediately. + pub fn check_repeat_or_memory_error(&self, length: usize, n: isize) -> PyResult { + let n = n.to_usize().unwrap_or(0); + if n > 0 && length > isize::MAX as usize / n { + // Empty message is currently used in CPython. + Err(self.new_memory_error("".to_owned())) + } else { + Ok(n) + } + } + // https://docs.python.org/3/reference/expressions.html#membership-test-operations fn _membership_iter_search(&self, haystack: PyObjectRef, needle: PyObjectRef) -> PyResult { let iter = iterator::get_iter(self, haystack)?;