From 42c1084890b03eeed19663f327113e960321a896 Mon Sep 17 00:00:00 2001 From: jfh Date: Fri, 30 Jul 2021 16:46:44 +0300 Subject: [PATCH 1/2] Use BigInts instead of keeping a range object to iterate. Equivalent to CPythons longrange_iterator. --- vm/src/builtins/range.rs | 51 +++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/vm/src/builtins/range.rs b/vm/src/builtins/range.rs index 6611a24e03..2bc15cc234 100644 --- a/vm/src/builtins/range.rs +++ b/vm/src/builtins/range.rs @@ -217,22 +217,21 @@ impl PyRange { } #[pymethod(name = "__reversed__")] - fn reversed(&self, vm: &VirtualMachine) -> PyRangeIterator { + fn reversed(&self) -> PyRangeIterator { let start = self.start.as_bigint(); let step = self.step.as_bigint(); // Use CPython calculation for this: + let length = self.len(); let new_stop = start - step; - let new_start = &new_stop + (self.len() * step); - let reversed = PyRange { - start: new_start.into_pyref(vm), - stop: new_stop.into_pyref(vm), - step: (-step).into_pyref(vm), - }; + let start = &new_stop + length.clone() * step; + let step = -step; PyRangeIterator { - position: AtomicCell::new(0), - range: reversed.into_ref(vm), + index: AtomicCell::new(0), + start, + step, + length, } } @@ -412,19 +411,37 @@ impl Comparable for PyRange { impl Iterable for PyRange { fn iter(zelf: PyRef, vm: &VirtualMachine) -> PyResult { + let (start, stop, step, length) = ( + zelf.start.as_bigint(), + zelf.stop.as_bigint(), + zelf.step.as_bigint(), + zelf.len(), + ); Ok(PyRangeIterator { - position: AtomicCell::new(0), - range: zelf, + index: AtomicCell::new(0), + start: start.clone(), + step: step.clone(), + length, } .into_object(vm)) } } +// Semantically, this is the same as the previous representation. +// +// Unfortunately, since AtomicCell requires a Copy type, no BigInt implementations can +// generally be used. As such, usize::MAX is the upper bound on number of elements (length) +// the range can contain in RustPython. +// +// This doesn't preclude the range from containing large values, since start and step +// can be BigInts, we can store any arbitrary range of values. #[pyclass(module = false, name = "range_iterator")] #[derive(Debug)] pub struct PyRangeIterator { - position: AtomicCell, - range: PyRangeRef, + index: AtomicCell, + start: BigInt, + step: BigInt, + length: BigInt, } impl PyValue for PyRangeIterator { @@ -438,9 +455,11 @@ impl PyRangeIterator {} impl PyIter for PyRangeIterator { fn next(zelf: &PyRef, vm: &VirtualMachine) -> PyResult { - let position = BigInt::from(zelf.position.fetch_add(1)); - if let Some(int) = zelf.range.get(&position) { - Ok(vm.ctx.new_int(int)) + let index = BigInt::from(zelf.index.fetch_add(1)); + if index < zelf.length { + Ok(vm + .ctx + .new_int(zelf.start.clone() + index * zelf.step.clone())) } else { Err(vm.new_stop_iteration()) } From 1115511e3d03f93720824997d8ff3ee13a540eed Mon Sep 17 00:00:00 2001 From: jfh Date: Fri, 30 Jul 2021 18:43:30 +0300 Subject: [PATCH 2/2] Add fast range iterator. --- Lib/test/test_range.py | 4 -- vm/src/builtins/range.rs | 106 +++++++++++++++++++++++++++++++-------- vm/src/types.rs | 2 + 3 files changed, 86 insertions(+), 26 deletions(-) diff --git a/Lib/test/test_range.py b/Lib/test/test_range.py index 24fba95336..60c9045b3a 100644 --- a/Lib/test/test_range.py +++ b/Lib/test/test_range.py @@ -484,10 +484,6 @@ class RangeTest(unittest.TestCase): self.assertNotIn(-1, r) self.assertNotIn(1, r) - # TODO: RUSTPYTHON. - # NOTE: Test passes, fast iterators are required for this to not - # take ~ 1m. - @unittest.skip def test_range_iterators(self): # exercise 'fast' iterators, that use a rangeiterobject internally. # see issue 7298 diff --git a/vm/src/builtins/range.rs b/vm/src/builtins/range.rs index 2bc15cc234..fc7910914a 100644 --- a/vm/src/builtins/range.rs +++ b/vm/src/builtins/range.rs @@ -1,7 +1,7 @@ use crossbeam_utils::atomic::AtomicCell; use num_bigint::{BigInt, Sign}; use num_integer::Integer; -use num_traits::{One, Signed, Zero}; +use num_traits::{One, Signed, ToPrimitive, Zero}; use super::int::{PyInt, PyIntRef}; use super::pytype::PyTypeRef; @@ -171,11 +171,10 @@ impl PyRange { pub fn init(context: &PyContext) { PyRange::extend_class(context, &context.types.range_type); + PyLongRangeIterator::extend_class(context, &context.types.longrange_iterator_type); PyRangeIterator::extend_class(context, &context.types.range_iterator_type); } -type PyRangeRef = PyRef; - #[pyimpl(with(Hashable, Comparable, Iterable))] impl PyRange { fn new(cls: PyTypeRef, stop: PyIntRef, vm: &VirtualMachine) -> PyResult> { @@ -217,7 +216,7 @@ impl PyRange { } #[pymethod(name = "__reversed__")] - fn reversed(&self) -> PyRangeIterator { + fn reversed(&self, vm: &VirtualMachine) -> PyResult { let start = self.start.as_bigint(); let step = self.step.as_bigint(); @@ -227,12 +226,29 @@ impl PyRange { let start = &new_stop + length.clone() * step; let step = -step; - PyRangeIterator { - index: AtomicCell::new(0), - start, - step, - length, - } + Ok( + if let (Some(start), Some(step), Some(_)) = + (start.to_isize(), step.to_isize(), new_stop.to_isize()) + { + PyRangeIterator { + index: AtomicCell::new(0), + start, + step, + // Cannot fail. If start, stop and step all successfully convert to isize, then result of zelf.len will + // always fit in a usize. + length: length.to_usize().unwrap_or(0), + } + .into_object(vm) + } else { + PyLongRangeIterator { + index: AtomicCell::new(0), + start, + step, + length, + } + .into_object(vm) + }, + ) } #[pymethod(name = "__len__")] @@ -417,33 +433,80 @@ impl Iterable for PyRange { zelf.step.as_bigint(), zelf.len(), ); - Ok(PyRangeIterator { - index: AtomicCell::new(0), - start: start.clone(), - step: step.clone(), - length, + if let (Some(start), Some(step), Some(_)) = + (start.to_isize(), step.to_isize(), stop.to_isize()) + { + Ok(PyRangeIterator { + index: AtomicCell::new(0), + start, + step, + // Cannot fail. If start, stop and step all successfully convert to isize, then result of zelf.len will + // always fit in a usize. + length: length.to_usize().unwrap_or(0), + } + .into_object(vm)) + } else { + Ok(PyLongRangeIterator { + index: AtomicCell::new(0), + start: start.clone(), + step: step.clone(), + length, + } + .into_object(vm)) } - .into_object(vm)) } } // Semantically, this is the same as the previous representation. // -// Unfortunately, since AtomicCell requires a Copy type, no BigInt implementations can +// Unfortunately, since AtomicCell requires a Copy type, no BigInt implementations can // generally be used. As such, usize::MAX is the upper bound on number of elements (length) // the range can contain in RustPython. // // This doesn't preclude the range from containing large values, since start and step // can be BigInts, we can store any arbitrary range of values. -#[pyclass(module = false, name = "range_iterator")] +#[pyclass(module = false, name = "longrange_iterator")] #[derive(Debug)] -pub struct PyRangeIterator { +pub struct PyLongRangeIterator { index: AtomicCell, start: BigInt, step: BigInt, length: BigInt, } +impl PyValue for PyLongRangeIterator { + fn class(vm: &VirtualMachine) -> &PyTypeRef { + &vm.ctx.types.longrange_iterator_type + } +} + +#[pyimpl(with(PyIter))] +impl PyLongRangeIterator {} + +impl PyIter for PyLongRangeIterator { + fn next(zelf: &PyRef, vm: &VirtualMachine) -> PyResult { + let index = BigInt::from(zelf.index.fetch_add(1)); + if index < zelf.length { + Ok(vm + .ctx + .new_int(zelf.start.clone() + index * zelf.step.clone())) + } else { + Err(vm.new_stop_iteration()) + } + } +} + +// When start, stop, step are isizes, we can use a faster more compact representation +// that only operates using isizes to track values. +#[pyclass(module = false, name = "range_iterator")] +#[derive(Debug)] +pub struct PyRangeIterator { + index: AtomicCell, + start: isize, + step: isize, + length: usize, +} + impl PyValue for PyRangeIterator { fn class(vm: &VirtualMachine) -> &PyTypeRef { &vm.ctx.types.range_iterator_type @@ -455,11 +518,10 @@ impl PyRangeIterator {} impl PyIter for PyRangeIterator { fn next(zelf: &PyRef, vm: &VirtualMachine) -> PyResult { - let index = BigInt::from(zelf.index.fetch_add(1)); - if index < zelf.length { + if zelf.index.load() < zelf.length { Ok(vm .ctx - .new_int(zelf.start.clone() + index * zelf.step.clone())) + .new_int(zelf.start + (zelf.index.fetch_add(1) as isize) * zelf.step)) } else { Err(vm.new_stop_iteration()) } diff --git a/vm/src/types.rs b/vm/src/types.rs index 264e603737..a5443334e2 100644 --- a/vm/src/types.rs +++ b/vm/src/types.rs @@ -94,6 +94,7 @@ pub struct TypeZoo { pub str_type: PyTypeRef, pub range_type: PyTypeRef, pub range_iterator_type: PyTypeRef, + pub longrange_iterator_type: PyTypeRef, pub slice_type: PyTypeRef, pub type_type: PyTypeRef, pub zip_type: PyTypeRef, @@ -187,6 +188,7 @@ impl TypeZoo { module_type: module::PyModule::init_bare_type().clone(), namespace_type: namespace::PyNamespace::init_bare_type().clone(), range_iterator_type: range::PyRangeIterator::init_bare_type().clone(), + longrange_iterator_type: range::PyLongRangeIterator::init_bare_type().clone(), set_iterator_type: set::PySetIterator::init_bare_type().clone(), str_iterator_type: pystr::PyStrIterator::init_bare_type().clone(), str_reverseiterator_type: pystr::PyStrReverseIterator::init_bare_type().clone(),