From 48025e0102a326858b865d8025ecd67db8d63ec3 Mon Sep 17 00:00:00 2001 From: Ankit Goel Date: Sat, 21 Sep 2024 20:55:28 +0100 Subject: [PATCH] Cache hash value for FrozenSets Adds a hash field to the `PyFrozenSet` data type in order to avoid recomputing the hash of an immutable object. --- Lib/test/test_set.py | 10 +--------- vm/src/builtins/set.rs | 43 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 523c39ce68..75447336e4 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -728,15 +728,7 @@ class TestFrozenSet(TestJointOps, unittest.TestCase): for i in range(len(s)+1): yield from map(frozenset, itertools.combinations(s, i)) - # TODO: RUSTPYTHON - # The original test has: - # for n in range(18): - # Due to general performance overhead, hashing a frozenset takes - # about 50 times longer than in CPython. This test amplifies that - # exponentially, so the best we can do here reasonably is 13. - # Even if the internal hash function did nothing, it would still be - # about 40 times slower than CPython. - for n in range(13): + for n in range(18): t = 2 ** n mask = t - 1 for nums in (range, zf_range): diff --git a/vm/src/builtins/set.rs b/vm/src/builtins/set.rs index 2d8a9a8dd0..1ab1fe21cf 100644 --- a/vm/src/builtins/set.rs +++ b/vm/src/builtins/set.rs @@ -24,6 +24,10 @@ use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, }; use once_cell::sync::Lazy; +use rustpython_common::{ + atomic::{Ordering, PyAtomic, Radium}, + hash, +}; use std::{fmt, ops::Deref}; pub type SetContentType = dictdatatype::Dict<()>; @@ -71,9 +75,18 @@ impl PySet { } #[pyclass(module = false, name = "frozenset", unhashable = true)] -#[derive(Default)] pub struct PyFrozenSet { inner: PySetInner, + hash: PyAtomic, +} + +impl Default for PyFrozenSet { + fn default() -> Self { + PyFrozenSet { + inner: PySetInner::default(), + hash: hash::SENTINEL.into(), + } + } } impl PyFrozenSet { @@ -87,7 +100,10 @@ impl PyFrozenSet { inner.add(elem, vm)?; } // FIXME: empty set check - Ok(Self { inner }) + Ok(Self { + inner, + ..Default::default() + }) } pub fn elements(&self) -> Vec { @@ -102,6 +118,7 @@ impl PyFrozenSet { ) -> PyResult { Ok(Self { inner: self.inner.fold_op(others, op, vm)?, + ..Default::default() }) } @@ -115,6 +132,7 @@ impl PyFrozenSet { inner: self .inner .fold_op(std::iter::once(other.into_iterable(vm)?), op, vm)?, + ..Default::default() }) } } @@ -472,6 +490,7 @@ impl PySetInner { op( &PyFrozenSet { inner: set.inner.copy(), + ..Default::default() } .into_pyobject(vm), vm, @@ -956,6 +975,7 @@ impl PyFrozenSet { } else { Self { inner: zelf.inner.copy(), + ..Default::default() } .into_ref(&vm.ctx) } @@ -1057,6 +1077,7 @@ impl PyFrozenSet { inner: other .as_inner() .difference(ArgIterable::try_from_object(vm, zelf.into())?, vm)?, + ..Default::default() })) } else { Ok(PyArithmeticValue::NotImplemented) @@ -1107,7 +1128,23 @@ impl AsSequence for PyFrozenSet { impl Hashable for PyFrozenSet { #[inline] fn hash(zelf: &crate::Py, vm: &VirtualMachine) -> PyResult { - zelf.inner.hash(vm) + let hash = match zelf.hash.load(Ordering::Relaxed) { + hash::SENTINEL => { + let hash = zelf.inner.hash(vm)?; + match Radium::compare_exchange( + &zelf.hash, + hash::SENTINEL, + hash::fix_sentinel(hash), + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => hash, + Err(prev_stored) => prev_stored, + } + } + hash => hash, + }; + Ok(hash) } }