From 8cff0ed6c251d60e90eb42103bb92403e49589cf Mon Sep 17 00:00:00 2001 From: Ankit Goel Date: Sat, 21 Sep 2024 15:18:26 +0100 Subject: [PATCH] Avoid allocating a vector of elements when hashing frozenset (#5408) Adds a `try_fold_keys` method to Dict which allows performing common operations on all elements without needing to create a Vec first. --- vm/src/builtins/set.rs | 8 ++++---- vm/src/dictdatatype.rs | 11 +++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/vm/src/builtins/set.rs b/vm/src/builtins/set.rs index fd001500d..2d8a9a8dd 100644 --- a/vm/src/builtins/set.rs +++ b/vm/src/builtins/set.rs @@ -436,12 +436,12 @@ impl PySetInner { ((h ^ 89869747) ^ (h.wrapping_shl(16))).wrapping_mul(3644798167) } // Factor in the number of active entries - let mut hash: u64 = (self.elements().len() as u64 + 1).wrapping_mul(1927868237); + let mut hash: u64 = (self.len() as u64 + 1).wrapping_mul(1927868237); // Xor-in shuffled bits from every entry's hash field because xor is // commutative and a frozenset hash should be independent of order. - for element in self.elements().iter() { - hash ^= _shuffle_bits(element.hash(vm)? as u64); - } + hash = self.content.try_fold_keys(hash, |h, element| { + Ok(h ^ _shuffle_bits(element.hash(vm)? as u64)) + })?; // Disperse patterns arising in nested frozensets hash ^= (hash >> 11) ^ (hash >> 25); hash = hash.wrapping_mul(69069).wrapping_add(907133923); diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 96715e731..4baeef0bf 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -530,6 +530,17 @@ impl Dict { .collect() } + pub fn try_fold_keys(&self, init: Acc, f: Fold) -> PyResult + where + Fold: FnMut(Acc, &PyObject) -> PyResult, + { + self.read() + .entries + .iter() + .filter_map(|v| v.as_ref().map(|v| v.key.as_object())) + .try_fold(init, f) + } + /// Lookup the index for the given key. #[cfg_attr(feature = "flame-it", flame("Dict"))] fn lookup(