From cc9dcbc9f811fe9cbc49ba2e1fea76e2de9d6792 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Thu, 9 May 2019 02:26:30 +0900 Subject: [PATCH] Refactor dictdatatype::Dict --- tests/snippets/dict.py | 17 ++++++++ vm/src/dictdatatype.rs | 93 +++++++++++++++++++++++++++--------------- vm/src/obj/objdict.rs | 14 +++---- 3 files changed, 81 insertions(+), 43 deletions(-) diff --git a/tests/snippets/dict.py b/tests/snippets/dict.py index f7506a205..4545f7f58 100644 --- a/tests/snippets/dict.py +++ b/tests/snippets/dict.py @@ -200,3 +200,20 @@ w = {1: 1, **x, 2: 2, **y, 3: 3, **z, 4: 4} assert w == {1: 1, 'a': 1, 'b': 2, 'c': 3, 2: 2, 'd': 3, 3: 3, 'e': 3, 4: 4} assert str({True: True, 1.0: 1.0}) == str({True: 1.0}) + +class A: + def __hash__(self): + return 1 + def __eq__(self, other): + return isinstance(other, A) +class B: + def __hash__(self): + return 1 + def __eq__(self, other): + return isinstance(other, B) + +s = {1: 0, A(): 1, B(): 2} +assert len(s) == 3 +assert s[1] == 0 +assert s[A()] == 1 +assert s[B()] == 2 diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index a075ec937..858cf43e1 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -1,16 +1,25 @@ use crate::obj::objbool; +use crate::pyhash; use crate::pyobject::{IdProtocol, PyObjectRef, PyResult}; use crate::vm::VirtualMachine; /// Ordered dictionary implementation. /// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html /// And: https://www.youtube.com/watch?v=p33CVV29OG8 /// And: http://code.activestate.com/recipes/578375/ -use std::collections::HashMap; +use std::collections::{hash_map::DefaultHasher, HashMap}; +use std::hash::{Hash, Hasher}; + +/// hash value of an object returned by __hash__ +type HashValue = pyhash::PyHash; +/// index calculated by resolving collision +type HashIndex = pyhash::PyHash; +/// entry index mapped in indices +type EntryIndex = usize; #[derive(Clone)] pub struct Dict { size: usize, - indices: HashMap, + indices: HashMap, entries: Vec>>, } @@ -26,7 +35,7 @@ impl Default for Dict { #[derive(Clone)] struct DictEntry { - hash: usize, + hash: HashValue, key: PyObjectRef, value: T, } @@ -38,12 +47,27 @@ pub struct DictSize { } impl Dict { - pub fn new() -> Self { - Dict { - size: 0, - indices: HashMap::new(), - entries: Vec::new(), - } + fn unchecked_push( + &mut self, + hash_index: HashIndex, + hash_value: HashValue, + key: &PyObjectRef, + value: T, + ) { + let entry = DictEntry { + hash: hash_value, + key: key.clone(), + value, + }; + let entry_index = self.entries.len(); + self.entries.push(Some(entry)); + self.indices.insert(hash_index, entry_index); + self.size += 1; + } + + fn unchecked_delete(&mut self, entry_index: EntryIndex) { + self.entries[entry_index] = None; + self.size -= 1; } /// Store a key @@ -63,29 +87,21 @@ impl Dict { hash_value, } => { // New key: - let entry = DictEntry { - hash: hash_value, - key: key.clone(), - value, - }; - let index = self.entries.len(); - self.entries.push(Some(entry)); - self.indices.insert(hash_index, index); - self.size += 1; + self.unchecked_push(hash_index, hash_value, key, value); Ok(()) } } } pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { - if let LookupResult::Existing(_index) = self.lookup(vm, key)? { + if let LookupResult::Existing(_) = self.lookup(vm, key)? { Ok(true) } else { Ok(false) } } - fn unchecked_get(&self, index: usize) -> T { + fn unchecked_get(&self, index: EntryIndex) -> T { if let Some(entry) = &self.entries[index] { entry.value.clone() } else { @@ -110,9 +126,7 @@ impl Dict { /// Delete a key pub fn delete(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<()> { - if let LookupResult::Existing(index) = self.lookup(vm, key)? { - self.entries[index] = None; - self.size -= 1; + if self.delete_if_exists(vm, key)? { Ok(()) } else { let key_repr = vm.to_pystr(key)?; @@ -120,6 +134,15 @@ impl Dict { } } + pub fn delete_if_exists(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { + if let LookupResult::Existing(entry_index) = self.lookup(vm, key)? { + self.unchecked_delete(entry_index); + Ok(true) + } else { + Ok(false) + } + } + pub fn len(&self) -> usize { self.size } @@ -135,7 +158,7 @@ impl Dict { } } - pub fn next_entry(&self, position: &mut usize) -> Option<(&PyObjectRef, &T)> { + pub fn next_entry(&self, position: &mut EntryIndex) -> Option<(&PyObjectRef, &T)> { while *position < self.entries.len() { if let Some(DictEntry { key, value, .. }) = &self.entries[*position] { *position += 1; @@ -152,9 +175,9 @@ impl Dict { /// Lookup the index for the given key. fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { - let hash_value = calc_hash(vm, key)?; + let hash_value = collection_hash(vm, key)?; let perturb = hash_value; - let mut hash_index: usize = hash_value; + let mut hash_index: HashIndex = hash_value; loop { if self.indices.contains_key(&hash_index) { // Now we have an index, lets check the key. @@ -197,8 +220,7 @@ impl Dict { pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { if let LookupResult::Existing(index) = self.lookup(vm, key)? { let value = self.unchecked_get(index); - self.entries[index] = None; - self.size -= 1; + self.unchecked_delete(index); Ok(value) } else { let key_repr = vm.to_pystr(key)?; @@ -209,14 +231,17 @@ impl Dict { enum LookupResult { NewIndex { - hash_value: usize, - hash_index: usize, + hash_value: HashValue, + hash_index: HashIndex, }, // return not found, index into indices - Existing(usize), // Existing record, index into entries + Existing(EntryIndex), // Existing record, index into entries } -fn calc_hash(vm: &VirtualMachine, key: &PyObjectRef) -> PyResult { - Ok(vm._hash(key)? as usize) +fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult { + let raw_hash = vm._hash(object)?; + let mut hasher = DefaultHasher::new(); + raw_hash.hash(&mut hasher); + Ok(hasher.finish() as HashValue) } /// Invoke __eq__ on two keys @@ -232,7 +257,7 @@ mod tests { #[test] fn test_insert() { let mut vm = VirtualMachine::new(); - let mut dict = Dict::new(); + let mut dict = Dict::default(); assert_eq!(0, dict.len()); let key1 = vm.new_bool(true); diff --git a/vm/src/obj/objdict.rs b/vm/src/obj/objdict.rs index 396c711f4..87e4b1919 100644 --- a/vm/src/obj/objdict.rs +++ b/vm/src/obj/objdict.rs @@ -264,15 +264,11 @@ impl PyDictRef { fn popitem(self, vm: &VirtualMachine) -> PyResult { let mut entries = self.entries.borrow_mut(); - let (key, value) = match entries.next_entry(&mut 0) { - Some((key, value)) => (key.clone(), value.clone()), - None => { - return Err(vm.new_key_error("popitem(): dictionary is empty".to_string())); - } - }; - - entries.delete(vm, &key)?; - Ok(vm.ctx.new_tuple(vec![key, value])) + if let Some((key, value)) = entries.pop_front() { + Ok(vm.ctx.new_tuple(vec![key, value])) + } else { + Err(vm.new_key_error("popitem(): dictionary is empty".to_string())) + } } /// Take a python dictionary and convert it to attributes.