Merge pull request #1187 from RustPython/dict-keying

Implement dictionary indexing by trait.
This commit is contained in:
Noah
2019-07-29 08:05:48 -05:00
committed by GitHub
6 changed files with 127 additions and 43 deletions

View File

@@ -1,7 +1,9 @@
use crate::obj::objbool;
use crate::obj::objstr::PyString;
use crate::pyhash;
use crate::pyobject::{IdProtocol, PyObjectRef, PyResult};
use crate::vm::VirtualMachine;
use num_bigint::ToBigInt;
/// Ordered dictionary implementation.
/// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
@@ -93,7 +95,7 @@ impl<T: Clone> Dict<T> {
}
}
pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<bool> {
pub fn contains<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<bool> {
if let LookupResult::Existing(_) = self.lookup(vm, key)? {
Ok(true)
} else {
@@ -111,7 +113,7 @@ impl<T: Clone> Dict<T> {
/// Retrieve a key
#[cfg_attr(feature = "flame-it", flame("Dict"))]
pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
pub fn get<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
Ok(Some(self.unchecked_get(index)))
} else {
@@ -149,7 +151,7 @@ impl<T: Clone> Dict<T> {
key: &PyObjectRef,
value: T,
) -> PyResult<()> {
match self.lookup(vm, &key)? {
match self.lookup(vm, key)? {
LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index),
LookupResult::NewIndex {
hash_value,
@@ -199,8 +201,8 @@ impl<T: Clone> Dict<T> {
/// Lookup the index for the given key.
#[cfg_attr(feature = "flame-it", flame("Dict"))]
fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<LookupResult> {
let hash_value = collection_hash(vm, key)?;
fn lookup<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<LookupResult> {
let hash_value = key.do_hash(vm)?;
let perturb = hash_value;
let mut hash_index: HashIndex = hash_value;
loop {
@@ -209,11 +211,11 @@ impl<T: Clone> Dict<T> {
let index = self.indices[&hash_index];
if let Some(entry) = &self.entries[index] {
// Okay, we have an entry at this place
if entry.key.is(key) {
if key.do_is(&entry.key) {
// Literally the same object
break Ok(LookupResult::Existing(index));
} else if entry.hash == hash_value {
if do_eq(vm, &entry.key, key)? {
if key.do_eq(vm, &entry.key)? {
break Ok(LookupResult::Existing(index));
} else {
// entry mismatch.
@@ -242,7 +244,7 @@ impl<T: Clone> Dict<T> {
}
/// Retrieve and delete a key
pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
pub fn pop<K: DictKey>(&mut self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
let value = self.unchecked_get(index);
self.unchecked_delete(index);
@@ -273,23 +275,68 @@ enum LookupResult {
Existing(EntryIndex), // Existing record, index into entries
}
#[cfg_attr(feature = "flame-it", flame())]
fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult<HashValue> {
let raw_hash = vm._hash(object)?;
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
/// Types implementing this trait can be used to index
/// the dictionary. Typical usecases are:
/// - PyObjectRef -> arbitrary python type used as key
/// - str -> string reference used as key, this is often used internally
pub trait DictKey {
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue>;
fn do_is(&self, other: &PyObjectRef) -> bool;
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool>;
}
/// Invoke __eq__ on two keys
fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result<bool, PyObjectRef> {
let result = vm._eq(key1.clone(), key2.clone())?;
objbool::boolval(vm, result)
/// Implement trait for PyObjectRef such that we can use python objects
/// to index dictionaries.
impl DictKey for PyObjectRef {
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> {
let raw_hash = vm._hash(self)?;
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
}
fn do_is(&self, other: &PyObjectRef) -> bool {
self.is(other)
}
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
let result = vm._eq(self.clone(), other_key.clone())?;
objbool::boolval(vm, result)
}
}
/// Implement trait for the str type, so that we can use strings
/// to index dictionaries.
impl DictKey for String {
fn do_hash(&self, _vm: &VirtualMachine) -> PyResult<HashValue> {
// follow a similar route as the hashing of PyStringRef
let raw_hash = pyhash::hash_value(self).to_bigint().unwrap();
let raw_hash = pyhash::hash_bigint(&raw_hash);
let mut hasher = DefaultHasher::new();
raw_hash.hash(&mut hasher);
Ok(hasher.finish() as HashValue)
}
fn do_is(&self, _other: &PyObjectRef) -> bool {
// No matter who the other pyobject is, we are never the same thing, since
// we are a str, not a pyobject.
false
}
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
if let Some(py_str_value) = other_key.payload::<PyString>() {
Ok(&py_str_value.value == self)
} else {
// Fall back to PyString implementation.
let s = vm.new_str(self.to_string());
s.do_eq(vm, other_key)
}
}
}
#[cfg(test)]
mod tests {
use super::{Dict, VirtualMachine};
use super::{Dict, DictKey, VirtualMachine};
#[test]
fn test_insert() {
@@ -313,9 +360,40 @@ mod tests {
dict.delete(&vm, &key1).unwrap();
assert_eq!(1, dict.len());
dict.insert(&vm, &key1, value2).unwrap();
dict.insert(&vm, &key1, value2.clone()).unwrap();
assert_eq!(2, dict.len());
assert_eq!(true, dict.contains(&vm, &key1).unwrap());
assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap());
let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap();
vm._eq(val, value2)
.expect("retrieved value must be equal to inserted value.");
}
macro_rules! hash_tests {
($($name:ident: $example_hash:expr,)*) => {
$(
#[test]
fn $name() {
check_hash_equivalence($example_hash);
}
)*
}
}
hash_tests! {
test_abc: "abc",
test_x: "x",
}
fn check_hash_equivalence(text: &str) {
let vm: VirtualMachine = Default::default();
let value1 = text.to_string();
let value2 = vm.new_str(value1.clone());
let hash1 = value1.do_hash(&vm).expect("Hash should not fail.");
let hash2 = value2.do_hash(&vm).expect("Hash should not fail.");
assert_eq!(hash1, hash2);
}
}

View File

@@ -11,6 +11,7 @@ use crate::vm::{ReprGuard, VirtualMachine};
use super::objbool;
use super::objiter;
use super::objstr;
use super::objtype;
use crate::dictdatatype;
use crate::obj::objtype::PyClassRef;
use crate::pyobject::PyClassImpl;
@@ -316,6 +317,23 @@ impl PyDictRef {
pub fn size(&self) -> dictdatatype::DictSize {
self.entries.borrow().size()
}
pub fn get_item_option<T: IntoPyObject>(
&self,
key: T,
vm: &VirtualMachine,
) -> PyResult<Option<PyObjectRef>> {
match self.get_item(key, vm) {
Ok(value) => Ok(Some(value)),
Err(exc) => {
if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) {
Ok(None)
} else {
Err(exc)
}
}
}
}
}
impl ItemProtocol for PyDictRef {

View File

@@ -418,10 +418,7 @@ impl PyInt {
#[pymethod(name = "__hash__")]
pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
match self.value.to_i64() {
Some(value) => (value % pyhash::MODULUS as i64),
None => (&self.value % pyhash::MODULUS).to_i64().unwrap(),
}
pyhash::hash_bigint(&self.value)
}
#[pymethod(name = "__abs__")]

View File

@@ -11,7 +11,7 @@ use crate::obj::objfunction::PyMethod;
use crate::obj::objstr;
use crate::obj::objtype::{PyClass, PyClassRef};
use crate::pyobject::{
ItemProtocol, PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
};
use crate::scope::NameProtocol;
use crate::vm::VirtualMachine;

View File

@@ -1,3 +1,5 @@
use num_bigint::BigInt;
use num_traits::ToPrimitive;
use std::hash::{Hash, Hasher};
use crate::obj::objfloat;
@@ -81,3 +83,10 @@ pub fn hash_iter<'a, I: std::iter::Iterator<Item = &'a PyObjectRef>>(
}
Ok(hasher.finish() as PyHash)
}
pub fn hash_bigint(value: &BigInt) -> PyHash {
match value.to_i64() {
Some(i64_value) => (i64_value % MODULUS as i64),
None => (value % MODULUS).to_i64().unwrap(),
}
}

View File

@@ -1044,24 +1044,6 @@ pub trait ItemProtocol {
vm: &VirtualMachine,
) -> PyResult;
fn del_item<T: IntoPyObject>(&self, key: T, vm: &VirtualMachine) -> PyResult;
#[cfg_attr(feature = "flame-it", flame("ItemProtocol"))]
fn get_item_option<T: IntoPyObject>(
&self,
key: T,
vm: &VirtualMachine,
) -> PyResult<Option<PyObjectRef>> {
match self.get_item(key, vm) {
Ok(value) => Ok(Some(value)),
Err(exc) => {
if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) {
Ok(None)
} else {
Err(exc)
}
}
}
}
}
impl ItemProtocol for PyObjectRef {