mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-09 22:49:57 +09:00
Merge pull request #1187 from RustPython/dict-keying
Implement dictionary indexing by trait.
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
use crate::obj::objbool;
|
||||
use crate::obj::objstr::PyString;
|
||||
use crate::pyhash;
|
||||
use crate::pyobject::{IdProtocol, PyObjectRef, PyResult};
|
||||
use crate::vm::VirtualMachine;
|
||||
use num_bigint::ToBigInt;
|
||||
/// Ordered dictionary implementation.
|
||||
/// Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
|
||||
/// And: https://www.youtube.com/watch?v=p33CVV29OG8
|
||||
@@ -93,7 +95,7 @@ impl<T: Clone> Dict<T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<bool> {
|
||||
pub fn contains<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<bool> {
|
||||
if let LookupResult::Existing(_) = self.lookup(vm, key)? {
|
||||
Ok(true)
|
||||
} else {
|
||||
@@ -111,7 +113,7 @@ impl<T: Clone> Dict<T> {
|
||||
|
||||
/// Retrieve a key
|
||||
#[cfg_attr(feature = "flame-it", flame("Dict"))]
|
||||
pub fn get(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
|
||||
pub fn get<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
|
||||
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
|
||||
Ok(Some(self.unchecked_get(index)))
|
||||
} else {
|
||||
@@ -149,7 +151,7 @@ impl<T: Clone> Dict<T> {
|
||||
key: &PyObjectRef,
|
||||
value: T,
|
||||
) -> PyResult<()> {
|
||||
match self.lookup(vm, &key)? {
|
||||
match self.lookup(vm, key)? {
|
||||
LookupResult::Existing(entry_index) => self.unchecked_delete(entry_index),
|
||||
LookupResult::NewIndex {
|
||||
hash_value,
|
||||
@@ -199,8 +201,8 @@ impl<T: Clone> Dict<T> {
|
||||
|
||||
/// Lookup the index for the given key.
|
||||
#[cfg_attr(feature = "flame-it", flame("Dict"))]
|
||||
fn lookup(&self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<LookupResult> {
|
||||
let hash_value = collection_hash(vm, key)?;
|
||||
fn lookup<K: DictKey>(&self, vm: &VirtualMachine, key: &K) -> PyResult<LookupResult> {
|
||||
let hash_value = key.do_hash(vm)?;
|
||||
let perturb = hash_value;
|
||||
let mut hash_index: HashIndex = hash_value;
|
||||
loop {
|
||||
@@ -209,11 +211,11 @@ impl<T: Clone> Dict<T> {
|
||||
let index = self.indices[&hash_index];
|
||||
if let Some(entry) = &self.entries[index] {
|
||||
// Okay, we have an entry at this place
|
||||
if entry.key.is(key) {
|
||||
if key.do_is(&entry.key) {
|
||||
// Literally the same object
|
||||
break Ok(LookupResult::Existing(index));
|
||||
} else if entry.hash == hash_value {
|
||||
if do_eq(vm, &entry.key, key)? {
|
||||
if key.do_eq(vm, &entry.key)? {
|
||||
break Ok(LookupResult::Existing(index));
|
||||
} else {
|
||||
// entry mismatch.
|
||||
@@ -242,7 +244,7 @@ impl<T: Clone> Dict<T> {
|
||||
}
|
||||
|
||||
/// Retrieve and delete a key
|
||||
pub fn pop(&mut self, vm: &VirtualMachine, key: &PyObjectRef) -> PyResult<Option<T>> {
|
||||
pub fn pop<K: DictKey>(&mut self, vm: &VirtualMachine, key: &K) -> PyResult<Option<T>> {
|
||||
if let LookupResult::Existing(index) = self.lookup(vm, key)? {
|
||||
let value = self.unchecked_get(index);
|
||||
self.unchecked_delete(index);
|
||||
@@ -273,23 +275,68 @@ enum LookupResult {
|
||||
Existing(EntryIndex), // Existing record, index into entries
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "flame-it", flame())]
|
||||
fn collection_hash(vm: &VirtualMachine, object: &PyObjectRef) -> PyResult<HashValue> {
|
||||
let raw_hash = vm._hash(object)?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
raw_hash.hash(&mut hasher);
|
||||
Ok(hasher.finish() as HashValue)
|
||||
/// Types implementing this trait can be used to index
|
||||
/// the dictionary. Typical usecases are:
|
||||
/// - PyObjectRef -> arbitrary python type used as key
|
||||
/// - str -> string reference used as key, this is often used internally
|
||||
pub trait DictKey {
|
||||
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue>;
|
||||
fn do_is(&self, other: &PyObjectRef) -> bool;
|
||||
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool>;
|
||||
}
|
||||
|
||||
/// Invoke __eq__ on two keys
|
||||
fn do_eq(vm: &VirtualMachine, key1: &PyObjectRef, key2: &PyObjectRef) -> Result<bool, PyObjectRef> {
|
||||
let result = vm._eq(key1.clone(), key2.clone())?;
|
||||
objbool::boolval(vm, result)
|
||||
/// Implement trait for PyObjectRef such that we can use python objects
|
||||
/// to index dictionaries.
|
||||
impl DictKey for PyObjectRef {
|
||||
fn do_hash(&self, vm: &VirtualMachine) -> PyResult<HashValue> {
|
||||
let raw_hash = vm._hash(self)?;
|
||||
let mut hasher = DefaultHasher::new();
|
||||
raw_hash.hash(&mut hasher);
|
||||
Ok(hasher.finish() as HashValue)
|
||||
}
|
||||
|
||||
fn do_is(&self, other: &PyObjectRef) -> bool {
|
||||
self.is(other)
|
||||
}
|
||||
|
||||
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
|
||||
let result = vm._eq(self.clone(), other_key.clone())?;
|
||||
objbool::boolval(vm, result)
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement trait for the str type, so that we can use strings
|
||||
/// to index dictionaries.
|
||||
impl DictKey for String {
|
||||
fn do_hash(&self, _vm: &VirtualMachine) -> PyResult<HashValue> {
|
||||
// follow a similar route as the hashing of PyStringRef
|
||||
let raw_hash = pyhash::hash_value(self).to_bigint().unwrap();
|
||||
let raw_hash = pyhash::hash_bigint(&raw_hash);
|
||||
let mut hasher = DefaultHasher::new();
|
||||
raw_hash.hash(&mut hasher);
|
||||
Ok(hasher.finish() as HashValue)
|
||||
}
|
||||
|
||||
fn do_is(&self, _other: &PyObjectRef) -> bool {
|
||||
// No matter who the other pyobject is, we are never the same thing, since
|
||||
// we are a str, not a pyobject.
|
||||
false
|
||||
}
|
||||
|
||||
fn do_eq(&self, vm: &VirtualMachine, other_key: &PyObjectRef) -> PyResult<bool> {
|
||||
if let Some(py_str_value) = other_key.payload::<PyString>() {
|
||||
Ok(&py_str_value.value == self)
|
||||
} else {
|
||||
// Fall back to PyString implementation.
|
||||
let s = vm.new_str(self.to_string());
|
||||
s.do_eq(vm, other_key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Dict, VirtualMachine};
|
||||
use super::{Dict, DictKey, VirtualMachine};
|
||||
|
||||
#[test]
|
||||
fn test_insert() {
|
||||
@@ -313,9 +360,40 @@ mod tests {
|
||||
dict.delete(&vm, &key1).unwrap();
|
||||
assert_eq!(1, dict.len());
|
||||
|
||||
dict.insert(&vm, &key1, value2).unwrap();
|
||||
dict.insert(&vm, &key1, value2.clone()).unwrap();
|
||||
assert_eq!(2, dict.len());
|
||||
|
||||
assert_eq!(true, dict.contains(&vm, &key1).unwrap());
|
||||
assert_eq!(true, dict.contains(&vm, &"x".to_string()).unwrap());
|
||||
|
||||
let val = dict.get(&vm, &"x".to_string()).unwrap().unwrap();
|
||||
vm._eq(val, value2)
|
||||
.expect("retrieved value must be equal to inserted value.");
|
||||
}
|
||||
|
||||
macro_rules! hash_tests {
|
||||
($($name:ident: $example_hash:expr,)*) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
check_hash_equivalence($example_hash);
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
hash_tests! {
|
||||
test_abc: "abc",
|
||||
test_x: "x",
|
||||
}
|
||||
|
||||
fn check_hash_equivalence(text: &str) {
|
||||
let vm: VirtualMachine = Default::default();
|
||||
let value1 = text.to_string();
|
||||
let value2 = vm.new_str(value1.clone());
|
||||
|
||||
let hash1 = value1.do_hash(&vm).expect("Hash should not fail.");
|
||||
let hash2 = value2.do_hash(&vm).expect("Hash should not fail.");
|
||||
assert_eq!(hash1, hash2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ use crate::vm::{ReprGuard, VirtualMachine};
|
||||
use super::objbool;
|
||||
use super::objiter;
|
||||
use super::objstr;
|
||||
use super::objtype;
|
||||
use crate::dictdatatype;
|
||||
use crate::obj::objtype::PyClassRef;
|
||||
use crate::pyobject::PyClassImpl;
|
||||
@@ -316,6 +317,23 @@ impl PyDictRef {
|
||||
pub fn size(&self) -> dictdatatype::DictSize {
|
||||
self.entries.borrow().size()
|
||||
}
|
||||
|
||||
pub fn get_item_option<T: IntoPyObject>(
|
||||
&self,
|
||||
key: T,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<Option<PyObjectRef>> {
|
||||
match self.get_item(key, vm) {
|
||||
Ok(value) => Ok(Some(value)),
|
||||
Err(exc) => {
|
||||
if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) {
|
||||
Ok(None)
|
||||
} else {
|
||||
Err(exc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ItemProtocol for PyDictRef {
|
||||
|
||||
@@ -418,10 +418,7 @@ impl PyInt {
|
||||
|
||||
#[pymethod(name = "__hash__")]
|
||||
pub fn hash(&self, _vm: &VirtualMachine) -> pyhash::PyHash {
|
||||
match self.value.to_i64() {
|
||||
Some(value) => (value % pyhash::MODULUS as i64),
|
||||
None => (&self.value % pyhash::MODULUS).to_i64().unwrap(),
|
||||
}
|
||||
pyhash::hash_bigint(&self.value)
|
||||
}
|
||||
|
||||
#[pymethod(name = "__abs__")]
|
||||
|
||||
@@ -11,7 +11,7 @@ use crate::obj::objfunction::PyMethod;
|
||||
use crate::obj::objstr;
|
||||
use crate::obj::objtype::{PyClass, PyClassRef};
|
||||
use crate::pyobject::{
|
||||
ItemProtocol, PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
|
||||
PyContext, PyObjectRef, PyRef, PyResult, PyValue, TryFromObject, TypeProtocol,
|
||||
};
|
||||
use crate::scope::NameProtocol;
|
||||
use crate::vm::VirtualMachine;
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use num_bigint::BigInt;
|
||||
use num_traits::ToPrimitive;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use crate::obj::objfloat;
|
||||
@@ -81,3 +83,10 @@ pub fn hash_iter<'a, I: std::iter::Iterator<Item = &'a PyObjectRef>>(
|
||||
}
|
||||
Ok(hasher.finish() as PyHash)
|
||||
}
|
||||
|
||||
pub fn hash_bigint(value: &BigInt) -> PyHash {
|
||||
match value.to_i64() {
|
||||
Some(i64_value) => (i64_value % MODULUS as i64),
|
||||
None => (value % MODULUS).to_i64().unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1044,24 +1044,6 @@ pub trait ItemProtocol {
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult;
|
||||
fn del_item<T: IntoPyObject>(&self, key: T, vm: &VirtualMachine) -> PyResult;
|
||||
|
||||
#[cfg_attr(feature = "flame-it", flame("ItemProtocol"))]
|
||||
fn get_item_option<T: IntoPyObject>(
|
||||
&self,
|
||||
key: T,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<Option<PyObjectRef>> {
|
||||
match self.get_item(key, vm) {
|
||||
Ok(value) => Ok(Some(value)),
|
||||
Err(exc) => {
|
||||
if objtype::isinstance(&exc, &vm.ctx.exceptions.key_error) {
|
||||
Ok(None)
|
||||
} else {
|
||||
Err(exc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ItemProtocol for PyObjectRef {
|
||||
|
||||
Reference in New Issue
Block a user