diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 3838f84e8..487207e88 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -37,34 +37,26 @@ impl fmt::Debug for Dict { } } -#[derive(Debug, Copy, Clone)] -enum IndexEntry { - Dummy, - Free, - Index(usize), -} +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[repr(transparent)] +struct IndexEntry(i64); impl IndexEntry { - const FREE: i64 = -1; - const DUMMY: i64 = -2; -} + const FREE: Self = Self(-1); + const DUMMY: Self = Self(-2); -impl From for IndexEntry { - fn from(idx: i64) -> Self { - match idx { - IndexEntry::FREE => IndexEntry::Free, - IndexEntry::DUMMY => IndexEntry::Dummy, - x => IndexEntry::Index(x as usize), - } + /// # Safety + /// idx must not be one of FREE or DUMMY + unsafe fn from_index_unchecked(idx: usize) -> Self { + debug_assert!((idx as isize) >= 0); + Self(idx as i64) } -} -impl From for i64 { - fn from(idx: IndexEntry) -> Self { - match idx { - IndexEntry::Free => IndexEntry::FREE, - IndexEntry::Dummy => IndexEntry::DUMMY, - IndexEntry::Index(i) => i as i64, + fn index(self) -> Option { + if self.0 >= 0 { + Some(self.0 as usize) + } else { + None } } } @@ -73,10 +65,8 @@ impl From for i64 { struct DictInner { used: usize, filled: usize, - indices: Vec, + indices: Vec, entries: Vec>>, - // index to new inserted element should be in entries - next_new_entry_idx: usize, } impl Clone for Dict { @@ -95,7 +85,6 @@ impl Default for Dict { filled: 0, indices: vec![IndexEntry::FREE; 8], entries: Vec::new(), - next_new_entry_idx: 0, }), } } @@ -108,6 +97,7 @@ struct DictEntry { index: IndexIndex, value: T, } +static_assertions::assert_eq_size!(DictEntry, Option>); impl DictEntry { pub(crate) fn as_tuple(&self) -> (PyObjectRef, T) { @@ -175,11 +165,16 @@ impl DictInner { let mut idxs = GenIndexes::new(entry.hash, mask); loop { let index_index = idxs.next(); - let idx = &mut self.indices[index_index]; - if *idx == IndexEntry::FREE { - *idx = entry_idx as i64; - entry.index = index_index; - break; + unsafe { + // Safety: index is always valid here + // index_index is generated by idxs + // entry_idx is saved one + let idx = self.indices.get_unchecked_mut(index_index); + if *idx == IndexEntry::FREE { + *idx = IndexEntry::from_index_unchecked(entry_idx); + entry.index = index_index; + break; + } } } } else { @@ -187,7 +182,6 @@ impl DictInner { } } self.filled = self.used; - self.next_new_entry_idx = self.entries.len(); } fn unchecked_push( @@ -204,16 +198,15 @@ impl DictInner { value, index, }; - let entry_index = self.next_new_entry_idx; - if self.entries.len() == entry_index { - self.entries.push(Some(entry)); - } else { - self.entries[entry_index] = Some(entry); - } - self.indices[index] = entry_index as i64; + let entry_index = self.entries.len(); + self.entries.push(Some(entry)); + self.indices[index] = unsafe { + // SAFETY: entry_index is self.entries.len(). it never can + // grow to `usize-2` because hash tables cannot full its index + IndexEntry::from_index_unchecked(entry_index) + }; self.used += 1; - self.next_new_entry_idx += 1; - if let IndexEntry::Free = index_entry { + if let IndexEntry::FREE = index_entry { self.filled += 1; if let Some(new_size) = self.should_resize() { self.resize(new_size) @@ -260,7 +253,7 @@ impl Dict { let _removed = loop { let (entry_index, index_index) = self.lookup(vm, key, hash, None)?; let mut inner = self.write(); - if let IndexEntry::Index(index) = entry_index { + if let Some(index) = entry_index.index() { // Update existing key if let Some(entry) = inner.entries.get_mut(index) { let entry = entry @@ -287,7 +280,7 @@ impl Dict { pub fn contains(&self, vm: &VirtualMachine, key: &K) -> PyResult { let (entry, _) = self.lookup(vm, key, key.key_hash(vm)?, None)?; - Ok(matches!(entry, IndexEntry::Index(_))) + Ok(entry.index().is_some()) } /// Retrieve a key @@ -305,7 +298,7 @@ impl Dict { ) -> PyResult> { let ret = loop { let (entry, index_index) = self.lookup(vm, key, hash, None)?; - if let IndexEntry::Index(index) = entry { + if let Some(index) = entry.index() { let inner = self.read(); if let Some(entry) = inner.entries.get(index) { let entry = extract_dict_entry(entry); @@ -346,7 +339,6 @@ impl Dict { inner.indices.resize(8, IndexEntry::FREE); inner.used = 0; inner.filled = 0; - inner.next_new_entry_idx = 0; // defer dec rc std::mem::take(&mut inner.entries) }; @@ -394,7 +386,7 @@ impl Dict { let _removed = loop { let lookup = self.lookup(vm, key, hash, None)?; let (entry, index_index) = lookup; - if let IndexEntry::Index(_) = entry { + if entry.index().is_some() { match self.pop_inner(lookup) { ControlFlow::Break(Some(entry)) => break Some(entry), _ => continue, @@ -416,8 +408,8 @@ impl Dict { let hash = key.key_hash(vm)?; let res = loop { let lookup = self.lookup(vm, key, hash, None)?; - let (entry, index_index) = lookup; - if let IndexEntry::Index(index) = entry { + let (index_entry, index_index) = lookup; + if let Some(index) = index_entry.index() { let inner = self.read(); if let Some(entry) = inner.entries.get(index) { let entry = extract_dict_entry(entry); @@ -433,7 +425,13 @@ impl Dict { } else { let value = default(); let mut inner = self.write(); - inner.unchecked_push(index_index, hash, key.to_pyobject(vm), value.clone(), entry); + inner.unchecked_push( + index_index, + hash, + key.to_pyobject(vm), + value.clone(), + index_entry, + ); break value; } }; @@ -454,8 +452,8 @@ impl Dict { let hash = key.key_hash(vm)?; let res = loop { let lookup = self.lookup(vm, key, hash, None)?; - let (entry, index_index) = lookup; - if let IndexEntry::Index(index) = entry { + let (index_entry, index_index) = lookup; + if let Some(index) = index_entry.index() { let inner = self.read(); if let Some(entry) = inner.entries.get(index) { let entry = extract_dict_entry(entry); @@ -473,7 +471,7 @@ impl Dict { let key = key.to_pyobject(vm); let mut inner = self.write(); let ret = (key.clone(), value.clone()); - inner.unchecked_push(index_index, hash, key, value, entry); + inner.unchecked_push(index_index, hash, key, value, index_entry); break ret; } }; @@ -541,7 +539,7 @@ impl Dict { mut lock: Option>>, ) -> PyResult { let mut idxs = None; - let mut freeslot = None; + let mut free_slot = None; let ret = 'outer: loop { let (entry_key, ret) = { let inner = lock.take().unwrap_or_else(|| self.read()); @@ -550,22 +548,31 @@ impl Dict { }); loop { let index_index = idxs.next(); - match IndexEntry::from(inner.indices[index_index]) { - IndexEntry::Dummy => { - if freeslot.is_none() { - freeslot = Some(index_index); + let index_entry = *unsafe { + // Safety: index_index is generated + inner.indices.get_unchecked(index_index) + }; + match index_entry { + IndexEntry::DUMMY => { + if free_slot.is_none() { + free_slot = Some(index_index); } } - IndexEntry::Free => { - let idxs = match freeslot { - Some(free) => (IndexEntry::Dummy, free), - None => (IndexEntry::Free, index_index), + IndexEntry::FREE => { + let idxs = match free_slot { + Some(free) => (IndexEntry::DUMMY, free), + None => (IndexEntry::FREE, index_index), }; return Ok(idxs); } - IndexEntry::Index(i) => { - let entry = &inner.entries[i].as_ref().unwrap(); - let ret = (IndexEntry::Index(i), index_index); + idx => { + let entry = unsafe { + // Safety: DUMMY and FREE are already handled above. + // i is always valid and entry always exists. + let i = idx.index().unwrap_unchecked(); + inner.entries.get_unchecked(i).as_ref().unwrap_unchecked() + }; + let ret = (idx, index_index); if key.key_is(&entry.key) { break 'outer ret; } else if entry.hash == hash_value { @@ -602,7 +609,7 @@ impl Dict { pred: impl Fn(&T) -> Result, ) -> Result, E> { let (entry_index, index_index) = lookup; - let entry_index = if let IndexEntry::Index(entry_index) = entry_index { + let entry_index = if let Some(entry_index) = entry_index.index() { entry_index } else { return Ok(ControlFlow::Break(None)); @@ -623,7 +630,10 @@ impl Dict { // The dict was changed since we did lookup. Let's try again. _ => return Ok(ControlFlow::Continue(())), } - inner.indices[index_index] = IndexEntry::DUMMY; + *unsafe { + // index_index is result of lookup + inner.indices.get_unchecked_mut(index_index) + } = IndexEntry::DUMMY; inner.used -= 1; let removed = slot.take(); Ok(ControlFlow::Break(removed)) @@ -644,14 +654,17 @@ impl Dict { pub fn pop_back(&self) -> Option<(PyObjectRef, T)> { let mut inner = &mut *self.write(); - let (entry_idx, entry) = inner.entries[..inner.next_new_entry_idx] - .iter_mut() - .enumerate() - .rev() - .find_map(|(i, entry)| entry.take().map(|e| (i, e)))?; + let entry = loop { + let entry = inner.entries.pop()?; + if let Some(entry) = entry { + break entry; + } + }; inner.used -= 1; - inner.indices[entry.index] = IndexEntry::DUMMY; - inner.next_new_entry_idx = entry_idx; + *unsafe { + // entry.index always refers valid index + inner.indices.get_unchecked_mut(entry.index) + } = IndexEntry::DUMMY; Some((entry.key, entry.value)) }