From a927814d6b5c66ba0f76cb9db70d6a4b41cd7e04 Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Wed, 11 May 2022 23:12:27 +0900 Subject: [PATCH 1/6] assertion for Option size --- vm/src/dictdatatype.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 3838f84e8..b04212489 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -108,6 +108,7 @@ struct DictEntry { index: IndexIndex, value: T, } +static_assertions::assert_eq_size!(DictEntry, Option>); impl DictEntry { pub(crate) fn as_tuple(&self) -> (PyObjectRef, T) { From 15a8f6fd1aee52c16edfabdb4a7d8624f95bf9d0 Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Wed, 11 May 2022 23:26:51 +0900 Subject: [PATCH 2/6] remove last in loop in DictInner::pop_back --- vm/src/dictdatatype.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index b04212489..9bcc9bdf4 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -645,14 +645,15 @@ impl Dict { pub fn pop_back(&self) -> Option<(PyObjectRef, T)> { let mut inner = &mut *self.write(); - let (entry_idx, entry) = inner.entries[..inner.next_new_entry_idx] - .iter_mut() - .enumerate() - .rev() - .find_map(|(i, entry)| entry.take().map(|e| (i, e)))?; + let entry = loop { + let entry = inner.entries.pop()?; + if let Some(entry) = entry { + break entry; + } + }; inner.used -= 1; inner.indices[entry.index] = IndexEntry::DUMMY; - inner.next_new_entry_idx = entry_idx; + inner.next_new_entry_idx = inner.entries.len(); Some((entry.key, entry.value)) } From 06524854368dc295b8ac18ab1c793ff1e2b1ead0 Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Wed, 11 May 2022 23:30:53 +0900 Subject: [PATCH 3/6] Remove DictInner::next_new_entry_idx --- vm/src/dictdatatype.rs | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 9bcc9bdf4..2fdd6925d 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -75,8 +75,6 @@ struct DictInner { filled: usize, indices: Vec, entries: Vec>>, - // index to new inserted element should be in entries - next_new_entry_idx: usize, } impl Clone for Dict { @@ -95,7 +93,6 @@ impl Default for Dict { filled: 0, indices: vec![IndexEntry::FREE; 8], entries: Vec::new(), - next_new_entry_idx: 0, }), } } @@ -188,7 +185,6 @@ impl DictInner { } } self.filled = self.used; - self.next_new_entry_idx = self.entries.len(); } fn unchecked_push( @@ -205,15 +201,10 @@ impl DictInner { value, index, }; - let entry_index = self.next_new_entry_idx; - if self.entries.len() == entry_index { - self.entries.push(Some(entry)); - } else { - self.entries[entry_index] = Some(entry); - } + let entry_index = self.entries.len(); + self.entries.push(Some(entry)); self.indices[index] = entry_index as i64; self.used += 1; - self.next_new_entry_idx += 1; if let IndexEntry::Free = index_entry { self.filled += 1; if let Some(new_size) = self.should_resize() { @@ -347,7 +338,6 @@ impl Dict { inner.indices.resize(8, IndexEntry::FREE); inner.used = 0; inner.filled = 0; - inner.next_new_entry_idx = 0; // defer dec rc std::mem::take(&mut inner.entries) }; @@ -653,7 +643,6 @@ impl Dict { }; inner.used -= 1; inner.indices[entry.index] = IndexEntry::DUMMY; - inner.next_new_entry_idx = inner.entries.len(); Some((entry.key, entry.value)) } From 855ab4b4e9ca12f2805b15acbadd78367180c49f Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Thu, 12 May 2022 00:10:34 +0900 Subject: [PATCH 4/6] DictInner::indices: Vec => Vec --- vm/src/dictdatatype.rs | 93 ++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 2fdd6925d..72f95f67e 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -37,34 +37,24 @@ impl fmt::Debug for Dict { } } -#[derive(Debug, Copy, Clone)] -enum IndexEntry { - Dummy, - Free, - Index(usize), -} +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[repr(transparent)] +struct IndexEntry(i64); impl IndexEntry { - const FREE: i64 = -1; - const DUMMY: i64 = -2; -} + const FREE: Self = Self(-1); + const DUMMY: Self = Self(-2); -impl From for IndexEntry { - fn from(idx: i64) -> Self { - match idx { - IndexEntry::FREE => IndexEntry::Free, - IndexEntry::DUMMY => IndexEntry::Dummy, - x => IndexEntry::Index(x as usize), - } + unsafe fn from_index_unchecked(idx: usize) -> Self { + debug_assert!((idx as isize) >= 0); + Self(idx as i64) } -} -impl From for i64 { - fn from(idx: IndexEntry) -> Self { - match idx { - IndexEntry::Free => IndexEntry::FREE, - IndexEntry::Dummy => IndexEntry::DUMMY, - IndexEntry::Index(i) => i as i64, + fn index(self) -> Option { + if self.0 >= 0 { + Some(self.0 as usize) + } else { + None } } } @@ -73,7 +63,7 @@ impl From for i64 { struct DictInner { used: usize, filled: usize, - indices: Vec, + indices: Vec, entries: Vec>>, } @@ -175,7 +165,10 @@ impl DictInner { let index_index = idxs.next(); let idx = &mut self.indices[index_index]; if *idx == IndexEntry::FREE { - *idx = entry_idx as i64; + *idx = unsafe { + // entry_idx never grow up to usize::MAX + IndexEntry::from_index_unchecked(entry_idx) + }; entry.index = index_index; break; } @@ -203,9 +196,9 @@ impl DictInner { }; let entry_index = self.entries.len(); self.entries.push(Some(entry)); - self.indices[index] = entry_index as i64; + self.indices[index] = unsafe { IndexEntry::from_index_unchecked(entry_index) }; self.used += 1; - if let IndexEntry::Free = index_entry { + if let IndexEntry::FREE = index_entry { self.filled += 1; if let Some(new_size) = self.should_resize() { self.resize(new_size) @@ -252,7 +245,7 @@ impl Dict { let _removed = loop { let (entry_index, index_index) = self.lookup(vm, key, hash, None)?; let mut inner = self.write(); - if let IndexEntry::Index(index) = entry_index { + if let Some(index) = entry_index.index() { // Update existing key if let Some(entry) = inner.entries.get_mut(index) { let entry = entry @@ -279,7 +272,7 @@ impl Dict { pub fn contains(&self, vm: &VirtualMachine, key: &K) -> PyResult { let (entry, _) = self.lookup(vm, key, key.key_hash(vm)?, None)?; - Ok(matches!(entry, IndexEntry::Index(_))) + Ok(entry.index().is_some()) } /// Retrieve a key @@ -297,7 +290,7 @@ impl Dict { ) -> PyResult> { let ret = loop { let (entry, index_index) = self.lookup(vm, key, hash, None)?; - if let IndexEntry::Index(index) = entry { + if let Some(index) = entry.index() { let inner = self.read(); if let Some(entry) = inner.entries.get(index) { let entry = extract_dict_entry(entry); @@ -385,7 +378,7 @@ impl Dict { let _removed = loop { let lookup = self.lookup(vm, key, hash, None)?; let (entry, index_index) = lookup; - if let IndexEntry::Index(_) = entry { + if entry.index().is_some() { match self.pop_inner(lookup) { ControlFlow::Break(Some(entry)) => break Some(entry), _ => continue, @@ -407,8 +400,8 @@ impl Dict { let hash = key.key_hash(vm)?; let res = loop { let lookup = self.lookup(vm, key, hash, None)?; - let (entry, index_index) = lookup; - if let IndexEntry::Index(index) = entry { + let (index_entry, index_index) = lookup; + if let Some(index) = index_entry.index() { let inner = self.read(); if let Some(entry) = inner.entries.get(index) { let entry = extract_dict_entry(entry); @@ -424,7 +417,13 @@ impl Dict { } else { let value = default(); let mut inner = self.write(); - inner.unchecked_push(index_index, hash, key.to_pyobject(vm), value.clone(), entry); + inner.unchecked_push( + index_index, + hash, + key.to_pyobject(vm), + value.clone(), + index_entry, + ); break value; } }; @@ -445,8 +444,8 @@ impl Dict { let hash = key.key_hash(vm)?; let res = loop { let lookup = self.lookup(vm, key, hash, None)?; - let (entry, index_index) = lookup; - if let IndexEntry::Index(index) = entry { + let (index_entry, index_index) = lookup; + if let Some(index) = index_entry.index() { let inner = self.read(); if let Some(entry) = inner.entries.get(index) { let entry = extract_dict_entry(entry); @@ -464,7 +463,7 @@ impl Dict { let key = key.to_pyobject(vm); let mut inner = self.write(); let ret = (key.clone(), value.clone()); - inner.unchecked_push(index_index, hash, key, value, entry); + inner.unchecked_push(index_index, hash, key, value, index_entry); break ret; } }; @@ -541,22 +540,26 @@ impl Dict { }); loop { let index_index = idxs.next(); - match IndexEntry::from(inner.indices[index_index]) { - IndexEntry::Dummy => { + match inner.indices[index_index] { + IndexEntry::DUMMY => { if freeslot.is_none() { freeslot = Some(index_index); } } - IndexEntry::Free => { + IndexEntry::FREE => { let idxs = match freeslot { - Some(free) => (IndexEntry::Dummy, free), - None => (IndexEntry::Free, index_index), + Some(free) => (IndexEntry::DUMMY, free), + None => (IndexEntry::FREE, index_index), }; return Ok(idxs); } - IndexEntry::Index(i) => { + idx => { + let i = idx.index().unwrap_or_else(|| unsafe { + // DUMMY and FREE is already checked above. + std::hint::unreachable_unchecked() + }); let entry = &inner.entries[i].as_ref().unwrap(); - let ret = (IndexEntry::Index(i), index_index); + let ret = (idx, index_index); if key.key_is(&entry.key) { break 'outer ret; } else if entry.hash == hash_value { @@ -593,7 +596,7 @@ impl Dict { pred: impl Fn(&T) -> Result, ) -> Result, E> { let (entry_index, index_index) = lookup; - let entry_index = if let IndexEntry::Index(entry_index) = entry_index { + let entry_index = if let Some(entry_index) = entry_index.index() { entry_index } else { return Ok(ControlFlow::Break(None)); From 52cce3f048f4a41824f292c4b2bfc0f048f043fd Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Thu, 12 May 2022 00:23:55 +0900 Subject: [PATCH 5/6] get_unchecked when compiler cannot guess boundary-safety --- vm/src/dictdatatype.rs | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 72f95f67e..179db0d6d 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -163,14 +163,14 @@ impl DictInner { let mut idxs = GenIndexes::new(entry.hash, mask); loop { let index_index = idxs.next(); - let idx = &mut self.indices[index_index]; - if *idx == IndexEntry::FREE { - *idx = unsafe { - // entry_idx never grow up to usize::MAX - IndexEntry::from_index_unchecked(entry_idx) - }; - entry.index = index_index; - break; + unsafe { + // index is always valid here + let idx = self.indices.get_unchecked_mut(index_index); + if *idx == IndexEntry::FREE { + *idx = IndexEntry::from_index_unchecked(entry_idx); + entry.index = index_index; + break; + } } } } else { @@ -531,7 +531,7 @@ impl Dict { mut lock: Option>>, ) -> PyResult { let mut idxs = None; - let mut freeslot = None; + let mut free_slot = None; let ret = 'outer: loop { let (entry_key, ret) = { let inner = lock.take().unwrap_or_else(|| self.read()); @@ -540,25 +540,27 @@ impl Dict { }); loop { let index_index = idxs.next(); - match inner.indices[index_index] { + let index_entry = *unsafe { inner.indices.get_unchecked(index_index) }; + match index_entry { IndexEntry::DUMMY => { - if freeslot.is_none() { - freeslot = Some(index_index); + if free_slot.is_none() { + free_slot = Some(index_index); } } IndexEntry::FREE => { - let idxs = match freeslot { + let idxs = match free_slot { Some(free) => (IndexEntry::DUMMY, free), None => (IndexEntry::FREE, index_index), }; return Ok(idxs); } idx => { - let i = idx.index().unwrap_or_else(|| unsafe { - // DUMMY and FREE is already checked above. - std::hint::unreachable_unchecked() - }); - let entry = &inner.entries[i].as_ref().unwrap(); + let entry = unsafe { + // DUMMY and FREE are already handled above. + // i is always valid and entry always exists. + let i = idx.index().unwrap_unchecked(); + inner.entries.get_unchecked(i).as_ref().unwrap_unchecked() + }; let ret = (idx, index_index); if key.key_is(&entry.key) { break 'outer ret; @@ -617,7 +619,7 @@ impl Dict { // The dict was changed since we did lookup. Let's try again. _ => return Ok(ControlFlow::Continue(())), } - inner.indices[index_index] = IndexEntry::DUMMY; + *unsafe { inner.indices.get_unchecked_mut(index_index) } = IndexEntry::DUMMY; inner.used -= 1; let removed = slot.take(); Ok(ControlFlow::Break(removed)) @@ -645,7 +647,7 @@ impl Dict { } }; inner.used -= 1; - inner.indices[entry.index] = IndexEntry::DUMMY; + *unsafe { inner.indices.get_unchecked_mut(entry.index) } = IndexEntry::DUMMY; Some((entry.key, entry.value)) } From 25a66297426965f12f8adf4ae7e105dbe2ef1faa Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 18 May 2022 10:37:51 +0900 Subject: [PATCH 6/6] Add unsafe comments --- vm/src/dictdatatype.rs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/vm/src/dictdatatype.rs b/vm/src/dictdatatype.rs index 179db0d6d..487207e88 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dictdatatype.rs @@ -45,6 +45,8 @@ impl IndexEntry { const FREE: Self = Self(-1); const DUMMY: Self = Self(-2); + /// # Safety + /// idx must not be one of FREE or DUMMY unsafe fn from_index_unchecked(idx: usize) -> Self { debug_assert!((idx as isize) >= 0); Self(idx as i64) @@ -164,7 +166,9 @@ impl DictInner { loop { let index_index = idxs.next(); unsafe { - // index is always valid here + // Safety: index is always valid here + // index_index is generated by idxs + // entry_idx is saved one let idx = self.indices.get_unchecked_mut(index_index); if *idx == IndexEntry::FREE { *idx = IndexEntry::from_index_unchecked(entry_idx); @@ -196,7 +200,11 @@ impl DictInner { }; let entry_index = self.entries.len(); self.entries.push(Some(entry)); - self.indices[index] = unsafe { IndexEntry::from_index_unchecked(entry_index) }; + self.indices[index] = unsafe { + // SAFETY: entry_index is self.entries.len(). it never can + // grow to `usize-2` because hash tables cannot full its index + IndexEntry::from_index_unchecked(entry_index) + }; self.used += 1; if let IndexEntry::FREE = index_entry { self.filled += 1; @@ -540,7 +548,10 @@ impl Dict { }); loop { let index_index = idxs.next(); - let index_entry = *unsafe { inner.indices.get_unchecked(index_index) }; + let index_entry = *unsafe { + // Safety: index_index is generated + inner.indices.get_unchecked(index_index) + }; match index_entry { IndexEntry::DUMMY => { if free_slot.is_none() { @@ -556,7 +567,7 @@ impl Dict { } idx => { let entry = unsafe { - // DUMMY and FREE are already handled above. + // Safety: DUMMY and FREE are already handled above. // i is always valid and entry always exists. let i = idx.index().unwrap_unchecked(); inner.entries.get_unchecked(i).as_ref().unwrap_unchecked() @@ -619,7 +630,10 @@ impl Dict { // The dict was changed since we did lookup. Let's try again. _ => return Ok(ControlFlow::Continue(())), } - *unsafe { inner.indices.get_unchecked_mut(index_index) } = IndexEntry::DUMMY; + *unsafe { + // index_index is result of lookup + inner.indices.get_unchecked_mut(index_index) + } = IndexEntry::DUMMY; inner.used -= 1; let removed = slot.take(); Ok(ControlFlow::Break(removed)) @@ -647,7 +661,10 @@ impl Dict { } }; inner.used -= 1; - *unsafe { inner.indices.get_unchecked_mut(entry.index) } = IndexEntry::DUMMY; + *unsafe { + // entry.index always refers valid index + inner.indices.get_unchecked_mut(entry.index) + } = IndexEntry::DUMMY; Some((entry.key, entry.value)) }