From 6e571946a0383bc946326561d6521d78673a2b4c Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Thu, 12 May 2022 06:38:31 +0900 Subject: [PATCH] introduce PyStrInterned --- stdlib/src/array.rs | 4 +- vm/src/builtins/bool.rs | 6 +- vm/src/builtins/code.rs | 4 +- vm/src/builtins/str.rs | 7 ++ vm/src/intern.rs | 223 ++++++++++++++++++++++++++++++++-------- vm/src/stdlib/sys.rs | 6 +- vm/src/vm/context.rs | 19 ++-- 7 files changed, 210 insertions(+), 59 deletions(-) diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index c0344bfb7..1d289b5a6 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -681,8 +681,8 @@ mod array { #[pyproperty] fn typecode(&self, vm: &VirtualMachine) -> PyStrRef { vm.ctx - .intern_string(self.read().typecode().to_string()) - .into_pyref() + .intern_str(self.read().typecode().to_string()) + .to_str() } #[pyproperty] diff --git a/vm/src/builtins/bool.rs b/vm/src/builtins/bool.rs index b51ceae6f..fab3e9cfe 100644 --- a/vm/src/builtins/bool.rs +++ b/vm/src/builtins/bool.rs @@ -112,10 +112,12 @@ impl PyBool { #[pymethod(magic)] fn repr(zelf: bool, vm: &VirtualMachine) -> PyStrRef { if zelf { - vm.ctx.true_str.clone() + vm.ctx.true_str } else { - vm.ctx.false_str.clone() + vm.ctx.false_str } + .to_owned() + .into_pyref() } #[pymethod(magic)] diff --git a/vm/src/builtins/code.rs b/vm/src/builtins/code.rs index 8b828e04b..c0f216f13 100644 --- a/vm/src/builtins/code.rs +++ b/vm/src/builtins/code.rs @@ -82,7 +82,7 @@ impl ConstantBag for PyObjBag<'_> { bytecode::BorrowedConstant::Float { value } => ctx.new_float(value).into(), bytecode::BorrowedConstant::Complex { value } => ctx.new_complex(value).into(), bytecode::BorrowedConstant::Str { value } if value.len() <= 20 => { - ctx.intern_string(value).into_pyref().into() + ctx.intern_str(value).to_object() } bytecode::BorrowedConstant::Str { value } => ctx.new_str(value).into(), bytecode::BorrowedConstant::Bytes { value } => ctx.new_bytes(value.to_vec()).into(), @@ -104,7 +104,7 @@ impl ConstantBag for PyObjBag<'_> { } fn make_name(&self, name: &str) -> PyStrRef { - self.0.intern_string(name).into_pyref() + self.0.intern_str(name).to_str() } } diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 11b84e7f8..9c74de710 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -227,6 +227,13 @@ impl IntoPyStrRef for &str { } } +impl IntoPyStrRef for &'static crate::intern::PyStrInterned { + #[inline] + fn into_pystr_ref(self, _vm: &VirtualMachine) -> PyRef { + self.to_str() + } +} + #[pyclass(module = false, name = "str_iterator")] #[derive(Debug)] pub struct PyStrIterator { diff --git a/vm/src/intern.rs b/vm/src/intern.rs index c7109c19a..5ba888ed2 100644 --- a/vm/src/intern.rs +++ b/vm/src/intern.rs @@ -1,9 +1,13 @@ use crate::{ builtins::{PyStr, PyTypeRef}, common::lock::PyRwLock, - Py, PyRef, PyRefExact, + convert::ToPyObject, + Py, PyObject, PyObjectRef, PyRef, PyRefExact, +}; +use std::{ + borrow::{Borrow, ToOwned}, + ops::Deref, }; -use std::ops::Deref; #[derive(Debug)] pub struct StringPool { @@ -28,24 +32,40 @@ impl Clone for StringPool { impl StringPool { #[inline] - pub unsafe fn intern(&self, s: S, typ: PyTypeRef) -> PyRefExact { - if let Some(found) = self.inner.read().get(s.as_str()) { - return found.clone().inner; + pub unsafe fn intern(&self, s: S, typ: PyTypeRef) -> &'static PyStrInterned { + if let Some(found) = self.interned(s.as_ref()) { + return found; } - let cache = CachedPyStrRef { - inner: s.into_pyref(typ), - }; - let inserted = self.inner.write().insert(cache.clone()); - if inserted { - cache.inner - } else { - self.inner - .read() - .get(cache.inner.as_str()) - .unwrap() - .clone() - .inner + + #[cold] + fn miss(zelf: &StringPool, s: PyRefExact) -> &'static PyStrInterned { + let cache = CachedPyStrRef { inner: s }; + let inserted = zelf.inner.write().insert(cache.clone()); + if inserted { + let interned = unsafe { PyStrInterned::borrow_cache(&cache) }; + // unsafe { interned.as_object().mark_intern() }; + interned + } else { + zelf.inner + .read() + .get(cache.as_str()) + .map(|cached| unsafe { PyStrInterned::borrow_cache(cached) }) + .expect("") + } } + let str_ref = s.into_pyref_exact(typ); + miss(self, str_ref) + } + + #[inline] + pub fn interned(&self, s: &S) -> Option<&'static PyStrInterned> { + if let Some(interned) = s.as_interned() { + return Some(interned); + } + self.inner + .read() + .get(s.as_ref()) + .map(|cached| unsafe { PyStrInterned::borrow_cache(cached) }) } } @@ -70,57 +90,178 @@ impl PartialEq for CachedPyStrRef { impl Eq for CachedPyStrRef {} impl std::borrow::Borrow for CachedPyStrRef { + #[inline] fn borrow(&self) -> &str { self.inner.as_str() } } +impl AsRef for CachedPyStrRef { + #[inline] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl CachedPyStrRef { + #[inline] + fn as_str(&self) -> &str { + self.inner.as_str() + } +} + +/// The unique reference of interned PyStr +/// Always intended to be used as a static reference +pub struct PyStrInterned { + inner: Py, +} + +impl PyStrInterned { + /// # Safety + /// the given cache must be alive while returned reference is alive + #[inline] + unsafe fn borrow_cache(cache: &CachedPyStrRef) -> &'static Self { + std::mem::transmute_copy(cache) + } + + #[inline] + fn as_ptr(&self) -> *const Py { + self as *const _ as *const _ + } + + #[inline] + pub fn to_owned(&'static self) -> PyRefExact { + unsafe { (*(&self as *const _ as *const PyRefExact)).clone() } + } + + #[inline] + pub fn to_str(&'static self) -> PyRef { + self.to_owned().into_pyref() + } + + #[inline] + pub fn to_object(&'static self) -> PyObjectRef { + self.to_str().into() + } +} + +impl Borrow for PyStrInterned { + #[inline(always)] + fn borrow(&self) -> &PyObject { + self.inner.borrow() + } +} + +impl Deref for PyStrInterned { + type Target = Py; + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl std::hash::Hash for PyStrInterned { + #[inline(always)] + fn hash(&self, state: &mut H) { + std::hash::Hash::hash(&(self as *const _), state) + } +} + +impl PartialEq for PyStrInterned { + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + std::ptr::eq(self, other) + } +} + +impl Eq for PyStrInterned {} + +impl AsRef for PyStrInterned { + #[inline] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl std::fmt::Debug for PyStrInterned { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(self.as_str(), f)?; + write!(f, "@{:p}", self.as_ptr()) + } +} + +impl std::fmt::Display for PyStrInterned { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.as_str(), f) + } +} + mod sealed { - use crate::{builtins::PyStr, object::PyRefExact}; + use crate::{ + builtins::PyStr, + object::{Py, PyRefExact}, + }; pub trait SealedInternable {} impl SealedInternable for String {} - impl SealedInternable for &str {} - impl SealedInternable for PyRefExact {} + + pub trait SealedMaybeInterned {} + + impl SealedMaybeInterned for str {} + impl SealedMaybeInterned for PyRefExact {} + impl SealedMaybeInterned for Py {} } /// A sealed marker trait for `DictKey` types that always become an exact instance of `str` -pub trait Internable: sealed::SealedInternable + AsRef { - type Key: crate::dictdatatype::DictKey + ?Sized; - fn as_str(&self) -> &str; - fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact; +pub trait Internable: sealed::SealedInternable + ToPyObject + AsRef { + type Interned: ?Sized + MaybeInterned; + fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact; } impl Internable for String { - type Key = str; - fn as_str(&self) -> &str { - String::as_str(self) - } - fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact { + type Interned = str; + #[inline] + fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact { let obj = PyRef::new_ref(PyStr::from(self), str_type, None); unsafe { PyRefExact::new_unchecked(obj) } } } impl Internable for &str { - type Key = str; - fn as_str(&self) -> &str { - self - } - fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact { - self.to_owned().into_pyref(str_type) + type Interned = str; + #[inline] + fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact { + self.to_owned().into_pyref_exact(str_type) } } impl Internable for PyRefExact { - type Key = Py; - fn as_str(&self) -> &str { - self.deref().as_str() - } - fn into_pyref(self, _str_type: PyTypeRef) -> PyRefExact { + type Interned = Py; + #[inline] + fn into_pyref_exact(self, _str_type: PyTypeRef) -> PyRefExact { self } } + +pub trait MaybeInterned: + AsRef + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned +{ + fn as_interned(&self) -> Option<&'static PyStrInterned>; +} + +impl MaybeInterned for str { + #[inline(always)] + fn as_interned(&self) -> Option<&'static PyStrInterned> { + None + } +} + +impl MaybeInterned for Py { + #[inline(always)] + fn as_interned(&self) -> Option<&'static PyStrInterned> { + None + } +} diff --git a/vm/src/stdlib/sys.rs b/vm/src/stdlib/sys.rs index ec5512035..c94df5e68 100644 --- a/vm/src/stdlib/sys.rs +++ b/vm/src/stdlib/sys.rs @@ -125,14 +125,14 @@ mod sys { fn byteorder(vm: &VirtualMachine) -> PyStrRef { // https://doc.rust-lang.org/reference/conditional-compilation.html#target_endian vm.ctx - .intern_string(if cfg!(target_endian = "little") { + .intern_str(if cfg!(target_endian = "little") { "little" } else if cfg!(target_endian = "big") { "big" } else { "unknown" }) - .into_pyref() + .to_str() } #[pyattr] @@ -513,7 +513,7 @@ mod sys { #[pyfunction] fn intern(s: PyRefExact, vm: &VirtualMachine) -> PyRefExact { - vm.ctx.intern_string(s) + vm.ctx.intern_str(s).to_owned() } #[pyattr] diff --git a/vm/src/vm/context.rs b/vm/src/vm/context.rs index c69fd074a..1e362c3c5 100644 --- a/vm/src/vm/context.rs +++ b/vm/src/vm/context.rs @@ -13,8 +13,8 @@ use crate::{ class::{PyClassImpl, StaticType}, exceptions, function::IntoPyNativeFunc, - intern::{Internable, StringPool}, - object::{PyObjectPayload, PyObjectRef, PyPayload, PyRef, PyRefExact}, + intern::{Internable, PyStrInterned, StringPool}, + object::{PyObjectPayload, PyObjectRef, PyPayload, PyRef}, types::{PyTypeFlags, PyTypeSlots, TypeZoo}, }; use num_bigint::BigInt; @@ -32,8 +32,8 @@ pub struct Context { pub ellipsis: PyRef, pub not_implemented: PyRef, - pub(crate) true_str: PyRef, - pub(crate) false_str: PyRef, + pub(crate) true_str: &'static PyStrInterned, + pub(crate) false_str: &'static PyStrInterned, pub types: TypeZoo, pub exceptions: exceptions::ExceptionZoo, @@ -80,14 +80,15 @@ impl Context { let new_str = unsafe { string_pool.intern("__new__", types.str_type.clone()) }; let slot_new_wrapper = create_object( - PyNativeFuncDef::new(PyType::__new__.into_func(), new_str.into_pyref()).into_function(), + PyNativeFuncDef::new(PyType::__new__.into_func(), new_str.to_owned().into_pyref()) + .into_function(), &types.builtin_function_or_method_type, ) .into(); - let true_str = unsafe { string_pool.intern("True", types.str_type.clone()) }.into_pyref(); - let false_str = unsafe { string_pool.intern("False", types.str_type.clone()) }.into_pyref(); - let empty_str = unsafe { string_pool.intern("", types.str_type.clone()) }.into_pyref(); + let true_str = unsafe { string_pool.intern("True", types.str_type.clone()) }; + let false_str = unsafe { string_pool.intern("False", types.str_type.clone()) }; + let empty_str = unsafe { string_pool.intern("", types.str_type.clone()) }.to_str(); let context = Context { true_value, @@ -114,7 +115,7 @@ impl Context { context } - pub fn intern_string(&self, s: S) -> PyRefExact { + pub fn intern_str(&self, s: S) -> &'static PyStrInterned { unsafe { self.string_pool.intern(s, self.types.str_type.clone()) } }