Merge pull request #3709 from youknowone/pystr-interned

introduce PyStrInterned
This commit is contained in:
Jeong YunWon
2022-05-17 06:53:01 +09:00
committed by GitHub
7 changed files with 210 additions and 59 deletions

View File

@@ -681,8 +681,8 @@ mod array {
#[pyproperty]
fn typecode(&self, vm: &VirtualMachine) -> PyStrRef {
vm.ctx
.intern_string(self.read().typecode().to_string())
.into_pyref()
.intern_str(self.read().typecode().to_string())
.to_str()
}
#[pyproperty]

View File

@@ -112,10 +112,12 @@ impl PyBool {
#[pymethod(magic)]
fn repr(zelf: bool, vm: &VirtualMachine) -> PyStrRef {
if zelf {
vm.ctx.true_str.clone()
vm.ctx.true_str
} else {
vm.ctx.false_str.clone()
vm.ctx.false_str
}
.to_owned()
.into_pyref()
}
#[pymethod(magic)]

View File

@@ -82,7 +82,7 @@ impl ConstantBag for PyObjBag<'_> {
bytecode::BorrowedConstant::Float { value } => ctx.new_float(value).into(),
bytecode::BorrowedConstant::Complex { value } => ctx.new_complex(value).into(),
bytecode::BorrowedConstant::Str { value } if value.len() <= 20 => {
ctx.intern_string(value).into_pyref().into()
ctx.intern_str(value).to_object()
}
bytecode::BorrowedConstant::Str { value } => ctx.new_str(value).into(),
bytecode::BorrowedConstant::Bytes { value } => ctx.new_bytes(value.to_vec()).into(),
@@ -104,7 +104,7 @@ impl ConstantBag for PyObjBag<'_> {
}
fn make_name(&self, name: &str) -> PyStrRef {
self.0.intern_string(name).into_pyref()
self.0.intern_str(name).to_str()
}
}

View File

@@ -227,6 +227,13 @@ impl IntoPyStrRef for &str {
}
}
impl IntoPyStrRef for &'static crate::intern::PyStrInterned {
#[inline]
fn into_pystr_ref(self, _vm: &VirtualMachine) -> PyRef<PyStr> {
self.to_str()
}
}
#[pyclass(module = false, name = "str_iterator")]
#[derive(Debug)]
pub struct PyStrIterator {

View File

@@ -1,9 +1,13 @@
use crate::{
builtins::{PyStr, PyTypeRef},
common::lock::PyRwLock,
Py, PyRef, PyRefExact,
convert::ToPyObject,
Py, PyObject, PyObjectRef, PyRef, PyRefExact,
};
use std::{
borrow::{Borrow, ToOwned},
ops::Deref,
};
use std::ops::Deref;
#[derive(Debug)]
pub struct StringPool {
@@ -28,24 +32,40 @@ impl Clone for StringPool {
impl StringPool {
#[inline]
pub unsafe fn intern<S: Internable>(&self, s: S, typ: PyTypeRef) -> PyRefExact<PyStr> {
if let Some(found) = self.inner.read().get(s.as_str()) {
return found.clone().inner;
pub unsafe fn intern<S: Internable>(&self, s: S, typ: PyTypeRef) -> &'static PyStrInterned {
if let Some(found) = self.interned(s.as_ref()) {
return found;
}
let cache = CachedPyStrRef {
inner: s.into_pyref(typ),
};
let inserted = self.inner.write().insert(cache.clone());
if inserted {
cache.inner
} else {
self.inner
.read()
.get(cache.inner.as_str())
.unwrap()
.clone()
.inner
#[cold]
fn miss(zelf: &StringPool, s: PyRefExact<PyStr>) -> &'static PyStrInterned {
let cache = CachedPyStrRef { inner: s };
let inserted = zelf.inner.write().insert(cache.clone());
if inserted {
let interned = unsafe { PyStrInterned::borrow_cache(&cache) };
// unsafe { interned.as_object().mark_intern() };
interned
} else {
zelf.inner
.read()
.get(cache.as_str())
.map(|cached| unsafe { PyStrInterned::borrow_cache(cached) })
.expect("")
}
}
let str_ref = s.into_pyref_exact(typ);
miss(self, str_ref)
}
#[inline]
pub fn interned<S: MaybeInterned + ?Sized>(&self, s: &S) -> Option<&'static PyStrInterned> {
if let Some(interned) = s.as_interned() {
return Some(interned);
}
self.inner
.read()
.get(s.as_ref())
.map(|cached| unsafe { PyStrInterned::borrow_cache(cached) })
}
}
@@ -70,57 +90,178 @@ impl PartialEq for CachedPyStrRef {
impl Eq for CachedPyStrRef {}
impl std::borrow::Borrow<str> for CachedPyStrRef {
#[inline]
fn borrow(&self) -> &str {
self.inner.as_str()
}
}
impl AsRef<str> for CachedPyStrRef {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl CachedPyStrRef {
#[inline]
fn as_str(&self) -> &str {
self.inner.as_str()
}
}
/// The unique reference of interned PyStr
/// Always intended to be used as a static reference
pub struct PyStrInterned {
inner: Py<PyStr>,
}
impl PyStrInterned {
/// # Safety
/// the given cache must be alive while returned reference is alive
#[inline]
unsafe fn borrow_cache(cache: &CachedPyStrRef) -> &'static Self {
std::mem::transmute_copy(cache)
}
#[inline]
fn as_ptr(&self) -> *const Py<PyStr> {
self as *const _ as *const _
}
#[inline]
pub fn to_owned(&'static self) -> PyRefExact<PyStr> {
unsafe { (*(&self as *const _ as *const PyRefExact<PyStr>)).clone() }
}
#[inline]
pub fn to_str(&'static self) -> PyRef<PyStr> {
self.to_owned().into_pyref()
}
#[inline]
pub fn to_object(&'static self) -> PyObjectRef {
self.to_str().into()
}
}
impl Borrow<PyObject> for PyStrInterned {
#[inline(always)]
fn borrow(&self) -> &PyObject {
self.inner.borrow()
}
}
impl Deref for PyStrInterned {
type Target = Py<PyStr>;
#[inline(always)]
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl std::hash::Hash for PyStrInterned {
#[inline(always)]
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
std::hash::Hash::hash(&(self as *const _), state)
}
}
impl PartialEq for PyStrInterned {
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
std::ptr::eq(self, other)
}
}
impl Eq for PyStrInterned {}
impl AsRef<str> for PyStrInterned {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl std::fmt::Debug for PyStrInterned {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(self.as_str(), f)?;
write!(f, "@{:p}", self.as_ptr())
}
}
impl std::fmt::Display for PyStrInterned {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.as_str(), f)
}
}
mod sealed {
use crate::{builtins::PyStr, object::PyRefExact};
use crate::{
builtins::PyStr,
object::{Py, PyRefExact},
};
pub trait SealedInternable {}
impl SealedInternable for String {}
impl SealedInternable for &str {}
impl SealedInternable for PyRefExact<PyStr> {}
pub trait SealedMaybeInterned {}
impl SealedMaybeInterned for str {}
impl SealedMaybeInterned for PyRefExact<PyStr> {}
impl SealedMaybeInterned for Py<PyStr> {}
}
/// A sealed marker trait for `DictKey` types that always become an exact instance of `str`
pub trait Internable: sealed::SealedInternable + AsRef<Self::Key> {
type Key: crate::dictdatatype::DictKey + ?Sized;
fn as_str(&self) -> &str;
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr>;
pub trait Internable: sealed::SealedInternable + ToPyObject + AsRef<Self::Interned> {
type Interned: ?Sized + MaybeInterned;
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr>;
}
impl Internable for String {
type Key = str;
fn as_str(&self) -> &str {
String::as_str(self)
}
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
type Interned = str;
#[inline]
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
let obj = PyRef::new_ref(PyStr::from(self), str_type, None);
unsafe { PyRefExact::new_unchecked(obj) }
}
}
impl Internable for &str {
type Key = str;
fn as_str(&self) -> &str {
self
}
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
self.to_owned().into_pyref(str_type)
type Interned = str;
#[inline]
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
self.to_owned().into_pyref_exact(str_type)
}
}
impl Internable for PyRefExact<PyStr> {
type Key = Py<PyStr>;
fn as_str(&self) -> &str {
self.deref().as_str()
}
fn into_pyref(self, _str_type: PyTypeRef) -> PyRefExact<PyStr> {
type Interned = Py<PyStr>;
#[inline]
fn into_pyref_exact(self, _str_type: PyTypeRef) -> PyRefExact<PyStr> {
self
}
}
pub trait MaybeInterned:
AsRef<str> + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned
{
fn as_interned(&self) -> Option<&'static PyStrInterned>;
}
impl MaybeInterned for str {
#[inline(always)]
fn as_interned(&self) -> Option<&'static PyStrInterned> {
None
}
}
impl MaybeInterned for Py<PyStr> {
#[inline(always)]
fn as_interned(&self) -> Option<&'static PyStrInterned> {
None
}
}

View File

@@ -125,14 +125,14 @@ mod sys {
fn byteorder(vm: &VirtualMachine) -> PyStrRef {
// https://doc.rust-lang.org/reference/conditional-compilation.html#target_endian
vm.ctx
.intern_string(if cfg!(target_endian = "little") {
.intern_str(if cfg!(target_endian = "little") {
"little"
} else if cfg!(target_endian = "big") {
"big"
} else {
"unknown"
})
.into_pyref()
.to_str()
}
#[pyattr]
@@ -513,7 +513,7 @@ mod sys {
#[pyfunction]
fn intern(s: PyRefExact<PyStr>, vm: &VirtualMachine) -> PyRefExact<PyStr> {
vm.ctx.intern_string(s)
vm.ctx.intern_str(s).to_owned()
}
#[pyattr]

View File

@@ -13,8 +13,8 @@ use crate::{
class::{PyClassImpl, StaticType},
exceptions,
function::IntoPyNativeFunc,
intern::{Internable, StringPool},
object::{PyObjectPayload, PyObjectRef, PyPayload, PyRef, PyRefExact},
intern::{Internable, PyStrInterned, StringPool},
object::{PyObjectPayload, PyObjectRef, PyPayload, PyRef},
types::{PyTypeFlags, PyTypeSlots, TypeZoo},
};
use num_bigint::BigInt;
@@ -32,8 +32,8 @@ pub struct Context {
pub ellipsis: PyRef<PyEllipsis>,
pub not_implemented: PyRef<PyNotImplemented>,
pub(crate) true_str: PyRef<PyStr>,
pub(crate) false_str: PyRef<PyStr>,
pub(crate) true_str: &'static PyStrInterned,
pub(crate) false_str: &'static PyStrInterned,
pub types: TypeZoo,
pub exceptions: exceptions::ExceptionZoo,
@@ -80,14 +80,15 @@ impl Context {
let new_str = unsafe { string_pool.intern("__new__", types.str_type.clone()) };
let slot_new_wrapper = create_object(
PyNativeFuncDef::new(PyType::__new__.into_func(), new_str.into_pyref()).into_function(),
PyNativeFuncDef::new(PyType::__new__.into_func(), new_str.to_owned().into_pyref())
.into_function(),
&types.builtin_function_or_method_type,
)
.into();
let true_str = unsafe { string_pool.intern("True", types.str_type.clone()) }.into_pyref();
let false_str = unsafe { string_pool.intern("False", types.str_type.clone()) }.into_pyref();
let empty_str = unsafe { string_pool.intern("", types.str_type.clone()) }.into_pyref();
let true_str = unsafe { string_pool.intern("True", types.str_type.clone()) };
let false_str = unsafe { string_pool.intern("False", types.str_type.clone()) };
let empty_str = unsafe { string_pool.intern("", types.str_type.clone()) }.to_str();
let context = Context {
true_value,
@@ -114,7 +115,7 @@ impl Context {
context
}
pub fn intern_string<S: Internable>(&self, s: S) -> PyRefExact<PyStr> {
pub fn intern_str<S: Internable>(&self, s: S) -> &'static PyStrInterned {
unsafe { self.string_pool.intern(s, self.types.str_type.clone()) }
}