mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-09 22:49:57 +09:00
Merge pull request #3709 from youknowone/pystr-interned
introduce PyStrInterned
This commit is contained in:
@@ -681,8 +681,8 @@ mod array {
|
||||
#[pyproperty]
|
||||
fn typecode(&self, vm: &VirtualMachine) -> PyStrRef {
|
||||
vm.ctx
|
||||
.intern_string(self.read().typecode().to_string())
|
||||
.into_pyref()
|
||||
.intern_str(self.read().typecode().to_string())
|
||||
.to_str()
|
||||
}
|
||||
|
||||
#[pyproperty]
|
||||
|
||||
@@ -112,10 +112,12 @@ impl PyBool {
|
||||
#[pymethod(magic)]
|
||||
fn repr(zelf: bool, vm: &VirtualMachine) -> PyStrRef {
|
||||
if zelf {
|
||||
vm.ctx.true_str.clone()
|
||||
vm.ctx.true_str
|
||||
} else {
|
||||
vm.ctx.false_str.clone()
|
||||
vm.ctx.false_str
|
||||
}
|
||||
.to_owned()
|
||||
.into_pyref()
|
||||
}
|
||||
|
||||
#[pymethod(magic)]
|
||||
|
||||
@@ -82,7 +82,7 @@ impl ConstantBag for PyObjBag<'_> {
|
||||
bytecode::BorrowedConstant::Float { value } => ctx.new_float(value).into(),
|
||||
bytecode::BorrowedConstant::Complex { value } => ctx.new_complex(value).into(),
|
||||
bytecode::BorrowedConstant::Str { value } if value.len() <= 20 => {
|
||||
ctx.intern_string(value).into_pyref().into()
|
||||
ctx.intern_str(value).to_object()
|
||||
}
|
||||
bytecode::BorrowedConstant::Str { value } => ctx.new_str(value).into(),
|
||||
bytecode::BorrowedConstant::Bytes { value } => ctx.new_bytes(value.to_vec()).into(),
|
||||
@@ -104,7 +104,7 @@ impl ConstantBag for PyObjBag<'_> {
|
||||
}
|
||||
|
||||
fn make_name(&self, name: &str) -> PyStrRef {
|
||||
self.0.intern_string(name).into_pyref()
|
||||
self.0.intern_str(name).to_str()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -227,6 +227,13 @@ impl IntoPyStrRef for &str {
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoPyStrRef for &'static crate::intern::PyStrInterned {
|
||||
#[inline]
|
||||
fn into_pystr_ref(self, _vm: &VirtualMachine) -> PyRef<PyStr> {
|
||||
self.to_str()
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(module = false, name = "str_iterator")]
|
||||
#[derive(Debug)]
|
||||
pub struct PyStrIterator {
|
||||
|
||||
223
vm/src/intern.rs
223
vm/src/intern.rs
@@ -1,9 +1,13 @@
|
||||
use crate::{
|
||||
builtins::{PyStr, PyTypeRef},
|
||||
common::lock::PyRwLock,
|
||||
Py, PyRef, PyRefExact,
|
||||
convert::ToPyObject,
|
||||
Py, PyObject, PyObjectRef, PyRef, PyRefExact,
|
||||
};
|
||||
use std::{
|
||||
borrow::{Borrow, ToOwned},
|
||||
ops::Deref,
|
||||
};
|
||||
use std::ops::Deref;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct StringPool {
|
||||
@@ -28,24 +32,40 @@ impl Clone for StringPool {
|
||||
|
||||
impl StringPool {
|
||||
#[inline]
|
||||
pub unsafe fn intern<S: Internable>(&self, s: S, typ: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
if let Some(found) = self.inner.read().get(s.as_str()) {
|
||||
return found.clone().inner;
|
||||
pub unsafe fn intern<S: Internable>(&self, s: S, typ: PyTypeRef) -> &'static PyStrInterned {
|
||||
if let Some(found) = self.interned(s.as_ref()) {
|
||||
return found;
|
||||
}
|
||||
let cache = CachedPyStrRef {
|
||||
inner: s.into_pyref(typ),
|
||||
};
|
||||
let inserted = self.inner.write().insert(cache.clone());
|
||||
if inserted {
|
||||
cache.inner
|
||||
} else {
|
||||
self.inner
|
||||
.read()
|
||||
.get(cache.inner.as_str())
|
||||
.unwrap()
|
||||
.clone()
|
||||
.inner
|
||||
|
||||
#[cold]
|
||||
fn miss(zelf: &StringPool, s: PyRefExact<PyStr>) -> &'static PyStrInterned {
|
||||
let cache = CachedPyStrRef { inner: s };
|
||||
let inserted = zelf.inner.write().insert(cache.clone());
|
||||
if inserted {
|
||||
let interned = unsafe { PyStrInterned::borrow_cache(&cache) };
|
||||
// unsafe { interned.as_object().mark_intern() };
|
||||
interned
|
||||
} else {
|
||||
zelf.inner
|
||||
.read()
|
||||
.get(cache.as_str())
|
||||
.map(|cached| unsafe { PyStrInterned::borrow_cache(cached) })
|
||||
.expect("")
|
||||
}
|
||||
}
|
||||
let str_ref = s.into_pyref_exact(typ);
|
||||
miss(self, str_ref)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn interned<S: MaybeInterned + ?Sized>(&self, s: &S) -> Option<&'static PyStrInterned> {
|
||||
if let Some(interned) = s.as_interned() {
|
||||
return Some(interned);
|
||||
}
|
||||
self.inner
|
||||
.read()
|
||||
.get(s.as_ref())
|
||||
.map(|cached| unsafe { PyStrInterned::borrow_cache(cached) })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,57 +90,178 @@ impl PartialEq for CachedPyStrRef {
|
||||
impl Eq for CachedPyStrRef {}
|
||||
|
||||
impl std::borrow::Borrow<str> for CachedPyStrRef {
|
||||
#[inline]
|
||||
fn borrow(&self) -> &str {
|
||||
self.inner.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for CachedPyStrRef {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &str {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl CachedPyStrRef {
|
||||
#[inline]
|
||||
fn as_str(&self) -> &str {
|
||||
self.inner.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
/// The unique reference of interned PyStr
|
||||
/// Always intended to be used as a static reference
|
||||
pub struct PyStrInterned {
|
||||
inner: Py<PyStr>,
|
||||
}
|
||||
|
||||
impl PyStrInterned {
|
||||
/// # Safety
|
||||
/// the given cache must be alive while returned reference is alive
|
||||
#[inline]
|
||||
unsafe fn borrow_cache(cache: &CachedPyStrRef) -> &'static Self {
|
||||
std::mem::transmute_copy(cache)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_ptr(&self) -> *const Py<PyStr> {
|
||||
self as *const _ as *const _
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_owned(&'static self) -> PyRefExact<PyStr> {
|
||||
unsafe { (*(&self as *const _ as *const PyRefExact<PyStr>)).clone() }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_str(&'static self) -> PyRef<PyStr> {
|
||||
self.to_owned().into_pyref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_object(&'static self) -> PyObjectRef {
|
||||
self.to_str().into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Borrow<PyObject> for PyStrInterned {
|
||||
#[inline(always)]
|
||||
fn borrow(&self) -> &PyObject {
|
||||
self.inner.borrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for PyStrInterned {
|
||||
type Target = Py<PyStr>;
|
||||
#[inline(always)]
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl std::hash::Hash for PyStrInterned {
|
||||
#[inline(always)]
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
std::hash::Hash::hash(&(self as *const _), state)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for PyStrInterned {
|
||||
#[inline(always)]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
std::ptr::eq(self, other)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for PyStrInterned {}
|
||||
|
||||
impl AsRef<str> for PyStrInterned {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &str {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PyStrInterned {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
std::fmt::Debug::fmt(self.as_str(), f)?;
|
||||
write!(f, "@{:p}", self.as_ptr())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PyStrInterned {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
std::fmt::Display::fmt(self.as_str(), f)
|
||||
}
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
use crate::{builtins::PyStr, object::PyRefExact};
|
||||
use crate::{
|
||||
builtins::PyStr,
|
||||
object::{Py, PyRefExact},
|
||||
};
|
||||
|
||||
pub trait SealedInternable {}
|
||||
|
||||
impl SealedInternable for String {}
|
||||
|
||||
impl SealedInternable for &str {}
|
||||
|
||||
impl SealedInternable for PyRefExact<PyStr> {}
|
||||
|
||||
pub trait SealedMaybeInterned {}
|
||||
|
||||
impl SealedMaybeInterned for str {}
|
||||
impl SealedMaybeInterned for PyRefExact<PyStr> {}
|
||||
impl SealedMaybeInterned for Py<PyStr> {}
|
||||
}
|
||||
|
||||
/// A sealed marker trait for `DictKey` types that always become an exact instance of `str`
|
||||
pub trait Internable: sealed::SealedInternable + AsRef<Self::Key> {
|
||||
type Key: crate::dictdatatype::DictKey + ?Sized;
|
||||
fn as_str(&self) -> &str;
|
||||
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr>;
|
||||
pub trait Internable: sealed::SealedInternable + ToPyObject + AsRef<Self::Interned> {
|
||||
type Interned: ?Sized + MaybeInterned;
|
||||
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr>;
|
||||
}
|
||||
|
||||
impl Internable for String {
|
||||
type Key = str;
|
||||
fn as_str(&self) -> &str {
|
||||
String::as_str(self)
|
||||
}
|
||||
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
type Interned = str;
|
||||
#[inline]
|
||||
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
let obj = PyRef::new_ref(PyStr::from(self), str_type, None);
|
||||
unsafe { PyRefExact::new_unchecked(obj) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Internable for &str {
|
||||
type Key = str;
|
||||
fn as_str(&self) -> &str {
|
||||
self
|
||||
}
|
||||
fn into_pyref(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
self.to_owned().into_pyref(str_type)
|
||||
type Interned = str;
|
||||
#[inline]
|
||||
fn into_pyref_exact(self, str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
self.to_owned().into_pyref_exact(str_type)
|
||||
}
|
||||
}
|
||||
|
||||
impl Internable for PyRefExact<PyStr> {
|
||||
type Key = Py<PyStr>;
|
||||
fn as_str(&self) -> &str {
|
||||
self.deref().as_str()
|
||||
}
|
||||
fn into_pyref(self, _str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
type Interned = Py<PyStr>;
|
||||
#[inline]
|
||||
fn into_pyref_exact(self, _str_type: PyTypeRef) -> PyRefExact<PyStr> {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MaybeInterned:
|
||||
AsRef<str> + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned
|
||||
{
|
||||
fn as_interned(&self) -> Option<&'static PyStrInterned>;
|
||||
}
|
||||
|
||||
impl MaybeInterned for str {
|
||||
#[inline(always)]
|
||||
fn as_interned(&self) -> Option<&'static PyStrInterned> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl MaybeInterned for Py<PyStr> {
|
||||
#[inline(always)]
|
||||
fn as_interned(&self) -> Option<&'static PyStrInterned> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,14 +125,14 @@ mod sys {
|
||||
fn byteorder(vm: &VirtualMachine) -> PyStrRef {
|
||||
// https://doc.rust-lang.org/reference/conditional-compilation.html#target_endian
|
||||
vm.ctx
|
||||
.intern_string(if cfg!(target_endian = "little") {
|
||||
.intern_str(if cfg!(target_endian = "little") {
|
||||
"little"
|
||||
} else if cfg!(target_endian = "big") {
|
||||
"big"
|
||||
} else {
|
||||
"unknown"
|
||||
})
|
||||
.into_pyref()
|
||||
.to_str()
|
||||
}
|
||||
|
||||
#[pyattr]
|
||||
@@ -513,7 +513,7 @@ mod sys {
|
||||
|
||||
#[pyfunction]
|
||||
fn intern(s: PyRefExact<PyStr>, vm: &VirtualMachine) -> PyRefExact<PyStr> {
|
||||
vm.ctx.intern_string(s)
|
||||
vm.ctx.intern_str(s).to_owned()
|
||||
}
|
||||
|
||||
#[pyattr]
|
||||
|
||||
@@ -13,8 +13,8 @@ use crate::{
|
||||
class::{PyClassImpl, StaticType},
|
||||
exceptions,
|
||||
function::IntoPyNativeFunc,
|
||||
intern::{Internable, StringPool},
|
||||
object::{PyObjectPayload, PyObjectRef, PyPayload, PyRef, PyRefExact},
|
||||
intern::{Internable, PyStrInterned, StringPool},
|
||||
object::{PyObjectPayload, PyObjectRef, PyPayload, PyRef},
|
||||
types::{PyTypeFlags, PyTypeSlots, TypeZoo},
|
||||
};
|
||||
use num_bigint::BigInt;
|
||||
@@ -32,8 +32,8 @@ pub struct Context {
|
||||
pub ellipsis: PyRef<PyEllipsis>,
|
||||
pub not_implemented: PyRef<PyNotImplemented>,
|
||||
|
||||
pub(crate) true_str: PyRef<PyStr>,
|
||||
pub(crate) false_str: PyRef<PyStr>,
|
||||
pub(crate) true_str: &'static PyStrInterned,
|
||||
pub(crate) false_str: &'static PyStrInterned,
|
||||
|
||||
pub types: TypeZoo,
|
||||
pub exceptions: exceptions::ExceptionZoo,
|
||||
@@ -80,14 +80,15 @@ impl Context {
|
||||
|
||||
let new_str = unsafe { string_pool.intern("__new__", types.str_type.clone()) };
|
||||
let slot_new_wrapper = create_object(
|
||||
PyNativeFuncDef::new(PyType::__new__.into_func(), new_str.into_pyref()).into_function(),
|
||||
PyNativeFuncDef::new(PyType::__new__.into_func(), new_str.to_owned().into_pyref())
|
||||
.into_function(),
|
||||
&types.builtin_function_or_method_type,
|
||||
)
|
||||
.into();
|
||||
|
||||
let true_str = unsafe { string_pool.intern("True", types.str_type.clone()) }.into_pyref();
|
||||
let false_str = unsafe { string_pool.intern("False", types.str_type.clone()) }.into_pyref();
|
||||
let empty_str = unsafe { string_pool.intern("", types.str_type.clone()) }.into_pyref();
|
||||
let true_str = unsafe { string_pool.intern("True", types.str_type.clone()) };
|
||||
let false_str = unsafe { string_pool.intern("False", types.str_type.clone()) };
|
||||
let empty_str = unsafe { string_pool.intern("", types.str_type.clone()) }.to_str();
|
||||
|
||||
let context = Context {
|
||||
true_value,
|
||||
@@ -114,7 +115,7 @@ impl Context {
|
||||
context
|
||||
}
|
||||
|
||||
pub fn intern_string<S: Internable>(&self, s: S) -> PyRefExact<PyStr> {
|
||||
pub fn intern_str<S: Internable>(&self, s: S) -> &'static PyStrInterned {
|
||||
unsafe { self.string_pool.intern(s, self.types.str_type.clone()) }
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user