use crate::common::{ lock::PyRwLock, rc::{PyRc, PyWeak}, }; use crate::{ builtins::{PyDictRef, PyTypeRef}, IdProtocol, PyObjectPayload, TypeProtocol, VirtualMachine, }; use std::any::TypeId; use std::fmt; use std::marker::PhantomData; use std::mem::ManuallyDrop; use std::ops::Deref; // so, PyObjectRef is basically equivalent to `PyRc>`, except it's // only one pointer in width rather than 2. We do that by manually creating a vtable, and putting // a &'static reference to it inside the `PyRc` rather than adjacent to it, like trait objects do. // This can lead to faster code since there's just less data to pass around, as well as because of // some weird stuff with trait objects, alignment, and padding. // // So, every type has an alignment, which means that if you create a value of it it's location in // memory has to be a multiple of it's alignment. e.g., a type with alignment 4 (like i32) could be // at 0xb7befbc0, 0xb7befbc4, or 0xb7befbc8, but not 0xb7befbc2. If you have a struct and there are // 2 fields whose sizes/alignments don't perfectly fit in with each other, e.g.: // +-------------+-------------+---------------------------+ // | u16 | ? | i32 | // | 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | // +-------------+-------------+---------------------------+ // There has to be padding in the space between the 2 fields. But, if that field is a trait object // (like `dyn PyObjectPayload`) we don't *know* how much padding there is between the `payload` // field and the previous field. So, Rust has to consult the vtable to know the exact offset of // `payload` in `PyObject`, which has a huge performance impact when *every // single payload access* requires a vtable lookup. Thankfully, we're able to avoid that because of // the way we use PyObjectRef, in that whenever we want to access the payload we (almost) always // access it from a generic function. So, rather than doing // // - check vtable for payload offset // - get offset in PyObject struct // - call as_any() method of PyObjectPayload // - call downcast_ref() method of Any // we can just do // - check vtable that typeid matches // - pointer cast directly to *const PyObject // // and at that point the compiler can know the offset of `payload` for us because **we've given it a // concrete type to work with before we ever access the `payload` field** /// A type to just represent "we've erased the type of this object, cast it before you use it" struct Erased; struct PyObjVTable { drop: unsafe fn(*mut PyInner), debug: unsafe fn(*const PyInner, &mut fmt::Formatter) -> fmt::Result, } unsafe fn drop_obj(x: *mut PyInner) { std::ptr::drop_in_place(x as *mut PyInner) } unsafe fn debug_obj( x: *const PyInner, f: &mut fmt::Formatter, ) -> fmt::Result { let x = &*x.cast::>(); fmt::Debug::fmt(x, f) } impl PyObjVTable { pub fn of() -> &'static Self { &PyObjVTable { drop: drop_obj::, debug: debug_obj::, } } } #[repr(C)] struct PyInner { // TODO: move typeid into vtable once TypeId::of is const typeid: TypeId, vtable: &'static PyObjVTable, typ: PyRwLock, // __class__ member dict: Option>, // __dict__ member payload: T, } impl fmt::Debug for PyInner { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[PyObj {:?}]", &self.payload) } } /// This is an actual python object. It consists of a `typ` which is the /// python class, and carries some rust payload optionally. This rust /// payload can be a rust float or rust int in case of float and int objects. #[repr(transparent)] pub struct PyObject { inner: ManuallyDrop>, } impl PyObject { #[allow(clippy::new_ret_no_self)] pub fn new(payload: T, typ: PyTypeRef, dict: Option) -> PyObjectRef { let inner = PyInner { typeid: TypeId::of::(), vtable: PyObjVTable::of::(), typ: PyRwLock::new(typ), dict: dict.map(PyRwLock::new), payload, }; PyObjectRef::new(PyObject { inner: ManuallyDrop::new(inner), }) } } impl Drop for PyObject { fn drop(&mut self) { let erased = &mut *self.inner as *mut _ as *mut PyInner; // SAFETY: the vtable contains functions that accept payload types that always match up // with the payload of the object unsafe { (self.inner.vtable.drop)(erased) } } } /// The `PyObjectRef` is one of the most used types. It is a reference to a /// python object. A single python object can have multiple references, and /// this reference counting is accounted for by this type. Use the `.clone()` /// method to create a new reference and increment the amount of references /// to the python object by 1. #[derive(Clone)] #[repr(transparent)] pub struct PyObjectRef { rc: PyRc>, } #[derive(Clone)] #[repr(transparent)] pub struct PyObjectWeak { weak: PyWeak>, } pub trait PyObjectWrap where Self: AsRef, { #[inline(always)] fn as_object(&self) -> &PyObjectRef { self.as_ref() } fn into_object(self) -> PyObjectRef; } /// A marker type that just references a raw python object. Don't use directly, pass as a pointer /// back to [`PyObjectRef::from_raw`] pub enum RawPyObject {} impl PyObjectRef { pub fn into_raw(this: Self) -> *const RawPyObject { let ptr = PyRc::as_ptr(&this.rc); std::mem::forget(this); ptr.cast() } /// # Safety /// The raw pointer must have been previously returned from a call to /// [`PyObjectRef::into_raw`]. The user is responsible for ensuring that the inner data is not /// dropped more than once due to mishandling the reference count by calling this function /// too many times. pub unsafe fn from_raw(ptr: *const RawPyObject) -> Self { Self { rc: PyRc::from_raw(ptr.cast()), } } fn new(value: PyObject) -> Self { let inner = PyRc::into_raw(PyRc::new(value)); let rc = unsafe { PyRc::from_raw(inner as *const PyObject) }; Self { rc } } pub fn strong_count(this: &Self) -> usize { PyRc::strong_count(&this.rc) } pub fn weak_count(this: &Self) -> usize { PyRc::weak_count(&this.rc) } pub fn downgrade(this: &Self) -> PyObjectWeak { PyObjectWeak { weak: PyRc::downgrade(&this.rc), } } pub fn payload_is(&self) -> bool { self.rc.inner.typeid == TypeId::of::() } pub fn payload(&self) -> Option<&T> { if self.payload_is::() { // we cast to a PyInner first because we don't know T's exact offset because of // varying alignment, but once we get a PyInner the compiler can get it for us let inner = unsafe { &*(&*self.rc.inner as *const PyInner as *const PyInner) }; Some(&inner.payload) } else { None } } /// Attempt to downcast this reference to a subclass. /// /// If the downcast fails, the original ref is returned in as `Err` so /// another downcast can be attempted without unnecessary cloning. pub fn downcast(self) -> Result, Self> { if self.payload_is::() { Ok(unsafe { PyRef::from_obj_unchecked(self) }) } else { Err(self) } } pub fn downcast_ref(&self) -> Option<&PyRef> { if self.payload_is::() { // SAFETY: just checked that the payload is T, and PyRef is repr(transparent) over // PyObjectRef Some(unsafe { &*(self as *const PyObjectRef as *const PyRef) }) } else { None } } /// # Safety /// T must be the exact payload type pub unsafe fn downcast_unchecked(self) -> PyRef { PyRef::from_obj_unchecked(self) } /// # Safety /// T must be the exact payload type pub unsafe fn downcast_unchecked_ref(&self) -> &PyRef { debug_assert!(self.payload_is::()); &*(self as *const PyObjectRef as *const PyRef) } pub(crate) fn class_lock(&self) -> &PyRwLock { &self.rc.inner.typ } // ideally we'd be able to define these in pyobject.rs, but method visibility rules are weird /// Attempt to downcast this reference to the specific class that is associated `T`. /// /// If the downcast fails, the original ref is returned in as `Err` so /// another downcast can be attempted without unnecessary cloning. pub fn downcast_exact( self, vm: &VirtualMachine, ) -> Result, Self> { if self.class().is(T::class(vm)) { // TODO: is this always true? assert!( self.payload_is::(), "obj.__class__ is T::class() but payload is not T" ); // SAFETY: just asserted that payload_is::() Ok(unsafe { PyRef::from_obj_unchecked(self) }) } else { Err(self) } } #[inline] pub fn payload_if_exact( &self, vm: &VirtualMachine, ) -> Option<&T> { if self.class().is(T::class(vm)) { self.payload() } else { None } } pub fn dict(&self) -> Option { self.rc.inner.dict.as_ref().map(|mu| mu.read().clone()) } /// Set the dict field. Returns `Err(dict)` if this object does not have a dict field /// in the first place. pub fn set_dict(&self, dict: PyDictRef) -> Result<(), PyDictRef> { match self.rc.inner.dict { Some(ref mu) => { *mu.write() = dict; Ok(()) } None => Err(dict), } } #[inline] pub fn payload_if_subclass(&self, vm: &crate::VirtualMachine) -> Option<&T> { if self.class().issubclass(T::class(vm)) { self.payload() } else { None } } } impl AsRef for PyObjectRef { fn as_ref(&self) -> &Self { self } } impl IdProtocol for PyObjectRef { fn get_id(&self) -> usize { self.rc.get_id() } } impl<'a, T: PyObjectPayload> From<&'a PyRef> for &'a PyObjectRef { fn from(py_ref: &'a PyRef) -> Self { &py_ref.obj } } impl From for PyObjectRef where T: PyObjectWrap, { fn from(py_ref: T) -> Self { py_ref.into_object() } } impl PyObjectWeak { pub fn upgrade(&self) -> Option { self.weak.upgrade().map(|rc| PyObjectRef { rc }) } } impl Drop for PyObjectRef { fn drop(&mut self) { // PyObjectRef will drop the value when its count goes to 0 if PyRc::strong_count(&self.rc) != 1 { return; } // CPython-compatible drop implementation let zelf = self.clone(); if let Some(del_slot) = self.class().mro_find_map(|cls| cls.slots.del.load()) { let ret = crate::vm::thread::with_vm(&zelf, |vm| { if let Err(e) = del_slot(&zelf, vm) { // exception in del will be ignored but printed print!("Exception ignored in: ",); let del_method = zelf.get_class_attr("__del__").unwrap(); let repr = vm.to_repr(&del_method); match repr { Ok(v) => println!("{}", v.to_string()), Err(_) => println!("{}", del_method.class().name()), } let tb_module = vm.import("traceback", None, 0).unwrap(); // TODO: set exc traceback let print_stack = vm.get_attribute(tb_module, "print_stack").unwrap(); vm.invoke(&print_stack, ()).unwrap(); if let Ok(repr) = vm.to_repr(e.as_object()) { println!("{}", repr.as_str()); } } }); if ret.is_none() { warn!("couldn't run __del__ method for object") } } // __del__ might have resurrected the object at this point, but that's fine, // inner.strong_count would be >1 now and it'll maybe get dropped the next time } } impl fmt::Debug for PyObjectRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // SAFETY: the vtable contains functions that accept payload types that always match up // with the payload of the object unsafe { (self.rc.inner.vtable.debug)(&*self.rc.inner, f) } } } impl fmt::Debug for PyObjectWeak { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "(PyWeak)") } } /// A reference to a Python object. /// /// Note that a `PyRef` can only deref to a shared / immutable reference. /// It is the payload type's responsibility to handle (possibly concurrent) /// mutability with locks or concurrent data structures if required. /// /// A `PyRef` can be directly returned from a built-in function to handle /// situations (such as when implementing in-place methods such as `__iadd__`) /// where a reference to the same object must be returned. #[repr(transparent)] pub struct PyRef { // invariant: this obj must always have payload of type T obj: PyObjectRef, _payload: PhantomData>, } impl fmt::Debug for PyRef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.obj.fmt(f) } } impl Clone for PyRef { fn clone(&self) -> Self { Self { obj: self.obj.clone(), _payload: PhantomData, } } } impl PyRef { /// Safety: payload type of `obj` must be `T` unsafe fn from_obj_unchecked(obj: PyObjectRef) -> Self { debug_assert!(obj.payload_is::()); Self { obj, _payload: PhantomData, } } #[inline(always)] pub fn as_object(&self) -> &PyObjectRef { &self.obj } pub fn downgrade(this: &Self) -> PyWeakRef { PyWeakRef { weak: PyObjectRef::downgrade(&this.obj), _payload: PhantomData, } } // ideally we'd be able to define this in pyobject.rs, but method visibility rules are weird pub fn new_ref( payload: T, typ: crate::builtins::PyTypeRef, dict: Option, ) -> Self { let obj = PyObject::new(payload, typ, dict); // SAFETY: we just created the object from a payload of type T unsafe { Self::from_obj_unchecked(obj) } } } impl PyObjectWrap for PyRef where T: PyObjectPayload, { fn into_object(self) -> PyObjectRef { self.obj } } impl AsRef for PyRef where T: PyObjectPayload, { fn as_ref(&self) -> &PyObjectRef { &self.obj } } impl Deref for PyRef where T: PyObjectPayload, { type Target = T; fn deref(&self) -> &T { // SAFETY: per the invariant on `self.obj`, the payload of the pyobject is always T, so it // can always be cast to a PyInner let obj = unsafe { &*(&*self.obj.rc.inner as *const PyInner as *const PyInner) }; &obj.payload } } #[repr(transparent)] pub struct PyWeakRef { weak: PyObjectWeak, _payload: PhantomData>, } impl PyWeakRef { pub fn upgrade(&self) -> Option> { self.weak.upgrade().map(|obj| unsafe { // SAFETY: PyWeakRef is only ever created from a PyRef PyRef::from_obj_unchecked(obj) }) } } /// Paritally initialize a struct, ensuring that all fields are /// either given values or explicitly left uninitialized macro_rules! partially_init { ( $ty:path {$($init_field:ident: $init_value:expr),*$(,)?}, Uninit { $($uninit_field:ident),*$(,)? }$(,)? ) => {{ // check all the fields are there but *don't* actually run it if false { #[allow(invalid_value, dead_code, unreachable_code)] let _ = {$ty { $($init_field: $init_value,)* $($uninit_field: unreachable!(),)* }}; } let mut m = ::std::mem::MaybeUninit::<$ty>::uninit(); #[allow(unused_unsafe)] unsafe { $(::std::ptr::write(&mut (*m.as_mut_ptr()).$init_field, $init_value);)* } m }}; } pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef) { use crate::builtins::{object, PyType, PyWeak}; use crate::{PyAttributes, PyClassImpl}; use std::mem::MaybeUninit; use std::ptr; // `type` inherits from `object` // and both `type` and `object are instances of `type`. // to produce this circular dependency, we need an unsafe block. // (and yes, this will never get dropped. TODO?) let (type_type, object_type) = { type UninitRef = PyRwLock>>>; // We cast between these 2 types, so make sure (at compile time) that there's no change in // layout when we wrap PyInner in MaybeUninit<> static_assertions::assert_eq_size!(MaybeUninit>, PyInner); static_assertions::assert_eq_align!(MaybeUninit>, PyInner); let type_payload = PyType { base: None, bases: vec![], mro: vec![], subclasses: PyRwLock::default(), attributes: PyRwLock::new(PyAttributes::default()), slots: PyType::make_slots(), }; let object_payload = PyType { base: None, bases: vec![], mro: vec![], subclasses: PyRwLock::default(), attributes: PyRwLock::new(PyAttributes::default()), slots: object::PyBaseObject::make_slots(), }; let type_type = PyRc::new(partially_init!( PyInner:: { typeid: TypeId::of::(), vtable: PyObjVTable::of::(), dict: None, payload: type_payload, }, Uninit { typ } )); let object_type = PyRc::new(partially_init!( PyInner:: { typeid: TypeId::of::(), vtable: PyObjVTable::of::(), dict: None, payload: object_payload, }, Uninit { typ }, )); let object_type_ptr = PyRc::into_raw(object_type) as *mut MaybeUninit> as *mut PyInner; let type_type_ptr = PyRc::into_raw(type_type.clone()) as *mut MaybeUninit> as *mut PyInner; unsafe { ptr::write( &mut (*object_type_ptr).typ as *mut PyRwLock as *mut UninitRef, PyRwLock::new(type_type.clone()), ); ptr::write( &mut (*type_type_ptr).typ as *mut PyRwLock as *mut UninitRef, PyRwLock::new(type_type), ); let type_type = PyTypeRef::from_obj_unchecked(PyObjectRef::from_raw(type_type_ptr.cast())); let object_type = PyTypeRef::from_obj_unchecked(PyObjectRef::from_raw(object_type_ptr.cast())); (*type_type_ptr).payload.mro = vec![object_type.clone()]; (*type_type_ptr).payload.bases = vec![object_type.clone()]; (*type_type_ptr).payload.base = Some(object_type.clone()); (type_type, object_type) } }; object_type .subclasses .write() .push(PyWeak::downgrade(type_type.as_object())); (type_type, object_type) } #[cfg(test)] mod tests { use super::*; #[test] fn miri_test_type_initialization() { let _ = init_type_hierarchy(); } }