From dbeb3b44532fb2cbc10f8c1aca5f19c7aa0296bf Mon Sep 17 00:00:00 2001 From: David Hewitt <1939362+davidhewitt@users.noreply.github.com> Date: Sat, 11 Feb 2023 21:27:59 +0000 Subject: [PATCH] move some private internals out of public implementation --- benches/bench_pyclass.rs | 2 +- guide/src/class.md | 2 +- pyo3-macros-backend/src/pyclass.rs | 2 +- src/derive_utils.rs | 24 +- src/impl_/pyclass.rs | 3 + src/impl_/pyclass/lazy_static_type.rs | 161 ++++++++++ src/pyclass.rs | 438 +------------------------- src/pyclass/create_type_object.rs | 433 +++++++++++++++++++++++++ src/type_object.rs | 158 +--------- 9 files changed, 607 insertions(+), 616 deletions(-) create mode 100644 src/impl_/pyclass/lazy_static_type.rs create mode 100644 src/pyclass/create_type_object.rs diff --git a/benches/bench_pyclass.rs b/benches/bench_pyclass.rs index 60f17c1b..dddc1995 100644 --- a/benches/bench_pyclass.rs +++ b/benches/bench_pyclass.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use pyo3::{prelude::*, type_object::LazyStaticType}; +use pyo3::{impl_::pyclass::LazyStaticType, prelude::*}; /// This is a feature-rich class instance used to benchmark various parts of the pyclass lifecycle. #[pyclass] diff --git a/guide/src/class.md b/guide/src/class.md index 3e41e8de..7b82d472 100644 --- a/guide/src/class.md +++ b/guide/src/class.md @@ -979,7 +979,7 @@ unsafe impl pyo3::type_object::PyTypeInfo for MyClass { const MODULE: ::std::option::Option<&'static str> = ::std::option::Option::None; #[inline] fn type_object_raw(py: pyo3::Python<'_>) -> *mut pyo3::ffi::PyTypeObject { - use pyo3::type_object::LazyStaticType; + use pyo3::impl_::pyclass::LazyStaticType; static TYPE_OBJECT: LazyStaticType = LazyStaticType::new(); TYPE_OBJECT.get_or_init::(py) } diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index 70b12c5d..b48df961 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -756,7 +756,7 @@ fn impl_pytypeinfo( fn type_object_raw(py: _pyo3::Python<'_>) -> *mut _pyo3::ffi::PyTypeObject { #deprecations - use _pyo3::type_object::LazyStaticType; + use _pyo3::impl_::pyclass::LazyStaticType; static TYPE_OBJECT: LazyStaticType = LazyStaticType::new(); TYPE_OBJECT.get_or_init::(py) } diff --git a/src/derive_utils.rs b/src/derive_utils.rs index 242cdf68..25ea7f89 100644 --- a/src/derive_utils.rs +++ b/src/derive_utils.rs @@ -4,29 +4,7 @@ //! Functionality for the code generated by the derive backend -use crate::{types::PyModule, PyCell, PyClass, PyErr, Python}; - -/// A trait for types that can be borrowed from a cell. -/// -/// This serves to unify the use of `PyRef` and `PyRefMut` in automatically -/// derived code, since both types can be obtained from a `PyCell`. -#[doc(hidden)] -pub trait TryFromPyCell<'a, T: PyClass>: Sized { - type Error: Into; - fn try_from_pycell(cell: &'a crate::PyCell) -> Result; -} - -impl<'a, T, R> TryFromPyCell<'a, T> for R -where - T: 'a + PyClass, - R: std::convert::TryFrom<&'a PyCell>, - R::Error: Into, -{ - type Error = R::Error; - fn try_from_pycell(cell: &'a crate::PyCell) -> Result { - >>::try_from(cell) - } -} +use crate::{types::PyModule, Python}; /// Enum to abstract over the arguments of Python function wrappers. pub enum PyFunctionArguments<'a> { diff --git a/src/impl_/pyclass.rs b/src/impl_/pyclass.rs index a2671748..c2d968c2 100644 --- a/src/impl_/pyclass.rs +++ b/src/impl_/pyclass.rs @@ -15,6 +15,9 @@ use std::{ thread, }; +mod lazy_static_type; +pub use lazy_static_type::LazyStaticType; + /// Gets the offset of the dictionary from the start of the object in bytes. #[inline] pub fn dict_offset() -> ffi::Py_ssize_t { diff --git a/src/impl_/pyclass/lazy_static_type.rs b/src/impl_/pyclass/lazy_static_type.rs new file mode 100644 index 00000000..aa4e5b9d --- /dev/null +++ b/src/impl_/pyclass/lazy_static_type.rs @@ -0,0 +1,161 @@ +use std::{ + borrow::Cow, + ffi::CStr, + thread::{self, ThreadId}, +}; + +use parking_lot::{const_mutex, Mutex}; + +use crate::{ + ffi, once_cell::GILOnceCell, pyclass::create_type_object, IntoPyPointer, PyClass, + PyMethodDefType, PyObject, PyResult, Python, +}; + +use super::PyClassItemsIter; + +/// Lazy type object for PyClass. +#[doc(hidden)] +pub struct LazyStaticType { + // Boxed because Python expects the type object to have a stable address. + value: GILOnceCell<*mut ffi::PyTypeObject>, + // Threads which have begun initialization of the `tp_dict`. Used for + // reentrant initialization detection. + initializing_threads: Mutex>, + tp_dict_filled: GILOnceCell>, +} + +impl LazyStaticType { + /// Creates an uninitialized `LazyStaticType`. + pub const fn new() -> Self { + LazyStaticType { + value: GILOnceCell::new(), + initializing_threads: const_mutex(Vec::new()), + tp_dict_filled: GILOnceCell::new(), + } + } + + /// Gets the type object contained by this `LazyStaticType`, initializing it if needed. + pub fn get_or_init(&self, py: Python<'_>) -> *mut ffi::PyTypeObject { + fn inner() -> *mut ffi::PyTypeObject { + // Safety: `py` is held by the caller of `get_or_init`. + let py = unsafe { Python::assume_gil_acquired() }; + create_type_object::(py) + } + + // Uses explicit GILOnceCell::get_or_init:: *mut ffi::PyTypeObject> monomorphization + // so that only this one monomorphization is instantiated (instead of one closure monormization for each T). + let type_object = *self + .value + .get_or_init:: *mut ffi::PyTypeObject>(py, inner::); + self.ensure_init(py, type_object, T::NAME, T::items_iter()); + type_object + } + + fn ensure_init( + &self, + py: Python<'_>, + type_object: *mut ffi::PyTypeObject, + name: &str, + items_iter: PyClassItemsIter, + ) { + // We might want to fill the `tp_dict` with python instances of `T` + // itself. In order to do so, we must first initialize the type object + // with an empty `tp_dict`: now we can create instances of `T`. + // + // Then we fill the `tp_dict`. Multiple threads may try to fill it at + // the same time, but only one of them will succeed. + // + // More importantly, if a thread is performing initialization of the + // `tp_dict`, it can still request the type object through `get_or_init`, + // but the `tp_dict` may appear empty of course. + + if self.tp_dict_filled.get(py).is_some() { + // `tp_dict` is already filled: ok. + return; + } + + let thread_id = thread::current().id(); + { + let mut threads = self.initializing_threads.lock(); + if threads.contains(&thread_id) { + // Reentrant call: just return the type object, even if the + // `tp_dict` is not filled yet. + return; + } + threads.push(thread_id); + } + + struct InitializationGuard<'a> { + initializing_threads: &'a Mutex>, + thread_id: ThreadId, + } + impl Drop for InitializationGuard<'_> { + fn drop(&mut self) { + let mut threads = self.initializing_threads.lock(); + threads.retain(|id| *id != self.thread_id); + } + } + + let guard = InitializationGuard { + initializing_threads: &self.initializing_threads, + thread_id, + }; + + // Pre-compute the class attribute objects: this can temporarily + // release the GIL since we're calling into arbitrary user code. It + // means that another thread can continue the initialization in the + // meantime: at worst, we'll just make a useless computation. + let mut items = vec![]; + for class_items in items_iter { + for def in class_items.methods { + if let PyMethodDefType::ClassAttribute(attr) = def { + let key = attr.attribute_c_string().unwrap(); + + match (attr.meth.0)(py) { + Ok(val) => items.push((key, val)), + Err(e) => panic!( + "An error occurred while initializing `{}.{}`: {}", + name, + attr.name.trim_end_matches('\0'), + e + ), + } + } + } + } + + // Now we hold the GIL and we can assume it won't be released until we + // return from the function. + let result = self.tp_dict_filled.get_or_init(py, move || { + let result = initialize_tp_dict(py, type_object as *mut ffi::PyObject, items); + + // Initialization successfully complete, can clear the thread list. + // (No further calls to get_or_init() will try to init, on any thread.) + std::mem::forget(guard); + *self.initializing_threads.lock() = Vec::new(); + result + }); + + if let Err(err) = result { + err.clone_ref(py).print(py); + panic!("An error occurred while initializing `{}.__dict__`", name); + } + } +} + +fn initialize_tp_dict( + py: Python<'_>, + type_object: *mut ffi::PyObject, + items: Vec<(Cow<'static, CStr>, PyObject)>, +) -> PyResult<()> { + // We hold the GIL: the dictionary update can be considered atomic from + // the POV of other threads. + for (key, val) in items { + let ret = unsafe { ffi::PyObject_SetAttrString(type_object, key.as_ptr(), val.into_ptr()) }; + crate::err::error_on_minusone(py, ret)?; + } + Ok(()) +} + +// This is necessary for making static `LazyStaticType`s +unsafe impl Sync for LazyStaticType {} diff --git a/src/pyclass.rs b/src/pyclass.rs index bab7ab52..028f3591 100644 --- a/src/pyclass.rs +++ b/src/pyclass.rs @@ -1,25 +1,14 @@ //! `PyClass` and related traits. use crate::{ - callback::IntoPyCallbackOutput, - exceptions::PyTypeError, - ffi, - impl_::pyclass::{ - assign_sequence_item_from_mapping, get_sequence_item_from_mapping, tp_dealloc, PyClassImpl, - PyClassItemsIter, - }, - IntoPy, IntoPyPointer, PyCell, PyErr, PyMethodDefType, PyObject, PyResult, PyTypeInfo, Python, -}; -use std::{ - cmp::Ordering, - collections::HashMap, - convert::TryInto, - ffi::{CStr, CString}, - os::raw::{c_char, c_int, c_ulong, c_void}, - ptr, + callback::IntoPyCallbackOutput, ffi, impl_::pyclass::PyClassImpl, IntoPy, IntoPyPointer, + PyCell, PyObject, PyResult, PyTypeInfo, Python, }; +use std::{cmp::Ordering, os::raw::c_int}; +mod create_type_object; mod gc; +pub(crate) use self::create_type_object::create_type_object; pub use self::gc::{PyTraverseError, PyVisit}; /// Types that can be used as Python classes. @@ -35,410 +24,6 @@ pub trait PyClass: type Frozen: Frozen; } -pub(crate) fn create_type_object(py: Python<'_>) -> *mut ffi::PyTypeObject -where - T: PyClass, -{ - match unsafe { - PyTypeBuilder::default() - .type_doc(T::DOC) - .offsets(T::dict_offset(), T::weaklist_offset()) - .slot(ffi::Py_tp_base, T::BaseType::type_object_raw(py)) - .slot(ffi::Py_tp_dealloc, tp_dealloc:: as *mut c_void) - .set_is_basetype(T::IS_BASETYPE) - .set_is_mapping(T::IS_MAPPING) - .set_is_sequence(T::IS_SEQUENCE) - .class_items(T::items_iter()) - .build(py, T::NAME, T::MODULE, std::mem::size_of::()) - } { - Ok(type_object) => type_object, - Err(e) => type_object_creation_failed(py, e, T::NAME), - } -} - -type PyTypeBuilderCleanup = Box; - -#[derive(Default)] -struct PyTypeBuilder { - slots: Vec, - method_defs: Vec, - property_defs_map: HashMap<&'static str, ffi::PyGetSetDef>, - /// Used to patch the type objects for the things there's no - /// PyType_FromSpec API for... there's no reason this should work, - /// except for that it does and we have tests. - cleanup: Vec, - is_mapping: bool, - is_sequence: bool, - has_new: bool, - has_dealloc: bool, - has_getitem: bool, - has_setitem: bool, - has_traverse: bool, - has_clear: bool, - has_dict: bool, - class_flags: c_ulong, - // Before Python 3.9, need to patch in buffer methods manually (they don't work in slots) - #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] - buffer_procs: ffi::PyBufferProcs, -} - -impl PyTypeBuilder { - /// # Safety - /// The given pointer must be of the correct type for the given slot - unsafe fn push_slot(&mut self, slot: c_int, pfunc: *mut T) { - match slot { - ffi::Py_tp_new => self.has_new = true, - ffi::Py_tp_dealloc => self.has_dealloc = true, - ffi::Py_mp_subscript => self.has_getitem = true, - ffi::Py_mp_ass_subscript => self.has_setitem = true, - ffi::Py_tp_traverse => { - self.has_traverse = true; - self.class_flags |= ffi::Py_TPFLAGS_HAVE_GC; - } - ffi::Py_tp_clear => self.has_clear = true, - #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] - ffi::Py_bf_getbuffer => { - // Safety: slot.pfunc is a valid function pointer - self.buffer_procs.bf_getbuffer = Some(std::mem::transmute(pfunc)); - } - #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] - ffi::Py_bf_releasebuffer => { - // Safety: slot.pfunc is a valid function pointer - self.buffer_procs.bf_releasebuffer = Some(std::mem::transmute(pfunc)); - } - _ => {} - } - - self.slots.push(ffi::PyType_Slot { - slot, - pfunc: pfunc as _, - }); - } - - /// # Safety - /// It is the caller's responsibility that `data` is of the correct type for the given slot. - unsafe fn push_raw_vec_slot(&mut self, slot: c_int, mut data: Vec) { - if !data.is_empty() { - // Python expects a zeroed entry to mark the end of the defs - data.push(std::mem::zeroed()); - self.push_slot(slot, Box::into_raw(data.into_boxed_slice()) as *mut c_void); - } - } - - /// # Safety - /// The given pointer must be of the correct type for the given slot - unsafe fn slot(mut self, slot: c_int, pfunc: *mut T) -> Self { - self.push_slot(slot, pfunc); - self - } - - fn pymethod_def(&mut self, def: &PyMethodDefType) { - const PY_GET_SET_DEF_INIT: ffi::PyGetSetDef = ffi::PyGetSetDef { - name: ptr::null_mut(), - get: None, - set: None, - doc: ptr::null(), - closure: ptr::null_mut(), - }; - - match def { - PyMethodDefType::Getter(getter) => { - getter.copy_to( - self.property_defs_map - .entry(getter.name) - .or_insert(PY_GET_SET_DEF_INIT), - ); - } - PyMethodDefType::Setter(setter) => { - setter.copy_to( - self.property_defs_map - .entry(setter.name) - .or_insert(PY_GET_SET_DEF_INIT), - ); - } - PyMethodDefType::Method(def) - | PyMethodDefType::Class(def) - | PyMethodDefType::Static(def) => { - let (def, destructor) = def.as_method_def().unwrap(); - // FIXME: stop leaking destructor - std::mem::forget(destructor); - self.method_defs.push(def); - } - // These class attributes are added after the type gets created by LazyStaticType - PyMethodDefType::ClassAttribute(_) => {} - } - } - - fn finalize_methods_and_properties(&mut self) { - let method_defs = std::mem::take(&mut self.method_defs); - // Safety: Py_tp_methods expects a raw vec of PyMethodDef - unsafe { self.push_raw_vec_slot(ffi::Py_tp_methods, method_defs) }; - - let property_defs = std::mem::take(&mut self.property_defs_map); - // TODO: use into_values when on MSRV Rust >= 1.54 - #[allow(unused_mut)] - let mut property_defs: Vec<_> = property_defs.into_iter().map(|(_, value)| value).collect(); - - // PyPy doesn't automatically add __dict__ getter / setter. - // PyObject_GenericGetDict not in the limited API until Python 3.10. - if self.has_dict { - #[cfg(not(any(PyPy, all(Py_LIMITED_API, not(Py_3_10)))))] - property_defs.push(ffi::PyGetSetDef { - name: "__dict__\0".as_ptr() as *mut c_char, - get: Some(ffi::PyObject_GenericGetDict), - set: Some(ffi::PyObject_GenericSetDict), - doc: ptr::null(), - closure: ptr::null_mut(), - }); - } - - // Safety: Py_tp_members expects a raw vec of PyGetSetDef - unsafe { self.push_raw_vec_slot(ffi::Py_tp_getset, property_defs) }; - - // If mapping methods implemented, define sequence methods get implemented too. - // CPython does the same for Python `class` statements. - - // NB we don't implement sq_length to avoid annoying CPython behaviour of automatically adding - // the length to negative indices. - - // Don't add these methods for "pure" mappings. - - if !self.is_mapping && self.has_getitem { - // Safety: This is the correct slot type for Py_sq_item - unsafe { - self.push_slot( - ffi::Py_sq_item, - get_sequence_item_from_mapping as *mut c_void, - ) - } - } - - if !self.is_mapping && self.has_setitem { - // Safety: This is the correct slot type for Py_sq_ass_item - unsafe { - self.push_slot( - ffi::Py_sq_ass_item, - assign_sequence_item_from_mapping as *mut c_void, - ) - } - } - } - - fn set_is_basetype(mut self, is_basetype: bool) -> Self { - if is_basetype { - self.class_flags |= ffi::Py_TPFLAGS_BASETYPE; - } - self - } - - fn set_is_mapping(mut self, is_mapping: bool) -> Self { - self.is_mapping = is_mapping; - self - } - - fn set_is_sequence(mut self, is_sequence: bool) -> Self { - self.is_sequence = is_sequence; - self - } - - /// # Safety - /// All slots in the PyClassItemsIter should be correct - unsafe fn class_items(mut self, iter: PyClassItemsIter) -> Self { - for items in iter { - for slot in items.slots { - self.push_slot(slot.slot, slot.pfunc); - } - for method in items.methods { - self.pymethod_def(method); - } - } - self - } - - fn type_doc(mut self, type_doc: &'static str) -> Self { - if let Some(doc) = py_class_doc(type_doc) { - unsafe { self.push_slot(ffi::Py_tp_doc, doc) } - } - - // Running this causes PyPy to segfault. - #[cfg(all(not(PyPy), not(Py_LIMITED_API), not(Py_3_10)))] - if type_doc != "\0" { - // Until CPython 3.10, tp_doc was treated specially for - // heap-types, and it removed the text_signature value from it. - // We go in after the fact and replace tp_doc with something - // that _does_ include the text_signature value! - self.cleanup - .push(Box::new(move |_self, type_object| unsafe { - ffi::PyObject_Free((*type_object).tp_doc as _); - let data = ffi::PyObject_Malloc(type_doc.len()); - data.copy_from(type_doc.as_ptr() as _, type_doc.len()); - (*type_object).tp_doc = data as _; - })) - } - self - } - - fn offsets( - mut self, - dict_offset: Option, - #[allow(unused_variables)] weaklist_offset: Option, - ) -> Self { - self.has_dict = dict_offset.is_some(); - - #[cfg(Py_3_9)] - { - #[inline(always)] - fn offset_def( - name: &'static str, - offset: ffi::Py_ssize_t, - ) -> ffi::structmember::PyMemberDef { - ffi::structmember::PyMemberDef { - name: name.as_ptr() as _, - type_code: ffi::structmember::T_PYSSIZET, - offset, - flags: ffi::structmember::READONLY, - doc: std::ptr::null_mut(), - } - } - - let mut members = Vec::new(); - - // __dict__ support - if let Some(dict_offset) = dict_offset { - members.push(offset_def("__dictoffset__\0", dict_offset)); - } - - // weakref support - if let Some(weaklist_offset) = weaklist_offset { - members.push(offset_def("__weaklistoffset__\0", weaklist_offset)); - } - - // Safety: Py_tp_members expects a raw vec of PyMemberDef - unsafe { self.push_raw_vec_slot(ffi::Py_tp_members, members) }; - } - - // Setting buffer protocols, tp_dictoffset and tp_weaklistoffset via slots doesn't work until - // Python 3.9, so on older versions we must manually fixup the type object. - #[cfg(all(not(Py_LIMITED_API), not(Py_3_9)))] - { - self.cleanup - .push(Box::new(move |builder, type_object| unsafe { - (*(*type_object).tp_as_buffer).bf_getbuffer = builder.buffer_procs.bf_getbuffer; - (*(*type_object).tp_as_buffer).bf_releasebuffer = - builder.buffer_procs.bf_releasebuffer; - - if let Some(dict_offset) = dict_offset { - (*type_object).tp_dictoffset = dict_offset; - } - - if let Some(weaklist_offset) = weaklist_offset { - (*type_object).tp_weaklistoffset = weaklist_offset; - } - })); - } - self - } - - fn build( - mut self, - py: Python<'_>, - name: &'static str, - module_name: Option<&'static str>, - basicsize: usize, - ) -> PyResult<*mut ffi::PyTypeObject> { - // `c_ulong` and `c_uint` have the same size - // on some platforms (like windows) - #![allow(clippy::useless_conversion)] - - self.finalize_methods_and_properties(); - - if !self.has_new { - // Safety: This is the correct slot type for Py_tp_new - unsafe { self.push_slot(ffi::Py_tp_new, no_constructor_defined as *mut c_void) } - } - - if !self.has_dealloc { - panic!("PyTypeBuilder requires you to specify slot ffi::Py_tp_dealloc"); - } - - if self.has_clear && !self.has_traverse { - return Err(PyTypeError::new_err(format!( - "`#[pyclass]` {} implements __clear__ without __traverse__", - name - ))); - } - - // For sequences, implement sq_length instead of mp_length - if self.is_sequence { - for slot in &mut self.slots { - if slot.slot == ffi::Py_mp_length { - slot.slot = ffi::Py_sq_length; - } - } - } - - // Add empty sentinel at the end - // Safety: python expects this empty slot - unsafe { self.push_slot(0, ptr::null_mut::()) } - - let mut spec = ffi::PyType_Spec { - name: py_class_qualified_name(module_name, name)?, - basicsize: basicsize as c_int, - itemsize: 0, - - flags: (ffi::Py_TPFLAGS_DEFAULT | self.class_flags) - .try_into() - .unwrap(), - slots: self.slots.as_mut_ptr(), - }; - - // Safety: We've correctly setup the PyType_Spec at this point - let type_object = unsafe { ffi::PyType_FromSpec(&mut spec) }; - if type_object.is_null() { - Err(PyErr::fetch(py)) - } else { - for cleanup in std::mem::take(&mut self.cleanup) { - cleanup(&self, type_object as _); - } - - Ok(type_object as _) - } - } -} - -#[cold] -fn type_object_creation_failed(py: Python<'_>, e: PyErr, name: &str) -> ! { - e.print(py); - panic!("An error occurred while initializing class {}", name) -} - -fn py_class_doc(class_doc: &str) -> Option<*mut c_char> { - match class_doc { - "\0" => None, - s => { - // To pass *mut pointer to python safely, leak a CString in whichever case - let cstring = if s.as_bytes().last() == Some(&0) { - CStr::from_bytes_with_nul(s.as_bytes()) - .unwrap_or_else(|e| panic!("doc contains interior nul byte: {:?} in {}", e, s)) - .to_owned() - } else { - CString::new(s) - .unwrap_or_else(|e| panic!("doc contains interior nul byte: {:?} in {}", e, s)) - }; - Some(cstring.into_raw()) - } - } -} - -fn py_class_qualified_name(module_name: Option<&str>, class_name: &str) -> PyResult<*mut c_char> { - Ok(CString::new(format!( - "{}.{}", - module_name.unwrap_or("builtins"), - class_name - ))? - .into_raw()) -} - /// Operators for the `__richcmp__` method #[derive(Debug, Clone, Copy)] pub enum CompareOp { @@ -599,19 +184,6 @@ where } } -/// Default new implementation -pub(crate) unsafe extern "C" fn no_constructor_defined( - _subtype: *mut ffi::PyTypeObject, - _args: *mut ffi::PyObject, - _kwds: *mut ffi::PyObject, -) -> *mut ffi::PyObject { - crate::impl_::trampoline::trampoline_inner(|_| { - Err(crate::exceptions::PyTypeError::new_err( - "No constructor defined", - )) - }) -} - /// A workaround for [associated const equality](https://github.com/rust-lang/rust/issues/92827). /// /// This serves to have True / False values in the [`PyClass`] trait's `Frozen` type. diff --git a/src/pyclass/create_type_object.rs b/src/pyclass/create_type_object.rs new file mode 100644 index 00000000..8e663a91 --- /dev/null +++ b/src/pyclass/create_type_object.rs @@ -0,0 +1,433 @@ +use crate::{ + exceptions::PyTypeError, + ffi, + impl_::pyclass::{ + assign_sequence_item_from_mapping, get_sequence_item_from_mapping, tp_dealloc, + PyClassItemsIter, + }, + PyClass, PyErr, PyMethodDefType, PyResult, PyTypeInfo, Python, +}; +use std::{ + collections::HashMap, + convert::TryInto, + ffi::{CStr, CString}, + os::raw::{c_char, c_int, c_ulong, c_void}, + ptr, +}; + +pub(crate) fn create_type_object(py: Python<'_>) -> *mut ffi::PyTypeObject +where + T: PyClass, +{ + match unsafe { + PyTypeBuilder::default() + .type_doc(T::DOC) + .offsets(T::dict_offset(), T::weaklist_offset()) + .slot(ffi::Py_tp_base, T::BaseType::type_object_raw(py)) + .slot(ffi::Py_tp_dealloc, tp_dealloc:: as *mut c_void) + .set_is_basetype(T::IS_BASETYPE) + .set_is_mapping(T::IS_MAPPING) + .set_is_sequence(T::IS_SEQUENCE) + .class_items(T::items_iter()) + .build(py, T::NAME, T::MODULE, std::mem::size_of::()) + } { + Ok(type_object) => type_object, + Err(e) => type_object_creation_failed(py, e, T::NAME), + } +} + +type PyTypeBuilderCleanup = Box; + +#[derive(Default)] +struct PyTypeBuilder { + slots: Vec, + method_defs: Vec, + property_defs_map: HashMap<&'static str, ffi::PyGetSetDef>, + /// Used to patch the type objects for the things there's no + /// PyType_FromSpec API for... there's no reason this should work, + /// except for that it does and we have tests. + cleanup: Vec, + is_mapping: bool, + is_sequence: bool, + has_new: bool, + has_dealloc: bool, + has_getitem: bool, + has_setitem: bool, + has_traverse: bool, + has_clear: bool, + has_dict: bool, + class_flags: c_ulong, + // Before Python 3.9, need to patch in buffer methods manually (they don't work in slots) + #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] + buffer_procs: ffi::PyBufferProcs, +} + +impl PyTypeBuilder { + /// # Safety + /// The given pointer must be of the correct type for the given slot + unsafe fn push_slot(&mut self, slot: c_int, pfunc: *mut T) { + match slot { + ffi::Py_tp_new => self.has_new = true, + ffi::Py_tp_dealloc => self.has_dealloc = true, + ffi::Py_mp_subscript => self.has_getitem = true, + ffi::Py_mp_ass_subscript => self.has_setitem = true, + ffi::Py_tp_traverse => { + self.has_traverse = true; + self.class_flags |= ffi::Py_TPFLAGS_HAVE_GC; + } + ffi::Py_tp_clear => self.has_clear = true, + #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] + ffi::Py_bf_getbuffer => { + // Safety: slot.pfunc is a valid function pointer + self.buffer_procs.bf_getbuffer = Some(std::mem::transmute(pfunc)); + } + #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] + ffi::Py_bf_releasebuffer => { + // Safety: slot.pfunc is a valid function pointer + self.buffer_procs.bf_releasebuffer = Some(std::mem::transmute(pfunc)); + } + _ => {} + } + + self.slots.push(ffi::PyType_Slot { + slot, + pfunc: pfunc as _, + }); + } + + /// # Safety + /// It is the caller's responsibility that `data` is of the correct type for the given slot. + unsafe fn push_raw_vec_slot(&mut self, slot: c_int, mut data: Vec) { + if !data.is_empty() { + // Python expects a zeroed entry to mark the end of the defs + data.push(std::mem::zeroed()); + self.push_slot(slot, Box::into_raw(data.into_boxed_slice()) as *mut c_void); + } + } + + /// # Safety + /// The given pointer must be of the correct type for the given slot + unsafe fn slot(mut self, slot: c_int, pfunc: *mut T) -> Self { + self.push_slot(slot, pfunc); + self + } + + fn pymethod_def(&mut self, def: &PyMethodDefType) { + const PY_GET_SET_DEF_INIT: ffi::PyGetSetDef = ffi::PyGetSetDef { + name: ptr::null_mut(), + get: None, + set: None, + doc: ptr::null(), + closure: ptr::null_mut(), + }; + + match def { + PyMethodDefType::Getter(getter) => { + getter.copy_to( + self.property_defs_map + .entry(getter.name) + .or_insert(PY_GET_SET_DEF_INIT), + ); + } + PyMethodDefType::Setter(setter) => { + setter.copy_to( + self.property_defs_map + .entry(setter.name) + .or_insert(PY_GET_SET_DEF_INIT), + ); + } + PyMethodDefType::Method(def) + | PyMethodDefType::Class(def) + | PyMethodDefType::Static(def) => { + let (def, destructor) = def.as_method_def().unwrap(); + // FIXME: stop leaking destructor + std::mem::forget(destructor); + self.method_defs.push(def); + } + // These class attributes are added after the type gets created by LazyStaticType + PyMethodDefType::ClassAttribute(_) => {} + } + } + + fn finalize_methods_and_properties(&mut self) { + let method_defs = std::mem::take(&mut self.method_defs); + // Safety: Py_tp_methods expects a raw vec of PyMethodDef + unsafe { self.push_raw_vec_slot(ffi::Py_tp_methods, method_defs) }; + + let property_defs = std::mem::take(&mut self.property_defs_map); + // TODO: use into_values when on MSRV Rust >= 1.54 + #[allow(unused_mut)] + let mut property_defs: Vec<_> = property_defs.into_iter().map(|(_, value)| value).collect(); + + // PyPy doesn't automatically add __dict__ getter / setter. + // PyObject_GenericGetDict not in the limited API until Python 3.10. + if self.has_dict { + #[cfg(not(any(PyPy, all(Py_LIMITED_API, not(Py_3_10)))))] + property_defs.push(ffi::PyGetSetDef { + name: "__dict__\0".as_ptr() as *mut c_char, + get: Some(ffi::PyObject_GenericGetDict), + set: Some(ffi::PyObject_GenericSetDict), + doc: ptr::null(), + closure: ptr::null_mut(), + }); + } + + // Safety: Py_tp_members expects a raw vec of PyGetSetDef + unsafe { self.push_raw_vec_slot(ffi::Py_tp_getset, property_defs) }; + + // If mapping methods implemented, define sequence methods get implemented too. + // CPython does the same for Python `class` statements. + + // NB we don't implement sq_length to avoid annoying CPython behaviour of automatically adding + // the length to negative indices. + + // Don't add these methods for "pure" mappings. + + if !self.is_mapping && self.has_getitem { + // Safety: This is the correct slot type for Py_sq_item + unsafe { + self.push_slot( + ffi::Py_sq_item, + get_sequence_item_from_mapping as *mut c_void, + ) + } + } + + if !self.is_mapping && self.has_setitem { + // Safety: This is the correct slot type for Py_sq_ass_item + unsafe { + self.push_slot( + ffi::Py_sq_ass_item, + assign_sequence_item_from_mapping as *mut c_void, + ) + } + } + } + + fn set_is_basetype(mut self, is_basetype: bool) -> Self { + if is_basetype { + self.class_flags |= ffi::Py_TPFLAGS_BASETYPE; + } + self + } + + fn set_is_mapping(mut self, is_mapping: bool) -> Self { + self.is_mapping = is_mapping; + self + } + + fn set_is_sequence(mut self, is_sequence: bool) -> Self { + self.is_sequence = is_sequence; + self + } + + /// # Safety + /// All slots in the PyClassItemsIter should be correct + unsafe fn class_items(mut self, iter: PyClassItemsIter) -> Self { + for items in iter { + for slot in items.slots { + self.push_slot(slot.slot, slot.pfunc); + } + for method in items.methods { + self.pymethod_def(method); + } + } + self + } + + fn type_doc(mut self, type_doc: &'static str) -> Self { + if let Some(doc) = py_class_doc(type_doc) { + unsafe { self.push_slot(ffi::Py_tp_doc, doc) } + } + + // Running this causes PyPy to segfault. + #[cfg(all(not(PyPy), not(Py_LIMITED_API), not(Py_3_10)))] + if type_doc != "\0" { + // Until CPython 3.10, tp_doc was treated specially for + // heap-types, and it removed the text_signature value from it. + // We go in after the fact and replace tp_doc with something + // that _does_ include the text_signature value! + self.cleanup + .push(Box::new(move |_self, type_object| unsafe { + ffi::PyObject_Free((*type_object).tp_doc as _); + let data = ffi::PyObject_Malloc(type_doc.len()); + data.copy_from(type_doc.as_ptr() as _, type_doc.len()); + (*type_object).tp_doc = data as _; + })) + } + self + } + + fn offsets( + mut self, + dict_offset: Option, + #[allow(unused_variables)] weaklist_offset: Option, + ) -> Self { + self.has_dict = dict_offset.is_some(); + + #[cfg(Py_3_9)] + { + #[inline(always)] + fn offset_def( + name: &'static str, + offset: ffi::Py_ssize_t, + ) -> ffi::structmember::PyMemberDef { + ffi::structmember::PyMemberDef { + name: name.as_ptr() as _, + type_code: ffi::structmember::T_PYSSIZET, + offset, + flags: ffi::structmember::READONLY, + doc: std::ptr::null_mut(), + } + } + + let mut members = Vec::new(); + + // __dict__ support + if let Some(dict_offset) = dict_offset { + members.push(offset_def("__dictoffset__\0", dict_offset)); + } + + // weakref support + if let Some(weaklist_offset) = weaklist_offset { + members.push(offset_def("__weaklistoffset__\0", weaklist_offset)); + } + + // Safety: Py_tp_members expects a raw vec of PyMemberDef + unsafe { self.push_raw_vec_slot(ffi::Py_tp_members, members) }; + } + + // Setting buffer protocols, tp_dictoffset and tp_weaklistoffset via slots doesn't work until + // Python 3.9, so on older versions we must manually fixup the type object. + #[cfg(all(not(Py_LIMITED_API), not(Py_3_9)))] + { + self.cleanup + .push(Box::new(move |builder, type_object| unsafe { + (*(*type_object).tp_as_buffer).bf_getbuffer = builder.buffer_procs.bf_getbuffer; + (*(*type_object).tp_as_buffer).bf_releasebuffer = + builder.buffer_procs.bf_releasebuffer; + + if let Some(dict_offset) = dict_offset { + (*type_object).tp_dictoffset = dict_offset; + } + + if let Some(weaklist_offset) = weaklist_offset { + (*type_object).tp_weaklistoffset = weaklist_offset; + } + })); + } + self + } + + fn build( + mut self, + py: Python<'_>, + name: &'static str, + module_name: Option<&'static str>, + basicsize: usize, + ) -> PyResult<*mut ffi::PyTypeObject> { + // `c_ulong` and `c_uint` have the same size + // on some platforms (like windows) + #![allow(clippy::useless_conversion)] + + self.finalize_methods_and_properties(); + + if !self.has_new { + // Safety: This is the correct slot type for Py_tp_new + unsafe { self.push_slot(ffi::Py_tp_new, no_constructor_defined as *mut c_void) } + } + + if !self.has_dealloc { + panic!("PyTypeBuilder requires you to specify slot ffi::Py_tp_dealloc"); + } + + if self.has_clear && !self.has_traverse { + return Err(PyTypeError::new_err(format!( + "`#[pyclass]` {} implements __clear__ without __traverse__", + name + ))); + } + + // For sequences, implement sq_length instead of mp_length + if self.is_sequence { + for slot in &mut self.slots { + if slot.slot == ffi::Py_mp_length { + slot.slot = ffi::Py_sq_length; + } + } + } + + // Add empty sentinel at the end + // Safety: python expects this empty slot + unsafe { self.push_slot(0, ptr::null_mut::()) } + + let mut spec = ffi::PyType_Spec { + name: py_class_qualified_name(module_name, name)?, + basicsize: basicsize as c_int, + itemsize: 0, + + flags: (ffi::Py_TPFLAGS_DEFAULT | self.class_flags) + .try_into() + .unwrap(), + slots: self.slots.as_mut_ptr(), + }; + + // Safety: We've correctly setup the PyType_Spec at this point + let type_object = unsafe { ffi::PyType_FromSpec(&mut spec) }; + if type_object.is_null() { + Err(PyErr::fetch(py)) + } else { + for cleanup in std::mem::take(&mut self.cleanup) { + cleanup(&self, type_object as _); + } + + Ok(type_object as _) + } + } +} + +#[cold] +fn type_object_creation_failed(py: Python<'_>, e: PyErr, name: &str) -> ! { + e.print(py); + panic!("An error occurred while initializing class {}", name) +} + +fn py_class_doc(class_doc: &str) -> Option<*mut c_char> { + match class_doc { + "\0" => None, + s => { + // To pass *mut pointer to python safely, leak a CString in whichever case + let cstring = if s.as_bytes().last() == Some(&0) { + CStr::from_bytes_with_nul(s.as_bytes()) + .unwrap_or_else(|e| panic!("doc contains interior nul byte: {:?} in {}", e, s)) + .to_owned() + } else { + CString::new(s) + .unwrap_or_else(|e| panic!("doc contains interior nul byte: {:?} in {}", e, s)) + }; + Some(cstring.into_raw()) + } + } +} + +fn py_class_qualified_name(module_name: Option<&str>, class_name: &str) -> PyResult<*mut c_char> { + Ok(CString::new(format!( + "{}.{}", + module_name.unwrap_or("builtins"), + class_name + ))? + .into_raw()) +} + +/// Default new implementation +unsafe extern "C" fn no_constructor_defined( + _subtype: *mut ffi::PyTypeObject, + _args: *mut ffi::PyObject, + _kwds: *mut ffi::PyObject, +) -> *mut ffi::PyObject { + crate::impl_::trampoline::trampoline_inner(|_| { + Err(crate::exceptions::PyTypeError::new_err( + "No constructor defined", + )) + }) +} diff --git a/src/type_object.rs b/src/type_object.rs index 53082dbd..a4a84d6b 100644 --- a/src/type_object.rs +++ b/src/type_object.rs @@ -1,17 +1,8 @@ // Copyright (c) 2017-present PyO3 Project and Contributors //! Python type object information -use crate::impl_::pyclass::PyClassItemsIter; -use crate::once_cell::GILOnceCell; -use crate::pyclass::create_type_object; -use crate::pyclass::PyClass; use crate::types::{PyAny, PyType}; -use crate::{conversion::IntoPyPointer, PyMethodDefType}; -use crate::{ffi, AsPyPointer, PyNativeType, PyObject, PyResult, Python}; -use parking_lot::{const_mutex, Mutex}; -use std::borrow::Cow; -use std::ffi::CStr; -use std::thread::{self, ThreadId}; +use crate::{ffi, AsPyPointer, PyNativeType, Python}; /// `T: PyLayout` represents that `T` is a concrete representation of `U` in the Python heap. /// E.g., `PyCell` is a concrete representation of all `pyclass`es, and `ffi::PyObject` @@ -85,153 +76,6 @@ pub unsafe trait PyTypeObject: PyTypeInfo {} #[allow(deprecated)] unsafe impl PyTypeObject for T {} -/// Lazy type object for PyClass. -#[doc(hidden)] -pub struct LazyStaticType { - // Boxed because Python expects the type object to have a stable address. - value: GILOnceCell<*mut ffi::PyTypeObject>, - // Threads which have begun initialization of the `tp_dict`. Used for - // reentrant initialization detection. - initializing_threads: Mutex>, - tp_dict_filled: GILOnceCell>, -} - -impl LazyStaticType { - /// Creates an uninitialized `LazyStaticType`. - pub const fn new() -> Self { - LazyStaticType { - value: GILOnceCell::new(), - initializing_threads: const_mutex(Vec::new()), - tp_dict_filled: GILOnceCell::new(), - } - } - - /// Gets the type object contained by this `LazyStaticType`, initializing it if needed. - pub fn get_or_init(&self, py: Python<'_>) -> *mut ffi::PyTypeObject { - fn inner() -> *mut ffi::PyTypeObject { - // Safety: `py` is held by the caller of `get_or_init`. - let py = unsafe { Python::assume_gil_acquired() }; - create_type_object::(py) - } - - // Uses explicit GILOnceCell::get_or_init:: *mut ffi::PyTypeObject> monomorphization - // so that only this one monomorphization is instantiated (instead of one closure monormization for each T). - let type_object = *self - .value - .get_or_init:: *mut ffi::PyTypeObject>(py, inner::); - self.ensure_init(py, type_object, T::NAME, T::items_iter()); - type_object - } - - fn ensure_init( - &self, - py: Python<'_>, - type_object: *mut ffi::PyTypeObject, - name: &str, - items_iter: PyClassItemsIter, - ) { - // We might want to fill the `tp_dict` with python instances of `T` - // itself. In order to do so, we must first initialize the type object - // with an empty `tp_dict`: now we can create instances of `T`. - // - // Then we fill the `tp_dict`. Multiple threads may try to fill it at - // the same time, but only one of them will succeed. - // - // More importantly, if a thread is performing initialization of the - // `tp_dict`, it can still request the type object through `get_or_init`, - // but the `tp_dict` may appear empty of course. - - if self.tp_dict_filled.get(py).is_some() { - // `tp_dict` is already filled: ok. - return; - } - - let thread_id = thread::current().id(); - { - let mut threads = self.initializing_threads.lock(); - if threads.contains(&thread_id) { - // Reentrant call: just return the type object, even if the - // `tp_dict` is not filled yet. - return; - } - threads.push(thread_id); - } - - struct InitializationGuard<'a> { - initializing_threads: &'a Mutex>, - thread_id: ThreadId, - } - impl Drop for InitializationGuard<'_> { - fn drop(&mut self) { - let mut threads = self.initializing_threads.lock(); - threads.retain(|id| *id != self.thread_id); - } - } - - let guard = InitializationGuard { - initializing_threads: &self.initializing_threads, - thread_id, - }; - - // Pre-compute the class attribute objects: this can temporarily - // release the GIL since we're calling into arbitrary user code. It - // means that another thread can continue the initialization in the - // meantime: at worst, we'll just make a useless computation. - let mut items = vec![]; - for class_items in items_iter { - for def in class_items.methods { - if let PyMethodDefType::ClassAttribute(attr) = def { - let key = attr.attribute_c_string().unwrap(); - - match (attr.meth.0)(py) { - Ok(val) => items.push((key, val)), - Err(e) => panic!( - "An error occurred while initializing `{}.{}`: {}", - name, - attr.name.trim_end_matches('\0'), - e - ), - } - } - } - } - - // Now we hold the GIL and we can assume it won't be released until we - // return from the function. - let result = self.tp_dict_filled.get_or_init(py, move || { - let result = initialize_tp_dict(py, type_object as *mut ffi::PyObject, items); - - // Initialization successfully complete, can clear the thread list. - // (No further calls to get_or_init() will try to init, on any thread.) - std::mem::forget(guard); - *self.initializing_threads.lock() = Vec::new(); - result - }); - - if let Err(err) = result { - err.clone_ref(py).print(py); - panic!("An error occurred while initializing `{}.__dict__`", name); - } - } -} - -fn initialize_tp_dict( - py: Python<'_>, - type_object: *mut ffi::PyObject, - items: Vec<(Cow<'static, CStr>, PyObject)>, -) -> PyResult<()> { - // We hold the GIL: the dictionary update can be considered atomic from - // the POV of other threads. - for (key, val) in items { - let ret = unsafe { ffi::PyObject_SetAttrString(type_object, key.as_ptr(), val.into_ptr()) }; - crate::err::error_on_minusone(py, ret)?; - } - Ok(()) -} - -// This is necessary for making static `LazyStaticType`s -unsafe impl Sync for LazyStaticType {} - #[inline] pub(crate) unsafe fn get_tp_alloc(tp: *mut ffi::PyTypeObject) -> Option { #[cfg(not(Py_LIMITED_API))]