pymethods: seq methods from mapping methods

This commit is contained in:
David Hewitt 2021-11-06 11:21:15 +00:00
parent 4305a9a45a
commit 53c170078d
8 changed files with 377 additions and 104 deletions

View File

@ -44,6 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `PyErr::new_type` now takes an optional docstring and now returns `PyResult<Py<PyType>>` rather than a `ffi::PyTypeObject` pointer.
- The `create_exception!` macro can now take an optional docstring. This docstring, if supplied, is visible to users (with `.__doc__` and `help()`) and
accompanies your error type in your crate's documentation.
- `__getitem__`, `__setitem__` and `__delitem__` in `#[pymethods]` now implement both a Python mapping and sequence by default. [#2065](https://github.com/PyO3/pyo3/pull/2065)
- Improve performance and error messages for `#[derive(FromPyObject)]` for enums. [#2068](https://github.com/PyO3/pyo3/pull/2068)
- Reduce generated LLVM code size (to improve compile times) for:
- internal `handle_panic` helper [#2074](https://github.com/PyO3/pyo3/pull/2074)

View File

@ -18,7 +18,6 @@ In PyO3 0.15, if a function name in `#[pymethods]` is a recognised magic method,
The magic methods handled by PyO3 are very similar to the standard Python ones on [this page](https://docs.python.org/3/reference/datamodel.html#special-method-names) - in particular they are the the subset which have slots as [defined here](https://docs.python.org/3/c-api/typeobj.html). Some of the slots do not have a magic method in Python, which leads to a few additional magic methods defined only in PyO3:
- Magic methods for garbage collection
- Magic methods for the buffer protocol
- Magic methods for the sequence protocol
When PyO3 handles a magic method, a couple of changes apply compared to other `#[pymethods]`:
- The `#[pyo3(text_signature = "...")]` attribute is not allowed
@ -86,11 +85,7 @@ given signatures should be interpreted as follows:
- `__aiter__(<self>) -> object`
- `__anext__(<self>) -> Option<object> or IterANextOutput`
#### Sequence types
TODO; see [#1884](https://github.com/PyO3/pyo3/issues/1884)
#### Mapping types
#### Mapping & Sequence types
- `__len__(<self>) -> usize`
- `__contains__(<self>, object) -> bool`

View File

@ -5,10 +5,40 @@ For a detailed list of all changes, see the [CHANGELOG](changelog.md).
## from 0.15.* to 0.16
### Drop support for older technogies
### Drop support for older technologies
PyO3 0.16 has increased minimum Rust version to 1.48 and minimum Python version to 3.7. This enables ore use of newer language features (enabling some of the other additions in 0.16) and simplifies maintenance of the project.
### Container magic methods now match Python behavior
In PyO3 0.15, `__getitem__`, `__setitem__` and `__delitem__` in `#[pymethods]` would generate only the _mapping_ implementation for a `#[pyclass]`. To match the Python behavior, these methods now generate both the _mapping_ **and** _sequence_ implementations.
This means that classes implementing these `#[pymethods]` will now also be treated as sequences, same as a Python `class` would be. Small differences in behavior may result:
- PyO3 will allow instances of these classes to be cast to `PySequence` as well as `PyMapping`.
- Python will provide a default implementation of `__iter__` (if the class did not have one) which repeatedly calls `__getitem__` with integers (starting at 0) until an `IndexError` is raised.
To explain this in detail, consider the following Python class:
```python
class ExampleContainer:
def __len__(self):
return 5
def __getitem__(self, idx: int) -> int:
if idx < 0 or idx > 5:
raise IndexError()
return idx
```
This class implements a Python [sequence](https://docs.python.org/3/glossary.html#term-sequence).
The `__len__` and `__getitem__` methods are also used to implement a Python [mapping](https://docs.python.org/3/glossary.html#term-mapping). In the Python C-API, these methods are not shared: the sequence `__len__` and `__getitem__` are defined by the `sq_len` and `sq_item` slots, and the mapping equivalents are `mp_len` and `mp_subscript`. There are similar distinctions for `__setitem__` and `__delitem__`.
Because there is no such distinction from Python, implementing these methods will fill the mapping and sequence slots simultaneously. A Python class with `__len__` implemented, for example, will have both the `sq_len` and `mp_len` slots filled.
The PyO3 behavior in 0.16 has been changed to be closer to this Python behavior by default.
## from 0.14.* to 0.15
### Changes in sequence indexing

View File

@ -293,6 +293,8 @@ fn add_shared_proto_slots(
);
try_add_shared_slot!("__pow__", "__rpow__", generate_pyclass_pow_slot);
// if this assertion trips, a slot fragment has been implemented which has not been added in the
// list above
assert!(implemented_proto_fragments.is_empty());
}

View File

@ -704,8 +704,21 @@ impl Ty {
let #ident = #extract;
}
}
Ty::PySsizeT => {
let ty = arg.ty;
let extract = handle_error(
extract_error_mode,
py,
quote! {
::std::convert::TryInto::<#ty>::try_into(#ident).map_err(|e| _pyo3::exceptions::PyValueError::new_err(e.to_string()))
},
);
quote! {
let #ident = #extract;
}
}
// Just pass other types through unmodified
Ty::PyBuffer | Ty::Int | Ty::PyHashT | Ty::PySsizeT | Ty::Void => quote! {},
Ty::PyBuffer | Ty::Int | Ty::PyHashT | Ty::Void => quote! {},
}
}
}

View File

@ -9,7 +9,12 @@ use crate::{
type_object::{PyLayout, PyTypeObject},
PyClass, PyMethodDefType, PyNativeType, PyResult, PyTypeInfo, Python,
};
use std::{marker::PhantomData, os::raw::c_void, ptr::NonNull, thread};
use std::{
marker::PhantomData,
os::raw::{c_int, c_void},
ptr::NonNull,
thread,
};
/// This type is used as a "dummy" type on which dtolnay specializations are
/// applied to apply implementations from `#[pymethods]` & `#[pyproto]`
@ -780,3 +785,34 @@ pub(crate) unsafe extern "C" fn fallback_new(
pub(crate) unsafe extern "C" fn tp_dealloc<T: PyClass>(obj: *mut ffi::PyObject) {
crate::callback_body!(py, T::Layout::tp_dealloc(obj, py))
}
pub(crate) unsafe extern "C" fn get_sequence_item_from_mapping(
obj: *mut ffi::PyObject,
index: ffi::Py_ssize_t,
) -> *mut ffi::PyObject {
let index = ffi::PyLong_FromSsize_t(index);
if index.is_null() {
return std::ptr::null_mut();
}
let result = ffi::PyObject_GetItem(obj, index);
ffi::Py_DECREF(index);
result
}
pub(crate) unsafe extern "C" fn assign_sequence_item_from_mapping(
obj: *mut ffi::PyObject,
index: ffi::Py_ssize_t,
value: *mut ffi::PyObject,
) -> c_int {
let index = ffi::PyLong_FromSsize_t(index);
if index.is_null() {
return -1;
}
let result = if value.is_null() {
ffi::PyObject_DelItem(obj, index)
} else {
ffi::PyObject_SetItem(obj, index, value)
};
ffi::Py_DECREF(index);
result
}

View File

@ -1,6 +1,9 @@
//! `PyClass` and related traits.
use crate::{
class::impl_::{fallback_new, tp_dealloc, PyClassImpl},
class::impl_::{
assign_sequence_item_from_mapping, fallback_new, get_sequence_item_from_mapping,
tp_dealloc, PyClassImpl,
},
ffi,
impl_::pyclass::{PyClassDict, PyClassWeakRef},
PyCell, PyErr, PyMethodDefType, PyNativeType, PyResult, PyTypeInfo, Python,
@ -114,28 +117,35 @@ unsafe fn create_type_object_impl(
}
}
let PyClassInfo {
method_defs,
property_defs,
} = method_defs_to_pyclass_info(for_each_method_def, dict_offset.is_none());
// normal methods
let methods = py_class_method_defs(for_each_method_def);
if !methods.is_empty() {
push_slot(&mut slots, ffi::Py_tp_methods, into_raw(methods));
if !method_defs.is_empty() {
push_slot(&mut slots, ffi::Py_tp_methods, into_raw(method_defs));
}
// properties
let props = py_class_properties(dict_offset.is_none(), for_each_method_def);
if !props.is_empty() {
push_slot(&mut slots, ffi::Py_tp_getset, into_raw(props));
if !property_defs.is_empty() {
push_slot(&mut slots, ffi::Py_tp_getset, into_raw(property_defs));
}
// protocol methods
let mut has_getitem = false;
let mut has_setitem = false;
let mut has_gc_methods = false;
// Before Python 3.9, need to patch in buffer methods manually (they don't work in slots)
#[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))]
let mut buffer_procs: ffi::PyBufferProcs = Default::default();
for_each_proto_slot(&mut |proto_slots| {
for slot in proto_slots {
has_getitem |= slot.slot == ffi::Py_mp_subscript;
has_setitem |= slot.slot == ffi::Py_mp_ass_subscript;
has_gc_methods |= slot.slot == ffi::Py_tp_clear || slot.slot == ffi::Py_tp_traverse;
#[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))]
if slot.slot == ffi::Py_bf_getbuffer {
// Safety: slot.pfunc is a valid function pointer
@ -151,7 +161,31 @@ unsafe fn create_type_object_impl(
slots.extend_from_slice(proto_slots);
});
// If mapping methods implemented, define sequence methods get implemented too.
// CPython does the same for Python `class` statements.
// NB we don't implement sq_length to avoid annoying CPython behaviour of automatically adding
// the length to negative indices.
if has_getitem {
push_slot(
&mut slots,
ffi::Py_sq_item,
get_sequence_item_from_mapping as _,
);
}
if has_setitem {
push_slot(
&mut slots,
ffi::Py_sq_ass_item,
assign_sequence_item_from_mapping as _,
);
}
// Add empty sentinel at the end
push_slot(&mut slots, 0, ptr::null_mut());
let mut spec = ffi::PyType_Spec {
name: py_class_qualified_name(module_name, name)?,
basicsize: basicsize as c_int,
@ -282,26 +316,76 @@ fn py_class_flags(has_gc_methods: bool, is_gc: bool, is_basetype: bool) -> c_uin
flags.try_into().unwrap()
}
fn py_class_method_defs(
for_each_method_def: &dyn Fn(&mut dyn FnMut(&[PyMethodDefType])),
) -> Vec<ffi::PyMethodDef> {
let mut defs = Vec::new();
struct PyClassInfo {
method_defs: Vec<ffi::PyMethodDef>,
property_defs: Vec<ffi::PyGetSetDef>,
}
for_each_method_def(&mut |method_defs| {
defs.extend(method_defs.iter().filter_map(|def| match def {
PyMethodDefType::Method(def)
| PyMethodDefType::Class(def)
| PyMethodDefType::Static(def) => Some(def.as_method_def().unwrap()),
_ => None,
}));
fn method_defs_to_pyclass_info(
for_each_method_def: &dyn Fn(&mut dyn FnMut(&[PyMethodDefType])),
has_dict: bool,
) -> PyClassInfo {
let mut method_defs = Vec::new();
let mut property_defs_map = std::collections::HashMap::new();
for_each_method_def(&mut |class_method_defs| {
for def in class_method_defs {
match def {
PyMethodDefType::Getter(getter) => {
getter.copy_to(
property_defs_map
.entry(getter.name)
.or_insert(PY_GET_SET_DEF_INIT),
);
}
PyMethodDefType::Setter(setter) => {
setter.copy_to(
property_defs_map
.entry(setter.name)
.or_insert(PY_GET_SET_DEF_INIT),
);
}
PyMethodDefType::Method(def)
| PyMethodDefType::Class(def)
| PyMethodDefType::Static(def) => method_defs.push(def.as_method_def().unwrap()),
PyMethodDefType::ClassAttribute(_) => {}
}
}
});
if !defs.is_empty() {
// TODO: use into_values when on MSRV Rust >= 1.54
let mut property_defs: Vec<_> = property_defs_map
.into_iter()
.map(|(_, value)| value)
.collect();
if !method_defs.is_empty() {
// Safety: Python expects a zeroed entry to mark the end of the defs
defs.push(unsafe { std::mem::zeroed() });
method_defs.push(unsafe { std::mem::zeroed() });
}
defs
// PyPy doesn't automatically add __dict__ getter / setter.
// PyObject_GenericGetDict not in the limited API until Python 3.10.
if !has_dict {
#[cfg(not(any(PyPy, all(Py_LIMITED_API, not(Py_3_10)))))]
property_defs.push(ffi::PyGetSetDef {
name: "__dict__\0".as_ptr() as *mut c_char,
get: Some(ffi::PyObject_GenericGetDict),
set: Some(ffi::PyObject_GenericSetDict),
doc: ptr::null_mut(),
closure: ptr::null_mut(),
});
}
if !property_defs.is_empty() {
// Safety: Python expects a zeroed entry to mark the end of the defs
property_defs.push(unsafe { std::mem::zeroed() });
}
PyClassInfo {
method_defs,
property_defs,
}
}
/// Generates the __dictoffset__ and __weaklistoffset__ members, to set tp_dictoffset and
@ -351,52 +435,3 @@ const PY_GET_SET_DEF_INIT: ffi::PyGetSetDef = ffi::PyGetSetDef {
doc: ptr::null_mut(),
closure: ptr::null_mut(),
};
fn py_class_properties(
is_dummy: bool,
for_each_method_def: &dyn Fn(&mut dyn FnMut(&[PyMethodDefType])),
) -> Vec<ffi::PyGetSetDef> {
let mut defs = std::collections::HashMap::new();
for_each_method_def(&mut |method_defs| {
for def in method_defs {
match def {
PyMethodDefType::Getter(getter) => {
getter.copy_to(defs.entry(getter.name).or_insert(PY_GET_SET_DEF_INIT));
}
PyMethodDefType::Setter(setter) => {
setter.copy_to(defs.entry(setter.name).or_insert(PY_GET_SET_DEF_INIT));
}
_ => (),
}
}
});
let mut props: Vec<_> = defs.values().cloned().collect();
// PyPy doesn't automatically adds __dict__ getter / setter.
// PyObject_GenericGetDict not in the limited API until Python 3.10.
push_dict_getset(&mut props, is_dummy);
if !props.is_empty() {
// Safety: Python expects a zeroed entry to mark the end of the defs
props.push(unsafe { std::mem::zeroed() });
}
props
}
#[cfg(not(any(PyPy, all(Py_LIMITED_API, not(Py_3_10)))))]
fn push_dict_getset(props: &mut Vec<ffi::PyGetSetDef>, is_dummy: bool) {
if !is_dummy {
props.push(ffi::PyGetSetDef {
name: "__dict__\0".as_ptr() as *mut c_char,
get: Some(ffi::PyObject_GenericGetDict),
set: Some(ffi::PyObject_GenericSetDict),
doc: ptr::null_mut(),
closure: ptr::null_mut(),
});
}
}
#[cfg(any(PyPy, all(Py_LIMITED_API, not(Py_3_10))))]
fn push_dict_getset(_: &mut Vec<ffi::PyGetSetDef>, _is_dummy: bool) {}

View File

@ -1,9 +1,9 @@
#![cfg(feature = "macros")]
use pyo3::exceptions::PyValueError;
use pyo3::types::{PyList, PySlice, PyType};
use pyo3::exceptions::{PyIndexError, PyValueError};
use pyo3::types::{PyDict, PyList, PyMapping, PySequence, PySlice, PyType};
use pyo3::{exceptions::PyAttributeError, prelude::*};
use pyo3::{ffi, py_run, AsPyPointer, PyCell};
use pyo3::{py_run, PyCell};
use std::{isize, iter};
mod common;
@ -166,37 +166,198 @@ fn test_bool() {
}
#[pyclass]
pub struct Len {
l: usize,
}
pub struct LenOverflow;
#[pymethods]
impl Len {
impl LenOverflow {
fn __len__(&self) -> usize {
self.l
(isize::MAX as usize) + 1
}
}
#[test]
fn len() {
let gil = Python::acquire_gil();
let py = gil.python();
fn len_overflow() {
Python::with_gil(|py| {
let inst = Py::new(py, LenOverflow).unwrap();
py_expect_exception!(py, inst, "len(inst)", PyOverflowError);
});
}
let inst = Py::new(py, Len { l: 10 }).unwrap();
py_assert!(py, inst, "len(inst) == 10");
unsafe {
assert_eq!(ffi::PyObject_Size(inst.as_ptr()), 10);
assert_eq!(ffi::PyMapping_Size(inst.as_ptr()), 10);
#[pyclass]
pub struct Mapping {
values: Py<PyDict>,
}
#[pymethods]
impl Mapping {
fn __len__(&self, py: Python) -> usize {
self.values.as_ref(py).len()
}
let inst = Py::new(
py,
Len {
l: (isize::MAX as usize) + 1,
},
)
.unwrap();
py_expect_exception!(py, inst, "len(inst)", PyOverflowError);
fn __getitem__<'a>(&'a self, key: &'a PyAny) -> PyResult<&'a PyAny> {
let any: &PyAny = self.values.as_ref(key.py()).as_ref();
any.get_item(key)
}
fn __setitem__(&self, key: &PyAny, value: &PyAny) -> PyResult<()> {
self.values.as_ref(key.py()).set_item(key, value)
}
fn __delitem__(&self, key: &PyAny) -> PyResult<()> {
self.values.as_ref(key.py()).del_item(key)
}
}
#[test]
fn mapping() {
Python::with_gil(|py| {
let inst = Py::new(
py,
Mapping {
values: PyDict::new(py).into(),
},
)
.unwrap();
//
let mapping: &PyMapping = inst.as_ref(py).downcast().unwrap();
py_assert!(py, inst, "len(inst) == 0");
py_run!(py, inst, "inst['foo'] = 'foo'");
py_assert!(py, inst, "inst['foo'] == 'foo'");
py_run!(py, inst, "del inst['foo']");
py_expect_exception!(py, inst, "inst['foo']", PyKeyError);
// Default iteration will call __getitem__ with integer indices
// which fails with a KeyError
py_expect_exception!(py, inst, "[*inst] == []", PyKeyError, "0");
// check mapping protocol
assert_eq!(mapping.len().unwrap(), 0);
mapping.set_item(0, 5).unwrap();
assert_eq!(mapping.len().unwrap(), 1);
assert_eq!(mapping.get_item(0).unwrap().extract::<u8>().unwrap(), 5);
mapping.del_item(0).unwrap();
assert_eq!(mapping.len().unwrap(), 0);
});
}
#[derive(FromPyObject)]
enum SequenceIndex<'a> {
Integer(isize),
Slice(&'a PySlice),
}
#[pyclass]
pub struct Sequence {
values: Vec<PyObject>,
}
#[pymethods]
impl Sequence {
fn __len__(&self) -> usize {
self.values.len()
}
fn __getitem__(&self, index: SequenceIndex) -> PyResult<PyObject> {
match index {
SequenceIndex::Integer(index) => {
let uindex = self.usize_index(index)?;
self.values
.get(uindex)
.map(Clone::clone)
.ok_or_else(|| PyIndexError::new_err(index))
}
// Just to prove that slicing can be implemented
SequenceIndex::Slice(s) => Ok(s.into()),
}
}
fn __setitem__(&mut self, index: isize, value: PyObject) -> PyResult<()> {
let uindex = self.usize_index(index)?;
self.values
.get_mut(uindex)
.map(|place| *place = value)
.ok_or_else(|| PyIndexError::new_err(index))
}
fn __delitem__(&mut self, index: isize) -> PyResult<()> {
let uindex = self.usize_index(index)?;
if uindex >= self.values.len() {
Err(PyIndexError::new_err(index))
} else {
self.values.remove(uindex);
Ok(())
}
}
fn append(&mut self, value: PyObject) {
self.values.push(value);
}
}
impl Sequence {
fn usize_index(&self, index: isize) -> PyResult<usize> {
if index < 0 {
let corrected_index = index + self.values.len() as isize;
if corrected_index < 0 {
Err(PyIndexError::new_err(index))
} else {
Ok(corrected_index as usize)
}
} else {
Ok(index as usize)
}
}
}
#[test]
fn sequence() {
Python::with_gil(|py| {
let inst = Py::new(py, Sequence { values: vec![] }).unwrap();
let sequence: &PySequence = inst.as_ref(py).downcast().unwrap();
py_assert!(py, inst, "len(inst) == 0");
py_expect_exception!(py, inst, "inst[0]", PyIndexError);
py_run!(py, inst, "inst.append('foo')");
py_assert!(py, inst, "inst[0] == 'foo'");
py_assert!(py, inst, "inst[-1] == 'foo'");
py_expect_exception!(py, inst, "inst[1]", PyIndexError);
py_expect_exception!(py, inst, "inst[-2]", PyIndexError);
py_assert!(py, inst, "[*inst] == ['foo']");
py_run!(py, inst, "del inst[0]");
py_expect_exception!(py, inst, "inst['foo']", PyTypeError);
py_assert!(py, inst, "inst[0:2] == slice(0, 2)");
// check sequence protocol
// we don't implement sequence length so that CPython doesn't attempt to correct negative
// indices.
assert!(sequence.len().is_err());
// however regular python len() works thanks to mp_len slot
assert_eq!(inst.as_ref(py).len().unwrap(), 0);
py_run!(py, inst, "inst.append(0)");
sequence.set_item(0, 5).unwrap();
assert_eq!(inst.as_ref(py).len().unwrap(), 1);
assert_eq!(sequence.get_item(0).unwrap().extract::<u8>().unwrap(), 5);
sequence.del_item(0).unwrap();
assert_eq!(inst.as_ref(py).len().unwrap(), 0);
});
}
#[pyclass]