From 56c218f70e2a15ac1f5a3a6f652525cd224c2962 Mon Sep 17 00:00:00 2001 From: mejrs <> Date: Sun, 12 Jun 2022 01:39:34 +0200 Subject: [PATCH 1/4] Fix annoying inference errors --- src/ffi/tests.rs | 7 +++++-- src/types/iterator.rs | 20 ++++++++++++++++---- src/types/set.rs | 4 ++-- src/types/tuple.rs | 14 +++++++------- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/ffi/tests.rs b/src/ffi/tests.rs index e6867901..fcb93ee0 100644 --- a/src/ffi/tests.rs +++ b/src/ffi/tests.rs @@ -135,7 +135,7 @@ fn ascii() { // _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings. assert!(!PyUnicode_DATA(ptr).is_null()); - assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as _); + assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as Py_ssize_t); assert_eq!(PyUnicode_IS_READY(ptr), 1); // This has potential to mutate object. But it should be a no-op since @@ -175,7 +175,10 @@ fn ucs4() { // _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings. assert!(!PyUnicode_DATA(ptr).is_null()); - assert_eq!(PyUnicode_GET_LENGTH(ptr), py_string.len().unwrap() as _); + assert_eq!( + PyUnicode_GET_LENGTH(ptr), + py_string.len().unwrap() as Py_ssize_t + ); assert_eq!(PyUnicode_IS_READY(ptr), 1); // This has potential to mutate object. But it should be a no-op since diff --git a/src/types/iterator.rs b/src/types/iterator.rs index 924ccea8..154d25fc 100644 --- a/src/types/iterator.rs +++ b/src/types/iterator.rs @@ -120,8 +120,14 @@ mod tests { let obj = vec![10, 20].to_object(py); let inst = obj.as_ref(py); let mut it = inst.iter().unwrap(); - assert_eq!(10, it.next().unwrap().unwrap().extract().unwrap()); - assert_eq!(20, it.next().unwrap().unwrap().extract().unwrap()); + assert_eq!( + 10_i32, + it.next().unwrap().unwrap().extract::<'_, i32>().unwrap() + ); + assert_eq!( + 20_i32, + it.next().unwrap().unwrap().extract::<'_, i32>().unwrap() + ); assert!(it.next().is_none()); }); } @@ -138,7 +144,10 @@ mod tests { let inst = obj.as_ref(py); let mut it = inst.iter().unwrap(); - assert_eq!(10, it.next().unwrap().unwrap().extract().unwrap()); + assert_eq!( + 10_i32, + it.next().unwrap().unwrap().extract::<'_, i32>().unwrap() + ); }); Python::with_gil(|py| { @@ -167,7 +176,10 @@ mod tests { let inst = obj.as_ref(py); let mut it = inst.iter().unwrap(); - assert_eq!(10, it.next().unwrap().unwrap().extract().unwrap()); + assert_eq!( + 10_i32, + it.next().unwrap().unwrap().extract::<'_, i32>().unwrap() + ); assert!(it.next().unwrap().unwrap().is_none()); } assert_eq!(count, none.get_refcnt(py)); diff --git a/src/types/set.rs b/src/types/set.rs index b083c476..cf97eb48 100644 --- a/src/types/set.rs +++ b/src/types/set.rs @@ -407,12 +407,12 @@ mod tests { // iter method for el in set.iter() { - assert_eq!(1i32, el.extract().unwrap()); + assert_eq!(1i32, el.extract::<'_, i32>().unwrap()); } // intoiterator iteration for el in set { - assert_eq!(1i32, el.extract().unwrap()); + assert_eq!(1i32, el.extract::<'_, i32>().unwrap()); } }); } diff --git a/src/types/tuple.rs b/src/types/tuple.rs index 4baa6901..c816a71a 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -508,13 +508,13 @@ mod tests { assert_eq!(iter.size_hint(), (3, Some(3))); - assert_eq!(1, iter.next().unwrap().extract().unwrap()); + assert_eq!(1_i32, iter.next().unwrap().extract::<'_, i32>().unwrap()); assert_eq!(iter.size_hint(), (2, Some(2))); - assert_eq!(2, iter.next().unwrap().extract().unwrap()); + assert_eq!(2_i32, iter.next().unwrap().extract::<'_, i32>().unwrap()); assert_eq!(iter.size_hint(), (1, Some(1))); - assert_eq!(3, iter.next().unwrap().extract().unwrap()); + assert_eq!(3_i32, iter.next().unwrap().extract::<'_, i32>().unwrap()); assert_eq!(iter.size_hint(), (0, Some(0))); }); } @@ -527,7 +527,7 @@ mod tests { assert_eq!(3, tuple.len()); for (i, item) in tuple.iter().enumerate() { - assert_eq!(i + 1, item.extract().unwrap()); + assert_eq!(i + 1, item.extract::<'_, usize>().unwrap()); } }); } @@ -541,9 +541,9 @@ mod tests { let slice = tuple.as_slice(); assert_eq!(3, slice.len()); - assert_eq!(1, slice[0].extract().unwrap()); - assert_eq!(2, slice[1].extract().unwrap()); - assert_eq!(3, slice[2].extract().unwrap()); + assert_eq!(1_i32, slice[0].extract::<'_, i32>().unwrap()); + assert_eq!(2_i32, slice[1].extract::<'_, i32>().unwrap()); + assert_eq!(3_i32, slice[2].extract::<'_, i32>().unwrap()); }); } From 2d3a5852ed67ea655cef909dbe6cf914e7d48993 Mon Sep 17 00:00:00 2001 From: mejrs <> Date: Sun, 12 Jun 2022 15:11:39 +0200 Subject: [PATCH 2/4] Avoid UB in *_offset methods --- src/pycell.rs | 123 +++++++++++++++++++++++++------------ tests/test_class_basics.rs | 68 +++++++++++++++++--- 2 files changed, 144 insertions(+), 47 deletions(-) diff --git a/src/pycell.rs b/src/pycell.rs index 928e7c3d..fdf9bf5b 100644 --- a/src/pycell.rs +++ b/src/pycell.rs @@ -433,6 +433,8 @@ pub struct PyCell { contents: PyCellContents, } +// This struct has mirrored definitions in `weaklist_offset` and `dict_offset`, +// who need the same struct layout to function correctly. #[repr(C)] pub(crate) struct PyCellContents { pub(crate) value: ManuallyDrop>, @@ -626,30 +628,52 @@ impl PyCell { /// Gets the offset of the dictionary from the start of the struct in bytes. pub(crate) fn dict_offset() -> ffi::Py_ssize_t { use std::convert::TryInto; + use std::mem::MaybeUninit; + #[cfg(addr_of)] - let offset = { - // With std::ptr::addr_of - can measure offset using uninit memory without UB. - let cell = std::mem::MaybeUninit::::uninit(); + let offset = unsafe { + // The much simpler way, using addr_of! + let cell = MaybeUninit::::uninit(); let base_ptr = cell.as_ptr(); - let dict_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.dict) }; - unsafe { (dict_ptr as *const u8).offset_from(base_ptr as *const u8) } + let dict_ptr = std::ptr::addr_of!((*base_ptr).contents.dict); + (dict_ptr.cast::()).offset_from(base_ptr.cast::()) }; #[cfg(not(addr_of))] - let offset = { - // No std::ptr::addr_of - need to take references to PyCell to measure offsets; - // make a zero-initialised "fake" one so that referencing it is not UB. - let mut cell = std::mem::MaybeUninit::::uninit(); - unsafe { - std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1); + let offset = unsafe { + // The annoying way using a dummy struct because there's no way to get + // a pointer without going through a reference, which is insta-UB + // if it is not initialized. + + use std::mem::size_of; + + #[repr(C)] + struct PyCellDummy { + ob_base: MaybeUninit<::LayoutAsBase>, + contents: PyCellContentsDummy, } - let cell = unsafe { cell.assume_init() }; - let dict_ptr = &cell.contents.dict; - // offset_from wasn't stabilised until 1.47, so we also have to work around - // that... - let offset = (dict_ptr as *const _ as usize) - (&cell as *const _ as usize); - // This isn't a valid cell, so ensure no Drop code runs etc. - std::mem::forget(cell); - offset + + #[repr(C)] + struct PyCellContentsDummy { + value: MaybeUninit, + borrow_checker: MaybeUninit<::Storage>, + thread_checker: MaybeUninit, + dict: MaybeUninit, + weakref: MaybeUninit, + } + + assert_eq!(size_of::>(), size_of::>()); + assert_eq!( + size_of::>(), + size_of::>() + ); + + // All of the pycelldummy's fields are maybeuninit, which require no initialization + let cell = MaybeUninit::>::uninit().assume_init(); + + let base_ptr: *const PyCellDummy = &cell; + let dict_ptr: *const MaybeUninit = &(*base_ptr).contents.dict; + + (dict_ptr.cast::()).offset_from(base_ptr.cast::()) }; // Py_ssize_t may not be equal to isize on all platforms #[allow(clippy::useless_conversion)] @@ -659,30 +683,53 @@ impl PyCell { /// Gets the offset of the weakref list from the start of the struct in bytes. pub(crate) fn weaklist_offset() -> ffi::Py_ssize_t { use std::convert::TryInto; + use std::mem::MaybeUninit; + #[cfg(addr_of)] - let offset = { - // With std::ptr::addr_of - can measure offset using uninit memory without UB. - let cell = std::mem::MaybeUninit::::uninit(); + let offset = unsafe { + // The much simpler way, using addr_of! + let cell = MaybeUninit::::uninit(); let base_ptr = cell.as_ptr(); - let weaklist_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.weakref) }; - unsafe { (weaklist_ptr as *const u8).offset_from(base_ptr as *const u8) } + let weaklist_ptr = std::ptr::addr_of!((*base_ptr).contents.weakref); + (weaklist_ptr.cast::()).offset_from(base_ptr.cast::()) }; #[cfg(not(addr_of))] - let offset = { - // No std::ptr::addr_of - need to take references to PyCell to measure offsets; - // make a zero-initialised "fake" one so that referencing it is not UB. - let mut cell = std::mem::MaybeUninit::::uninit(); - unsafe { - std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1); + let offset = unsafe { + // The annoying way using a dummy struct because there's no way to get + // a pointer without going through a reference, which is insta-UB + // if it is not initialized. + + use std::mem::size_of; + + #[repr(C)] + struct PyCellDummy { + ob_base: MaybeUninit<::LayoutAsBase>, + contents: PyCellContentsDummy, } - let cell = unsafe { cell.assume_init() }; - let weaklist_ptr = &cell.contents.weakref; - // offset_from wasn't stabilised until 1.47, so we also have to work around - // that... - let offset = (weaklist_ptr as *const _ as usize) - (&cell as *const _ as usize); - // This isn't a valid cell, so ensure no Drop code runs etc. - std::mem::forget(cell); - offset + + #[repr(C)] + struct PyCellContentsDummy { + value: MaybeUninit, + borrow_checker: MaybeUninit<::Storage>, + thread_checker: MaybeUninit, + dict: MaybeUninit, + weakref: MaybeUninit, + } + + assert_eq!(size_of::>(), size_of::>()); + assert_eq!( + size_of::>(), + size_of::>() + ); + + // All of the pycelldummy's fields are maybeuninit, which require no initialization + let cell = MaybeUninit::>::uninit().assume_init(); + + let base_ptr: *const PyCellDummy = &cell; + let weaklist_ptr: *const MaybeUninit<::WeakRef> = + &(*base_ptr).contents.weakref; + + (weaklist_ptr.cast::()).offset_from(base_ptr.cast::()) }; // Py_ssize_t may not be equal to isize on all platforms #[allow(clippy::useless_conversion)] diff --git a/tests/test_class_basics.rs b/tests/test_class_basics.rs index b3ebc5df..dd6560f2 100644 --- a/tests/test_class_basics.rs +++ b/tests/test_class_basics.rs @@ -347,14 +347,23 @@ fn test_tuple_struct_class() { } #[pyclass(dict, subclass)] -struct DunderDictSupport {} +struct DunderDictSupport { + // Make sure that dict_offset runs with non-zero sized Self + _pad: [u8; 32], +} #[test] #[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)] fn dunder_dict_support() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, DunderDictSupport {}).unwrap(); + let inst = PyCell::new( + py, + DunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -371,7 +380,13 @@ fn dunder_dict_support() { fn access_dunder_dict() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, DunderDictSupport {}).unwrap(); + let inst = PyCell::new( + py, + DunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -393,7 +408,16 @@ struct InheritDict { fn inherited_dict() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, (InheritDict { _value: 0 }, DunderDictSupport {})).unwrap(); + let inst = PyCell::new( + py, + ( + InheritDict { _value: 0 }, + DunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ), + ) + .unwrap(); py_run!( py, inst, @@ -405,14 +429,23 @@ fn inherited_dict() { } #[pyclass(weakref, dict)] -struct WeakRefDunderDictSupport {} +struct WeakRefDunderDictSupport { + // Make sure that weaklist_offset runs with non-zero sized Self + _pad: [u8; 32], +} #[test] #[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)] fn weakref_dunder_dict_support() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, WeakRefDunderDictSupport {}).unwrap(); + let inst = PyCell::new( + py, + WeakRefDunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -421,14 +454,22 @@ fn weakref_dunder_dict_support() { } #[pyclass(weakref, subclass)] -struct WeakRefSupport {} +struct WeakRefSupport { + _pad: [u8; 32], +} #[test] #[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)] fn weakref_support() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, WeakRefSupport {}).unwrap(); + let inst = PyCell::new( + py, + WeakRefSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -447,7 +488,16 @@ struct InheritWeakRef { fn inherited_weakref() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, (InheritWeakRef { _value: 0 }, WeakRefSupport {})).unwrap(); + let inst = PyCell::new( + py, + ( + InheritWeakRef { _value: 0 }, + WeakRefSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ), + ) + .unwrap(); py_run!( py, inst, From 2d94cb4a2d3d8b71ad97bde1dae00e12c7272b6a Mon Sep 17 00:00:00 2001 From: mejrs <> Date: Sun, 12 Jun 2022 18:28:21 +0200 Subject: [PATCH 3/4] use memoffset instead --- Cargo.toml | 1 + src/pycell.rs | 97 +++------------------------------------------------ 2 files changed, 5 insertions(+), 93 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 99f6a96c..c6699fd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ edition = "2018" cfg-if = "1.0" libc = "0.2.62" parking_lot = ">= 0.11, < 0.13" +memoffset = "0.6.5" # ffi bindings to the python interpreter, split into a separate crate so they can be used independently pyo3-ffi = { path = "pyo3-ffi", version = "=0.16.5" } diff --git a/src/pycell.rs b/src/pycell.rs index fdf9bf5b..b1f084c1 100644 --- a/src/pycell.rs +++ b/src/pycell.rs @@ -433,8 +433,6 @@ pub struct PyCell { contents: PyCellContents, } -// This struct has mirrored definitions in `weaklist_offset` and `dict_offset`, -// who need the same struct layout to function correctly. #[repr(C)] pub(crate) struct PyCellContents { pub(crate) value: ManuallyDrop>, @@ -627,54 +625,11 @@ impl PyCell { /// Gets the offset of the dictionary from the start of the struct in bytes. pub(crate) fn dict_offset() -> ffi::Py_ssize_t { + use memoffset::offset_of; use std::convert::TryInto; - use std::mem::MaybeUninit; - #[cfg(addr_of)] - let offset = unsafe { - // The much simpler way, using addr_of! - let cell = MaybeUninit::::uninit(); - let base_ptr = cell.as_ptr(); - let dict_ptr = std::ptr::addr_of!((*base_ptr).contents.dict); - (dict_ptr.cast::()).offset_from(base_ptr.cast::()) - }; - #[cfg(not(addr_of))] - let offset = unsafe { - // The annoying way using a dummy struct because there's no way to get - // a pointer without going through a reference, which is insta-UB - // if it is not initialized. + let offset = offset_of!(PyCell, contents) + offset_of!(PyCellContents, dict); - use std::mem::size_of; - - #[repr(C)] - struct PyCellDummy { - ob_base: MaybeUninit<::LayoutAsBase>, - contents: PyCellContentsDummy, - } - - #[repr(C)] - struct PyCellContentsDummy { - value: MaybeUninit, - borrow_checker: MaybeUninit<::Storage>, - thread_checker: MaybeUninit, - dict: MaybeUninit, - weakref: MaybeUninit, - } - - assert_eq!(size_of::>(), size_of::>()); - assert_eq!( - size_of::>(), - size_of::>() - ); - - // All of the pycelldummy's fields are maybeuninit, which require no initialization - let cell = MaybeUninit::>::uninit().assume_init(); - - let base_ptr: *const PyCellDummy = &cell; - let dict_ptr: *const MaybeUninit = &(*base_ptr).contents.dict; - - (dict_ptr.cast::()).offset_from(base_ptr.cast::()) - }; // Py_ssize_t may not be equal to isize on all platforms #[allow(clippy::useless_conversion)] offset.try_into().expect("offset should fit in Py_ssize_t") @@ -682,55 +637,11 @@ impl PyCell { /// Gets the offset of the weakref list from the start of the struct in bytes. pub(crate) fn weaklist_offset() -> ffi::Py_ssize_t { + use memoffset::offset_of; use std::convert::TryInto; - use std::mem::MaybeUninit; - #[cfg(addr_of)] - let offset = unsafe { - // The much simpler way, using addr_of! - let cell = MaybeUninit::::uninit(); - let base_ptr = cell.as_ptr(); - let weaklist_ptr = std::ptr::addr_of!((*base_ptr).contents.weakref); - (weaklist_ptr.cast::()).offset_from(base_ptr.cast::()) - }; - #[cfg(not(addr_of))] - let offset = unsafe { - // The annoying way using a dummy struct because there's no way to get - // a pointer without going through a reference, which is insta-UB - // if it is not initialized. + let offset = offset_of!(PyCell, contents) + offset_of!(PyCellContents, weakref); - use std::mem::size_of; - - #[repr(C)] - struct PyCellDummy { - ob_base: MaybeUninit<::LayoutAsBase>, - contents: PyCellContentsDummy, - } - - #[repr(C)] - struct PyCellContentsDummy { - value: MaybeUninit, - borrow_checker: MaybeUninit<::Storage>, - thread_checker: MaybeUninit, - dict: MaybeUninit, - weakref: MaybeUninit, - } - - assert_eq!(size_of::>(), size_of::>()); - assert_eq!( - size_of::>(), - size_of::>() - ); - - // All of the pycelldummy's fields are maybeuninit, which require no initialization - let cell = MaybeUninit::>::uninit().assume_init(); - - let base_ptr: *const PyCellDummy = &cell; - let weaklist_ptr: *const MaybeUninit<::WeakRef> = - &(*base_ptr).contents.weakref; - - (weaklist_ptr.cast::()).offset_from(base_ptr.cast::()) - }; // Py_ssize_t may not be equal to isize on all platforms #[allow(clippy::useless_conversion)] offset.try_into().expect("offset should fit in Py_ssize_t") From e19c364fc9b39345c9d9da5ed90b7ce262bfcdc8 Mon Sep 17 00:00:00 2001 From: mejrs <> Date: Tue, 14 Jun 2022 12:53:00 +0200 Subject: [PATCH 4/4] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ec080b6..271d4199 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix FFI definitions `PyTypeObject`. `PyHeapTypeObject`, and `PyCFunctionObject` having incorrect members on PyPy 3.9. [#2428](https://github.com/PyO3/pyo3/pull/2428) - Fix FFI definition `PyGetSetDef` to have `*const c_char` for `doc` member (not `*mut c_char`). [#2439](https://github.com/PyO3/pyo3/pull/2439) - Fix `#[pyo3(from_py_with = "...")]` being ignored for 1-element tuple structs and transparent structs. [#2440](https://github.com/PyO3/pyo3/pull/2440) +- Use `memoffset` for computing PyCell offsets [#2450](https://github.com/PyO3/pyo3/pull/2450) ## [0.16.5] - 2022-05-15