From 2d3a5852ed67ea655cef909dbe6cf914e7d48993 Mon Sep 17 00:00:00 2001 From: mejrs <> Date: Sun, 12 Jun 2022 15:11:39 +0200 Subject: [PATCH] Avoid UB in *_offset methods --- src/pycell.rs | 123 +++++++++++++++++++++++++------------ tests/test_class_basics.rs | 68 +++++++++++++++++--- 2 files changed, 144 insertions(+), 47 deletions(-) diff --git a/src/pycell.rs b/src/pycell.rs index 928e7c3d..fdf9bf5b 100644 --- a/src/pycell.rs +++ b/src/pycell.rs @@ -433,6 +433,8 @@ pub struct PyCell { contents: PyCellContents, } +// This struct has mirrored definitions in `weaklist_offset` and `dict_offset`, +// who need the same struct layout to function correctly. #[repr(C)] pub(crate) struct PyCellContents { pub(crate) value: ManuallyDrop>, @@ -626,30 +628,52 @@ impl PyCell { /// Gets the offset of the dictionary from the start of the struct in bytes. pub(crate) fn dict_offset() -> ffi::Py_ssize_t { use std::convert::TryInto; + use std::mem::MaybeUninit; + #[cfg(addr_of)] - let offset = { - // With std::ptr::addr_of - can measure offset using uninit memory without UB. - let cell = std::mem::MaybeUninit::::uninit(); + let offset = unsafe { + // The much simpler way, using addr_of! + let cell = MaybeUninit::::uninit(); let base_ptr = cell.as_ptr(); - let dict_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.dict) }; - unsafe { (dict_ptr as *const u8).offset_from(base_ptr as *const u8) } + let dict_ptr = std::ptr::addr_of!((*base_ptr).contents.dict); + (dict_ptr.cast::()).offset_from(base_ptr.cast::()) }; #[cfg(not(addr_of))] - let offset = { - // No std::ptr::addr_of - need to take references to PyCell to measure offsets; - // make a zero-initialised "fake" one so that referencing it is not UB. - let mut cell = std::mem::MaybeUninit::::uninit(); - unsafe { - std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1); + let offset = unsafe { + // The annoying way using a dummy struct because there's no way to get + // a pointer without going through a reference, which is insta-UB + // if it is not initialized. + + use std::mem::size_of; + + #[repr(C)] + struct PyCellDummy { + ob_base: MaybeUninit<::LayoutAsBase>, + contents: PyCellContentsDummy, } - let cell = unsafe { cell.assume_init() }; - let dict_ptr = &cell.contents.dict; - // offset_from wasn't stabilised until 1.47, so we also have to work around - // that... - let offset = (dict_ptr as *const _ as usize) - (&cell as *const _ as usize); - // This isn't a valid cell, so ensure no Drop code runs etc. - std::mem::forget(cell); - offset + + #[repr(C)] + struct PyCellContentsDummy { + value: MaybeUninit, + borrow_checker: MaybeUninit<::Storage>, + thread_checker: MaybeUninit, + dict: MaybeUninit, + weakref: MaybeUninit, + } + + assert_eq!(size_of::>(), size_of::>()); + assert_eq!( + size_of::>(), + size_of::>() + ); + + // All of the pycelldummy's fields are maybeuninit, which require no initialization + let cell = MaybeUninit::>::uninit().assume_init(); + + let base_ptr: *const PyCellDummy = &cell; + let dict_ptr: *const MaybeUninit = &(*base_ptr).contents.dict; + + (dict_ptr.cast::()).offset_from(base_ptr.cast::()) }; // Py_ssize_t may not be equal to isize on all platforms #[allow(clippy::useless_conversion)] @@ -659,30 +683,53 @@ impl PyCell { /// Gets the offset of the weakref list from the start of the struct in bytes. pub(crate) fn weaklist_offset() -> ffi::Py_ssize_t { use std::convert::TryInto; + use std::mem::MaybeUninit; + #[cfg(addr_of)] - let offset = { - // With std::ptr::addr_of - can measure offset using uninit memory without UB. - let cell = std::mem::MaybeUninit::::uninit(); + let offset = unsafe { + // The much simpler way, using addr_of! + let cell = MaybeUninit::::uninit(); let base_ptr = cell.as_ptr(); - let weaklist_ptr = unsafe { std::ptr::addr_of!((*base_ptr).contents.weakref) }; - unsafe { (weaklist_ptr as *const u8).offset_from(base_ptr as *const u8) } + let weaklist_ptr = std::ptr::addr_of!((*base_ptr).contents.weakref); + (weaklist_ptr.cast::()).offset_from(base_ptr.cast::()) }; #[cfg(not(addr_of))] - let offset = { - // No std::ptr::addr_of - need to take references to PyCell to measure offsets; - // make a zero-initialised "fake" one so that referencing it is not UB. - let mut cell = std::mem::MaybeUninit::::uninit(); - unsafe { - std::ptr::write_bytes(cell.as_mut_ptr(), 0, 1); + let offset = unsafe { + // The annoying way using a dummy struct because there's no way to get + // a pointer without going through a reference, which is insta-UB + // if it is not initialized. + + use std::mem::size_of; + + #[repr(C)] + struct PyCellDummy { + ob_base: MaybeUninit<::LayoutAsBase>, + contents: PyCellContentsDummy, } - let cell = unsafe { cell.assume_init() }; - let weaklist_ptr = &cell.contents.weakref; - // offset_from wasn't stabilised until 1.47, so we also have to work around - // that... - let offset = (weaklist_ptr as *const _ as usize) - (&cell as *const _ as usize); - // This isn't a valid cell, so ensure no Drop code runs etc. - std::mem::forget(cell); - offset + + #[repr(C)] + struct PyCellContentsDummy { + value: MaybeUninit, + borrow_checker: MaybeUninit<::Storage>, + thread_checker: MaybeUninit, + dict: MaybeUninit, + weakref: MaybeUninit, + } + + assert_eq!(size_of::>(), size_of::>()); + assert_eq!( + size_of::>(), + size_of::>() + ); + + // All of the pycelldummy's fields are maybeuninit, which require no initialization + let cell = MaybeUninit::>::uninit().assume_init(); + + let base_ptr: *const PyCellDummy = &cell; + let weaklist_ptr: *const MaybeUninit<::WeakRef> = + &(*base_ptr).contents.weakref; + + (weaklist_ptr.cast::()).offset_from(base_ptr.cast::()) }; // Py_ssize_t may not be equal to isize on all platforms #[allow(clippy::useless_conversion)] diff --git a/tests/test_class_basics.rs b/tests/test_class_basics.rs index b3ebc5df..dd6560f2 100644 --- a/tests/test_class_basics.rs +++ b/tests/test_class_basics.rs @@ -347,14 +347,23 @@ fn test_tuple_struct_class() { } #[pyclass(dict, subclass)] -struct DunderDictSupport {} +struct DunderDictSupport { + // Make sure that dict_offset runs with non-zero sized Self + _pad: [u8; 32], +} #[test] #[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)] fn dunder_dict_support() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, DunderDictSupport {}).unwrap(); + let inst = PyCell::new( + py, + DunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -371,7 +380,13 @@ fn dunder_dict_support() { fn access_dunder_dict() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, DunderDictSupport {}).unwrap(); + let inst = PyCell::new( + py, + DunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -393,7 +408,16 @@ struct InheritDict { fn inherited_dict() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, (InheritDict { _value: 0 }, DunderDictSupport {})).unwrap(); + let inst = PyCell::new( + py, + ( + InheritDict { _value: 0 }, + DunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ), + ) + .unwrap(); py_run!( py, inst, @@ -405,14 +429,23 @@ fn inherited_dict() { } #[pyclass(weakref, dict)] -struct WeakRefDunderDictSupport {} +struct WeakRefDunderDictSupport { + // Make sure that weaklist_offset runs with non-zero sized Self + _pad: [u8; 32], +} #[test] #[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)] fn weakref_dunder_dict_support() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, WeakRefDunderDictSupport {}).unwrap(); + let inst = PyCell::new( + py, + WeakRefDunderDictSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -421,14 +454,22 @@ fn weakref_dunder_dict_support() { } #[pyclass(weakref, subclass)] -struct WeakRefSupport {} +struct WeakRefSupport { + _pad: [u8; 32], +} #[test] #[cfg_attr(all(Py_LIMITED_API, not(Py_3_9)), ignore)] fn weakref_support() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, WeakRefSupport {}).unwrap(); + let inst = PyCell::new( + py, + WeakRefSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ) + .unwrap(); py_run!( py, inst, @@ -447,7 +488,16 @@ struct InheritWeakRef { fn inherited_weakref() { let gil = Python::acquire_gil(); let py = gil.python(); - let inst = PyCell::new(py, (InheritWeakRef { _value: 0 }, WeakRefSupport {})).unwrap(); + let inst = PyCell::new( + py, + ( + InheritWeakRef { _value: 0 }, + WeakRefSupport { + _pad: *b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF", + }, + ), + ) + .unwrap(); py_run!( py, inst,