Merge pull request #3651 from davidhewitt/bytes2

implement `PyBytesMethods` and `PyByteArrayMethods`
This commit is contained in:
David Hewitt 2023-12-19 19:56:16 +00:00 committed by GitHub
commit 54ba6e82ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 381 additions and 21 deletions

View File

@ -1,4 +1,8 @@
use crate::{ffi, instance::Py2, PyAny, PyResult, Python};
use crate::{
ffi,
instance::{Py2, Py2Borrowed},
PyAny, PyResult, Python,
};
mod sealed {
use super::*;
@ -13,6 +17,24 @@ use sealed::Sealed;
pub(crate) trait FfiPtrExt: Sealed {
unsafe fn assume_owned_or_err(self, py: Python<'_>) -> PyResult<Py2<'_, PyAny>>;
unsafe fn assume_owned(self, py: Python<'_>) -> Py2<'_, PyAny>;
/// Assumes this pointer is borrowed from a parent object.
///
/// Warning: the lifetime `'a` is not bounded by the function arguments; the caller is
/// responsible to ensure this is tied to some appropriate lifetime.
unsafe fn assume_borrowed_or_err<'a>(
self,
py: Python<'_>,
) -> PyResult<Py2Borrowed<'a, '_, PyAny>>;
/// Same as `assume_borrowed_or_err`, but doesn't fetch an error on NULL.
unsafe fn assume_borrowed_or_opt<'a>(
self,
py: Python<'_>,
) -> Option<Py2Borrowed<'a, '_, PyAny>>;
/// Same as `assume_borrowed_or_err`, but panics on NULL.
unsafe fn assume_borrowed<'a>(self, py: Python<'_>) -> Py2Borrowed<'a, '_, PyAny>;
}
impl FfiPtrExt for *mut ffi::PyObject {
@ -25,4 +47,25 @@ impl FfiPtrExt for *mut ffi::PyObject {
unsafe fn assume_owned(self, py: Python<'_>) -> Py2<'_, PyAny> {
Py2::from_owned_ptr(py, self)
}
#[inline]
unsafe fn assume_borrowed_or_err<'a>(
self,
py: Python<'_>,
) -> PyResult<Py2Borrowed<'a, '_, PyAny>> {
Py2Borrowed::from_ptr_or_err(py, self)
}
#[inline]
unsafe fn assume_borrowed_or_opt<'a>(
self,
py: Python<'_>,
) -> Option<Py2Borrowed<'a, '_, PyAny>> {
Py2Borrowed::from_ptr_or_opt(py, self)
}
#[inline]
unsafe fn assume_borrowed<'a>(self, py: Python<'_>) -> Py2Borrowed<'a, '_, PyAny> {
Py2Borrowed::from_ptr(py, self)
}
}

View File

@ -217,6 +217,96 @@ unsafe impl<T> AsPyPointer for Py2<'_, T> {
}
}
/// A borrowed equivalent to `Py2`.
///
/// The advantage of this over `&Py2` is that it avoids the need to have a pointer-to-pointer, as Py2
/// is already a pointer to an `ffi::PyObject``.
#[repr(transparent)]
pub(crate) struct Py2Borrowed<'a, 'py, T>(
NonNull<ffi::PyObject>,
PhantomData<&'a Py<T>>,
Python<'py>,
);
impl<'a, 'py> Py2Borrowed<'a, 'py, PyAny> {
/// # Safety
/// This is similar to `std::slice::from_raw_parts`, the lifetime `'a` is completely defined by
/// the caller and it's the caller's responsibility to ensure that the reference this is
/// derived from is valid for the lifetime `'a`.
pub(crate) unsafe fn from_ptr_or_err(
py: Python<'py>,
ptr: *mut ffi::PyObject,
) -> PyResult<Self> {
NonNull::new(ptr).map_or_else(
|| Err(PyErr::fetch(py)),
|ptr| Ok(Self(ptr, PhantomData, py)),
)
}
/// # Safety
/// This is similar to `std::slice::from_raw_parts`, the lifetime `'a` is completely defined by
/// the caller and it's the caller's responsibility to ensure that the reference this is
/// derived from is valid for the lifetime `'a`.
pub(crate) unsafe fn from_ptr_or_opt(py: Python<'py>, ptr: *mut ffi::PyObject) -> Option<Self> {
NonNull::new(ptr).map(|ptr| Self(ptr, PhantomData, py))
}
/// # Safety
/// This is similar to `std::slice::from_raw_parts`, the lifetime `'a` is completely defined by
/// the caller and it's the caller's responsibility to ensure that the reference this is
/// derived from is valid for the lifetime `'a`.
pub(crate) unsafe fn from_ptr(py: Python<'py>, ptr: *mut ffi::PyObject) -> Self {
Self(
NonNull::new(ptr).unwrap_or_else(|| crate::err::panic_after_error(py)),
PhantomData,
py,
)
}
}
impl<'a, 'py, T> From<&'a Py2<'py, T>> for Py2Borrowed<'a, 'py, T> {
/// Create borrow on a Py2
fn from(instance: &'a Py2<'py, T>) -> Self {
Self(
unsafe { NonNull::new_unchecked(instance.as_ptr()) },
PhantomData,
instance.py(),
)
}
}
impl<'py, T> Py2Borrowed<'py, 'py, T>
where
T: HasPyGilRef,
{
pub(crate) fn from_gil_ref(gil_ref: &'py T::AsRefTarget) -> Self {
// Safety: &'py T::AsRefTarget is expected to be a Python pointer,
// so &'py T::AsRefTarget has the same layout as Self.
unsafe { std::mem::transmute(gil_ref) }
}
// pub(crate) fn into_gil_ref(self) -> &'py T::AsRefTarget {
// // Safety: self is a borrow over `'py`.
// unsafe { self.py().from_borrowed_ptr(self.0.as_ptr()) }
// }
}
impl<T> std::fmt::Debug for Py2Borrowed<'_, '_, T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Py2::fmt(self, f)
}
}
impl<'py, T> Deref for Py2Borrowed<'_, 'py, T> {
type Target = Py2<'py, T>;
#[inline]
fn deref(&self) -> &Py2<'py, T> {
// safety: Py2 has the same layout as NonNull<ffi::PyObject>
unsafe { &*(&self.0 as *const _ as *const Py2<'py, T>) }
}
}
/// A GIL-independent reference to an object allocated on the Python heap.
///
/// This type does not auto-dereference to the inner object because you must prove you hold the GIL to access it.
@ -727,6 +817,10 @@ impl<T> Py<T> {
Py2(py, ManuallyDrop::new(self))
}
pub(crate) fn attach_borrow<'a, 'py>(&'a self, py: Python<'py>) -> Py2Borrowed<'a, 'py, T> {
Py2Borrowed(self.0, PhantomData, py)
}
/// Returns whether `self` and `other` point to the same object. To compare
/// the equality of two objects (the `==` operator), use [`eq`](PyAny::eq).
///

View File

@ -27,5 +27,7 @@ pub use crate::wrap_pyfunction;
// pub(crate) use crate::instance::Py2; // Will be stabilized with a different name
// pub(crate) use crate::types::any::PyAnyMethods;
// pub(crate) use crate::types::boolobject::PyBoolMethods;
// pub(crate) use crate::types::bytearray::PyByteArrayMethods;
// pub(crate) use crate::types::bytes::PyBytesMethods;
// pub(crate) use crate::types::float::PyFloatMethods;
// pub(crate) use crate::types::sequence::PySequenceMethods;

View File

@ -1,4 +1,5 @@
use crate::err::{PyErr, PyResult};
use crate::instance::{Py2, Py2Borrowed};
use crate::{ffi, AsPyPointer, Py, PyAny, Python};
use std::os::raw::c_char;
use std::slice;
@ -74,13 +75,12 @@ impl PyByteArray {
/// Gets the length of the bytearray.
#[inline]
pub fn len(&self) -> usize {
// non-negative Py_ssize_t should always fit into Rust usize
unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize }
Py2::borrowed_from_gil_ref(&self).len()
}
/// Checks if the bytearray is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
Py2::borrowed_from_gil_ref(&self).is_empty()
}
/// Gets the start of the buffer containing the contents of the bytearray.
@ -89,7 +89,7 @@ impl PyByteArray {
///
/// See the safety requirements of [`PyByteArray::as_bytes`] and [`PyByteArray::as_bytes_mut`].
pub fn data(&self) -> *mut u8 {
unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() }
Py2::borrowed_from_gil_ref(&self).data()
}
/// Extracts a slice of the `ByteArray`'s entire buffer.
@ -188,7 +188,7 @@ impl PyByteArray {
/// }
/// ```
pub unsafe fn as_bytes(&self) -> &[u8] {
slice::from_raw_parts(self.data(), self.len())
Py2Borrowed::from_gil_ref(self).as_bytes()
}
/// Extracts a mutable slice of the `ByteArray`'s entire buffer.
@ -200,7 +200,7 @@ impl PyByteArray {
/// apply to this function as well.
#[allow(clippy::mut_from_ref)]
pub unsafe fn as_bytes_mut(&self) -> &mut [u8] {
slice::from_raw_parts_mut(self.data(), self.len())
Py2Borrowed::from_gil_ref(self).as_bytes_mut()
}
/// Copies the contents of the bytearray to a Rust vector.
@ -222,7 +222,7 @@ impl PyByteArray {
/// # });
/// ```
pub fn to_vec(&self) -> Vec<u8> {
unsafe { self.as_bytes() }.to_vec()
Py2::borrowed_from_gil_ref(&self).to_vec()
}
/// Resizes the bytearray object to the new length `len`.
@ -230,6 +230,193 @@ impl PyByteArray {
/// Note that this will invalidate any pointers obtained by [PyByteArray::data], as well as
/// any (unsafe) slices obtained from [PyByteArray::as_bytes] and [PyByteArray::as_bytes_mut].
pub fn resize(&self, len: usize) -> PyResult<()> {
Py2::borrowed_from_gil_ref(&self).resize(len)
}
}
/// Implementation of functionality for [`PyByteArray`].
///
/// These methods are defined for the `Py2<'py, PyByteArray>` smart pointer, so to use method call
/// syntax these methods are separated into a trait, because stable Rust does not yet support
/// `arbitrary_self_types`.
#[doc(alias = "PyByteArray")]
pub trait PyByteArrayMethods<'py> {
/// Gets the length of the bytearray.
fn len(&self) -> usize;
/// Checks if the bytearray is empty.
fn is_empty(&self) -> bool;
/// Gets the start of the buffer containing the contents of the bytearray.
///
/// # Safety
///
/// See the safety requirements of [`PyByteArray::as_bytes`] and [`PyByteArray::as_bytes_mut`].
fn data(&self) -> *mut u8;
/// Extracts a slice of the `ByteArray`'s entire buffer.
///
/// # Safety
///
/// Mutation of the `bytearray` invalidates the slice. If it is used afterwards, the behavior is
/// undefined.
///
/// These mutations may occur in Python code as well as from Rust:
/// - Calling methods like [`PyByteArray::as_bytes_mut`] and [`PyByteArray::resize`] will
/// invalidate the slice.
/// - Actions like dropping objects or raising exceptions can invoke `__del__`methods or signal
/// handlers, which may execute arbitrary Python code. This means that if Python code has a
/// reference to the `bytearray` you cannot safely use the vast majority of PyO3's API whilst
/// using the slice.
///
/// As a result, this slice should only be used for short-lived operations without executing any
/// Python code, such as copying into a Vec.
///
/// # Examples
///
/// ```rust
/// use pyo3::prelude::*;
/// use pyo3::exceptions::PyRuntimeError;
/// use pyo3::types::PyByteArray;
///
/// #[pyfunction]
/// fn a_valid_function(bytes: &PyByteArray) -> PyResult<()> {
/// let section = {
/// // SAFETY: We promise to not let the interpreter regain control
/// // or invoke any PyO3 APIs while using the slice.
/// let slice = unsafe { bytes.as_bytes() };
///
/// // Copy only a section of `bytes` while avoiding
/// // `to_vec` which copies the entire thing.
/// let section = slice
/// .get(6..11)
/// .ok_or_else(|| PyRuntimeError::new_err("input is not long enough"))?;
/// Vec::from(section)
/// };
///
/// // Now we can do things with `section` and call PyO3 APIs again.
/// // ...
/// # assert_eq!(&section, b"world");
///
/// Ok(())
/// }
/// # fn main() -> PyResult<()> {
/// # Python::with_gil(|py| -> PyResult<()> {
/// # let fun = wrap_pyfunction!(a_valid_function, py)?;
/// # let locals = pyo3::types::PyDict::new(py);
/// # locals.set_item("a_valid_function", fun)?;
/// #
/// # py.run(
/// # r#"b = bytearray(b"hello world")
/// # a_valid_function(b)
/// #
/// # try:
/// # a_valid_function(bytearray())
/// # except RuntimeError as e:
/// # assert str(e) == 'input is not long enough'"#,
/// # None,
/// # Some(locals),
/// # )?;
/// #
/// # Ok(())
/// # })
/// # }
/// ```
///
/// # Incorrect usage
///
/// The following `bug` function is unsound ⚠️
///
/// ```rust,no_run
/// # use pyo3::prelude::*;
/// # use pyo3::types::PyByteArray;
///
/// # #[allow(dead_code)]
/// #[pyfunction]
/// fn bug(py: Python<'_>, bytes: &PyByteArray) {
/// let slice = unsafe { bytes.as_bytes() };
///
/// // This explicitly yields control back to the Python interpreter...
/// // ...but it's not always this obvious. Many things do this implicitly.
/// py.allow_threads(|| {
/// // Python code could be mutating through its handle to `bytes`,
/// // which makes reading it a data race, which is undefined behavior.
/// println!("{:?}", slice[0]);
/// });
///
/// // Python code might have mutated it, so we can not rely on the slice
/// // remaining valid. As such this is also undefined behavior.
/// println!("{:?}", slice[0]);
/// }
/// ```
unsafe fn as_bytes(&self) -> &[u8];
/// Extracts a mutable slice of the `ByteArray`'s entire buffer.
///
/// # Safety
///
/// Any other accesses of the `bytearray`'s buffer invalidate the slice. If it is used
/// afterwards, the behavior is undefined. The safety requirements of [`PyByteArray::as_bytes`]
/// apply to this function as well.
#[allow(clippy::mut_from_ref)]
unsafe fn as_bytes_mut(&self) -> &mut [u8];
/// Copies the contents of the bytearray to a Rust vector.
///
/// # Examples
///
/// ```
/// # use pyo3::prelude::*;
/// # use pyo3::types::PyByteArray;
/// # Python::with_gil(|py| {
/// let bytearray = PyByteArray::new(py, b"Hello World.");
/// let mut copied_message = bytearray.to_vec();
/// assert_eq!(b"Hello World.", copied_message.as_slice());
///
/// copied_message[11] = b'!';
/// assert_eq!(b"Hello World!", copied_message.as_slice());
///
/// pyo3::py_run!(py, bytearray, "assert bytearray == b'Hello World.'");
/// # });
/// ```
fn to_vec(&self) -> Vec<u8>;
/// Resizes the bytearray object to the new length `len`.
///
/// Note that this will invalidate any pointers obtained by [PyByteArray::data], as well as
/// any (unsafe) slices obtained from [PyByteArray::as_bytes] and [PyByteArray::as_bytes_mut].
fn resize(&self, len: usize) -> PyResult<()>;
}
impl<'py> PyByteArrayMethods<'py> for Py2<'py, PyByteArray> {
#[inline]
fn len(&self) -> usize {
// non-negative Py_ssize_t should always fit into Rust usize
unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize }
}
fn is_empty(&self) -> bool {
self.len() == 0
}
fn data(&self) -> *mut u8 {
Py2Borrowed::from(self).data()
}
unsafe fn as_bytes(&self) -> &[u8] {
Py2Borrowed::from(self).as_bytes()
}
#[allow(clippy::mut_from_ref)]
unsafe fn as_bytes_mut(&self) -> &mut [u8] {
Py2Borrowed::from(self).as_bytes_mut()
}
fn to_vec(&self) -> Vec<u8> {
unsafe { self.as_bytes() }.to_vec()
}
fn resize(&self, len: usize) -> PyResult<()> {
unsafe {
let result = ffi::PyByteArray_Resize(self.as_ptr(), len as ffi::Py_ssize_t);
if result == 0 {
@ -241,6 +428,22 @@ impl PyByteArray {
}
}
impl<'a> Py2Borrowed<'a, '_, PyByteArray> {
fn data(&self) -> *mut u8 {
unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() }
}
#[allow(clippy::wrong_self_convention)]
unsafe fn as_bytes(self) -> &'a [u8] {
slice::from_raw_parts(self.data(), self.len())
}
#[allow(clippy::wrong_self_convention)]
unsafe fn as_bytes_mut(self) -> &'a mut [u8] {
slice::from_raw_parts_mut(self.data(), self.len())
}
}
impl<'py> TryFrom<&'py PyAny> for &'py PyByteArray {
type Error = crate::PyErr;

View File

@ -1,3 +1,4 @@
use crate::instance::{Py2, Py2Borrowed};
use crate::{ffi, FromPyObject, IntoPy, Py, PyAny, PyResult, Python, ToPyObject};
use std::borrow::Cow;
use std::ops::Index;
@ -88,6 +89,32 @@ impl PyBytes {
/// Gets the Python string as a byte slice.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
Py2Borrowed::from_gil_ref(self).as_bytes()
}
}
/// Implementation of functionality for [`PyBytes`].
///
/// These methods are defined for the `Py2<'py, PyBytes>` smart pointer, so to use method call
/// syntax these methods are separated into a trait, because stable Rust does not yet support
/// `arbitrary_self_types`.
#[doc(alias = "PyBytes")]
pub(crate) trait PyBytesMethods<'py> {
/// Gets the Python string as a byte slice.
fn as_bytes(&self) -> &[u8];
}
impl<'py> PyBytesMethods<'py> for Py2<'py, PyBytes> {
#[inline]
fn as_bytes(&self) -> &[u8] {
Py2Borrowed::from(self).as_bytes()
}
}
impl<'a> Py2Borrowed<'a, '_, PyBytes> {
/// Gets the Python string as a byte slice.
#[allow(clippy::wrong_self_convention)]
fn as_bytes(self) -> &'a [u8] {
unsafe {
let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
@ -101,17 +128,8 @@ impl Py<PyBytes> {
/// Gets the Python bytes as a byte slice. Because Python bytes are
/// immutable, the result may be used for as long as the reference to
/// `self` is held, including when the GIL is released.
pub fn as_bytes<'a>(&'a self, _py: Python<'_>) -> &'a [u8] {
// py is required here because `PyBytes_AsString` and `PyBytes_Size`
// can both technically raise exceptions which require the GIL to be
// held. The only circumstance in which they raise is if the value
// isn't really a `PyBytes`, but better safe than sorry.
unsafe {
let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
debug_assert!(!buffer.is_null());
std::slice::from_raw_parts(buffer, length)
}
pub fn as_bytes<'a>(&'a self, py: Python<'_>) -> &'a [u8] {
self.attach_borrow(py).as_bytes()
}
}

View File

@ -273,8 +273,8 @@ macro_rules! pyobject_native_type {
pub(crate) mod any;
pub(crate) mod boolobject;
mod bytearray;
mod bytes;
pub(crate) mod bytearray;
pub(crate) mod bytes;
mod capsule;
#[cfg(not(Py_LIMITED_API))]
mod code;