From de79ebc5f839de701fed260ea14e0f52192d7459 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Fri, 20 Jan 2023 14:05:18 +0100 Subject: [PATCH 1/4] Provide a special purpose FromPyObject impl to efficiently and safely get a byte slice from either bytes or byte arrays. --- newsfragments/2899.added.md | 1 + src/types/bytes.rs | 43 +++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 newsfragments/2899.added.md diff --git a/newsfragments/2899.added.md b/newsfragments/2899.added.md new file mode 100644 index 00000000..c6e8b5b2 --- /dev/null +++ b/newsfragments/2899.added.md @@ -0,0 +1 @@ +Add special-purpose impl of `FromPyObject` for `Cow<[u8]>` to efficiently handle both `bytes` and `bytearray` objects diff --git a/src/types/bytes.rs b/src/types/bytes.rs index 3302e8e6..f8eeb670 100644 --- a/src/types/bytes.rs +++ b/src/types/bytes.rs @@ -1,9 +1,12 @@ -use crate::{ffi, AsPyPointer, Py, PyAny, PyResult, Python}; +use crate::{ffi, AsPyPointer, FromPyObject, Py, PyAny, PyResult, Python}; +use std::borrow::Cow; use std::ops::Index; use std::os::raw::c_char; use std::slice::SliceIndex; use std::str; +use super::bytearray::PyByteArray; + /// Represents a Python `bytes` object. /// /// This type is immutable. @@ -121,11 +124,25 @@ impl> Index for PyBytes { } } +/// Special-purpose trait impl to efficiently handle both `bytes` and `bytearray` +/// +/// If the source object is a `bytes` object, the `Cow` will be borrowed and +/// pointing into the source object, and no copying or heap allocations will happen. +/// If it is a `bytearray`, its contents will be copied to an owned `Cow`. +impl<'source> FromPyObject<'source> for Cow<'source, [u8]> { + fn extract(ob: &'source PyAny) -> PyResult { + if let Ok(bytes) = ob.downcast::() { + return Ok(Cow::Borrowed(bytes.as_bytes())); + } + + let byte_array = ob.downcast::()?; + Ok(Cow::Owned(byte_array.to_vec())) + } +} + #[cfg(test)] mod tests { - use super::PyBytes; - use crate::FromPyObject; - use crate::Python; + use super::*; #[test] fn test_bytes_index() { @@ -172,4 +189,22 @@ mod tests { .is_instance_of::(py)); }); } + + #[test] + fn test_cow_impl() { + Python::with_gil(|py| { + let bytes = py.eval(r#"b"foobar""#, None, None).unwrap(); + let cow = bytes.extract::>().unwrap(); + assert_eq!(cow, Cow::<[u8]>::Borrowed(b"foobar")); + + let byte_array = py.eval(r#"bytearray(b"foobar")"#, None, None).unwrap(); + let cow = byte_array.extract::>().unwrap(); + assert_eq!(cow, Cow::<[u8]>::Owned(b"foobar".to_vec())); + + let something_else_entirely = py.eval("42", None, None).unwrap(); + something_else_entirely + .extract::>() + .unwrap_err(); + }); + } } From e3e37ac6248bcd875aaf911c043071638f4f4500 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Fri, 20 Jan 2023 23:15:47 +0100 Subject: [PATCH 2/4] Add ToPyObject and IntoPy impl for Cow<[u8]> to support return values as well as function arguments. --- src/types/bytes.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/types/bytes.rs b/src/types/bytes.rs index f8eeb670..3ed906f2 100644 --- a/src/types/bytes.rs +++ b/src/types/bytes.rs @@ -1,4 +1,4 @@ -use crate::{ffi, AsPyPointer, FromPyObject, Py, PyAny, PyResult, Python}; +use crate::{ffi, AsPyPointer, FromPyObject, IntoPy, Py, PyAny, PyResult, Python, ToPyObject}; use std::borrow::Cow; use std::ops::Index; use std::os::raw::c_char; @@ -140,10 +140,24 @@ impl<'source> FromPyObject<'source> for Cow<'source, [u8]> { } } +impl ToPyObject for Cow<'_, [u8]> { + fn to_object(&self, py: Python<'_>) -> Py { + PyBytes::new(py, self.as_ref()).into() + } +} + +impl IntoPy> for Cow<'_, [u8]> { + fn into_py(self, py: Python<'_>) -> Py { + self.to_object(py) + } +} + #[cfg(test)] mod tests { use super::*; + use crate::types::IntoPyDict; + #[test] fn test_bytes_index() { Python::with_gil(|py| { @@ -205,6 +219,16 @@ mod tests { something_else_entirely .extract::>() .unwrap_err(); + + let cow = Cow::<[u8]>::Borrowed(b"foobar").into_py(py); + let locals = [("cow", cow)].into_py_dict(py); + py.run("assert isinstance(cow, bytes)", Some(locals), None) + .unwrap(); + + let cow = Cow::<[u8]>::Owned(b"foobar".to_vec()).into_py(py); + let locals = [("cow", cow)].into_py_dict(py); + py.run("assert isinstance(cow, bytes)", Some(locals), None) + .unwrap(); }); } } From a16f2e45c8e6112e57b3bf9b1184c5d2653c8f49 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sat, 21 Jan 2023 11:07:28 +0100 Subject: [PATCH 3/4] Include conversions for bytes, bytearrays and Cow<[u8]> in the guide. --- guide/src/conversions/tables.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/guide/src/conversions/tables.md b/guide/src/conversions/tables.md index 6baf7dca..0b5d5973 100644 --- a/guide/src/conversions/tables.md +++ b/guide/src/conversions/tables.md @@ -14,7 +14,7 @@ The table below contains the Python type and the corresponding function argument | ------------- |:-------------------------------:|:--------------------:| | `object` | - | `&PyAny` | | `str` | `String`, `Cow`, `&str`, `OsString`, `PathBuf` | `&PyUnicode` | -| `bytes` | `Vec`, `&[u8]` | `&PyBytes` | +| `bytes` | `Vec`, `&[u8]`, `Cow<[u8]>` | `&PyBytes` | | `bool` | `bool` | `&PyBool` | | `int` | Any integer type (`i32`, `u32`, `usize`, etc) | `&PyLong` | | `float` | `f32`, `f64` | `&PyFloat` | @@ -24,7 +24,7 @@ The table below contains the Python type and the corresponding function argument | `tuple[T, U]` | `(T, U)`, `Vec` | `&PyTuple` | | `set[T]` | `HashSet`, `BTreeSet`, `hashbrown::HashSet`[^2] | `&PySet` | | `frozenset[T]` | `HashSet`, `BTreeSet`, `hashbrown::HashSet`[^2] | `&PyFrozenSet` | -| `bytearray` | `Vec` | `&PyByteArray` | +| `bytearray` | `Vec`, `Cow<[u8]>` | `&PyByteArray` | | `slice` | - | `&PySlice` | | `type` | - | `&PyType` | | `module` | - | `&PyModule` | @@ -84,6 +84,7 @@ Finally, the following Rust types are also able to convert to Python as return v | `Option` | `Optional[T]` | | `(T, U)` | `Tuple[T, U]` | | `Vec` | `List[T]` | +| `Cow<[u8]>` | `bytes` | | `HashMap` | `Dict[K, V]` | | `BTreeMap` | `Dict[K, V]` | | `HashSet` | `Set[T]` | From 0a48859fc4540e7ca352785511ef84a2b6458f03 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 22 Feb 2023 22:12:35 +0100 Subject: [PATCH 4/4] Make tests of Cow::<[u8]> compatible with older Python versions by using native code instead inline Python. --- src/types/bytes.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/types/bytes.rs b/src/types/bytes.rs index 3ed906f2..1f816d03 100644 --- a/src/types/bytes.rs +++ b/src/types/bytes.rs @@ -156,8 +156,6 @@ impl IntoPy> for Cow<'_, [u8]> { mod tests { use super::*; - use crate::types::IntoPyDict; - #[test] fn test_bytes_index() { Python::with_gil(|py| { @@ -220,15 +218,11 @@ mod tests { .extract::>() .unwrap_err(); - let cow = Cow::<[u8]>::Borrowed(b"foobar").into_py(py); - let locals = [("cow", cow)].into_py_dict(py); - py.run("assert isinstance(cow, bytes)", Some(locals), None) - .unwrap(); + let cow = Cow::<[u8]>::Borrowed(b"foobar").to_object(py); + assert!(cow.as_ref(py).is_instance_of::().unwrap()); - let cow = Cow::<[u8]>::Owned(b"foobar".to_vec()).into_py(py); - let locals = [("cow", cow)].into_py_dict(py); - py.run("assert isinstance(cow, bytes)", Some(locals), None) - .unwrap(); + let cow = Cow::<[u8]>::Owned(b"foobar".to_vec()).to_object(py); + assert!(cow.as_ref(py).is_instance_of::().unwrap()); }); } }