add `PyStringMethods::encode_utf8`

This commit is contained in:
David Hewitt 2024-02-05 12:22:10 +00:00
parent ecb4ecbe22
commit 662eecfb44
2 changed files with 35 additions and 5 deletions

View File

@ -0,0 +1 @@
Add `PyStringMethods::encode_utf8`.

View File

@ -305,6 +305,9 @@ pub trait PyStringMethods<'py> {
/// replaced with `U+FFFD REPLACEMENT CHARACTER`.
fn to_string_lossy(&self) -> Cow<'_, str>;
/// Encodes this string as a Python `bytes` object, using UTF-8 encoding.
fn encode_utf8(&self) -> PyResult<Bound<'py, PyBytes>>;
/// Obtains the raw data backing the Python string.
///
/// If the Python string object was created through legacy APIs, its internal storage format
@ -337,6 +340,14 @@ impl<'py> PyStringMethods<'py> for Bound<'py, PyString> {
self.as_borrowed().to_string_lossy()
}
fn encode_utf8(&self) -> PyResult<Bound<'py, PyBytes>> {
unsafe {
ffi::PyUnicode_AsUTF8String(self.as_ptr())
.assume_owned_or_err(self.py())
.downcast_into_unchecked::<PyBytes>()
}
}
#[cfg(not(Py_LIMITED_API))]
unsafe fn data(&self) -> PyResult<PyStringData<'_>> {
self.as_borrowed().data()
@ -371,11 +382,7 @@ impl<'a> Borrowed<'a, '_, PyString> {
#[cfg(not(any(Py_3_10, not(Py_LIMITED_API))))]
{
let bytes = unsafe {
ffi::PyUnicode_AsUTF8String(self.as_ptr())
.assume_owned_or_err(self.py())?
.downcast_into_unchecked::<PyBytes>()
};
let bytes = self.encode_utf8()?;
Ok(Cow::Owned(
unsafe { str::from_utf8_unchecked(bytes.as_bytes()) }.to_owned(),
))
@ -535,6 +542,28 @@ mod tests {
})
}
#[test]
fn test_encode_utf8_unicode() {
Python::with_gil(|py| {
let s = "哈哈🐈";
let obj = PyString::new_bound(py, s);
assert_eq!(s.as_bytes(), obj.encode_utf8().unwrap().as_bytes());
})
}
#[test]
fn test_encode_utf8_surrogate() {
Python::with_gil(|py| {
let obj: PyObject = py.eval(r"'\ud800'", None, None).unwrap().into();
assert!(obj
.bind(py)
.downcast::<PyString>()
.unwrap()
.encode_utf8()
.is_err());
})
}
#[test]
fn test_to_string_lossy() {
Python::with_gil(|py| {