Merge pull request #1020 from PyO3/to-str

Remove PyString::as_bytes since it cannot return raw bytes
This commit is contained in:
Yuji Kanagawa 2020-07-08 15:05:58 +09:00 committed by GitHub
commit c00080e27f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 56 additions and 133 deletions

View File

@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Changed
- Update `parking_lot` dependency to `0.11`. [#1010](https://github.com/PyO3/pyo3/pull/1010)
- `PyString::to_string` is now renamed `to_str` and returns `&str` instead of `Cow<str>`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
### Removed
- Remove `PyString::as_bytes`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
- Remove `Python::register_gil`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
## [0.11.0] - 2020-06-28
### Added

View File

@ -5,30 +5,10 @@
use crate::{ffi, internal_tricks::Unsendable, Python};
use parking_lot::{const_mutex, Mutex};
use std::cell::{Cell, RefCell};
use std::{any, mem::ManuallyDrop, ptr::NonNull, sync};
use std::{mem::ManuallyDrop, ptr::NonNull, sync};
static START: sync::Once = sync::Once::new();
/// Holds temporally owned objects.
struct ObjectHolder {
/// Objects owned by the current thread
obj: Vec<NonNull<ffi::PyObject>>,
/// Non-python objects(e.g., String) owned by the current thread
any: Vec<Box<dyn any::Any>>,
}
impl ObjectHolder {
fn new() -> Self {
Self {
obj: Vec::with_capacity(256),
any: Vec::with_capacity(4),
}
}
fn len(&self) -> (usize, usize) {
(self.obj.len(), self.any.len())
}
}
thread_local! {
/// This is a internal counter in pyo3 monitoring whether this thread has the GIL.
///
@ -41,7 +21,7 @@ thread_local! {
pub(crate) static GIL_COUNT: Cell<u32> = Cell::new(0);
/// Temporally hold objects that will be released when the GILPool drops.
static OWNED_OBJECTS: RefCell<ObjectHolder> = RefCell::new(ObjectHolder::new());
static OWNED_OBJECTS: RefCell<Vec<NonNull<ffi::PyObject>>> = RefCell::new(Vec::with_capacity(256));
}
/// Check whether the GIL is acquired.
@ -51,7 +31,7 @@ thread_local! {
/// 2) PyGILState_Check always returns 1 if the sub-interpreter APIs have ever been called,
/// which could lead to incorrect conclusions that the GIL is held.
fn gil_is_acquired() -> bool {
GIL_COUNT.with(|c| c.get() > 0)
GIL_COUNT.try_with(|c| c.get() > 0).unwrap_or(false)
}
/// Prepares the use of Python in a free-threaded context.
@ -159,21 +139,19 @@ impl GILGuard {
pub fn acquire() -> GILGuard {
prepare_freethreaded_python();
unsafe {
let gstate = ffi::PyGILState_Ensure(); // acquire GIL
let gstate = unsafe { ffi::PyGILState_Ensure() }; // acquire GIL
// If there's already a GILPool, we should not create another or this could lead to
// incorrect dangling references in safe code (see #864).
let pool = if !gil_is_acquired() {
Some(GILPool::new())
} else {
None
};
// If there's already a GILPool, we should not create another or this could lead to
// incorrect dangling references in safe code (see #864).
let pool = if !gil_is_acquired() {
Some(unsafe { GILPool::new() })
} else {
None
};
GILGuard {
gstate,
pool: ManuallyDrop::new(pool),
}
GILGuard {
gstate,
pool: ManuallyDrop::new(pool),
}
}
@ -252,7 +230,7 @@ static POOL: ReferencePool = ReferencePool::new();
pub struct GILPool {
/// Initial length of owned objects and anys.
/// `Option` is used since TSL can be broken when `new` is called from `atexit`.
start: Option<(usize, usize)>,
start: Option<usize>,
no_send: Unsendable,
}
@ -283,20 +261,19 @@ impl GILPool {
impl Drop for GILPool {
fn drop(&mut self) {
unsafe {
if let Some((obj_len_start, any_len_start)) = self.start {
let dropping_obj = OWNED_OBJECTS.with(|holder| {
// `holder` must be dropped before calling Py_DECREF, or Py_DECREF may call
// `GILPool::drop` recursively, resulting in invalid borrowing.
let mut holder = holder.borrow_mut();
holder.any.truncate(any_len_start);
if obj_len_start < holder.obj.len() {
holder.obj.split_off(obj_len_start)
} else {
Vec::new()
}
});
for obj in dropping_obj {
if let Some(obj_len_start) = self.start {
let dropping_obj = OWNED_OBJECTS.with(|holder| {
// `holder` must be dropped before calling Py_DECREF, or Py_DECREF may call
// `GILPool::drop` recursively, resulting in invalid borrowing.
let mut holder = holder.borrow_mut();
if obj_len_start < holder.len() {
holder.split_off(obj_len_start)
} else {
Vec::new()
}
});
for obj in dropping_obj {
unsafe {
ffi::Py_DECREF(obj.as_ptr());
}
}
@ -343,36 +320,21 @@ pub unsafe fn register_decref(obj: NonNull<ffi::PyObject>) {
/// The object must be an owned Python reference.
pub unsafe fn register_owned(_py: Python, obj: NonNull<ffi::PyObject>) {
debug_assert!(gil_is_acquired());
// Ignoring the error means we do nothing if the TLS is broken.
let _ = OWNED_OBJECTS.try_with(|holder| holder.borrow_mut().obj.push(obj));
}
/// Register any value inside the GILPool.
///
/// # Safety
/// It is the caller's responsibility to ensure that the inferred lifetime 'p is not inferred by
/// the Rust compiler to outlast the current GILPool.
pub unsafe fn register_any<'p, T: 'static>(obj: T) -> &'p T {
debug_assert!(gil_is_acquired());
OWNED_OBJECTS.with(|holder| {
let boxed = Box::new(obj);
let value_ref: *const T = &*boxed;
holder.borrow_mut().any.push(boxed);
&*value_ref
})
// Ignores the error in case this function called from `atexit`.
let _ = OWNED_OBJECTS.try_with(|holder| holder.borrow_mut().push(obj));
}
/// Increment pyo3's internal GIL count - to be called whenever GILPool or GILGuard is created.
// Ignores the error in case this function called from `atexit`.
#[inline(always)]
fn increment_gil_count() {
// Ignores the error in case this function called from `atexit`.
let _ = GIL_COUNT.with(|c| c.set(c.get() + 1));
}
/// Decrement pyo3's internal GIL count - to be called whenever GILPool or GILGuard is dropped.
// Ignores the error in case this function called from `atexit`.
#[inline(always)]
fn decrement_gil_count() {
// Ignores the error in case this function called from `atexit`.
let _ = GIL_COUNT.try_with(|c| {
let current = c.get();
debug_assert!(
@ -431,7 +393,7 @@ mod test {
}
fn owned_object_count() -> usize {
OWNED_OBJECTS.with(|holder| holder.borrow().obj.len())
OWNED_OBJECTS.with(|holder| holder.borrow().len())
}
#[test]

View File

@ -435,13 +435,6 @@ impl<'p> Python<'p> {
FromPyPointer::from_borrowed_ptr_or_opt(self, ptr)
}
#[doc(hidden)]
/// Passes value ownership to `Python` object and get reference back.
/// Value get cleaned up on the GIL release.
pub fn register_any<T: 'static>(self, ob: T) -> &'p T {
unsafe { gil::register_any(ob) }
}
/// Releases a PyObject reference.
#[inline]
pub fn release<T>(self, ob: T)

View File

@ -173,7 +173,7 @@ mod test {
slice[0..5].copy_from_slice(b"Hi...");
assert_eq!(
&bytearray.str().unwrap().to_string().unwrap(),
bytearray.str().unwrap().to_str().unwrap(),
"bytearray(b'Hi... Python')"
);
}

View File

@ -6,7 +6,6 @@ use crate::{
PyTryFrom, Python, ToPyObject,
};
use std::borrow::Cow;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::str;
@ -44,41 +43,33 @@ impl PyString {
/// Returns a `UnicodeEncodeError` if the input is not valid unicode
/// (containing unpaired surrogates).
#[inline]
pub fn as_bytes(&self) -> PyResult<&[u8]> {
pub fn to_str(&self) -> PyResult<&str> {
unsafe {
let mut size: ffi::Py_ssize_t = 0;
let data = ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) as *const u8;
if data.is_null() {
Err(PyErr::fetch(self.py()))
} else {
Ok(std::slice::from_raw_parts(data, size as usize))
let slice = std::slice::from_raw_parts(data, size as usize);
Ok(std::str::from_utf8_unchecked(slice))
}
}
}
/// Converts the `PyString` into a Rust string.
pub fn to_string(&self) -> PyResult<Cow<str>> {
let bytes = self.as_bytes()?;
let string = std::str::from_utf8(bytes)?;
Ok(Cow::Borrowed(string))
}
/// Converts the `PyString` into a Rust string.
///
/// Unpaired surrogates invalid UTF-8 sequences are
/// replaced with `U+FFFD REPLACEMENT CHARACTER`.
pub fn to_string_lossy(&self) -> Cow<str> {
match self.to_string() {
Ok(s) => s,
match self.to_str() {
Ok(s) => Cow::Borrowed(s),
Err(_) => {
let bytes = unsafe {
self.py()
.from_owned_ptr::<PyBytes>(ffi::PyUnicode_AsEncodedString(
self.as_ptr(),
CStr::from_bytes_with_nul(b"utf-8\0").unwrap().as_ptr(),
CStr::from_bytes_with_nul(b"surrogatepass\0")
.unwrap()
.as_ptr(),
b"utf-8\0" as *const _ as _,
b"surrogatepass\0" as *const _ as _,
))
};
String::from_utf8_lossy(bytes.as_bytes())
@ -136,24 +127,9 @@ impl<'a> IntoPy<PyObject> for &'a String {
/// Allows extracting strings from Python objects.
/// Accepts Python `str` and `unicode` objects.
impl<'source> crate::FromPyObject<'source> for Cow<'source, str> {
impl<'source> crate::FromPyObject<'source> for &'source str {
fn extract(ob: &'source PyAny) -> PyResult<Self> {
<PyString as PyTryFrom>::try_from(ob)?.to_string()
}
}
/// Allows extracting strings from Python objects.
/// Accepts Python `str` and `unicode` objects.
impl<'a> crate::FromPyObject<'a> for &'a str {
fn extract(ob: &'a PyAny) -> PyResult<Self> {
let s: Cow<'a, str> = crate::FromPyObject::extract(ob)?;
match s {
Cow::Borrowed(r) => Ok(r),
Cow::Owned(r) => {
let r = ob.py().register_any(r);
Ok(r.as_str())
}
}
<PyString as PyTryFrom>::try_from(ob)?.to_str()
}
}
@ -162,8 +138,8 @@ impl<'a> crate::FromPyObject<'a> for &'a str {
impl<'source> FromPyObject<'source> for String {
fn extract(obj: &'source PyAny) -> PyResult<Self> {
<PyString as PyTryFrom>::try_from(obj)?
.to_string()
.map(Cow::into_owned)
.to_str()
.map(ToOwned::to_owned)
}
}
@ -174,7 +150,6 @@ mod test {
use crate::object::PyObject;
use crate::Python;
use crate::{FromPyObject, PyTryFrom, ToPyObject};
use std::borrow::Cow;
#[test]
fn test_non_bmp() {
@ -197,44 +172,32 @@ mod test {
}
#[test]
fn test_as_bytes() {
fn test_to_str_ascii() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "ascii 🐈";
let obj: PyObject = PyString::new(py, s).into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert_eq!(s.as_bytes(), py_string.as_bytes().unwrap());
assert_eq!(s, py_string.to_str().unwrap());
}
#[test]
fn test_as_bytes_surrogate() {
fn test_to_str_surrogate() {
let gil = Python::acquire_gil();
let py = gil.python();
let obj: PyObject = py.eval(r#"'\ud800'"#, None, None).unwrap().into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert!(py_string.as_bytes().is_err());
assert!(py_string.to_str().is_err());
}
#[test]
fn test_to_string_ascii() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "ascii";
let obj: PyObject = PyString::new(py, s).into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert!(py_string.to_string().is_ok());
assert_eq!(Cow::Borrowed(s), py_string.to_string().unwrap());
}
#[test]
fn test_to_string_unicode() {
fn test_to_str_unicode() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "哈哈🐈";
let obj: PyObject = PyString::new(py, s).into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert!(py_string.to_string().is_ok());
assert_eq!(Cow::Borrowed(s), py_string.to_string().unwrap());
assert_eq!(s, py_string.to_str().unwrap());
}
#[test]