Merge pull request #1020 from PyO3/to-str
Remove PyString::as_bytes since it cannot return raw bytes
This commit is contained in:
commit
c00080e27f
|
@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|||
|
||||
### Changed
|
||||
- Update `parking_lot` dependency to `0.11`. [#1010](https://github.com/PyO3/pyo3/pull/1010)
|
||||
- `PyString::to_string` is now renamed `to_str` and returns `&str` instead of `Cow<str>`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
|
||||
|
||||
### Removed
|
||||
- Remove `PyString::as_bytes`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
|
||||
- Remove `Python::register_gil`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
|
||||
|
||||
## [0.11.0] - 2020-06-28
|
||||
### Added
|
||||
|
|
104
src/gil.rs
104
src/gil.rs
|
@ -5,30 +5,10 @@
|
|||
use crate::{ffi, internal_tricks::Unsendable, Python};
|
||||
use parking_lot::{const_mutex, Mutex};
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::{any, mem::ManuallyDrop, ptr::NonNull, sync};
|
||||
use std::{mem::ManuallyDrop, ptr::NonNull, sync};
|
||||
|
||||
static START: sync::Once = sync::Once::new();
|
||||
|
||||
/// Holds temporally owned objects.
|
||||
struct ObjectHolder {
|
||||
/// Objects owned by the current thread
|
||||
obj: Vec<NonNull<ffi::PyObject>>,
|
||||
/// Non-python objects(e.g., String) owned by the current thread
|
||||
any: Vec<Box<dyn any::Any>>,
|
||||
}
|
||||
|
||||
impl ObjectHolder {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
obj: Vec::with_capacity(256),
|
||||
any: Vec::with_capacity(4),
|
||||
}
|
||||
}
|
||||
fn len(&self) -> (usize, usize) {
|
||||
(self.obj.len(), self.any.len())
|
||||
}
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
/// This is a internal counter in pyo3 monitoring whether this thread has the GIL.
|
||||
///
|
||||
|
@ -41,7 +21,7 @@ thread_local! {
|
|||
pub(crate) static GIL_COUNT: Cell<u32> = Cell::new(0);
|
||||
|
||||
/// Temporally hold objects that will be released when the GILPool drops.
|
||||
static OWNED_OBJECTS: RefCell<ObjectHolder> = RefCell::new(ObjectHolder::new());
|
||||
static OWNED_OBJECTS: RefCell<Vec<NonNull<ffi::PyObject>>> = RefCell::new(Vec::with_capacity(256));
|
||||
}
|
||||
|
||||
/// Check whether the GIL is acquired.
|
||||
|
@ -51,7 +31,7 @@ thread_local! {
|
|||
/// 2) PyGILState_Check always returns 1 if the sub-interpreter APIs have ever been called,
|
||||
/// which could lead to incorrect conclusions that the GIL is held.
|
||||
fn gil_is_acquired() -> bool {
|
||||
GIL_COUNT.with(|c| c.get() > 0)
|
||||
GIL_COUNT.try_with(|c| c.get() > 0).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Prepares the use of Python in a free-threaded context.
|
||||
|
@ -159,21 +139,19 @@ impl GILGuard {
|
|||
pub fn acquire() -> GILGuard {
|
||||
prepare_freethreaded_python();
|
||||
|
||||
unsafe {
|
||||
let gstate = ffi::PyGILState_Ensure(); // acquire GIL
|
||||
let gstate = unsafe { ffi::PyGILState_Ensure() }; // acquire GIL
|
||||
|
||||
// If there's already a GILPool, we should not create another or this could lead to
|
||||
// incorrect dangling references in safe code (see #864).
|
||||
let pool = if !gil_is_acquired() {
|
||||
Some(GILPool::new())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// If there's already a GILPool, we should not create another or this could lead to
|
||||
// incorrect dangling references in safe code (see #864).
|
||||
let pool = if !gil_is_acquired() {
|
||||
Some(unsafe { GILPool::new() })
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
GILGuard {
|
||||
gstate,
|
||||
pool: ManuallyDrop::new(pool),
|
||||
}
|
||||
GILGuard {
|
||||
gstate,
|
||||
pool: ManuallyDrop::new(pool),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,7 +230,7 @@ static POOL: ReferencePool = ReferencePool::new();
|
|||
pub struct GILPool {
|
||||
/// Initial length of owned objects and anys.
|
||||
/// `Option` is used since TSL can be broken when `new` is called from `atexit`.
|
||||
start: Option<(usize, usize)>,
|
||||
start: Option<usize>,
|
||||
no_send: Unsendable,
|
||||
}
|
||||
|
||||
|
@ -283,20 +261,19 @@ impl GILPool {
|
|||
|
||||
impl Drop for GILPool {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
if let Some((obj_len_start, any_len_start)) = self.start {
|
||||
let dropping_obj = OWNED_OBJECTS.with(|holder| {
|
||||
// `holder` must be dropped before calling Py_DECREF, or Py_DECREF may call
|
||||
// `GILPool::drop` recursively, resulting in invalid borrowing.
|
||||
let mut holder = holder.borrow_mut();
|
||||
holder.any.truncate(any_len_start);
|
||||
if obj_len_start < holder.obj.len() {
|
||||
holder.obj.split_off(obj_len_start)
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
});
|
||||
for obj in dropping_obj {
|
||||
if let Some(obj_len_start) = self.start {
|
||||
let dropping_obj = OWNED_OBJECTS.with(|holder| {
|
||||
// `holder` must be dropped before calling Py_DECREF, or Py_DECREF may call
|
||||
// `GILPool::drop` recursively, resulting in invalid borrowing.
|
||||
let mut holder = holder.borrow_mut();
|
||||
if obj_len_start < holder.len() {
|
||||
holder.split_off(obj_len_start)
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
});
|
||||
for obj in dropping_obj {
|
||||
unsafe {
|
||||
ffi::Py_DECREF(obj.as_ptr());
|
||||
}
|
||||
}
|
||||
|
@ -343,36 +320,21 @@ pub unsafe fn register_decref(obj: NonNull<ffi::PyObject>) {
|
|||
/// The object must be an owned Python reference.
|
||||
pub unsafe fn register_owned(_py: Python, obj: NonNull<ffi::PyObject>) {
|
||||
debug_assert!(gil_is_acquired());
|
||||
// Ignoring the error means we do nothing if the TLS is broken.
|
||||
let _ = OWNED_OBJECTS.try_with(|holder| holder.borrow_mut().obj.push(obj));
|
||||
}
|
||||
|
||||
/// Register any value inside the GILPool.
|
||||
///
|
||||
/// # Safety
|
||||
/// It is the caller's responsibility to ensure that the inferred lifetime 'p is not inferred by
|
||||
/// the Rust compiler to outlast the current GILPool.
|
||||
pub unsafe fn register_any<'p, T: 'static>(obj: T) -> &'p T {
|
||||
debug_assert!(gil_is_acquired());
|
||||
OWNED_OBJECTS.with(|holder| {
|
||||
let boxed = Box::new(obj);
|
||||
let value_ref: *const T = &*boxed;
|
||||
holder.borrow_mut().any.push(boxed);
|
||||
&*value_ref
|
||||
})
|
||||
// Ignores the error in case this function called from `atexit`.
|
||||
let _ = OWNED_OBJECTS.try_with(|holder| holder.borrow_mut().push(obj));
|
||||
}
|
||||
|
||||
/// Increment pyo3's internal GIL count - to be called whenever GILPool or GILGuard is created.
|
||||
// Ignores the error in case this function called from `atexit`.
|
||||
#[inline(always)]
|
||||
fn increment_gil_count() {
|
||||
// Ignores the error in case this function called from `atexit`.
|
||||
let _ = GIL_COUNT.with(|c| c.set(c.get() + 1));
|
||||
}
|
||||
|
||||
/// Decrement pyo3's internal GIL count - to be called whenever GILPool or GILGuard is dropped.
|
||||
// Ignores the error in case this function called from `atexit`.
|
||||
#[inline(always)]
|
||||
fn decrement_gil_count() {
|
||||
// Ignores the error in case this function called from `atexit`.
|
||||
let _ = GIL_COUNT.try_with(|c| {
|
||||
let current = c.get();
|
||||
debug_assert!(
|
||||
|
@ -431,7 +393,7 @@ mod test {
|
|||
}
|
||||
|
||||
fn owned_object_count() -> usize {
|
||||
OWNED_OBJECTS.with(|holder| holder.borrow().obj.len())
|
||||
OWNED_OBJECTS.with(|holder| holder.borrow().len())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -435,13 +435,6 @@ impl<'p> Python<'p> {
|
|||
FromPyPointer::from_borrowed_ptr_or_opt(self, ptr)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
/// Passes value ownership to `Python` object and get reference back.
|
||||
/// Value get cleaned up on the GIL release.
|
||||
pub fn register_any<T: 'static>(self, ob: T) -> &'p T {
|
||||
unsafe { gil::register_any(ob) }
|
||||
}
|
||||
|
||||
/// Releases a PyObject reference.
|
||||
#[inline]
|
||||
pub fn release<T>(self, ob: T)
|
||||
|
|
|
@ -173,7 +173,7 @@ mod test {
|
|||
slice[0..5].copy_from_slice(b"Hi...");
|
||||
|
||||
assert_eq!(
|
||||
&bytearray.str().unwrap().to_string().unwrap(),
|
||||
bytearray.str().unwrap().to_str().unwrap(),
|
||||
"bytearray(b'Hi... Python')"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ use crate::{
|
|||
PyTryFrom, Python, ToPyObject,
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::CStr;
|
||||
use std::os::raw::c_char;
|
||||
use std::str;
|
||||
|
||||
|
@ -44,41 +43,33 @@ impl PyString {
|
|||
/// Returns a `UnicodeEncodeError` if the input is not valid unicode
|
||||
/// (containing unpaired surrogates).
|
||||
#[inline]
|
||||
pub fn as_bytes(&self) -> PyResult<&[u8]> {
|
||||
pub fn to_str(&self) -> PyResult<&str> {
|
||||
unsafe {
|
||||
let mut size: ffi::Py_ssize_t = 0;
|
||||
let data = ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) as *const u8;
|
||||
if data.is_null() {
|
||||
Err(PyErr::fetch(self.py()))
|
||||
} else {
|
||||
Ok(std::slice::from_raw_parts(data, size as usize))
|
||||
let slice = std::slice::from_raw_parts(data, size as usize);
|
||||
Ok(std::str::from_utf8_unchecked(slice))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the `PyString` into a Rust string.
|
||||
pub fn to_string(&self) -> PyResult<Cow<str>> {
|
||||
let bytes = self.as_bytes()?;
|
||||
let string = std::str::from_utf8(bytes)?;
|
||||
Ok(Cow::Borrowed(string))
|
||||
}
|
||||
|
||||
/// Converts the `PyString` into a Rust string.
|
||||
///
|
||||
/// Unpaired surrogates invalid UTF-8 sequences are
|
||||
/// replaced with `U+FFFD REPLACEMENT CHARACTER`.
|
||||
pub fn to_string_lossy(&self) -> Cow<str> {
|
||||
match self.to_string() {
|
||||
Ok(s) => s,
|
||||
match self.to_str() {
|
||||
Ok(s) => Cow::Borrowed(s),
|
||||
Err(_) => {
|
||||
let bytes = unsafe {
|
||||
self.py()
|
||||
.from_owned_ptr::<PyBytes>(ffi::PyUnicode_AsEncodedString(
|
||||
self.as_ptr(),
|
||||
CStr::from_bytes_with_nul(b"utf-8\0").unwrap().as_ptr(),
|
||||
CStr::from_bytes_with_nul(b"surrogatepass\0")
|
||||
.unwrap()
|
||||
.as_ptr(),
|
||||
b"utf-8\0" as *const _ as _,
|
||||
b"surrogatepass\0" as *const _ as _,
|
||||
))
|
||||
};
|
||||
String::from_utf8_lossy(bytes.as_bytes())
|
||||
|
@ -136,24 +127,9 @@ impl<'a> IntoPy<PyObject> for &'a String {
|
|||
|
||||
/// Allows extracting strings from Python objects.
|
||||
/// Accepts Python `str` and `unicode` objects.
|
||||
impl<'source> crate::FromPyObject<'source> for Cow<'source, str> {
|
||||
impl<'source> crate::FromPyObject<'source> for &'source str {
|
||||
fn extract(ob: &'source PyAny) -> PyResult<Self> {
|
||||
<PyString as PyTryFrom>::try_from(ob)?.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Allows extracting strings from Python objects.
|
||||
/// Accepts Python `str` and `unicode` objects.
|
||||
impl<'a> crate::FromPyObject<'a> for &'a str {
|
||||
fn extract(ob: &'a PyAny) -> PyResult<Self> {
|
||||
let s: Cow<'a, str> = crate::FromPyObject::extract(ob)?;
|
||||
match s {
|
||||
Cow::Borrowed(r) => Ok(r),
|
||||
Cow::Owned(r) => {
|
||||
let r = ob.py().register_any(r);
|
||||
Ok(r.as_str())
|
||||
}
|
||||
}
|
||||
<PyString as PyTryFrom>::try_from(ob)?.to_str()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,8 +138,8 @@ impl<'a> crate::FromPyObject<'a> for &'a str {
|
|||
impl<'source> FromPyObject<'source> for String {
|
||||
fn extract(obj: &'source PyAny) -> PyResult<Self> {
|
||||
<PyString as PyTryFrom>::try_from(obj)?
|
||||
.to_string()
|
||||
.map(Cow::into_owned)
|
||||
.to_str()
|
||||
.map(ToOwned::to_owned)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,7 +150,6 @@ mod test {
|
|||
use crate::object::PyObject;
|
||||
use crate::Python;
|
||||
use crate::{FromPyObject, PyTryFrom, ToPyObject};
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[test]
|
||||
fn test_non_bmp() {
|
||||
|
@ -197,44 +172,32 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_bytes() {
|
||||
fn test_to_str_ascii() {
|
||||
let gil = Python::acquire_gil();
|
||||
let py = gil.python();
|
||||
let s = "ascii 🐈";
|
||||
let obj: PyObject = PyString::new(py, s).into();
|
||||
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
|
||||
assert_eq!(s.as_bytes(), py_string.as_bytes().unwrap());
|
||||
assert_eq!(s, py_string.to_str().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_bytes_surrogate() {
|
||||
fn test_to_str_surrogate() {
|
||||
let gil = Python::acquire_gil();
|
||||
let py = gil.python();
|
||||
let obj: PyObject = py.eval(r#"'\ud800'"#, None, None).unwrap().into();
|
||||
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
|
||||
assert!(py_string.as_bytes().is_err());
|
||||
assert!(py_string.to_str().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_string_ascii() {
|
||||
let gil = Python::acquire_gil();
|
||||
let py = gil.python();
|
||||
let s = "ascii";
|
||||
let obj: PyObject = PyString::new(py, s).into();
|
||||
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
|
||||
assert!(py_string.to_string().is_ok());
|
||||
assert_eq!(Cow::Borrowed(s), py_string.to_string().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_string_unicode() {
|
||||
fn test_to_str_unicode() {
|
||||
let gil = Python::acquire_gil();
|
||||
let py = gil.python();
|
||||
let s = "哈哈🐈";
|
||||
let obj: PyObject = PyString::new(py, s).into();
|
||||
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
|
||||
assert!(py_string.to_string().is_ok());
|
||||
assert_eq!(Cow::Borrowed(s), py_string.to_string().unwrap());
|
||||
assert_eq!(s, py_string.to_str().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in New Issue