Merge pull request #1834 from davidhewitt/pystring-data-big-endian
pystring: disable `PyString::data` on big-endian targets
This commit is contained in:
commit
868c591c18
|
@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
||||||
- `PySequence::get_slice` now returns `PyResult<&PySequence>` instead of `PyResult<&PyAny>`. [#1829](https://github.com/PyO3/pyo3/pull/1829)
|
- `PySequence::get_slice` now returns `PyResult<&PySequence>` instead of `PyResult<&PyAny>`. [#1829](https://github.com/PyO3/pyo3/pull/1829)
|
||||||
- Deprecate `PyTuple::split_from`. [#1804](https://github.com/PyO3/pyo3/pull/1804)
|
- Deprecate `PyTuple::split_from`. [#1804](https://github.com/PyO3/pyo3/pull/1804)
|
||||||
- Deprecate `PyTuple::slice`, new method `PyTuple::get_slice` added with `usize` indices. [#1828](https://github.com/PyO3/pyo3/pull/1828)
|
- Deprecate `PyTuple::slice`, new method `PyTuple::get_slice` added with `usize` indices. [#1828](https://github.com/PyO3/pyo3/pull/1828)
|
||||||
|
- Mark `PyString::data` as `unsafe` and disable it and some supporting PyUnicode FFI APIs (which depend on a C bitfield) on big-endian targets. [#1834](https://github.com/PyO3/pyo3/pull/1834)
|
||||||
|
|
||||||
## [0.14.3] - 2021-08-22
|
## [0.14.3] - 2021-08-22
|
||||||
|
|
||||||
|
|
|
@ -50,29 +50,34 @@ pub struct PyASCIIObject {
|
||||||
pub wstr: *mut wchar_t,
|
pub wstr: *mut wchar_t,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
|
||||||
|
///
|
||||||
|
/// In addition, they are disabled on big-endian architectures to restrict this to most "common"
|
||||||
|
/// platforms, which are at least tested on CI and appear to be sound.
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
impl PyASCIIObject {
|
impl PyASCIIObject {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn interned(&self) -> c_uint {
|
pub unsafe fn interned(&self) -> c_uint {
|
||||||
self.state & 3
|
self.state & 3
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn kind(&self) -> c_uint {
|
pub unsafe fn kind(&self) -> c_uint {
|
||||||
(self.state >> 2) & 7
|
(self.state >> 2) & 7
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn compact(&self) -> c_uint {
|
pub unsafe fn compact(&self) -> c_uint {
|
||||||
(self.state >> 5) & 1
|
(self.state >> 5) & 1
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn ascii(&self) -> c_uint {
|
pub unsafe fn ascii(&self) -> c_uint {
|
||||||
(self.state >> 6) & 1
|
(self.state >> 6) & 1
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn ready(&self) -> c_uint {
|
pub unsafe fn ready(&self) -> c_uint {
|
||||||
(self.state >> 7) & 1
|
(self.state >> 7) & 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,6 +119,7 @@ pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
|
||||||
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
|
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
||||||
debug_assert!(PyUnicode_Check(op) != 0);
|
debug_assert!(PyUnicode_Check(op) != 0);
|
||||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||||
|
@ -122,11 +128,13 @@ pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
|
||||||
(*(op as *mut PyASCIIObject)).compact()
|
(*(op as *mut PyASCIIObject)).compact()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
|
||||||
if (*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0 {
|
if (*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0 {
|
||||||
1
|
1
|
||||||
|
@ -144,21 +152,25 @@ pub const PyUnicode_2BYTE_KIND: c_uint = 2;
|
||||||
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
|
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
|
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
|
||||||
PyUnicode_DATA(op) as *mut Py_UCS1
|
PyUnicode_DATA(op) as *mut Py_UCS1
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
|
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
|
||||||
PyUnicode_DATA(op) as *mut Py_UCS2
|
PyUnicode_DATA(op) as *mut Py_UCS2
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
|
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
|
||||||
PyUnicode_DATA(op) as *mut Py_UCS4
|
PyUnicode_DATA(op) as *mut Py_UCS4
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
||||||
debug_assert!(PyUnicode_Check(op) != 0);
|
debug_assert!(PyUnicode_Check(op) != 0);
|
||||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||||
|
@ -167,6 +179,7 @@ pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
if PyUnicode_IS_ASCII(op) != 0 {
|
if PyUnicode_IS_ASCII(op) != 0 {
|
||||||
(op as *mut PyASCIIObject).offset(1) as *mut c_void
|
(op as *mut PyASCIIObject).offset(1) as *mut c_void
|
||||||
|
@ -176,6 +189,7 @@ pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
|
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
|
||||||
|
|
||||||
|
@ -183,6 +197,7 @@ pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
|
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
debug_assert!(PyUnicode_Check(op) != 0);
|
debug_assert!(PyUnicode_Check(op) != 0);
|
||||||
|
|
||||||
|
@ -206,6 +221,7 @@ pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
||||||
(*(op as *mut PyASCIIObject)).ready()
|
(*(op as *mut PyASCIIObject)).ready()
|
||||||
}
|
}
|
||||||
|
@ -213,6 +229,7 @@ pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
||||||
#[cfg(not(Py_3_12))]
|
#[cfg(not(Py_3_12))]
|
||||||
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
|
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
|
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
|
||||||
debug_assert!(PyUnicode_Check(op) != 0);
|
debug_assert!(PyUnicode_Check(op) != 0);
|
||||||
|
|
||||||
|
@ -481,6 +498,7 @@ extern "C" {
|
||||||
// skipped _PyUnicode_ScanIdentifier
|
// skipped _PyUnicode_ScanIdentifier
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
#[cfg(not(target_endian = "big"))]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::types::PyString;
|
use crate::types::PyString;
|
||||||
|
@ -498,6 +516,7 @@ mod tests {
|
||||||
wstr: std::ptr::null_mut() as *mut wchar_t,
|
wstr: std::ptr::null_mut() as *mut wchar_t,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
unsafe {
|
||||||
assert_eq!(o.interned(), 0);
|
assert_eq!(o.interned(), 0);
|
||||||
assert_eq!(o.kind(), 0);
|
assert_eq!(o.kind(), 0);
|
||||||
assert_eq!(o.compact(), 0);
|
assert_eq!(o.compact(), 0);
|
||||||
|
@ -523,6 +542,7 @@ mod tests {
|
||||||
o.state = 1 << 7;
|
o.state = 1 << 7;
|
||||||
assert_eq!(o.ready(), 1);
|
assert_eq!(o.ready(), 1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(Py_3_10, allow(deprecated))]
|
#[cfg_attr(Py_3_10, allow(deprecated))]
|
||||||
|
|
|
@ -17,8 +17,8 @@ use std::str;
|
||||||
///
|
///
|
||||||
/// Python internally stores strings in various representations. This enumeration
|
/// Python internally stores strings in various representations. This enumeration
|
||||||
/// represents those variations.
|
/// represents those variations.
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
#[cfg_attr(docsrs, doc(cfg(not(Py_LIMITED_API))))]
|
#[cfg_attr(docsrs, doc(cfg(not(any(Py_LIMITED_API, target_endian = "big")))))]
|
||||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||||
pub enum PyStringData<'a> {
|
pub enum PyStringData<'a> {
|
||||||
/// UCS1 representation.
|
/// UCS1 representation.
|
||||||
|
@ -31,7 +31,7 @@ pub enum PyStringData<'a> {
|
||||||
Ucs4(&'a [u32]),
|
Ucs4(&'a [u32]),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
impl<'a> PyStringData<'a> {
|
impl<'a> PyStringData<'a> {
|
||||||
/// Obtain the raw bytes backing this instance as a [u8] slice.
|
/// Obtain the raw bytes backing this instance as a [u8] slice.
|
||||||
pub fn as_bytes(&self) -> &[u8] {
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
@ -211,16 +211,28 @@ impl PyString {
|
||||||
|
|
||||||
/// Obtains the raw data backing the Python string.
|
/// Obtains the raw data backing the Python string.
|
||||||
///
|
///
|
||||||
/// If the Python string object was created through legacy APIs, its internal
|
/// If the Python string object was created through legacy APIs, its internal storage format
|
||||||
/// storage format will be canonicalized before data is returned.
|
/// will be canonicalized before data is returned.
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
///
|
||||||
#[cfg_attr(docsrs, doc(cfg(not(Py_LIMITED_API))))]
|
/// # Safety
|
||||||
pub fn data(&self) -> PyResult<PyStringData<'_>> {
|
///
|
||||||
|
/// This function implementation relies on manually decoding a C bitfield. In practice, this
|
||||||
|
/// works well on common little-endian architectures such as x86_64, where the bitfield has a
|
||||||
|
/// common representation (even if it is not part of the C spec). The PyO3 CI tests this API on
|
||||||
|
/// x86_64 platforms.
|
||||||
|
///
|
||||||
|
/// By using this API, you accept responsibility for testing that PyStringData behaves as
|
||||||
|
/// expected on the targets where you plan to distribute your software.
|
||||||
|
///
|
||||||
|
/// For example, it is known not to work on big-endian platforms.
|
||||||
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
|
#[cfg_attr(docsrs, doc(cfg(not(any(Py_LIMITED_API, target_endian = "big")))))]
|
||||||
|
pub unsafe fn data(&self) -> PyResult<PyStringData<'_>> {
|
||||||
let ptr = self.as_ptr();
|
let ptr = self.as_ptr();
|
||||||
|
|
||||||
if cfg!(not(Py_3_12)) {
|
if cfg!(not(Py_3_12)) {
|
||||||
#[allow(deprecated)]
|
#[allow(deprecated)]
|
||||||
let ready = unsafe { ffi::PyUnicode_READY(ptr) };
|
let ready = ffi::PyUnicode_READY(ptr);
|
||||||
if ready != 0 {
|
if ready != 0 {
|
||||||
// Exception was created on failure.
|
// Exception was created on failure.
|
||||||
return Err(crate::PyErr::api_call_failed(self.py()));
|
return Err(crate::PyErr::api_call_failed(self.py()));
|
||||||
|
@ -230,20 +242,23 @@ impl PyString {
|
||||||
// The string should be in its canonical form after calling `PyUnicode_READY()`.
|
// The string should be in its canonical form after calling `PyUnicode_READY()`.
|
||||||
// And non-canonical form not possible after Python 3.12. So it should be safe
|
// And non-canonical form not possible after Python 3.12. So it should be safe
|
||||||
// to call these APIs.
|
// to call these APIs.
|
||||||
let length = unsafe { ffi::PyUnicode_GET_LENGTH(ptr) } as usize;
|
let length = ffi::PyUnicode_GET_LENGTH(ptr) as usize;
|
||||||
let raw_data = unsafe { ffi::PyUnicode_DATA(ptr) };
|
let raw_data = ffi::PyUnicode_DATA(ptr);
|
||||||
let kind = unsafe { ffi::PyUnicode_KIND(ptr) };
|
let kind = ffi::PyUnicode_KIND(ptr);
|
||||||
|
|
||||||
match kind {
|
match kind {
|
||||||
ffi::PyUnicode_1BYTE_KIND => Ok(PyStringData::Ucs1(unsafe {
|
ffi::PyUnicode_1BYTE_KIND => Ok(PyStringData::Ucs1(std::slice::from_raw_parts(
|
||||||
std::slice::from_raw_parts(raw_data as *const u8, length)
|
raw_data as *const u8,
|
||||||
})),
|
length,
|
||||||
ffi::PyUnicode_2BYTE_KIND => Ok(PyStringData::Ucs2(unsafe {
|
))),
|
||||||
std::slice::from_raw_parts(raw_data as *const u16, length)
|
ffi::PyUnicode_2BYTE_KIND => Ok(PyStringData::Ucs2(std::slice::from_raw_parts(
|
||||||
})),
|
raw_data as *const u16,
|
||||||
ffi::PyUnicode_4BYTE_KIND => Ok(PyStringData::Ucs4(unsafe {
|
length,
|
||||||
std::slice::from_raw_parts(raw_data as *const u32, length)
|
))),
|
||||||
})),
|
ffi::PyUnicode_4BYTE_KIND => Ok(PyStringData::Ucs4(std::slice::from_raw_parts(
|
||||||
|
raw_data as *const u32,
|
||||||
|
length,
|
||||||
|
))),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -465,11 +480,11 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
fn test_string_data_ucs1() {
|
fn test_string_data_ucs1() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
let s = PyString::new(py, "hello, world");
|
let s = PyString::new(py, "hello, world");
|
||||||
let data = s.data().unwrap();
|
let data = unsafe { s.data().unwrap() };
|
||||||
|
|
||||||
assert_eq!(data, PyStringData::Ucs1(b"hello, world"));
|
assert_eq!(data, PyStringData::Ucs1(b"hello, world"));
|
||||||
assert_eq!(data.to_string(py).unwrap(), Cow::Borrowed("hello, world"));
|
assert_eq!(data.to_string(py).unwrap(), Cow::Borrowed("hello, world"));
|
||||||
|
@ -478,7 +493,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
fn test_string_data_ucs1_invalid() {
|
fn test_string_data_ucs1_invalid() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
// 0xfe is not allowed in UTF-8.
|
// 0xfe is not allowed in UTF-8.
|
||||||
|
@ -492,7 +507,7 @@ mod tests {
|
||||||
};
|
};
|
||||||
assert!(!ptr.is_null());
|
assert!(!ptr.is_null());
|
||||||
let s: &PyString = unsafe { py.from_owned_ptr(ptr) };
|
let s: &PyString = unsafe { py.from_owned_ptr(ptr) };
|
||||||
let data = s.data().unwrap();
|
let data = unsafe { s.data().unwrap() };
|
||||||
assert_eq!(data, PyStringData::Ucs1(b"f\xfe"));
|
assert_eq!(data, PyStringData::Ucs1(b"f\xfe"));
|
||||||
let err = data.to_string(py).unwrap_err();
|
let err = data.to_string(py).unwrap_err();
|
||||||
assert_eq!(err.ptype(py), PyUnicodeDecodeError::type_object(py));
|
assert_eq!(err.ptype(py), PyUnicodeDecodeError::type_object(py));
|
||||||
|
@ -504,12 +519,12 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
fn test_string_data_ucs2() {
|
fn test_string_data_ucs2() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
let s = py.eval("'foo\\ud800'", None, None).unwrap();
|
let s = py.eval("'foo\\ud800'", None, None).unwrap();
|
||||||
let py_string = s.cast_as::<PyString>().unwrap();
|
let py_string = s.cast_as::<PyString>().unwrap();
|
||||||
let data = py_string.data().unwrap();
|
let data = unsafe { py_string.data().unwrap() };
|
||||||
|
|
||||||
assert_eq!(data, PyStringData::Ucs2(&[102, 111, 111, 0xd800]));
|
assert_eq!(data, PyStringData::Ucs2(&[102, 111, 111, 0xd800]));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -520,7 +535,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
fn test_string_data_ucs2_invalid() {
|
fn test_string_data_ucs2_invalid() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
// U+FF22 (valid) & U+d800 (never valid)
|
// U+FF22 (valid) & U+d800 (never valid)
|
||||||
|
@ -534,7 +549,7 @@ mod tests {
|
||||||
};
|
};
|
||||||
assert!(!ptr.is_null());
|
assert!(!ptr.is_null());
|
||||||
let s: &PyString = unsafe { py.from_owned_ptr(ptr) };
|
let s: &PyString = unsafe { py.from_owned_ptr(ptr) };
|
||||||
let data = s.data().unwrap();
|
let data = unsafe { s.data().unwrap() };
|
||||||
assert_eq!(data, PyStringData::Ucs2(&[0xff22, 0xd800]));
|
assert_eq!(data, PyStringData::Ucs2(&[0xff22, 0xd800]));
|
||||||
let err = data.to_string(py).unwrap_err();
|
let err = data.to_string(py).unwrap_err();
|
||||||
assert_eq!(err.ptype(py), PyUnicodeDecodeError::type_object(py));
|
assert_eq!(err.ptype(py), PyUnicodeDecodeError::type_object(py));
|
||||||
|
@ -546,12 +561,12 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
fn test_string_data_ucs4() {
|
fn test_string_data_ucs4() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
let s = "哈哈🐈";
|
let s = "哈哈🐈";
|
||||||
let py_string = PyString::new(py, s);
|
let py_string = PyString::new(py, s);
|
||||||
let data = py_string.data().unwrap();
|
let data = unsafe { py_string.data().unwrap() };
|
||||||
|
|
||||||
assert_eq!(data, PyStringData::Ucs4(&[21704, 21704, 128008]));
|
assert_eq!(data, PyStringData::Ucs4(&[21704, 21704, 128008]));
|
||||||
assert_eq!(data.to_string_lossy(), Cow::Owned::<str>(s.to_string()));
|
assert_eq!(data.to_string_lossy(), Cow::Owned::<str>(s.to_string()));
|
||||||
|
@ -559,7 +574,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(not(Py_LIMITED_API))]
|
#[cfg(not(any(Py_LIMITED_API, target_endian = "big")))]
|
||||||
fn test_string_data_ucs4_invalid() {
|
fn test_string_data_ucs4_invalid() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
// U+20000 (valid) & U+d800 (never valid)
|
// U+20000 (valid) & U+d800 (never valid)
|
||||||
|
@ -573,7 +588,7 @@ mod tests {
|
||||||
};
|
};
|
||||||
assert!(!ptr.is_null());
|
assert!(!ptr.is_null());
|
||||||
let s: &PyString = unsafe { py.from_owned_ptr(ptr) };
|
let s: &PyString = unsafe { py.from_owned_ptr(ptr) };
|
||||||
let data = s.data().unwrap();
|
let data = unsafe { s.data().unwrap() };
|
||||||
assert_eq!(data, PyStringData::Ucs4(&[0x20000, 0xd800]));
|
assert_eq!(data, PyStringData::Ucs4(&[0x20000, 0xd800]));
|
||||||
let err = data.to_string(py).unwrap_err();
|
let err = data.to_string(py).unwrap_err();
|
||||||
assert_eq!(err.ptype(py), PyUnicodeDecodeError::type_object(py));
|
assert_eq!(err.ptype(py), PyUnicodeDecodeError::type_object(py));
|
||||||
|
|
Loading…
Reference in New Issue