Add wrapper for PyASCIIObject state bitfield accesses based on bindgen
This commit is contained in:
parent
713c02a910
commit
40d6d4797f
|
@ -0,0 +1 @@
|
||||||
|
Support `PyASCIIObject` / `PyUnicode` and associated methods on big-endian architectures.
|
|
@ -30,6 +30,184 @@ use std::os::raw::{c_char, c_int, c_uint, c_void};
|
||||||
// skipped Py_UNICODE_HIGH_SURROGATE
|
// skipped Py_UNICODE_HIGH_SURROGATE
|
||||||
// skipped Py_UNICODE_LOW_SURROGATE
|
// skipped Py_UNICODE_LOW_SURROGATE
|
||||||
|
|
||||||
|
// generated by bindgen v0.63.0 (with small adaptations)
|
||||||
|
#[repr(C)]
|
||||||
|
struct BitfieldUnit<Storage> {
|
||||||
|
storage: Storage,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Storage> BitfieldUnit<Storage> {
|
||||||
|
#[inline]
|
||||||
|
pub const fn new(storage: Storage) -> Self {
|
||||||
|
Self { storage }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Storage> BitfieldUnit<Storage>
|
||||||
|
where
|
||||||
|
Storage: AsRef<[u8]> + AsMut<[u8]>,
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn get_bit(&self, index: usize) -> bool {
|
||||||
|
debug_assert!(index / 8 < self.storage.as_ref().len());
|
||||||
|
let byte_index = index / 8;
|
||||||
|
let byte = self.storage.as_ref()[byte_index];
|
||||||
|
let bit_index = if cfg!(target_endian = "big") {
|
||||||
|
7 - (index % 8)
|
||||||
|
} else {
|
||||||
|
index % 8
|
||||||
|
};
|
||||||
|
let mask = 1 << bit_index;
|
||||||
|
byte & mask == mask
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn set_bit(&mut self, index: usize, val: bool) {
|
||||||
|
debug_assert!(index / 8 < self.storage.as_ref().len());
|
||||||
|
let byte_index = index / 8;
|
||||||
|
let byte = &mut self.storage.as_mut()[byte_index];
|
||||||
|
let bit_index = if cfg!(target_endian = "big") {
|
||||||
|
7 - (index % 8)
|
||||||
|
} else {
|
||||||
|
index % 8
|
||||||
|
};
|
||||||
|
let mask = 1 << bit_index;
|
||||||
|
if val {
|
||||||
|
*byte |= mask;
|
||||||
|
} else {
|
||||||
|
*byte &= !mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
|
||||||
|
debug_assert!(bit_width <= 64);
|
||||||
|
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
|
||||||
|
debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
|
||||||
|
let mut val = 0;
|
||||||
|
for i in 0..(bit_width as usize) {
|
||||||
|
if self.get_bit(i + bit_offset) {
|
||||||
|
let index = if cfg!(target_endian = "big") {
|
||||||
|
bit_width as usize - 1 - i
|
||||||
|
} else {
|
||||||
|
i
|
||||||
|
};
|
||||||
|
val |= 1 << index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
|
||||||
|
debug_assert!(bit_width <= 64);
|
||||||
|
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
|
||||||
|
debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
|
||||||
|
for i in 0..(bit_width as usize) {
|
||||||
|
let mask = 1 << i;
|
||||||
|
let val_bit_is_set = val & mask == mask;
|
||||||
|
let index = if cfg!(target_endian = "big") {
|
||||||
|
bit_width as usize - 1 - i
|
||||||
|
} else {
|
||||||
|
i
|
||||||
|
};
|
||||||
|
self.set_bit(index + bit_offset, val_bit_is_set);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// generated by bindgen v0.63.0 (with small adaptations)
|
||||||
|
// The same code is generated for Python 3.7, 3.8, 3.9, 3.10, and 3.11, but the "ready" field
|
||||||
|
// has been removed from Python 3.12.
|
||||||
|
|
||||||
|
/// Wrapper around the `PyASCIIObject.state` bitfield with getters and setters that work
|
||||||
|
/// on most little- and big-endian architectures.
|
||||||
|
///
|
||||||
|
/// Memory layout of C bitfields is implementation defined, so these functions are still
|
||||||
|
/// unsafe. Users must verify that they work as expected on the architectures they target.
|
||||||
|
#[repr(C)]
|
||||||
|
#[repr(align(4))]
|
||||||
|
struct PyASCIIObjectState {
|
||||||
|
_bitfield_align: [u8; 0],
|
||||||
|
_bitfield: BitfieldUnit<[u8; 4usize]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// c_uint and u32 are not necessarily the same type on all targets / architectures
|
||||||
|
#[allow(clippy::useless_transmute)]
|
||||||
|
impl PyASCIIObjectState {
|
||||||
|
#[inline]
|
||||||
|
unsafe fn interned(&self) -> c_uint {
|
||||||
|
std::mem::transmute(self._bitfield.get(0usize, 2u8) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn set_interned(&mut self, val: c_uint) {
|
||||||
|
let val: u32 = std::mem::transmute(val);
|
||||||
|
self._bitfield.set(0usize, 2u8, val as u64)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn kind(&self) -> c_uint {
|
||||||
|
std::mem::transmute(self._bitfield.get(2usize, 3u8) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn set_kind(&mut self, val: c_uint) {
|
||||||
|
let val: u32 = std::mem::transmute(val);
|
||||||
|
self._bitfield.set(2usize, 3u8, val as u64)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn compact(&self) -> c_uint {
|
||||||
|
std::mem::transmute(self._bitfield.get(5usize, 1u8) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn set_compact(&mut self, val: c_uint) {
|
||||||
|
let val: u32 = std::mem::transmute(val);
|
||||||
|
self._bitfield.set(5usize, 1u8, val as u64)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn ascii(&self) -> c_uint {
|
||||||
|
std::mem::transmute(self._bitfield.get(6usize, 1u8) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn set_ascii(&mut self, val: c_uint) {
|
||||||
|
let val: u32 = std::mem::transmute(val);
|
||||||
|
self._bitfield.set(6usize, 1u8, val as u64)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn ready(&self) -> c_uint {
|
||||||
|
std::mem::transmute(self._bitfield.get(7usize, 1u8) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn set_ready(&mut self, val: c_uint) {
|
||||||
|
let val: u32 = std::mem::transmute(val);
|
||||||
|
self._bitfield.set(7usize, 1u8, val as u64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<u32> for PyASCIIObjectState {
|
||||||
|
#[inline]
|
||||||
|
fn from(value: u32) -> Self {
|
||||||
|
PyASCIIObjectState {
|
||||||
|
_bitfield_align: [],
|
||||||
|
_bitfield: BitfieldUnit::new(value.to_ne_bytes()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<PyASCIIObjectState> for u32 {
|
||||||
|
#[inline]
|
||||||
|
fn from(value: PyASCIIObjectState) -> Self {
|
||||||
|
u32::from_ne_bytes(value._bitfield.storage)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct PyASCIIObject {
|
pub struct PyASCIIObject {
|
||||||
pub ob_base: PyObject,
|
pub ob_base: PyObject,
|
||||||
|
@ -52,34 +230,98 @@ pub struct PyASCIIObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
|
/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
|
||||||
///
|
|
||||||
/// In addition, they are disabled on big-endian architectures to restrict this to most "common"
|
|
||||||
/// platforms, which are at least tested on CI and appear to be sound.
|
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
impl PyASCIIObject {
|
impl PyASCIIObject {
|
||||||
|
/// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn interned(&self) -> c_uint {
|
pub unsafe fn interned(&self) -> c_uint {
|
||||||
self.state & 3
|
PyASCIIObjectState::from(self.state).interned()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
|
||||||
|
/// [`SSTATE_INTERNED_MORTAL`], or [`SSTATE_INTERNED_IMMORTAL`] is invalid.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn set_interned(&mut self, val: c_uint) {
|
||||||
|
let mut state = PyASCIIObjectState::from(self.state);
|
||||||
|
state.set_interned(val);
|
||||||
|
self.state = u32::from(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `kind` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Returns one of: [`PyUnicode_WCHAR_KIND`], [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`],
|
||||||
|
/// [`PyUnicode_4BYTE_KIND`]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn kind(&self) -> c_uint {
|
pub unsafe fn kind(&self) -> c_uint {
|
||||||
(self.state >> 2) & 7
|
PyASCIIObjectState::from(self.state).kind()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the `kind` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Calling this function with an argument that is not [`PyUnicode_WCHAR_KIND`], [`PyUnicode_1BYTE_KIND`],
|
||||||
|
/// [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`] is invalid.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn set_kind(&mut self, val: c_uint) {
|
||||||
|
let mut state = PyASCIIObjectState::from(self.state);
|
||||||
|
state.set_kind(val);
|
||||||
|
self.state = u32::from(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `compact` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Returns either `0` or `1`.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn compact(&self) -> c_uint {
|
pub unsafe fn compact(&self) -> c_uint {
|
||||||
(self.state >> 5) & 1
|
PyASCIIObjectState::from(self.state).compact()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the `compact` flag of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Calling this function with an argument that is neither `0` nor `1` is invalid.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn set_compact(&mut self, val: c_uint) {
|
||||||
|
let mut state = PyASCIIObjectState::from(self.state);
|
||||||
|
state.set_compact(val);
|
||||||
|
self.state = u32::from(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `ascii` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Returns either `0` or `1`.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn ascii(&self) -> c_uint {
|
pub unsafe fn ascii(&self) -> c_uint {
|
||||||
(self.state >> 6) & 1
|
PyASCIIObjectState::from(self.state).ascii()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the `ascii` flag of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Calling this function with an argument that is neither `0` nor `1` is invalid.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn set_ascii(&mut self, val: c_uint) {
|
||||||
|
let mut state = PyASCIIObjectState::from(self.state);
|
||||||
|
state.set_ascii(val);
|
||||||
|
self.state = u32::from(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `ready` field of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Returns either `0` or `1`.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn ready(&self) -> c_uint {
|
pub unsafe fn ready(&self) -> c_uint {
|
||||||
(self.state >> 7) & 1
|
PyASCIIObjectState::from(self.state).ready()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the `ready` flag of the [`PyASCIIObject`] state bitfield.
|
||||||
|
///
|
||||||
|
/// Calling this function with an argument that is neither `0` nor `1` is invalid.
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn set_ready(&mut self, val: c_uint) {
|
||||||
|
let mut state = PyASCIIObjectState::from(self.state);
|
||||||
|
state.set_ready(val);
|
||||||
|
self.state = u32::from(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,7 +362,6 @@ pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
|
||||||
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
|
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
||||||
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
||||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||||
|
@ -129,13 +370,11 @@ pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
|
||||||
(*(op as *mut PyASCIIObject)).compact()
|
(*(op as *mut PyASCIIObject)).compact()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
|
||||||
((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
|
((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
|
||||||
}
|
}
|
||||||
|
@ -149,25 +388,21 @@ pub const PyUnicode_2BYTE_KIND: c_uint = 2;
|
||||||
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
|
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
|
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
|
||||||
PyUnicode_DATA(op) as *mut Py_UCS1
|
PyUnicode_DATA(op) as *mut Py_UCS1
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
|
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
|
||||||
PyUnicode_DATA(op) as *mut Py_UCS2
|
PyUnicode_DATA(op) as *mut Py_UCS2
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
|
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
|
||||||
PyUnicode_DATA(op) as *mut Py_UCS4
|
PyUnicode_DATA(op) as *mut Py_UCS4
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
||||||
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
||||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||||
|
@ -176,7 +411,6 @@ pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
if PyUnicode_IS_ASCII(op) != 0 {
|
if PyUnicode_IS_ASCII(op) != 0 {
|
||||||
(op as *mut PyASCIIObject).offset(1) as *mut c_void
|
(op as *mut PyASCIIObject).offset(1) as *mut c_void
|
||||||
|
@ -186,7 +420,6 @@ pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
|
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
|
||||||
|
|
||||||
|
@ -194,7 +427,6 @@ pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
|
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
||||||
|
|
||||||
|
@ -210,7 +442,6 @@ pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
|
||||||
// skipped PyUnicode_READ_CHAR
|
// skipped PyUnicode_READ_CHAR
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
|
pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
|
||||||
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
||||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||||
|
@ -219,7 +450,6 @@ pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
||||||
(*(op as *mut PyASCIIObject)).ready()
|
(*(op as *mut PyASCIIObject)).ready()
|
||||||
}
|
}
|
||||||
|
@ -227,7 +457,6 @@ pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
||||||
#[cfg(not(Py_3_12))]
|
#[cfg(not(Py_3_12))]
|
||||||
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
|
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
|
||||||
#[inline]
|
#[inline]
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
|
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
|
||||||
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
debug_assert!(crate::PyUnicode_Check(op) != 0);
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
use crate::ffi::*;
|
use crate::ffi::*;
|
||||||
use crate::{types::PyDict, AsPyPointer, IntoPy, Py, PyAny, Python};
|
use crate::{types::PyDict, AsPyPointer, IntoPy, Py, PyAny, Python};
|
||||||
|
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
use crate::types::PyString;
|
use crate::types::PyString;
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
use libc::wchar_t;
|
use libc::wchar_t;
|
||||||
|
|
||||||
#[cfg_attr(target_arch = "wasm32", ignore)] // DateTime import fails on wasm for mysterious reasons
|
#[cfg_attr(target_arch = "wasm32", ignore)] // DateTime import fails on wasm for mysterious reasons
|
||||||
|
@ -108,7 +106,6 @@ fn test_timezone_from_offset_and_name() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn ascii_object_bitfield() {
|
fn ascii_object_bitfield() {
|
||||||
let ob_base: PyObject = unsafe { std::mem::zeroed() };
|
let ob_base: PyObject = unsafe { std::mem::zeroed() };
|
||||||
|
@ -118,7 +115,7 @@ fn ascii_object_bitfield() {
|
||||||
length: 0,
|
length: 0,
|
||||||
#[cfg(not(PyPy))]
|
#[cfg(not(PyPy))]
|
||||||
hash: 0,
|
hash: 0,
|
||||||
state: 0,
|
state: 0u32,
|
||||||
wstr: std::ptr::null_mut() as *mut wchar_t,
|
wstr: std::ptr::null_mut() as *mut wchar_t,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -130,27 +127,26 @@ fn ascii_object_bitfield() {
|
||||||
assert_eq!(o.ready(), 0);
|
assert_eq!(o.ready(), 0);
|
||||||
|
|
||||||
for i in 0..4 {
|
for i in 0..4 {
|
||||||
o.state = i;
|
o.set_interned(i);
|
||||||
assert_eq!(o.interned(), i);
|
assert_eq!(o.interned(), i);
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in 0..8 {
|
for i in 0..8 {
|
||||||
o.state = i << 2;
|
o.set_kind(i);
|
||||||
assert_eq!(o.kind(), i);
|
assert_eq!(o.kind(), i);
|
||||||
}
|
}
|
||||||
|
|
||||||
o.state = 1 << 5;
|
o.set_compact(1);
|
||||||
assert_eq!(o.compact(), 1);
|
assert_eq!(o.compact(), 1);
|
||||||
|
|
||||||
o.state = 1 << 6;
|
o.set_ascii(1);
|
||||||
assert_eq!(o.ascii(), 1);
|
assert_eq!(o.ascii(), 1);
|
||||||
|
|
||||||
o.state = 1 << 7;
|
o.set_ready(1);
|
||||||
assert_eq!(o.ready(), 1);
|
assert_eq!(o.ready(), 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(Py_3_10, allow(deprecated))]
|
#[cfg_attr(Py_3_10, allow(deprecated))]
|
||||||
fn ascii() {
|
fn ascii() {
|
||||||
|
@ -191,7 +187,6 @@ fn ascii() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_endian = "little")]
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(Py_3_10, allow(deprecated))]
|
#[cfg_attr(Py_3_10, allow(deprecated))]
|
||||||
fn ucs4() {
|
fn ucs4() {
|
||||||
|
|
|
@ -36,7 +36,7 @@ pub use self::pysuper::PySuper;
|
||||||
pub use self::sequence::PySequence;
|
pub use self::sequence::PySequence;
|
||||||
pub use self::set::PySet;
|
pub use self::set::PySet;
|
||||||
pub use self::slice::{PySlice, PySliceIndices};
|
pub use self::slice::{PySlice, PySliceIndices};
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
pub use self::string::PyStringData;
|
pub use self::string::PyStringData;
|
||||||
pub use self::string::{PyString, PyString as PyUnicode};
|
pub use self::string::{PyString, PyString as PyUnicode};
|
||||||
pub use self::traceback::PyTraceback;
|
pub use self::traceback::PyTraceback;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// Copyright (c) 2017-present PyO3 Project and Contributors
|
// Copyright (c) 2017-present PyO3 Project and Contributors
|
||||||
|
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
use crate::exceptions::PyUnicodeDecodeError;
|
use crate::exceptions::PyUnicodeDecodeError;
|
||||||
use crate::types::PyBytes;
|
use crate::types::PyBytes;
|
||||||
use crate::{ffi, AsPyPointer, PyAny, PyResult, Python};
|
use crate::{ffi, AsPyPointer, PyAny, PyResult, Python};
|
||||||
|
@ -12,7 +12,7 @@ use std::str;
|
||||||
///
|
///
|
||||||
/// Python internally stores strings in various representations. This enumeration
|
/// Python internally stores strings in various representations. This enumeration
|
||||||
/// represents those variations.
|
/// represents those variations.
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
pub enum PyStringData<'a> {
|
pub enum PyStringData<'a> {
|
||||||
/// UCS1 representation.
|
/// UCS1 representation.
|
||||||
|
@ -25,7 +25,7 @@ pub enum PyStringData<'a> {
|
||||||
Ucs4(&'a [u32]),
|
Ucs4(&'a [u32]),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
impl<'a> PyStringData<'a> {
|
impl<'a> PyStringData<'a> {
|
||||||
/// Obtain the raw bytes backing this instance as a [u8] slice.
|
/// Obtain the raw bytes backing this instance as a [u8] slice.
|
||||||
pub fn as_bytes(&self) -> &[u8] {
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
@ -238,9 +238,7 @@ impl PyString {
|
||||||
///
|
///
|
||||||
/// By using this API, you accept responsibility for testing that PyStringData behaves as
|
/// By using this API, you accept responsibility for testing that PyStringData behaves as
|
||||||
/// expected on the targets where you plan to distribute your software.
|
/// expected on the targets where you plan to distribute your software.
|
||||||
///
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
/// For example, it is known not to work on big-endian platforms.
|
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
|
||||||
pub unsafe fn data(&self) -> PyResult<PyStringData<'_>> {
|
pub unsafe fn data(&self) -> PyResult<PyStringData<'_>> {
|
||||||
let ptr = self.as_ptr();
|
let ptr = self.as_ptr();
|
||||||
|
|
||||||
|
@ -284,7 +282,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::Python;
|
use crate::Python;
|
||||||
use crate::{PyObject, ToPyObject};
|
use crate::{PyObject, ToPyObject};
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -347,7 +345,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
fn test_string_data_ucs1() {
|
fn test_string_data_ucs1() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
let s = PyString::new(py, "hello, world");
|
let s = PyString::new(py, "hello, world");
|
||||||
|
@ -360,7 +358,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
fn test_string_data_ucs1_invalid() {
|
fn test_string_data_ucs1_invalid() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
// 0xfe is not allowed in UTF-8.
|
// 0xfe is not allowed in UTF-8.
|
||||||
|
@ -386,7 +384,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
fn test_string_data_ucs2() {
|
fn test_string_data_ucs2() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
let s = py.eval("'foo\\ud800'", None, None).unwrap();
|
let s = py.eval("'foo\\ud800'", None, None).unwrap();
|
||||||
|
@ -428,7 +426,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(all(not(Py_LIMITED_API), target_endian = "little"))]
|
#[cfg(not(Py_LIMITED_API))]
|
||||||
fn test_string_data_ucs4() {
|
fn test_string_data_ucs4() {
|
||||||
Python::with_gil(|py| {
|
Python::with_gil(|py| {
|
||||||
let s = "哈哈🐈";
|
let s = "哈哈🐈";
|
||||||
|
|
Loading…
Reference in New Issue