PEP 623: wstr, wstr_length, state changes

https://peps.python.org/pep-0623/

	Following members are removed from the Unicode structures:

	    wstr
	    wstr_length
	    state.compact
	    state.ready

Note state.compact is still present as of v3.12.0a7.

state.interned has changed width from 2 to 1 bits.
This commit is contained in:
ijl 2023-04-11 12:51:42 +00:00
parent 275e66112d
commit 88b46a7029
2 changed files with 62 additions and 12 deletions

View file

@ -1,6 +1,7 @@
#[cfg(not(PyPy))]
use crate::Py_hash_t;
use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_ssize_t};
#[cfg(not(Py_3_12))]
use libc::wchar_t;
use std::os::raw::{c_char, c_int, c_uint, c_void};
@ -116,6 +117,28 @@ where
}
}
const STATE_INTERNED_INDEX: usize = 0;
#[cfg(not(Py_3_12))]
const STATE_INTERNED_WIDTH: u8 = 2;
#[cfg(Py_3_12)]
const STATE_INTERNED_WIDTH: u8 = 1;
const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
const STATE_KIND_WIDTH: u8 = 3;
const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
const STATE_COMPACT_WIDTH: u8 = 1;
const STATE_ASCII_INDEX: usize =
(STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
const STATE_ASCII_WIDTH: u8 = 1;
#[cfg(not(Py_3_12))]
const STATE_READY_INDEX: usize =
(STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
#[cfg(not(Py_3_12))]
const STATE_READY_WIDTH: u8 = 1;
// generated by bindgen v0.63.0 (with small adaptations)
// The same code is generated for Python 3.7, 3.8, 3.9, 3.10, and 3.11, but the "ready" field
// has been removed from Python 3.12.
@ -137,57 +160,67 @@ struct PyASCIIObjectState {
impl PyASCIIObjectState {
#[inline]
unsafe fn interned(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(0usize, 2u8) as u32)
std::mem::transmute(
self._bitfield
.get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
)
}
#[inline]
unsafe fn set_interned(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(0usize, 2u8, val as u64)
self._bitfield
.set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
}
#[inline]
unsafe fn kind(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(2usize, 3u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
}
#[inline]
unsafe fn set_kind(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(2usize, 3u8, val as u64)
self._bitfield
.set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
}
#[inline]
unsafe fn compact(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(5usize, 1u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
}
#[inline]
unsafe fn set_compact(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(5usize, 1u8, val as u64)
self._bitfield
.set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
}
#[inline]
unsafe fn ascii(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(6usize, 1u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
}
#[inline]
unsafe fn set_ascii(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(6usize, 1u8, val as u64)
self._bitfield
.set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
}
#[cfg(not(Py_3_12))]
#[inline]
unsafe fn ready(&self) -> c_uint {
std::mem::transmute(self._bitfield.get(7usize, 1u8) as u32)
std::mem::transmute(self._bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
}
#[cfg(not(Py_3_12))]
#[inline]
unsafe fn set_ready(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self._bitfield.set(7usize, 1u8, val as u64)
self._bitfield
.set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
}
}
@ -226,6 +259,7 @@ pub struct PyASCIIObject {
/// unsigned int ready:1;
/// unsigned int :24;
pub state: u32,
#[cfg(not(Py_3_12))]
pub wstr: *mut wchar_t,
}
@ -233,7 +267,8 @@ pub struct PyASCIIObject {
impl PyASCIIObject {
/// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
///
/// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`]
/// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`],
/// or on CPython earlier than 3.12, [`SSTATE_INTERNED_IMMORTAL`]
#[inline]
pub unsafe fn interned(&self) -> c_uint {
PyASCIIObjectState::from(self.state).interned()
@ -242,7 +277,8 @@ impl PyASCIIObject {
/// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
///
/// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
/// [`SSTATE_INTERNED_MORTAL`], or [`SSTATE_INTERNED_IMMORTAL`] is invalid.
/// [`SSTATE_INTERNED_MORTAL`], or on CPython earlier than 3.12,
/// [`SSTATE_INTERNED_IMMORTAL`] is invalid.
#[inline]
pub unsafe fn set_interned(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
@ -309,6 +345,7 @@ impl PyASCIIObject {
/// Get the `ready` field of the [`PyASCIIObject`] state bitfield.
///
/// Returns either `0` or `1`.
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn ready(&self) -> c_uint {
PyASCIIObjectState::from(self.state).ready()
@ -317,6 +354,7 @@ impl PyASCIIObject {
/// Set the `ready` flag of the [`PyASCIIObject`] state bitfield.
///
/// Calling this function with an argument that is neither `0` nor `1` is invalid.
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn set_ready(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
@ -330,6 +368,7 @@ pub struct PyCompactUnicodeObject {
pub _base: PyASCIIObject,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
#[cfg(not(Py_3_12))]
pub wstr_length: Py_ssize_t,
}
@ -359,6 +398,7 @@ extern "C" {
pub const SSTATE_NOT_INTERNED: c_uint = 0;
pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
#[cfg(not(Py_3_12))]
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
#[inline]
@ -449,6 +489,14 @@ pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
(*(op as *mut PyASCIIObject)).length
}
#[cfg(Py_3_12)]
#[inline]
pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
// kept in CPython for backwards compatibility
1
}
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).ready()

View file

@ -83,6 +83,8 @@ extern "C" {
pub fn PyUnicode_FromFormat(format: *const c_char, ...) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_InternInPlace")]
pub fn PyUnicode_InternInPlace(arg1: *mut *mut PyObject);
#[cfg(not(Py_3_12))]
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
pub fn PyUnicode_InternImmortal(arg1: *mut *mut PyObject);
#[cfg_attr(PyPy, link_name = "PyPyUnicode_InternFromString")]
pub fn PyUnicode_InternFromString(u: *const c_char) -> *mut PyObject;