Merge pull request #1777 from indygreg/unicode-apis

ffi: define some cpython/unicodeobject bindings
This commit is contained in:
David Hewitt 2021-08-14 23:07:49 +01:00 committed by GitHub
commit eafc75ab06
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 609 additions and 145 deletions

View File

@ -20,6 +20,7 @@ pub(crate) mod pydebug;
#[cfg(all(Py_3_8, not(PyPy)))]
pub(crate) mod pylifecycle;
pub(crate) mod pystate;
pub(crate) mod unicodeobject;
pub use self::abstract_::*;
#[cfg(not(PyPy))]
@ -40,3 +41,4 @@ pub use self::pydebug::*;
#[cfg(all(Py_3_8, not(PyPy)))]
pub use self::pylifecycle::*;
pub use self::pystate::*;
pub use self::unicodeobject::*;

View File

@ -0,0 +1,605 @@
use crate::ffi::{
PyObject, PyUnicode_Check, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_hash_t, Py_ssize_t,
};
use libc::wchar_t;
use std::os::raw::{c_char, c_int, c_uint, c_void};
// skipped Py_UNICODE_ISSPACE()
// skipped Py_UNICODE_ISLOWER()
// skipped Py_UNICODE_ISUPPER()
// skipped Py_UNICODE_ISTITLE()
// skipped Py_UNICODE_ISLINEBREAK
// skipped Py_UNICODE_TOLOWER
// skipped Py_UNICODE_TOUPPER
// skipped Py_UNICODE_TOTITLE
// skipped Py_UNICODE_ISDECIMAL
// skipped Py_UNICODE_ISDIGIT
// skipped Py_UNICODE_ISNUMERIC
// skipped Py_UNICODE_ISPRINTABLE
// skipped Py_UNICODE_TODECIMAL
// skipped Py_UNICODE_TODIGIT
// skipped Py_UNICODE_TONUMERIC
// skipped Py_UNICODE_ISALPHA
// skipped Py_UNICODE_ISALNUM
// skipped Py_UNICODE_COPY
// skipped Py_UNICODE_FILL
// skipped Py_UNICODE_IS_SURROGATE
// skipped Py_UNICODE_IS_HIGH_SURROGATE
// skipped Py_UNICODE_IS_LOW_SURROGATE
// skipped Py_UNICODE_JOIN_SURROGATES
// skipped Py_UNICODE_HIGH_SURROGATE
// skipped Py_UNICODE_LOW_SURROGATE
#[repr(C)]
pub struct PyASCIIObject {
pub ob_base: PyObject,
pub length: Py_ssize_t,
pub hash: Py_hash_t,
/// A bit field with various properties.
///
/// Rust doesn't expose bitfields. So we have accessor functions for
/// retrieving values.
///
/// unsigned int interned:2; // SSTATE_* constants.
/// unsigned int kind:3; // PyUnicode_*_KIND constants.
/// unsigned int compact:1;
/// unsigned int ascii:1;
/// unsigned int ready:1;
/// unsigned int :24;
pub state: u32,
pub wstr: *mut wchar_t,
}
impl PyASCIIObject {
#[inline]
pub fn interned(&self) -> c_uint {
self.state & 3
}
#[inline]
pub fn kind(&self) -> c_uint {
(self.state >> 2) & 7
}
#[inline]
pub fn compact(&self) -> c_uint {
(self.state >> 5) & 1
}
#[inline]
pub fn ascii(&self) -> c_uint {
(self.state >> 6) & 1
}
#[inline]
pub fn ready(&self) -> c_uint {
(self.state >> 7) & 1
}
}
#[repr(C)]
pub struct PyCompactUnicodeObject {
pub _base: PyASCIIObject,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
pub wstr_length: Py_ssize_t,
}
#[repr(C)]
pub union PyUnicodeObjectData {
any: *mut c_void,
latin1: *mut Py_UCS1,
ucs2: *mut Py_UCS2,
ucs4: *mut Py_UCS4,
}
#[repr(C)]
pub struct PyUnicodeObject {
pub _base: PyCompactUnicodeObject,
pub data: PyUnicodeObjectData,
}
extern "C" {
#[cfg(not(PyPy))]
pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
}
// skipped PyUnicode_GET_SIZE
// skipped PyUnicode_GET_DATA_SIZE
// skipped PyUnicode_AS_UNICODE
// skipped PyUnicode_AS_DATA
pub const SSTATE_NOT_INTERNED: c_uint = 0;
pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
#[inline]
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
debug_assert!(PyUnicode_Check(op) != 0);
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).ascii()
}
#[inline]
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).compact()
}
#[inline]
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
if (*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0 {
1
} else {
0
}
}
#[cfg(not(Py_3_12))]
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
pub const PyUnicode_WCHAR_KIND: c_uint = 0;
pub const PyUnicode_1BYTE_KIND: c_uint = 1;
pub const PyUnicode_2BYTE_KIND: c_uint = 2;
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
#[inline]
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
PyUnicode_DATA(op) as *mut Py_UCS1
}
#[inline]
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
PyUnicode_DATA(op) as *mut Py_UCS2
}
#[inline]
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
PyUnicode_DATA(op) as *mut Py_UCS4
}
#[inline]
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
debug_assert!(PyUnicode_Check(op) != 0);
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).kind()
}
#[inline]
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
if PyUnicode_IS_ASCII(op) != 0 {
(op as *mut PyASCIIObject).offset(1) as *mut c_void
} else {
(op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
}
}
#[inline]
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
(*(op as *mut PyUnicodeObject)).data.any
}
#[inline]
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
debug_assert!(PyUnicode_Check(op) != 0);
if PyUnicode_IS_COMPACT(op) != 0 {
_PyUnicode_COMPACT_DATA(op)
} else {
_PyUnicode_NONCOMPACT_DATA(op)
}
}
// skipped PyUnicode_WRITE
// skipped PyUnicode_READ
// skipped PyUnicode_READ_CHAR
#[inline]
pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
debug_assert!(PyUnicode_Check(op) != 0);
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).length
}
#[inline]
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).ready()
}
#[cfg(not(Py_3_12))]
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
#[inline]
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
debug_assert!(PyUnicode_Check(op) != 0);
if PyUnicode_IS_READY(op) != 0 {
0
} else {
_PyUnicode_Ready(op)
}
}
// skipped PyUnicode_MAX_CHAR_VALUE
// skipped _PyUnicode_get_wstr_length
// skipped PyUnicode_WSTR_LENGTH
extern "C" {
#[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
// skipped _PyUnicode_Copy
#[cfg(not(PyPy))]
#[cfg_attr(docsrs, doc(cfg(not(PyPy))))]
pub fn PyUnicode_CopyCharacters(
to: *mut PyObject,
to_start: Py_ssize_t,
from: *mut PyObject,
from_start: Py_ssize_t,
how_many: Py_ssize_t,
) -> Py_ssize_t;
// skipped _PyUnicode_FastCopyCharacters
#[cfg(not(PyPy))]
#[cfg_attr(docsrs, doc(cfg(not(PyPy))))]
pub fn PyUnicode_Fill(
unicode: *mut PyObject,
start: Py_ssize_t,
length: Py_ssize_t,
fill_char: Py_UCS4,
) -> Py_ssize_t;
// skipped _PyUnicode_FastFill
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
pub fn PyUnicode_FromKindAndData(
kind: c_int,
buffer: *const c_void,
size: Py_ssize_t,
) -> *mut PyObject;
// skipped _PyUnicode_FromASCII
// skipped _PyUnicode_FindMaxChar
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
// skipped _PyUnicode_AsUnicode
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
pub fn PyUnicode_AsUnicodeAndSize(
unicode: *mut PyObject,
size: *mut Py_ssize_t,
) -> *mut Py_UNICODE;
// skipped PyUnicode_GetMax
}
// skipped _PyUnicodeWriter
// skipped _PyUnicodeWriter_Init
// skipped _PyUnicodeWriter_Prepare
// skipped _PyUnicodeWriter_PrepareInternal
// skipped _PyUnicodeWriter_PrepareKind
// skipped _PyUnicodeWriter_PrepareKindInternal
// skipped _PyUnicodeWriter_WriteChar
// skipped _PyUnicodeWriter_WriteStr
// skipped _PyUnicodeWriter_WriteSubstring
// skipped _PyUnicodeWriter_WriteASCIIString
// skipped _PyUnicodeWriter_WriteLatin1String
// skipped _PyUnicodeWriter_Finish
// skipped _PyUnicodeWriter_Dealloc
// skipped _PyUnicode_FormatAdvancedWriter
extern "C" {
#[cfg(Py_3_7)]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
#[cfg(not(Py_3_7))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char;
// skipped _PyUnicode_AsStringAndSize
#[cfg(Py_3_7)]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
#[cfg(not(Py_3_7))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char;
// skipped _PyUnicode_AsString
pub fn PyUnicode_Encode(
s: *const Py_UNICODE,
size: Py_ssize_t,
encoding: *const c_char,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF7(
data: *const Py_UNICODE,
length: Py_ssize_t,
base64SetO: c_int,
base64WhiteSpace: c_int,
errors: *const c_char,
) -> *mut PyObject;
// skipped _PyUnicode_EncodeUTF7
// skipped _PyUnicode_AsUTF8String
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
pub fn PyUnicode_EncodeUTF8(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF32(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
// skipped _PyUnicode_EncodeUTF32
pub fn PyUnicode_EncodeUTF16(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
// skipped _PyUnicode_EncodeUTF16
// skipped _PyUnicode_DecodeUnicodeEscape
pub fn PyUnicode_EncodeUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
pub fn PyUnicode_EncodeRawUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
// skipped _PyUnicode_AsLatin1String
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
pub fn PyUnicode_EncodeLatin1(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
// skipped _PyUnicode_AsASCIIString
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
pub fn PyUnicode_EncodeASCII(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
mapping: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
// skipped _PyUnicode_EncodeCharmap
pub fn PyUnicode_TranslateCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
table: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
// skipped PyUnicode_EncodeMBCS
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
pub fn PyUnicode_EncodeDecimal(
s: *mut Py_UNICODE,
length: Py_ssize_t,
output: *mut c_char,
errors: *const c_char,
) -> c_int;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
pub fn PyUnicode_TransformDecimalToASCII(
s: *mut Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
// skipped _PyUnicode_TransformDecimalAndSpaceToASCII
}
// skipped _PyUnicode_JoinArray
// skipped _PyUnicode_EqualToASCIIId
// skipped _PyUnicode_EqualToASCIIString
// skipped _PyUnicode_XStrip
// skipped _PyUnicode_InsertThousandsGrouping
// skipped _Py_ascii_whitespace
// skipped _PyUnicode_IsLowercase
// skipped _PyUnicode_IsUppercase
// skipped _PyUnicode_IsTitlecase
// skipped _PyUnicode_IsXidStart
// skipped _PyUnicode_IsXidContinue
// skipped _PyUnicode_IsWhitespace
// skipped _PyUnicode_IsLinebreak
// skipped _PyUnicode_ToLowercase
// skipped _PyUnicode_ToUppercase
// skipped _PyUnicode_ToTitlecase
// skipped _PyUnicode_ToLowerFull
// skipped _PyUnicode_ToTitleFull
// skipped _PyUnicode_ToUpperFull
// skipped _PyUnicode_ToFoldedFull
// skipped _PyUnicode_IsCaseIgnorable
// skipped _PyUnicode_IsCased
// skipped _PyUnicode_ToDecimalDigit
// skipped _PyUnicode_ToDigit
// skipped _PyUnicode_ToNumeric
// skipped _PyUnicode_IsDecimalDigit
// skipped _PyUnicode_IsDigit
// skipped _PyUnicode_IsNumeric
// skipped _PyUnicode_IsPrintable
// skipped _PyUnicode_IsAlpha
// skipped Py_UNICODE_strlen
// skipped Py_UNICODE_strcpy
// skipped Py_UNICODE_strcat
// skipped Py_UNICODE_strncpy
// skipped Py_UNICODE_strcmp
// skipped Py_UNICODE_strncmp
// skipped Py_UNICODE_strchr
// skipped Py_UNICODE_strrchr
// skipped _PyUnicode_FormatLong
// skipped PyUnicode_AsUnicodeCopy
// skipped _PyUnicode_FromId
// skipped _PyUnicode_EQ
// skipped _PyUnicode_ScanIdentifier
#[cfg(test)]
mod tests {
use super::*;
use crate::types::PyString;
use crate::{AsPyPointer, Python};
#[test]
fn ascii_object_bitfield() {
let ob_base: PyObject = unsafe { std::mem::zeroed() };
let mut o = PyASCIIObject {
ob_base,
length: 0,
hash: 0,
state: 0,
wstr: std::ptr::null_mut() as *mut wchar_t,
};
assert_eq!(o.interned(), 0);
assert_eq!(o.kind(), 0);
assert_eq!(o.compact(), 0);
assert_eq!(o.ascii(), 0);
assert_eq!(o.ready(), 0);
for i in 0..4 {
o.state = i;
assert_eq!(o.interned(), i);
}
for i in 0..8 {
o.state = i << 2;
assert_eq!(o.kind(), i);
}
o.state = 1 << 5;
assert_eq!(o.compact(), 1);
o.state = 1 << 6;
assert_eq!(o.ascii(), 1);
o.state = 1 << 7;
assert_eq!(o.ready(), 1);
}
#[test]
#[cfg_attr(Py_3_10, allow(deprecated))]
fn ascii() {
Python::with_gil(|py| {
// This test relies on implementation details of PyString.
let s = PyString::new(py, "hello, world");
let ptr = s.as_ptr();
unsafe {
let ascii_ptr = ptr as *mut PyASCIIObject;
let ascii = ascii_ptr.as_ref().unwrap();
assert_eq!(ascii.interned(), 0);
assert_eq!(ascii.kind(), PyUnicode_1BYTE_KIND);
assert_eq!(ascii.compact(), 1);
assert_eq!(ascii.ascii(), 1);
assert_eq!(ascii.ready(), 1);
assert_eq!(PyUnicode_IS_ASCII(ptr), 1);
assert_eq!(PyUnicode_IS_COMPACT(ptr), 1);
assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 1);
assert!(!PyUnicode_1BYTE_DATA(ptr).is_null());
// 2 and 4 byte macros return nonsense for this string instance.
assert_eq!(PyUnicode_KIND(ptr), PyUnicode_1BYTE_KIND);
assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null());
// _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings.
assert!(!PyUnicode_DATA(ptr).is_null());
assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as _);
assert_eq!(PyUnicode_IS_READY(ptr), 1);
// This has potential to mutate object. But it should be a no-op since
// we're already ready.
assert_eq!(PyUnicode_READY(ptr), 0);
}
})
}
#[test]
#[cfg_attr(Py_3_10, allow(deprecated))]
fn ucs4() {
Python::with_gil(|py| {
let s = "哈哈🐈";
let py_string = PyString::new(py, s);
let ptr = py_string.as_ptr();
unsafe {
let ascii_ptr = ptr as *mut PyASCIIObject;
let ascii = ascii_ptr.as_ref().unwrap();
assert_eq!(ascii.interned(), 0);
assert_eq!(ascii.kind(), PyUnicode_4BYTE_KIND);
assert_eq!(ascii.compact(), 1);
assert_eq!(ascii.ascii(), 0);
assert_eq!(ascii.ready(), 1);
assert_eq!(PyUnicode_IS_ASCII(ptr), 0);
assert_eq!(PyUnicode_IS_COMPACT(ptr), 1);
assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 0);
assert!(!PyUnicode_4BYTE_DATA(ptr).is_null());
assert_eq!(PyUnicode_KIND(ptr), PyUnicode_4BYTE_KIND);
assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null());
// _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings.
assert!(!PyUnicode_DATA(ptr).is_null());
assert_eq!(PyUnicode_GET_LENGTH(ptr), py_string.len().unwrap() as _);
assert_eq!(PyUnicode_IS_READY(ptr), 1);
// This has potential to mutate object. But it should be a no-op since
// we're already ready.
assert_eq!(PyUnicode_READY(ptr), 0);
}
})
}
}

View File

@ -40,40 +40,11 @@ pub unsafe fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int {
pub const Py_UNICODE_REPLACEMENT_CHARACTER: Py_UCS4 = 0xFFFD;
extern "C" {
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_CopyCharacters(
to: *mut PyObject,
to_start: Py_ssize_t,
from: *mut PyObject,
from_start: Py_ssize_t,
how_many: Py_ssize_t,
) -> Py_ssize_t;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_Fill(
unicode: *mut PyObject,
start: Py_ssize_t,
length: Py_ssize_t,
fill_char: Py_UCS4,
) -> Py_ssize_t;
#[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromStringAndSize")]
pub fn PyUnicode_FromStringAndSize(u: *const c_char, size: Py_ssize_t) -> *mut PyObject;
pub fn PyUnicode_FromString(u: *const c_char) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_FromKindAndData(
kind: c_int,
buffer: *const c_void,
size: Py_ssize_t,
) -> *mut PyObject;
pub fn PyUnicode_Substring(
str: *mut PyObject,
start: Py_ssize_t,
@ -86,17 +57,6 @@ extern "C" {
copy_null: c_int,
) -> *mut Py_UCS4;
pub fn PyUnicode_AsUCS4Copy(unicode: *mut PyObject) -> *mut Py_UCS4;
#[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
#[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
pub fn PyUnicode_AsUnicodeAndSize(
unicode: *mut PyObject,
size: *mut Py_ssize_t,
) -> *mut Py_UNICODE;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_GetLength")]
pub fn PyUnicode_GetLength(unicode: *mut PyObject) -> Py_ssize_t;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_GetSize")]
@ -143,20 +103,6 @@ extern "C" {
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromOrdinal")]
pub fn PyUnicode_FromOrdinal(ordinal: c_int) -> *mut PyObject;
pub fn PyUnicode_ClearFreeList() -> c_int;
#[cfg(any(not(Py_LIMITED_API), Py_3_10))]
#[cfg(Py_3_7)]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
#[cfg(not(Py_3_7))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char;
#[cfg(not(Py_LIMITED_API))]
#[cfg(Py_3_7)]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
#[cfg(not(Py_3_7))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_GetDefaultEncoding")]
pub fn PyUnicode_GetDefaultEncoding() -> *const c_char;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_Decode")]
@ -176,13 +122,6 @@ extern "C" {
encoding: *const c_char,
errors: *const c_char,
) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_Encode(
s: *const Py_UNICODE,
size: Py_ssize_t,
encoding: *const c_char,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedObject")]
pub fn PyUnicode_AsEncodedObject(
unicode: *mut PyObject,
@ -212,14 +151,6 @@ extern "C" {
errors: *const c_char,
consumed: *mut Py_ssize_t,
) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_EncodeUTF7(
data: *const Py_UNICODE,
length: Py_ssize_t,
base64SetO: c_int,
base64WhiteSpace: c_int,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF8")]
pub fn PyUnicode_DecodeUTF8(
string: *const c_char,
@ -234,13 +165,6 @@ extern "C" {
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8String")]
pub fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
pub fn PyUnicode_EncodeUTF8(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF32")]
pub fn PyUnicode_DecodeUTF32(
string: *const c_char,
@ -257,13 +181,6 @@ extern "C" {
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF32String")]
pub fn PyUnicode_AsUTF32String(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_EncodeUTF32(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF16")]
pub fn PyUnicode_DecodeUTF16(
string: *const c_char,
@ -280,13 +197,6 @@ extern "C" {
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF16String")]
pub fn PyUnicode_AsUTF16String(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_EncodeUTF16(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
pub fn PyUnicode_DecodeUnicodeEscape(
string: *const c_char,
length: Py_ssize_t,
@ -294,22 +204,12 @@ extern "C" {
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeEscapeString")]
pub fn PyUnicode_AsUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_EncodeUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
pub fn PyUnicode_DecodeRawUnicodeEscape(
string: *const c_char,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_AsRawUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_EncodeRawUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeLatin1")]
pub fn PyUnicode_DecodeLatin1(
string: *const c_char,
@ -318,13 +218,6 @@ extern "C" {
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsLatin1String")]
pub fn PyUnicode_AsLatin1String(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
pub fn PyUnicode_EncodeLatin1(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeASCII")]
pub fn PyUnicode_DecodeASCII(
string: *const c_char,
@ -333,13 +226,6 @@ extern "C" {
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsASCIIString")]
pub fn PyUnicode_AsASCIIString(unicode: *mut PyObject) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
pub fn PyUnicode_EncodeASCII(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_DecodeCharmap(
string: *const c_char,
length: Py_ssize_t,
@ -350,35 +236,6 @@ extern "C" {
unicode: *mut PyObject,
mapping: *mut PyObject,
) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_EncodeCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
mapping: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
pub fn PyUnicode_TranslateCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
table: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
#[cfg(not(Py_LIMITED_API))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
pub fn PyUnicode_EncodeDecimal(
s: *mut Py_UNICODE,
length: Py_ssize_t,
output: *mut c_char,
errors: *const c_char,
) -> c_int;
#[cfg(not(Py_LIMITED_API))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
pub fn PyUnicode_TransformDecimalToASCII(
s: *mut Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
pub fn PyUnicode_DecodeLocaleAndSize(
str: *const c_char,
len: Py_ssize_t,

View File

@ -49,8 +49,8 @@ impl PyString {
pub fn to_str(&self) -> PyResult<&str> {
let utf8_slice = {
cfg_if::cfg_if! {
if #[cfg(any(not(Py_LIMITED_API), Py_3_10))] {
// PyUnicode_AsUTF8AndSize only available on limited API from Python 3.10 and up.
if #[cfg(not(Py_LIMITED_API))] {
// PyUnicode_AsUTF8AndSize only available on limited API.
let mut size: ffi::Py_ssize_t = 0;
let data = unsafe { ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) };
if data.is_null() {