Merge pull request #1777 from indygreg/unicode-apis
ffi: define some cpython/unicodeobject bindings
This commit is contained in:
commit
eafc75ab06
|
@ -20,6 +20,7 @@ pub(crate) mod pydebug;
|
|||
#[cfg(all(Py_3_8, not(PyPy)))]
|
||||
pub(crate) mod pylifecycle;
|
||||
pub(crate) mod pystate;
|
||||
pub(crate) mod unicodeobject;
|
||||
|
||||
pub use self::abstract_::*;
|
||||
#[cfg(not(PyPy))]
|
||||
|
@ -40,3 +41,4 @@ pub use self::pydebug::*;
|
|||
#[cfg(all(Py_3_8, not(PyPy)))]
|
||||
pub use self::pylifecycle::*;
|
||||
pub use self::pystate::*;
|
||||
pub use self::unicodeobject::*;
|
||||
|
|
|
@ -0,0 +1,605 @@
|
|||
use crate::ffi::{
|
||||
PyObject, PyUnicode_Check, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_hash_t, Py_ssize_t,
|
||||
};
|
||||
use libc::wchar_t;
|
||||
use std::os::raw::{c_char, c_int, c_uint, c_void};
|
||||
|
||||
// skipped Py_UNICODE_ISSPACE()
|
||||
// skipped Py_UNICODE_ISLOWER()
|
||||
// skipped Py_UNICODE_ISUPPER()
|
||||
// skipped Py_UNICODE_ISTITLE()
|
||||
// skipped Py_UNICODE_ISLINEBREAK
|
||||
// skipped Py_UNICODE_TOLOWER
|
||||
// skipped Py_UNICODE_TOUPPER
|
||||
// skipped Py_UNICODE_TOTITLE
|
||||
// skipped Py_UNICODE_ISDECIMAL
|
||||
// skipped Py_UNICODE_ISDIGIT
|
||||
// skipped Py_UNICODE_ISNUMERIC
|
||||
// skipped Py_UNICODE_ISPRINTABLE
|
||||
// skipped Py_UNICODE_TODECIMAL
|
||||
// skipped Py_UNICODE_TODIGIT
|
||||
// skipped Py_UNICODE_TONUMERIC
|
||||
// skipped Py_UNICODE_ISALPHA
|
||||
// skipped Py_UNICODE_ISALNUM
|
||||
// skipped Py_UNICODE_COPY
|
||||
// skipped Py_UNICODE_FILL
|
||||
// skipped Py_UNICODE_IS_SURROGATE
|
||||
// skipped Py_UNICODE_IS_HIGH_SURROGATE
|
||||
// skipped Py_UNICODE_IS_LOW_SURROGATE
|
||||
// skipped Py_UNICODE_JOIN_SURROGATES
|
||||
// skipped Py_UNICODE_HIGH_SURROGATE
|
||||
// skipped Py_UNICODE_LOW_SURROGATE
|
||||
|
||||
#[repr(C)]
|
||||
pub struct PyASCIIObject {
|
||||
pub ob_base: PyObject,
|
||||
pub length: Py_ssize_t,
|
||||
pub hash: Py_hash_t,
|
||||
/// A bit field with various properties.
|
||||
///
|
||||
/// Rust doesn't expose bitfields. So we have accessor functions for
|
||||
/// retrieving values.
|
||||
///
|
||||
/// unsigned int interned:2; // SSTATE_* constants.
|
||||
/// unsigned int kind:3; // PyUnicode_*_KIND constants.
|
||||
/// unsigned int compact:1;
|
||||
/// unsigned int ascii:1;
|
||||
/// unsigned int ready:1;
|
||||
/// unsigned int :24;
|
||||
pub state: u32,
|
||||
pub wstr: *mut wchar_t,
|
||||
}
|
||||
|
||||
impl PyASCIIObject {
|
||||
#[inline]
|
||||
pub fn interned(&self) -> c_uint {
|
||||
self.state & 3
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn kind(&self) -> c_uint {
|
||||
(self.state >> 2) & 7
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn compact(&self) -> c_uint {
|
||||
(self.state >> 5) & 1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ascii(&self) -> c_uint {
|
||||
(self.state >> 6) & 1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn ready(&self) -> c_uint {
|
||||
(self.state >> 7) & 1
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct PyCompactUnicodeObject {
|
||||
pub _base: PyASCIIObject,
|
||||
pub utf8_length: Py_ssize_t,
|
||||
pub utf8: *mut c_char,
|
||||
pub wstr_length: Py_ssize_t,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub union PyUnicodeObjectData {
|
||||
any: *mut c_void,
|
||||
latin1: *mut Py_UCS1,
|
||||
ucs2: *mut Py_UCS2,
|
||||
ucs4: *mut Py_UCS4,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct PyUnicodeObject {
|
||||
pub _base: PyCompactUnicodeObject,
|
||||
pub data: PyUnicodeObjectData,
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#[cfg(not(PyPy))]
|
||||
pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
|
||||
}
|
||||
|
||||
// skipped PyUnicode_GET_SIZE
|
||||
// skipped PyUnicode_GET_DATA_SIZE
|
||||
// skipped PyUnicode_AS_UNICODE
|
||||
// skipped PyUnicode_AS_DATA
|
||||
|
||||
pub const SSTATE_NOT_INTERNED: c_uint = 0;
|
||||
pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
|
||||
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
|
||||
debug_assert!(PyUnicode_Check(op) != 0);
|
||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||
|
||||
(*(op as *mut PyASCIIObject)).ascii()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
|
||||
(*(op as *mut PyASCIIObject)).compact()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
|
||||
if (*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0 {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(Py_3_12))]
|
||||
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
|
||||
pub const PyUnicode_WCHAR_KIND: c_uint = 0;
|
||||
|
||||
pub const PyUnicode_1BYTE_KIND: c_uint = 1;
|
||||
pub const PyUnicode_2BYTE_KIND: c_uint = 2;
|
||||
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
|
||||
PyUnicode_DATA(op) as *mut Py_UCS1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
|
||||
PyUnicode_DATA(op) as *mut Py_UCS2
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
|
||||
PyUnicode_DATA(op) as *mut Py_UCS4
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
|
||||
debug_assert!(PyUnicode_Check(op) != 0);
|
||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||
|
||||
(*(op as *mut PyASCIIObject)).kind()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||
if PyUnicode_IS_ASCII(op) != 0 {
|
||||
(op as *mut PyASCIIObject).offset(1) as *mut c_void
|
||||
} else {
|
||||
(op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
|
||||
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
|
||||
|
||||
(*(op as *mut PyUnicodeObject)).data.any
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
|
||||
debug_assert!(PyUnicode_Check(op) != 0);
|
||||
|
||||
if PyUnicode_IS_COMPACT(op) != 0 {
|
||||
_PyUnicode_COMPACT_DATA(op)
|
||||
} else {
|
||||
_PyUnicode_NONCOMPACT_DATA(op)
|
||||
}
|
||||
}
|
||||
|
||||
// skipped PyUnicode_WRITE
|
||||
// skipped PyUnicode_READ
|
||||
// skipped PyUnicode_READ_CHAR
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
|
||||
debug_assert!(PyUnicode_Check(op) != 0);
|
||||
debug_assert!(PyUnicode_IS_READY(op) != 0);
|
||||
|
||||
(*(op as *mut PyASCIIObject)).length
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
|
||||
(*(op as *mut PyASCIIObject)).ready()
|
||||
}
|
||||
|
||||
#[cfg(not(Py_3_12))]
|
||||
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
|
||||
#[inline]
|
||||
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
|
||||
debug_assert!(PyUnicode_Check(op) != 0);
|
||||
|
||||
if PyUnicode_IS_READY(op) != 0 {
|
||||
0
|
||||
} else {
|
||||
_PyUnicode_Ready(op)
|
||||
}
|
||||
}
|
||||
|
||||
// skipped PyUnicode_MAX_CHAR_VALUE
|
||||
// skipped _PyUnicode_get_wstr_length
|
||||
// skipped PyUnicode_WSTR_LENGTH
|
||||
|
||||
extern "C" {
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
|
||||
pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
|
||||
pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
|
||||
|
||||
// skipped _PyUnicode_Copy
|
||||
|
||||
#[cfg(not(PyPy))]
|
||||
#[cfg_attr(docsrs, doc(cfg(not(PyPy))))]
|
||||
pub fn PyUnicode_CopyCharacters(
|
||||
to: *mut PyObject,
|
||||
to_start: Py_ssize_t,
|
||||
from: *mut PyObject,
|
||||
from_start: Py_ssize_t,
|
||||
how_many: Py_ssize_t,
|
||||
) -> Py_ssize_t;
|
||||
|
||||
// skipped _PyUnicode_FastCopyCharacters
|
||||
|
||||
#[cfg(not(PyPy))]
|
||||
#[cfg_attr(docsrs, doc(cfg(not(PyPy))))]
|
||||
pub fn PyUnicode_Fill(
|
||||
unicode: *mut PyObject,
|
||||
start: Py_ssize_t,
|
||||
length: Py_ssize_t,
|
||||
fill_char: Py_UCS4,
|
||||
) -> Py_ssize_t;
|
||||
|
||||
// skipped _PyUnicode_FastFill
|
||||
|
||||
#[cfg(not(Py_3_12))]
|
||||
#[deprecated]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
|
||||
pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
|
||||
pub fn PyUnicode_FromKindAndData(
|
||||
kind: c_int,
|
||||
buffer: *const c_void,
|
||||
size: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_FromASCII
|
||||
// skipped _PyUnicode_FindMaxChar
|
||||
|
||||
#[cfg(not(Py_3_12))]
|
||||
#[deprecated]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
|
||||
pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
|
||||
|
||||
// skipped _PyUnicode_AsUnicode
|
||||
|
||||
#[cfg(not(Py_3_12))]
|
||||
#[deprecated]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
|
||||
pub fn PyUnicode_AsUnicodeAndSize(
|
||||
unicode: *mut PyObject,
|
||||
size: *mut Py_ssize_t,
|
||||
) -> *mut Py_UNICODE;
|
||||
|
||||
// skipped PyUnicode_GetMax
|
||||
}
|
||||
|
||||
// skipped _PyUnicodeWriter
|
||||
// skipped _PyUnicodeWriter_Init
|
||||
// skipped _PyUnicodeWriter_Prepare
|
||||
// skipped _PyUnicodeWriter_PrepareInternal
|
||||
// skipped _PyUnicodeWriter_PrepareKind
|
||||
// skipped _PyUnicodeWriter_PrepareKindInternal
|
||||
// skipped _PyUnicodeWriter_WriteChar
|
||||
// skipped _PyUnicodeWriter_WriteStr
|
||||
// skipped _PyUnicodeWriter_WriteSubstring
|
||||
// skipped _PyUnicodeWriter_WriteASCIIString
|
||||
// skipped _PyUnicodeWriter_WriteLatin1String
|
||||
// skipped _PyUnicodeWriter_Finish
|
||||
// skipped _PyUnicodeWriter_Dealloc
|
||||
// skipped _PyUnicode_FormatAdvancedWriter
|
||||
|
||||
extern "C" {
|
||||
#[cfg(Py_3_7)]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
|
||||
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
|
||||
|
||||
#[cfg(not(Py_3_7))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
|
||||
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char;
|
||||
|
||||
// skipped _PyUnicode_AsStringAndSize
|
||||
|
||||
#[cfg(Py_3_7)]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
|
||||
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
|
||||
|
||||
#[cfg(not(Py_3_7))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
|
||||
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char;
|
||||
|
||||
// skipped _PyUnicode_AsString
|
||||
|
||||
pub fn PyUnicode_Encode(
|
||||
s: *const Py_UNICODE,
|
||||
size: Py_ssize_t,
|
||||
encoding: *const c_char,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
pub fn PyUnicode_EncodeUTF7(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
base64SetO: c_int,
|
||||
base64WhiteSpace: c_int,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_EncodeUTF7
|
||||
// skipped _PyUnicode_AsUTF8String
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
|
||||
pub fn PyUnicode_EncodeUTF8(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
pub fn PyUnicode_EncodeUTF32(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
byteorder: c_int,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_EncodeUTF32
|
||||
|
||||
pub fn PyUnicode_EncodeUTF16(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
byteorder: c_int,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_EncodeUTF16
|
||||
// skipped _PyUnicode_DecodeUnicodeEscape
|
||||
|
||||
pub fn PyUnicode_EncodeUnicodeEscape(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
|
||||
pub fn PyUnicode_EncodeRawUnicodeEscape(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_AsLatin1String
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
|
||||
pub fn PyUnicode_EncodeLatin1(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_AsASCIIString
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
|
||||
pub fn PyUnicode_EncodeASCII(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
pub fn PyUnicode_EncodeCharmap(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
mapping: *mut PyObject,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_EncodeCharmap
|
||||
|
||||
pub fn PyUnicode_TranslateCharmap(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
table: *mut PyObject,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped PyUnicode_EncodeMBCS
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
|
||||
pub fn PyUnicode_EncodeDecimal(
|
||||
s: *mut Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
output: *mut c_char,
|
||||
errors: *const c_char,
|
||||
) -> c_int;
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
|
||||
pub fn PyUnicode_TransformDecimalToASCII(
|
||||
s: *mut Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
|
||||
// skipped _PyUnicode_TransformDecimalAndSpaceToASCII
|
||||
}
|
||||
|
||||
// skipped _PyUnicode_JoinArray
|
||||
// skipped _PyUnicode_EqualToASCIIId
|
||||
// skipped _PyUnicode_EqualToASCIIString
|
||||
// skipped _PyUnicode_XStrip
|
||||
// skipped _PyUnicode_InsertThousandsGrouping
|
||||
|
||||
// skipped _Py_ascii_whitespace
|
||||
|
||||
// skipped _PyUnicode_IsLowercase
|
||||
// skipped _PyUnicode_IsUppercase
|
||||
// skipped _PyUnicode_IsTitlecase
|
||||
// skipped _PyUnicode_IsXidStart
|
||||
// skipped _PyUnicode_IsXidContinue
|
||||
// skipped _PyUnicode_IsWhitespace
|
||||
// skipped _PyUnicode_IsLinebreak
|
||||
// skipped _PyUnicode_ToLowercase
|
||||
// skipped _PyUnicode_ToUppercase
|
||||
// skipped _PyUnicode_ToTitlecase
|
||||
// skipped _PyUnicode_ToLowerFull
|
||||
// skipped _PyUnicode_ToTitleFull
|
||||
// skipped _PyUnicode_ToUpperFull
|
||||
// skipped _PyUnicode_ToFoldedFull
|
||||
// skipped _PyUnicode_IsCaseIgnorable
|
||||
// skipped _PyUnicode_IsCased
|
||||
// skipped _PyUnicode_ToDecimalDigit
|
||||
// skipped _PyUnicode_ToDigit
|
||||
// skipped _PyUnicode_ToNumeric
|
||||
// skipped _PyUnicode_IsDecimalDigit
|
||||
// skipped _PyUnicode_IsDigit
|
||||
// skipped _PyUnicode_IsNumeric
|
||||
// skipped _PyUnicode_IsPrintable
|
||||
// skipped _PyUnicode_IsAlpha
|
||||
// skipped Py_UNICODE_strlen
|
||||
// skipped Py_UNICODE_strcpy
|
||||
// skipped Py_UNICODE_strcat
|
||||
// skipped Py_UNICODE_strncpy
|
||||
// skipped Py_UNICODE_strcmp
|
||||
// skipped Py_UNICODE_strncmp
|
||||
// skipped Py_UNICODE_strchr
|
||||
// skipped Py_UNICODE_strrchr
|
||||
// skipped _PyUnicode_FormatLong
|
||||
// skipped PyUnicode_AsUnicodeCopy
|
||||
// skipped _PyUnicode_FromId
|
||||
// skipped _PyUnicode_EQ
|
||||
// skipped _PyUnicode_ScanIdentifier
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::types::PyString;
|
||||
use crate::{AsPyPointer, Python};
|
||||
|
||||
#[test]
|
||||
fn ascii_object_bitfield() {
|
||||
let ob_base: PyObject = unsafe { std::mem::zeroed() };
|
||||
|
||||
let mut o = PyASCIIObject {
|
||||
ob_base,
|
||||
length: 0,
|
||||
hash: 0,
|
||||
state: 0,
|
||||
wstr: std::ptr::null_mut() as *mut wchar_t,
|
||||
};
|
||||
|
||||
assert_eq!(o.interned(), 0);
|
||||
assert_eq!(o.kind(), 0);
|
||||
assert_eq!(o.compact(), 0);
|
||||
assert_eq!(o.ascii(), 0);
|
||||
assert_eq!(o.ready(), 0);
|
||||
|
||||
for i in 0..4 {
|
||||
o.state = i;
|
||||
assert_eq!(o.interned(), i);
|
||||
}
|
||||
|
||||
for i in 0..8 {
|
||||
o.state = i << 2;
|
||||
assert_eq!(o.kind(), i);
|
||||
}
|
||||
|
||||
o.state = 1 << 5;
|
||||
assert_eq!(o.compact(), 1);
|
||||
|
||||
o.state = 1 << 6;
|
||||
assert_eq!(o.ascii(), 1);
|
||||
|
||||
o.state = 1 << 7;
|
||||
assert_eq!(o.ready(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(Py_3_10, allow(deprecated))]
|
||||
fn ascii() {
|
||||
Python::with_gil(|py| {
|
||||
// This test relies on implementation details of PyString.
|
||||
let s = PyString::new(py, "hello, world");
|
||||
let ptr = s.as_ptr();
|
||||
|
||||
unsafe {
|
||||
let ascii_ptr = ptr as *mut PyASCIIObject;
|
||||
let ascii = ascii_ptr.as_ref().unwrap();
|
||||
|
||||
assert_eq!(ascii.interned(), 0);
|
||||
assert_eq!(ascii.kind(), PyUnicode_1BYTE_KIND);
|
||||
assert_eq!(ascii.compact(), 1);
|
||||
assert_eq!(ascii.ascii(), 1);
|
||||
assert_eq!(ascii.ready(), 1);
|
||||
|
||||
assert_eq!(PyUnicode_IS_ASCII(ptr), 1);
|
||||
assert_eq!(PyUnicode_IS_COMPACT(ptr), 1);
|
||||
assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 1);
|
||||
|
||||
assert!(!PyUnicode_1BYTE_DATA(ptr).is_null());
|
||||
// 2 and 4 byte macros return nonsense for this string instance.
|
||||
assert_eq!(PyUnicode_KIND(ptr), PyUnicode_1BYTE_KIND);
|
||||
|
||||
assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null());
|
||||
// _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings.
|
||||
assert!(!PyUnicode_DATA(ptr).is_null());
|
||||
|
||||
assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as _);
|
||||
assert_eq!(PyUnicode_IS_READY(ptr), 1);
|
||||
|
||||
// This has potential to mutate object. But it should be a no-op since
|
||||
// we're already ready.
|
||||
assert_eq!(PyUnicode_READY(ptr), 0);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(Py_3_10, allow(deprecated))]
|
||||
fn ucs4() {
|
||||
Python::with_gil(|py| {
|
||||
let s = "哈哈🐈";
|
||||
let py_string = PyString::new(py, s);
|
||||
let ptr = py_string.as_ptr();
|
||||
|
||||
unsafe {
|
||||
let ascii_ptr = ptr as *mut PyASCIIObject;
|
||||
let ascii = ascii_ptr.as_ref().unwrap();
|
||||
|
||||
assert_eq!(ascii.interned(), 0);
|
||||
assert_eq!(ascii.kind(), PyUnicode_4BYTE_KIND);
|
||||
assert_eq!(ascii.compact(), 1);
|
||||
assert_eq!(ascii.ascii(), 0);
|
||||
assert_eq!(ascii.ready(), 1);
|
||||
|
||||
assert_eq!(PyUnicode_IS_ASCII(ptr), 0);
|
||||
assert_eq!(PyUnicode_IS_COMPACT(ptr), 1);
|
||||
assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 0);
|
||||
|
||||
assert!(!PyUnicode_4BYTE_DATA(ptr).is_null());
|
||||
assert_eq!(PyUnicode_KIND(ptr), PyUnicode_4BYTE_KIND);
|
||||
|
||||
assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null());
|
||||
// _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings.
|
||||
assert!(!PyUnicode_DATA(ptr).is_null());
|
||||
|
||||
assert_eq!(PyUnicode_GET_LENGTH(ptr), py_string.len().unwrap() as _);
|
||||
assert_eq!(PyUnicode_IS_READY(ptr), 1);
|
||||
|
||||
// This has potential to mutate object. But it should be a no-op since
|
||||
// we're already ready.
|
||||
assert_eq!(PyUnicode_READY(ptr), 0);
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -40,40 +40,11 @@ pub unsafe fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int {
|
|||
pub const Py_UNICODE_REPLACEMENT_CHARACTER: Py_UCS4 = 0xFFFD;
|
||||
|
||||
extern "C" {
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
|
||||
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_CopyCharacters(
|
||||
to: *mut PyObject,
|
||||
to_start: Py_ssize_t,
|
||||
from: *mut PyObject,
|
||||
from_start: Py_ssize_t,
|
||||
how_many: Py_ssize_t,
|
||||
) -> Py_ssize_t;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_Fill(
|
||||
unicode: *mut PyObject,
|
||||
start: Py_ssize_t,
|
||||
length: Py_ssize_t,
|
||||
fill_char: Py_UCS4,
|
||||
) -> Py_ssize_t;
|
||||
#[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))]
|
||||
#[deprecated]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
|
||||
pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
|
||||
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromStringAndSize")]
|
||||
pub fn PyUnicode_FromStringAndSize(u: *const c_char, size: Py_ssize_t) -> *mut PyObject;
|
||||
pub fn PyUnicode_FromString(u: *const c_char) -> *mut PyObject;
|
||||
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_FromKindAndData(
|
||||
kind: c_int,
|
||||
buffer: *const c_void,
|
||||
size: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
|
||||
pub fn PyUnicode_Substring(
|
||||
str: *mut PyObject,
|
||||
start: Py_ssize_t,
|
||||
|
@ -86,17 +57,6 @@ extern "C" {
|
|||
copy_null: c_int,
|
||||
) -> *mut Py_UCS4;
|
||||
pub fn PyUnicode_AsUCS4Copy(unicode: *mut PyObject) -> *mut Py_UCS4;
|
||||
#[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))]
|
||||
#[deprecated]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
|
||||
pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
|
||||
#[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))]
|
||||
#[deprecated]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
|
||||
pub fn PyUnicode_AsUnicodeAndSize(
|
||||
unicode: *mut PyObject,
|
||||
size: *mut Py_ssize_t,
|
||||
) -> *mut Py_UNICODE;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_GetLength")]
|
||||
pub fn PyUnicode_GetLength(unicode: *mut PyObject) -> Py_ssize_t;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_GetSize")]
|
||||
|
@ -143,20 +103,6 @@ extern "C" {
|
|||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromOrdinal")]
|
||||
pub fn PyUnicode_FromOrdinal(ordinal: c_int) -> *mut PyObject;
|
||||
pub fn PyUnicode_ClearFreeList() -> c_int;
|
||||
#[cfg(any(not(Py_LIMITED_API), Py_3_10))]
|
||||
#[cfg(Py_3_7)]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
|
||||
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
|
||||
#[cfg(not(Py_3_7))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
|
||||
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
#[cfg(Py_3_7)]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
|
||||
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
|
||||
#[cfg(not(Py_3_7))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
|
||||
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_GetDefaultEncoding")]
|
||||
pub fn PyUnicode_GetDefaultEncoding() -> *const c_char;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_Decode")]
|
||||
|
@ -176,13 +122,6 @@ extern "C" {
|
|||
encoding: *const c_char,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_Encode(
|
||||
s: *const Py_UNICODE,
|
||||
size: Py_ssize_t,
|
||||
encoding: *const c_char,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedObject")]
|
||||
pub fn PyUnicode_AsEncodedObject(
|
||||
unicode: *mut PyObject,
|
||||
|
@ -212,14 +151,6 @@ extern "C" {
|
|||
errors: *const c_char,
|
||||
consumed: *mut Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_EncodeUTF7(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
base64SetO: c_int,
|
||||
base64WhiteSpace: c_int,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF8")]
|
||||
pub fn PyUnicode_DecodeUTF8(
|
||||
string: *const c_char,
|
||||
|
@ -234,13 +165,6 @@ extern "C" {
|
|||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8String")]
|
||||
pub fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
|
||||
pub fn PyUnicode_EncodeUTF8(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF32")]
|
||||
pub fn PyUnicode_DecodeUTF32(
|
||||
string: *const c_char,
|
||||
|
@ -257,13 +181,6 @@ extern "C" {
|
|||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF32String")]
|
||||
pub fn PyUnicode_AsUTF32String(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_EncodeUTF32(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
byteorder: c_int,
|
||||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF16")]
|
||||
pub fn PyUnicode_DecodeUTF16(
|
||||
string: *const c_char,
|
||||
|
@ -280,13 +197,6 @@ extern "C" {
|
|||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF16String")]
|
||||
pub fn PyUnicode_AsUTF16String(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_EncodeUTF16(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
byteorder: c_int,
|
||||
) -> *mut PyObject;
|
||||
pub fn PyUnicode_DecodeUnicodeEscape(
|
||||
string: *const c_char,
|
||||
length: Py_ssize_t,
|
||||
|
@ -294,22 +204,12 @@ extern "C" {
|
|||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeEscapeString")]
|
||||
pub fn PyUnicode_AsUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_EncodeUnicodeEscape(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
pub fn PyUnicode_DecodeRawUnicodeEscape(
|
||||
string: *const c_char,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
pub fn PyUnicode_AsRawUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_EncodeRawUnicodeEscape(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeLatin1")]
|
||||
pub fn PyUnicode_DecodeLatin1(
|
||||
string: *const c_char,
|
||||
|
@ -318,13 +218,6 @@ extern "C" {
|
|||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsLatin1String")]
|
||||
pub fn PyUnicode_AsLatin1String(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
|
||||
pub fn PyUnicode_EncodeLatin1(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeASCII")]
|
||||
pub fn PyUnicode_DecodeASCII(
|
||||
string: *const c_char,
|
||||
|
@ -333,13 +226,6 @@ extern "C" {
|
|||
) -> *mut PyObject;
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsASCIIString")]
|
||||
pub fn PyUnicode_AsASCIIString(unicode: *mut PyObject) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
|
||||
pub fn PyUnicode_EncodeASCII(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
pub fn PyUnicode_DecodeCharmap(
|
||||
string: *const c_char,
|
||||
length: Py_ssize_t,
|
||||
|
@ -350,35 +236,6 @@ extern "C" {
|
|||
unicode: *mut PyObject,
|
||||
mapping: *mut PyObject,
|
||||
) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_EncodeCharmap(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
mapping: *mut PyObject,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
pub fn PyUnicode_TranslateCharmap(
|
||||
data: *const Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
table: *mut PyObject,
|
||||
errors: *const c_char,
|
||||
) -> *mut PyObject;
|
||||
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
|
||||
pub fn PyUnicode_EncodeDecimal(
|
||||
s: *mut Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
output: *mut c_char,
|
||||
errors: *const c_char,
|
||||
) -> c_int;
|
||||
#[cfg(not(Py_LIMITED_API))]
|
||||
#[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
|
||||
pub fn PyUnicode_TransformDecimalToASCII(
|
||||
s: *mut Py_UNICODE,
|
||||
length: Py_ssize_t,
|
||||
) -> *mut PyObject;
|
||||
pub fn PyUnicode_DecodeLocaleAndSize(
|
||||
str: *const c_char,
|
||||
len: Py_ssize_t,
|
||||
|
|
|
@ -49,8 +49,8 @@ impl PyString {
|
|||
pub fn to_str(&self) -> PyResult<&str> {
|
||||
let utf8_slice = {
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(any(not(Py_LIMITED_API), Py_3_10))] {
|
||||
// PyUnicode_AsUTF8AndSize only available on limited API from Python 3.10 and up.
|
||||
if #[cfg(not(Py_LIMITED_API))] {
|
||||
// PyUnicode_AsUTF8AndSize only available on limited API.
|
||||
let mut size: ffi::Py_ssize_t = 0;
|
||||
let data = unsafe { ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) };
|
||||
if data.is_null() {
|
||||
|
|
Loading…
Reference in New Issue