PyString for py2
This commit is contained in:
parent
805429e0f4
commit
03ced8917b
|
@ -33,12 +33,7 @@ pub const PyObject_HEAD_INIT: PyObject = PyObject {
|
|||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PyVarObject {
|
||||
#[cfg(py_sys_config="Py_TRACE_REFS")]
|
||||
pub _ob_next: *mut PyObject,
|
||||
#[cfg(py_sys_config="Py_TRACE_REFS")]
|
||||
pub _ob_prev: *mut PyObject,
|
||||
pub ob_refcnt: Py_ssize_t,
|
||||
pub ob_type: *mut PyTypeObject,
|
||||
pub ob_base: PyObject,
|
||||
pub ob_size: Py_ssize_t,
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
use std;
|
||||
use std::str;
|
||||
use std::borrow::Cow;
|
||||
use std::ascii::AsciiExt;
|
||||
use std::os::raw::c_char;
|
||||
|
||||
use ffi;
|
||||
|
@ -13,11 +14,18 @@ use pointers::PyPtr;
|
|||
use python::{Python, ToPyPointer};
|
||||
use super::{PyObject, PyStringData};
|
||||
|
||||
/// Represents a Python string. Corresponds to `unicode` in Python 2
|
||||
/// Represents a Python string.
|
||||
pub struct PyString(PyPtr);
|
||||
|
||||
pyobject_convert!(PyString);
|
||||
pyobject_nativetype!(PyString, PyUnicode_Check, PyUnicode_Type);
|
||||
pyobject_nativetype!(PyString, PyString_Check, PyBaseString_Type);
|
||||
|
||||
|
||||
/// Represents a Python unicode string.
|
||||
pub struct PyUnicode(PyPtr);
|
||||
|
||||
pyobject_convert!(PyUnicode);
|
||||
pyobject_nativetype!(PyUnicode, PyUnicode_Check, PyUnicode_Type);
|
||||
|
||||
/// Represents a Python byte string. Corresponds to `str` in Python 2
|
||||
pub struct PyBytes(PyPtr);
|
||||
|
@ -25,6 +33,81 @@ pub struct PyBytes(PyPtr);
|
|||
pyobject_convert!(PyBytes);
|
||||
pyobject_nativetype!(PyBytes, PyString_Check, PyBaseString_Type);
|
||||
|
||||
impl PyString {
|
||||
/// Creates a new Python string object.
|
||||
///
|
||||
/// This function will create a byte string if the
|
||||
/// input string is ASCII-only; and a unicode string otherwise.
|
||||
/// Use `PyUnicode::new()` to always create a unicode string.
|
||||
///
|
||||
/// Panics if out of memory.
|
||||
pub fn new(py: Python, s: &str) -> PyString {
|
||||
if s.is_ascii() {
|
||||
PyBytes::new(py, s.as_bytes()).into_basestring()
|
||||
} else {
|
||||
PyUnicode::new(py, s).into_basestring()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_object(py: Python, src: &PyObject,
|
||||
encoding: &str, errors: &str) -> PyResult<PyString> {
|
||||
unsafe {
|
||||
Ok(PyString(PyPtr::from_owned_ptr_or_err(
|
||||
py, ffi::PyUnicode_FromEncodedObject(
|
||||
src.as_ptr(), encoding.as_ptr() as *const i8, errors.as_ptr() as *const i8))?
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the python string data in its underlying representation.
|
||||
///
|
||||
/// For Python 2 byte strings, this function always returns `PyStringData::Utf8`,
|
||||
/// even if the bytes are not valid UTF-8.
|
||||
/// For unicode strings, returns the underlying representation used by Python.
|
||||
pub fn data(&self, py: Python) -> PyStringData {
|
||||
let ob: &PyObject = self.as_ref();
|
||||
if let Ok(bytes) = ob.cast_as::<PyBytes>(py) {
|
||||
PyStringData::Utf8(bytes.data(py))
|
||||
} else if let Ok(unicode) = ob.cast_as::<PyUnicode>(py) {
|
||||
unicode.data(py)
|
||||
} else {
|
||||
panic!("PyString is neither `str` nor `unicode`")
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the `PyString` into a Rust string.
|
||||
///
|
||||
/// On Python 2.7, if the `PyString` refers to a byte string,
|
||||
/// it will be decoded using UTF-8.
|
||||
///
|
||||
/// Returns a `UnicodeDecodeError` if the input is not valid unicode
|
||||
/// (containing unpaired surrogates, or a Python 2.7 byte string that is
|
||||
/// not valid UTF-8).
|
||||
pub fn to_string(&self, py: Python) -> PyResult<Cow<str>> {
|
||||
self.data(py).to_string(py)
|
||||
}
|
||||
|
||||
/// Convert the `PyString` into a Rust string.
|
||||
///
|
||||
/// On Python 2.7, if the `PyString` refers to a byte string,
|
||||
/// it will be decoded using UTF-8.
|
||||
///
|
||||
/// Unpaired surrogates and (on Python 2.7) invalid UTF-8 sequences are
|
||||
/// replaced with U+FFFD REPLACEMENT CHARACTER.
|
||||
pub fn to_string_lossy(&self, py: Python) -> Cow<str> {
|
||||
self.data(py).to_string_lossy()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_base_string(obj: &PyObject) -> bool {
|
||||
unsafe {
|
||||
ffi::PyType_FastSubclass(
|
||||
ffi::Py_TYPE(obj.as_ptr()),
|
||||
ffi::Py_TPFLAGS_STRING_SUBCLASS | ffi::Py_TPFLAGS_UNICODE_SUBCLASS) != 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl PyBytes {
|
||||
/// Creates a new Python byte string object.
|
||||
|
@ -40,6 +123,12 @@ impl PyBytes {
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts from `PyBytes` to `PyString`.
|
||||
#[inline]
|
||||
pub fn into_basestring(self) -> PyString {
|
||||
<PyString as ::PyDowncastInto>::unchecked_downcast_into(self)
|
||||
}
|
||||
|
||||
/// Gets the Python string data as byte slice.
|
||||
pub fn data(&self, _py: Python) -> &[u8] {
|
||||
unsafe {
|
||||
|
@ -49,34 +138,26 @@ impl PyBytes {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_base_string(obj: &PyObject) -> bool {
|
||||
unsafe {
|
||||
ffi::PyType_FastSubclass(
|
||||
ffi::Py_TYPE(obj.as_ptr()),
|
||||
ffi::Py_TPFLAGS_STRING_SUBCLASS | ffi::Py_TPFLAGS_UNICODE_SUBCLASS) != 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PyString {
|
||||
impl PyUnicode {
|
||||
/// Creates a new Python unicode string object.
|
||||
///
|
||||
/// Panics if out of memory.
|
||||
pub fn new(_py: Python, s: &str) -> PyString {
|
||||
pub fn new(_py: Python, s: &str) -> PyUnicode {
|
||||
let ptr = s.as_ptr() as *const c_char;
|
||||
let len = s.len() as ffi::Py_ssize_t;
|
||||
unsafe {
|
||||
PyString(PyPtr::from_owned_ptr_or_panic(
|
||||
PyUnicode(PyPtr::from_owned_ptr_or_panic(
|
||||
ffi::PyUnicode_FromStringAndSize(ptr, len)))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_object(py: Python, src: &PyObject, encoding: &str, errors: &str)
|
||||
-> PyResult<PyString>
|
||||
-> PyResult<PyUnicode>
|
||||
{
|
||||
unsafe {
|
||||
Ok(PyString(
|
||||
Ok(PyUnicode(
|
||||
PyPtr::from_owned_ptr_or_err(
|
||||
py, ffi::PyUnicode_FromEncodedObject(
|
||||
src.as_ptr(),
|
||||
|
@ -85,10 +166,10 @@ impl PyString {
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts from `PyString` to `PyBytes`.
|
||||
/// Converts from `PyUnicode` to `PyString`.
|
||||
#[inline]
|
||||
pub fn into_bytes(self) -> PyBytes {
|
||||
<PyBytes as ::PyDowncastInto>::unchecked_downcast_into(self)
|
||||
pub fn into_basestring(self) -> PyString {
|
||||
<PyString as ::PyDowncastInto>::unchecked_downcast_into(self)
|
||||
}
|
||||
|
||||
/// Gets the python string data in its underlying representation.
|
||||
|
|
Loading…
Reference in New Issue