PyString for py2

This commit is contained in:
Nikolay Kim 2017-06-11 17:20:18 -07:00
parent 805429e0f4
commit 03ced8917b
2 changed files with 100 additions and 24 deletions

View File

@ -33,12 +33,7 @@ pub const PyObject_HEAD_INIT: PyObject = PyObject {
#[repr(C)]
#[derive(Copy, Clone)]
pub struct PyVarObject {
#[cfg(py_sys_config="Py_TRACE_REFS")]
pub _ob_next: *mut PyObject,
#[cfg(py_sys_config="Py_TRACE_REFS")]
pub _ob_prev: *mut PyObject,
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub ob_base: PyObject,
pub ob_size: Py_ssize_t,
}

View File

@ -5,6 +5,7 @@
use std;
use std::str;
use std::borrow::Cow;
use std::ascii::AsciiExt;
use std::os::raw::c_char;
use ffi;
@ -13,11 +14,18 @@ use pointers::PyPtr;
use python::{Python, ToPyPointer};
use super::{PyObject, PyStringData};
/// Represents a Python string. Corresponds to `unicode` in Python 2
/// Represents a Python string.
pub struct PyString(PyPtr);
pyobject_convert!(PyString);
pyobject_nativetype!(PyString, PyUnicode_Check, PyUnicode_Type);
pyobject_nativetype!(PyString, PyString_Check, PyBaseString_Type);
/// Represents a Python unicode string.
pub struct PyUnicode(PyPtr);
pyobject_convert!(PyUnicode);
pyobject_nativetype!(PyUnicode, PyUnicode_Check, PyUnicode_Type);
/// Represents a Python byte string. Corresponds to `str` in Python 2
pub struct PyBytes(PyPtr);
@ -25,6 +33,81 @@ pub struct PyBytes(PyPtr);
pyobject_convert!(PyBytes);
pyobject_nativetype!(PyBytes, PyString_Check, PyBaseString_Type);
impl PyString {
/// Creates a new Python string object.
///
/// This function will create a byte string if the
/// input string is ASCII-only; and a unicode string otherwise.
/// Use `PyUnicode::new()` to always create a unicode string.
///
/// Panics if out of memory.
pub fn new(py: Python, s: &str) -> PyString {
if s.is_ascii() {
PyBytes::new(py, s.as_bytes()).into_basestring()
} else {
PyUnicode::new(py, s).into_basestring()
}
}
pub fn from_object(py: Python, src: &PyObject,
encoding: &str, errors: &str) -> PyResult<PyString> {
unsafe {
Ok(PyString(PyPtr::from_owned_ptr_or_err(
py, ffi::PyUnicode_FromEncodedObject(
src.as_ptr(), encoding.as_ptr() as *const i8, errors.as_ptr() as *const i8))?
))
}
}
/// Gets the python string data in its underlying representation.
///
/// For Python 2 byte strings, this function always returns `PyStringData::Utf8`,
/// even if the bytes are not valid UTF-8.
/// For unicode strings, returns the underlying representation used by Python.
pub fn data(&self, py: Python) -> PyStringData {
let ob: &PyObject = self.as_ref();
if let Ok(bytes) = ob.cast_as::<PyBytes>(py) {
PyStringData::Utf8(bytes.data(py))
} else if let Ok(unicode) = ob.cast_as::<PyUnicode>(py) {
unicode.data(py)
} else {
panic!("PyString is neither `str` nor `unicode`")
}
}
/// Convert the `PyString` into a Rust string.
///
/// On Python 2.7, if the `PyString` refers to a byte string,
/// it will be decoded using UTF-8.
///
/// Returns a `UnicodeDecodeError` if the input is not valid unicode
/// (containing unpaired surrogates, or a Python 2.7 byte string that is
/// not valid UTF-8).
pub fn to_string(&self, py: Python) -> PyResult<Cow<str>> {
self.data(py).to_string(py)
}
/// Convert the `PyString` into a Rust string.
///
/// On Python 2.7, if the `PyString` refers to a byte string,
/// it will be decoded using UTF-8.
///
/// Unpaired surrogates and (on Python 2.7) invalid UTF-8 sequences are
/// replaced with U+FFFD REPLACEMENT CHARACTER.
pub fn to_string_lossy(&self, py: Python) -> Cow<str> {
self.data(py).to_string_lossy()
}
#[inline]
pub fn is_base_string(obj: &PyObject) -> bool {
unsafe {
ffi::PyType_FastSubclass(
ffi::Py_TYPE(obj.as_ptr()),
ffi::Py_TPFLAGS_STRING_SUBCLASS | ffi::Py_TPFLAGS_UNICODE_SUBCLASS) != 0
}
}
}
impl PyBytes {
/// Creates a new Python byte string object.
@ -40,6 +123,12 @@ impl PyBytes {
}
}
/// Converts from `PyBytes` to `PyString`.
#[inline]
pub fn into_basestring(self) -> PyString {
<PyString as ::PyDowncastInto>::unchecked_downcast_into(self)
}
/// Gets the Python string data as byte slice.
pub fn data(&self, _py: Python) -> &[u8] {
unsafe {
@ -49,34 +138,26 @@ impl PyBytes {
}
}
#[inline]
pub fn is_base_string(obj: &PyObject) -> bool {
unsafe {
ffi::PyType_FastSubclass(
ffi::Py_TYPE(obj.as_ptr()),
ffi::Py_TPFLAGS_STRING_SUBCLASS | ffi::Py_TPFLAGS_UNICODE_SUBCLASS) != 0
}
}
}
impl PyString {
impl PyUnicode {
/// Creates a new Python unicode string object.
///
/// Panics if out of memory.
pub fn new(_py: Python, s: &str) -> PyString {
pub fn new(_py: Python, s: &str) -> PyUnicode {
let ptr = s.as_ptr() as *const c_char;
let len = s.len() as ffi::Py_ssize_t;
unsafe {
PyString(PyPtr::from_owned_ptr_or_panic(
PyUnicode(PyPtr::from_owned_ptr_or_panic(
ffi::PyUnicode_FromStringAndSize(ptr, len)))
}
}
pub fn from_object(py: Python, src: &PyObject, encoding: &str, errors: &str)
-> PyResult<PyString>
-> PyResult<PyUnicode>
{
unsafe {
Ok(PyString(
Ok(PyUnicode(
PyPtr::from_owned_ptr_or_err(
py, ffi::PyUnicode_FromEncodedObject(
src.as_ptr(),
@ -85,10 +166,10 @@ impl PyString {
}
}
/// Converts from `PyString` to `PyBytes`.
/// Converts from `PyUnicode` to `PyString`.
#[inline]
pub fn into_bytes(self) -> PyBytes {
<PyBytes as ::PyDowncastInto>::unchecked_downcast_into(self)
pub fn into_basestring(self) -> PyString {
<PyString as ::PyDowncastInto>::unchecked_downcast_into(self)
}
/// Gets the python string data in its underlying representation.