diff --git a/python3-sys/src/lib.rs b/python3-sys/src/lib.rs index 5a7c8b8d..6173ea49 100644 --- a/python3-sys/src/lib.rs +++ b/python3-sys/src/lib.rs @@ -88,9 +88,9 @@ mod typeslots; mod pydebug; -mod bytearrayobject; // TODO supports PEP-384 only; needs adjustment for Python 3.3 and 3.5 -mod bytesobject; // TODO supports PEP-384 only; needs adjustment for Python 3.3 and 3.5 -mod unicodeobject; // TODO supports PEP-384 only; needs adjustment for Python 3.3 and 3.5 +mod bytearrayobject; +mod bytesobject; +mod unicodeobject; mod longobject; // TODO supports PEP-384 only; needs adjustment for Python 3.3 and 3.5 // mod longintrepr; TODO excluded by PEP-384 mod boolobject; // TODO supports PEP-384 only; needs adjustment for Python 3.3 and 3.5 diff --git a/python3-sys/src/unicodeobject.rs b/python3-sys/src/unicodeobject.rs index 758fad37..794988ea 100644 --- a/python3-sys/src/unicodeobject.rs +++ b/python3-sys/src/unicodeobject.rs @@ -2,6 +2,9 @@ use libc::{c_void, c_char, c_int, wchar_t}; use object::*; use pyport::Py_ssize_t; +#[cfg(not(Py_LIMITED_API))] +pub type Py_UNICODE = wchar_t; + pub type Py_UCS4 = u32; pub type Py_UCS2 = u16; pub type Py_UCS1 = u8; @@ -24,21 +27,51 @@ pub unsafe fn PyUnicode_CheckExact(op : *mut PyObject) -> c_int { pub const Py_UNICODE_REPLACEMENT_CHARACTER : Py_UCS4 = 0xFFFD; extern "C" { + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject; + + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_CopyCharacters(to: *mut PyObject, to_start: Py_ssize_t, + from: *mut PyObject, + from_start: Py_ssize_t, + how_many: Py_ssize_t) -> Py_ssize_t; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_Fill(unicode: *mut PyObject, start: Py_ssize_t, + length: Py_ssize_t, fill_char: Py_UCS4) + -> Py_ssize_t; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) + -> *mut PyObject; + pub fn PyUnicode_FromStringAndSize(u: *const c_char, size: Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_FromString(u: *const c_char) -> *mut PyObject; + + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_FromKindAndData(kind: c_int, + buffer: *const c_void, + size: Py_ssize_t) -> *mut PyObject; + pub fn PyUnicode_Substring(str: *mut PyObject, start: Py_ssize_t, end: Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_AsUCS4(unicode: *mut PyObject, buffer: *mut Py_UCS4, buflen: Py_ssize_t, copy_null: c_int) -> *mut Py_UCS4; pub fn PyUnicode_AsUCS4Copy(unicode: *mut PyObject) -> *mut Py_UCS4; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_AsUnicodeAndSize(unicode: *mut PyObject, + size: *mut Py_ssize_t) + -> *mut Py_UNICODE; pub fn PyUnicode_GetLength(unicode: *mut PyObject) -> Py_ssize_t; pub fn PyUnicode_GetSize(unicode: *mut PyObject) -> Py_ssize_t; pub fn PyUnicode_ReadChar(unicode: *mut PyObject, index: Py_ssize_t) -> Py_UCS4; pub fn PyUnicode_WriteChar(unicode: *mut PyObject, index: Py_ssize_t, character: Py_UCS4) -> c_int; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_GetMax() -> Py_UNICODE; pub fn PyUnicode_Resize(unicode: *mut *mut PyObject, length: Py_ssize_t) -> c_int; pub fn PyUnicode_FromEncodedObject(obj: *mut PyObject, @@ -62,6 +95,12 @@ extern "C" { size: *mut Py_ssize_t) -> *mut wchar_t; pub fn PyUnicode_FromOrdinal(ordinal: c_int) -> *mut PyObject; pub fn PyUnicode_ClearFreeList() -> c_int; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, + size: *mut Py_ssize_t) + -> *mut c_char; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char; pub fn PyUnicode_GetDefaultEncoding() -> *const c_char; pub fn PyUnicode_Decode(s: *const c_char, size: Py_ssize_t, encoding: *const c_char, @@ -74,6 +113,10 @@ extern "C" { encoding: *const c_char, errors: *const c_char) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_Encode(s: *const Py_UNICODE, size: Py_ssize_t, + encoding: *const c_char, + errors: *const c_char) -> *mut PyObject; pub fn PyUnicode_AsEncodedObject(unicode: *mut PyObject, encoding: *const c_char, errors: *const c_char) @@ -96,6 +139,12 @@ extern "C" { errors: *const c_char, consumed: *mut Py_ssize_t) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeUTF7(data: *const Py_UNICODE, length: Py_ssize_t, + base64SetO: c_int, + base64WhiteSpace: c_int, + errors: *const c_char) + -> *mut PyObject; pub fn PyUnicode_DecodeUTF8(string: *const c_char, length: Py_ssize_t, errors: *const c_char) @@ -106,6 +155,10 @@ extern "C" { consumed: *mut Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeUTF8(data: *const Py_UNICODE, length: Py_ssize_t, + errors: *const c_char) + -> *mut PyObject; pub fn PyUnicode_DecodeUTF32(string: *const c_char, length: Py_ssize_t, errors: *const c_char, @@ -118,6 +171,10 @@ extern "C" { consumed: *mut Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_AsUTF32String(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeUTF32(data: *const Py_UNICODE, length: Py_ssize_t, + errors: *const c_char, + byteorder: c_int) -> *mut PyObject; pub fn PyUnicode_DecodeUTF16(string: *const c_char, length: Py_ssize_t, errors: *const c_char, @@ -130,34 +187,74 @@ extern "C" { consumed: *mut Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_AsUTF16String(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeUTF16(data: *const Py_UNICODE, length: Py_ssize_t, + errors: *const c_char, + byteorder: c_int) -> *mut PyObject; pub fn PyUnicode_DecodeUnicodeEscape(string: *const c_char, length: Py_ssize_t, errors: *const c_char) -> *mut PyObject; pub fn PyUnicode_AsUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeUnicodeEscape(data: *const Py_UNICODE, + length: Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_DecodeRawUnicodeEscape(string: *const c_char, length: Py_ssize_t, errors: *const c_char) -> *mut PyObject; pub fn PyUnicode_AsRawUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeRawUnicodeEscape(data: *const Py_UNICODE, + length: Py_ssize_t) + -> *mut PyObject; pub fn PyUnicode_DecodeLatin1(string: *const c_char, length: Py_ssize_t, errors: *const c_char) -> *mut PyObject; pub fn PyUnicode_AsLatin1String(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeLatin1(data: *const Py_UNICODE, length: Py_ssize_t, + errors: *const c_char) + -> *mut PyObject; pub fn PyUnicode_DecodeASCII(string: *const c_char, length: Py_ssize_t, errors: *const c_char) -> *mut PyObject; pub fn PyUnicode_AsASCIIString(unicode: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeASCII(data: *const Py_UNICODE, length: Py_ssize_t, + errors: *const c_char) + -> *mut PyObject; pub fn PyUnicode_DecodeCharmap(string: *const c_char, length: Py_ssize_t, mapping: *mut PyObject, errors: *const c_char) -> *mut PyObject; pub fn PyUnicode_AsCharmapString(unicode: *mut PyObject, mapping: *mut PyObject) -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeCharmap(data: *const Py_UNICODE, + length: Py_ssize_t, mapping: *mut PyObject, + errors: *const c_char) + -> *mut PyObject; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_TranslateCharmap(data: *const Py_UNICODE, + length: Py_ssize_t, + table: *mut PyObject, + errors: *const c_char) + -> *mut PyObject; + + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_EncodeDecimal(s: *mut Py_UNICODE, length: Py_ssize_t, + output: *mut c_char, + errors: *const c_char) + -> c_int; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_TransformDecimalToASCII(s: *mut Py_UNICODE, + length: Py_ssize_t) + -> *mut PyObject; pub fn PyUnicode_DecodeLocaleAndSize(str: *const c_char, len: Py_ssize_t, errors: *const c_char) @@ -225,5 +322,7 @@ extern "C" { pub fn PyUnicode_Contains(container: *mut PyObject, element: *mut PyObject) -> c_int; pub fn PyUnicode_IsIdentifier(s: *mut PyObject) -> c_int; + #[cfg(not(Py_LIMITED_API))] + pub fn PyUnicode_AsUnicodeCopy(unicode: *mut PyObject) -> *mut Py_UNICODE; } diff --git a/src/objects/string.rs b/src/objects/string.rs index 4a25c0ab..37f3f2b7 100644 --- a/src/objects/string.rs +++ b/src/objects/string.rs @@ -92,8 +92,8 @@ impl <'p> PyUnicode<'p> { /// Convert the `PyUnicode` into a rust string. /// /// Returns a `UnicodeDecodeError` if the input contains invalid code points. + #[cfg(feature="python27-sys")] pub fn to_string(&self) -> PyResult<'p, Cow> { - // TODO: use PyUnicode_AsUTF8AndSize if available let py = self.python(); let bytes: PyBytes = unsafe { try!(err::result_cast_from_owned_ptr(py, ffi::PyUnicode_AsUTF8String(self.as_ptr()))) @@ -107,8 +107,8 @@ impl <'p> PyUnicode<'p> { /// Convert the `PyUnicode` into a rust string. /// /// Any invalid code points are replaced with U+FFFD REPLACEMENT CHARACTER. + #[cfg(feature="python27-sys")] pub fn to_string_lossy(&self) -> Cow { - // TODO: use PyUnicode_AsUTF8AndSize if available // TODO: test how this function handles lone surrogates or otherwise invalid code points let py = self.python(); let bytes: PyBytes = unsafe { @@ -117,6 +117,41 @@ impl <'p> PyUnicode<'p> { }; Cow::Owned(String::from_utf8_lossy(bytes.as_slice()).into_owned()) } + + #[cfg(feature="python3-sys")] + fn to_utf8_bytes(&self) -> PyResult<'p, &[u8]> { + unsafe { + let mut length = 0; + let data = ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut length); + if data.is_null() { + Err(PyErr::fetch(self.python())) + } else { + Ok(std::slice::from_raw_parts(data as *const u8, length as usize)) + } + } + } + + /// Convert the `PyUnicode` into a rust string. + /// + /// Returns a `UnicodeDecodeError` if the input contains invalid code points. + #[cfg(feature="python3-sys")] + pub fn to_string(&self) -> PyResult<'p, Cow> { + let py = self.python(); + let bytes = try!(self.to_utf8_bytes()); + match str::from_utf8(bytes) { + Ok(s) => Ok(Cow::Borrowed(s)), + Err(e) => Err(PyErr::from_instance(try!(exc::UnicodeDecodeError::new_utf8(py, bytes, e)))) + } + } + + /// Convert the `PyUnicode` into a rust string. + /// + /// Any invalid code points are replaced with U+FFFD REPLACEMENT CHARACTER. + #[cfg(feature="python3-sys")] + pub fn to_string_lossy(&self) -> Cow { + let bytes = self.to_utf8_bytes().expect("Error in PyUnicode_AsUTF8AndSize"); + String::from_utf8_lossy(bytes) + } } // On PyString (i.e. PyBytes in 2.7, PyUnicode otherwise), put static methods