Add PyString::intern to enable access to Python's built-in string interning. (#2268)

This commit is contained in:
Adam Reichold 2022-04-03 21:07:58 +02:00 committed by GitHub
parent 040ce8616b
commit d3dcbd72ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 2 deletions

View File

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Allow dependent crates to access config values from `pyo3-build-config` via cargo link dep env vars. [#2092](https://github.com/PyO3/pyo3/pull/2092)
- Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092)
- Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250)
- Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268)
### Changed

View File

@ -88,7 +88,7 @@ pub fn build_py_methods(
pub fn impl_methods(
ty: &syn::Type,
impls: &mut Vec<syn::ImplItem>,
impls: &mut [syn::ImplItem],
methods_type: PyClassMethodsType,
options: PyImplOptions,
) -> syn::Result<TokenStream> {

View File

@ -44,7 +44,7 @@ pub fn build_py_proto(ast: &mut syn::ItemImpl) -> syn::Result<TokenStream> {
fn impl_proto_impl(
ty: &syn::Type,
impls: &mut Vec<syn::ImplItem>,
impls: &mut [syn::ImplItem],
proto: &defs::Proto,
) -> syn::Result<TokenStream> {
let mut trait_impls = TokenStream::new();

View File

@ -144,6 +144,26 @@ impl PyString {
unsafe { py.from_owned_ptr(ffi::PyUnicode_FromStringAndSize(ptr, len)) }
}
/// Intern the given string
///
/// This will return a reference to the same Python string object if called repeatedly with the same string.
///
/// Note that while this is more memory efficient than [`PyString::new`], it unconditionally allocates a
/// temporary Python string object and is thereby slower than [`PyString::new`].
///
/// Panics if out of memory.
pub fn intern<'p>(py: Python<'p>, s: &str) -> &'p PyString {
let ptr = s.as_ptr() as *const c_char;
let len = s.len() as ffi::Py_ssize_t;
unsafe {
let mut ob = ffi::PyUnicode_FromStringAndSize(ptr, len);
if !ob.is_null() {
ffi::PyUnicode_InternInPlace(&mut ob);
}
py.from_owned_ptr(ob)
}
}
/// Attempts to create a Python string from a Python [bytes-like object].
///
/// [bytes-like object]: (https://docs.python.org/3/glossary.html#term-bytes-like-object).
@ -592,4 +612,22 @@ mod tests {
assert_eq!(data.to_string_lossy(), Cow::Owned::<str>("𠀀<EFBFBD>".into()));
});
}
#[test]
fn test_intern_string() {
Python::with_gil(|py| {
let py_string1 = PyString::intern(py, "foo");
assert_eq!(py_string1.to_str().unwrap(), "foo");
let py_string2 = PyString::intern(py, "foo");
assert_eq!(py_string2.to_str().unwrap(), "foo");
assert_eq!(py_string1.as_ptr(), py_string2.as_ptr());
let py_string3 = PyString::intern(py, "bar");
assert_eq!(py_string3.to_str().unwrap(), "bar");
assert_ne!(py_string1.as_ptr(), py_string3.as_ptr());
});
}
}