diff --git a/CHANGELOG.md b/CHANGELOG.md index d44838ee..9a1defc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092) - Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250) - Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268) +- Add `intern!` macro which can be used to amortize the cost of creating Python strings by storing them inside a `GILOnceCell`. [#2269](https://github.com/PyO3/pyo3/pull/2269) ### Changed diff --git a/Cargo.toml b/Cargo.toml index bf66abf5..d4a75ce6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,6 +142,10 @@ harness = false name = "bench_tuple" harness = false +[[bench]] +name = "bench_intern" +harness = false + [workspace] members = [ "pyo3-ffi", diff --git a/benches/bench_intern.rs b/benches/bench_intern.rs new file mode 100644 index 00000000..927a6e6a --- /dev/null +++ b/benches/bench_intern.rs @@ -0,0 +1,33 @@ +use criterion::{criterion_group, criterion_main, Bencher, Criterion}; + +use pyo3::prelude::*; + +use pyo3::{intern, prepare_freethreaded_python}; + +fn getattr_direct(b: &mut Bencher<'_>) { + prepare_freethreaded_python(); + + Python::with_gil(|py| { + let sys = py.import("sys").unwrap(); + + b.iter(|| sys.getattr("version").unwrap()); + }); +} + +fn getattr_intern(b: &mut Bencher<'_>) { + prepare_freethreaded_python(); + + Python::with_gil(|py| { + let sys = py.import("sys").unwrap(); + + b.iter(|| sys.getattr(intern!(py, "version")).unwrap()); + }); +} + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("getattr_direct", getattr_direct); + c.bench_function("getattr_intern", getattr_intern); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index ce2a1e24..6da16623 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -375,6 +375,7 @@ pub mod impl_; mod instance; pub mod marker; pub mod marshal; +#[macro_use] pub mod once_cell; pub mod panic; pub mod prelude; diff --git a/src/once_cell.rs b/src/once_cell.rs index 739864c5..c95a7fb8 100644 --- a/src/once_cell.rs +++ b/src/once_cell.rs @@ -42,6 +42,7 @@ impl GILOnceCell { } /// Get a reference to the contained value, or `None` if the cell has not yet been written. + #[inline] pub fn get(&self, _py: Python<'_>) -> Option<&T> { // Safe because if the cell has not yet been written, None is returned. unsafe { &*self.0.get() }.as_ref() @@ -61,15 +62,23 @@ impl GILOnceCell { /// exactly once, even if multiple threads attempt to call `get_or_init` /// 4) if f() can panic but still does not release the GIL, it may be called multiple times, /// but it is guaranteed that f() will never be called concurrently + #[inline] pub fn get_or_init(&self, py: Python<'_>, f: F) -> &T where F: FnOnce() -> T, { - let inner = unsafe { &*self.0.get() }.as_ref(); - if let Some(value) = inner { + if let Some(value) = self.get(py) { return value; } + self.init(py, f) + } + + #[cold] + fn init(&self, py: Python<'_>, f: F) -> &T + where + F: FnOnce() -> T, + { // Note that f() could temporarily release the GIL, so it's possible that another thread // writes to this GILOnceCell before f() finishes. That's fine; we'll just have to discard // the value computed here and accept a bit of wasted computation. @@ -101,3 +110,79 @@ impl GILOnceCell { Ok(()) } } + +/// Interns `text` as a Python string and stores a reference to it in static storage. +/// +/// A reference to the same Python string is returned on each invocation. +/// +/// # Example: Using `intern!` to avoid needlessly recreating the same Python string +/// +/// ``` +/// use pyo3::intern; +/// # use pyo3::{pyfunction, types::PyDict, wrap_pyfunction, PyResult, Python}; +/// +/// #[pyfunction] +/// fn create_dict(py: Python<'_>) -> PyResult<&PyDict> { +/// let dict = PyDict::new(py); +/// // 👇 A new `PyString` is created +/// // for every call of this function. +/// dict.set_item("foo", 42)?; +/// Ok(dict) +/// } +/// +/// #[pyfunction] +/// fn create_dict_faster(py: Python<'_>) -> PyResult<&PyDict> { +/// let dict = PyDict::new(py); +/// // 👇 A `PyString` is created once and reused +/// // for the lifetime of the program. +/// dict.set_item(intern!(py, "foo"), 42)?; +/// Ok(dict) +/// } +/// # +/// # Python::with_gil(|py| { +/// # let fun = wrap_pyfunction!(create_dict_faster, py).unwrap(); +/// # let dict = fun.call0().unwrap(); +/// # assert!(dict.contains("foo").unwrap()); +/// # }); +/// ``` +#[macro_export] +macro_rules! intern { + ($py: expr, $text: expr) => {{ + fn isolate_from_dyn_env(py: $crate::Python<'_>) -> &$crate::types::PyString { + static INTERNED: $crate::once_cell::GILOnceCell<$crate::Py<$crate::types::PyString>> = + $crate::once_cell::GILOnceCell::new(); + + INTERNED + .get_or_init(py, || { + $crate::conversion::IntoPy::into_py( + $crate::types::PyString::intern(py, $text), + py, + ) + }) + .as_ref(py) + } + + isolate_from_dyn_env($py) + }}; +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::types::PyDict; + + #[test] + fn test_intern() { + Python::with_gil(|py| { + let foo1 = "foo"; + let foo2 = intern!(py, "foo"); + let foo3 = intern!(py, stringify!(foo)); + + let dict = PyDict::new(py); + dict.set_item(foo1, 42_usize).unwrap(); + assert!(dict.contains(foo2).unwrap()); + assert_eq!(dict.get_item(foo3).unwrap().extract::().unwrap(), 42); + }); + } +} diff --git a/src/test_hygiene/misc.rs b/src/test_hygiene/misc.rs index eb49718e..1c0baaa2 100644 --- a/src/test_hygiene/misc.rs +++ b/src/test_hygiene/misc.rs @@ -27,3 +27,9 @@ enum Derive4 { crate::create_exception!(mymodule, CustomError, crate::exceptions::PyException); crate::import_exception!(socket, gaierror); + +#[allow(dead_code)] +fn intern(py: crate::Python<'_>) { + let _foo = crate::intern!(py, "foo"); + let _bar = crate::intern!(py, stringify!(bar)); +}