Merge pull request #2269 from PyO3/intern-macro

RFC: Add intern! macro which can be used to amortize the cost of creating Python objects by storing them inside a GILOnceCell.
This commit is contained in:
David Hewitt 2022-04-05 00:34:37 +01:00 committed by GitHub
commit d3ac71a42b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 132 additions and 2 deletions

View File

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092)
- Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250)
- Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268)
- Add `intern!` macro which can be used to amortize the cost of creating Python strings by storing them inside a `GILOnceCell`. [#2269](https://github.com/PyO3/pyo3/pull/2269)
### Changed

View File

@ -142,6 +142,10 @@ harness = false
name = "bench_tuple"
harness = false
[[bench]]
name = "bench_intern"
harness = false
[workspace]
members = [
"pyo3-ffi",

33
benches/bench_intern.rs Normal file
View File

@ -0,0 +1,33 @@
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
use pyo3::prelude::*;
use pyo3::{intern, prepare_freethreaded_python};
fn getattr_direct(b: &mut Bencher<'_>) {
prepare_freethreaded_python();
Python::with_gil(|py| {
let sys = py.import("sys").unwrap();
b.iter(|| sys.getattr("version").unwrap());
});
}
fn getattr_intern(b: &mut Bencher<'_>) {
prepare_freethreaded_python();
Python::with_gil(|py| {
let sys = py.import("sys").unwrap();
b.iter(|| sys.getattr(intern!(py, "version")).unwrap());
});
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("getattr_direct", getattr_direct);
c.bench_function("getattr_intern", getattr_intern);
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@ -375,6 +375,7 @@ pub mod impl_;
mod instance;
pub mod marker;
pub mod marshal;
#[macro_use]
pub mod once_cell;
pub mod panic;
pub mod prelude;

View File

@ -42,6 +42,7 @@ impl<T> GILOnceCell<T> {
}
/// Get a reference to the contained value, or `None` if the cell has not yet been written.
#[inline]
pub fn get(&self, _py: Python<'_>) -> Option<&T> {
// Safe because if the cell has not yet been written, None is returned.
unsafe { &*self.0.get() }.as_ref()
@ -61,15 +62,23 @@ impl<T> GILOnceCell<T> {
/// exactly once, even if multiple threads attempt to call `get_or_init`
/// 4) if f() can panic but still does not release the GIL, it may be called multiple times,
/// but it is guaranteed that f() will never be called concurrently
#[inline]
pub fn get_or_init<F>(&self, py: Python<'_>, f: F) -> &T
where
F: FnOnce() -> T,
{
let inner = unsafe { &*self.0.get() }.as_ref();
if let Some(value) = inner {
if let Some(value) = self.get(py) {
return value;
}
self.init(py, f)
}
#[cold]
fn init<F>(&self, py: Python<'_>, f: F) -> &T
where
F: FnOnce() -> T,
{
// Note that f() could temporarily release the GIL, so it's possible that another thread
// writes to this GILOnceCell before f() finishes. That's fine; we'll just have to discard
// the value computed here and accept a bit of wasted computation.
@ -101,3 +110,79 @@ impl<T> GILOnceCell<T> {
Ok(())
}
}
/// Interns `text` as a Python string and stores a reference to it in static storage.
///
/// A reference to the same Python string is returned on each invocation.
///
/// # Example: Using `intern!` to avoid needlessly recreating the same Python string
///
/// ```
/// use pyo3::intern;
/// # use pyo3::{pyfunction, types::PyDict, wrap_pyfunction, PyResult, Python};
///
/// #[pyfunction]
/// fn create_dict(py: Python<'_>) -> PyResult<&PyDict> {
/// let dict = PyDict::new(py);
/// // 👇 A new `PyString` is created
/// // for every call of this function.
/// dict.set_item("foo", 42)?;
/// Ok(dict)
/// }
///
/// #[pyfunction]
/// fn create_dict_faster(py: Python<'_>) -> PyResult<&PyDict> {
/// let dict = PyDict::new(py);
/// // 👇 A `PyString` is created once and reused
/// // for the lifetime of the program.
/// dict.set_item(intern!(py, "foo"), 42)?;
/// Ok(dict)
/// }
/// #
/// # Python::with_gil(|py| {
/// # let fun = wrap_pyfunction!(create_dict_faster, py).unwrap();
/// # let dict = fun.call0().unwrap();
/// # assert!(dict.contains("foo").unwrap());
/// # });
/// ```
#[macro_export]
macro_rules! intern {
($py: expr, $text: expr) => {{
fn isolate_from_dyn_env(py: $crate::Python<'_>) -> &$crate::types::PyString {
static INTERNED: $crate::once_cell::GILOnceCell<$crate::Py<$crate::types::PyString>> =
$crate::once_cell::GILOnceCell::new();
INTERNED
.get_or_init(py, || {
$crate::conversion::IntoPy::into_py(
$crate::types::PyString::intern(py, $text),
py,
)
})
.as_ref(py)
}
isolate_from_dyn_env($py)
}};
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::PyDict;
#[test]
fn test_intern() {
Python::with_gil(|py| {
let foo1 = "foo";
let foo2 = intern!(py, "foo");
let foo3 = intern!(py, stringify!(foo));
let dict = PyDict::new(py);
dict.set_item(foo1, 42_usize).unwrap();
assert!(dict.contains(foo2).unwrap());
assert_eq!(dict.get_item(foo3).unwrap().extract::<usize>().unwrap(), 42);
});
}
}

View File

@ -27,3 +27,9 @@ enum Derive4 {
crate::create_exception!(mymodule, CustomError, crate::exceptions::PyException);
crate::import_exception!(socket, gaierror);
#[allow(dead_code)]
fn intern(py: crate::Python<'_>) {
let _foo = crate::intern!(py, "foo");
let _bar = crate::intern!(py, stringify!(bar));
}