From c644c0b0b8c5d685c355eeda49c73103b6a51ba4 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Thu, 6 Jun 2024 10:45:16 +0200 Subject: [PATCH] Lazy-initialize the global reference pool to reduce its overhead when unused (#4178) * Add benchmarks exercising the global reference count decrement pool. * Lazy-initialize the global reference pool to reduce its overhead when unused --- Cargo.toml | 1 + newsfragments/4178.changed.md | 1 + pyo3-benches/benches/bench_pyobject.rs | 106 ++++++++++++++++++++++++- src/gil.rs | 23 ++++-- 4 files changed, 124 insertions(+), 7 deletions(-) create mode 100644 newsfragments/4178.changed.md diff --git a/Cargo.toml b/Cargo.toml index 921e5517..30b394c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ rust-version = "1.63" cfg-if = "1.0" libc = "0.2.62" memoffset = "0.9" +once_cell = "1" # ffi bindings to the python interpreter, split into a separate crate so they can be used independently pyo3-ffi = { path = "pyo3-ffi", version = "=0.22.0-dev" } diff --git a/newsfragments/4178.changed.md b/newsfragments/4178.changed.md new file mode 100644 index 00000000..a97c1ec8 --- /dev/null +++ b/newsfragments/4178.changed.md @@ -0,0 +1 @@ +The global reference pool (to track pending reference count decrements) is now initialized lazily to avoid the overhead of taking a mutex upon function entry when the functionality is not actually used. diff --git a/pyo3-benches/benches/bench_pyobject.rs b/pyo3-benches/benches/bench_pyobject.rs index af25d61c..a57a98a8 100644 --- a/pyo3-benches/benches/bench_pyobject.rs +++ b/pyo3-benches/benches/bench_pyobject.rs @@ -1,4 +1,12 @@ -use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion}; +use codspeed_criterion_compat::{criterion_group, criterion_main, BatchSize, Bencher, Criterion}; + +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + mpsc::channel, + Arc, Barrier, +}; +use std::thread::spawn; +use std::time::{Duration, Instant}; use pyo3::prelude::*; @@ -6,14 +14,108 @@ fn drop_many_objects(b: &mut Bencher<'_>) { Python::with_gil(|py| { b.iter(|| { for _ in 0..1000 { - std::mem::drop(py.None()); + drop(py.None()); } }); }); } +fn drop_many_objects_without_gil(b: &mut Bencher<'_>) { + b.iter_batched( + || { + Python::with_gil(|py| { + (0..1000) + .map(|_| py.None().into_py(py)) + .collect::>() + }) + }, + |objs| { + drop(objs); + + Python::with_gil(|_py| ()); + }, + BatchSize::SmallInput, + ); +} + +fn drop_many_objects_multiple_threads(b: &mut Bencher<'_>) { + const THREADS: usize = 5; + + let barrier = Arc::new(Barrier::new(1 + THREADS)); + + let done = Arc::new(AtomicUsize::new(0)); + + let sender = (0..THREADS) + .map(|_| { + let (sender, receiver) = channel(); + + let barrier = barrier.clone(); + + let done = done.clone(); + + spawn(move || { + for objs in receiver { + barrier.wait(); + + drop(objs); + + done.fetch_add(1, Ordering::AcqRel); + } + }); + + sender + }) + .collect::>(); + + b.iter_custom(|iters| { + let mut duration = Duration::ZERO; + + let mut last_done = done.load(Ordering::Acquire); + + for _ in 0..iters { + for sender in &sender { + let objs = Python::with_gil(|py| { + (0..1000 / THREADS) + .map(|_| py.None().into_py(py)) + .collect::>() + }); + + sender.send(objs).unwrap(); + } + + barrier.wait(); + + let start = Instant::now(); + + loop { + Python::with_gil(|_py| ()); + + let done = done.load(Ordering::Acquire); + if done - last_done == THREADS { + last_done = done; + break; + } + } + + Python::with_gil(|_py| ()); + + duration += start.elapsed(); + } + + duration + }); +} + fn criterion_benchmark(c: &mut Criterion) { c.bench_function("drop_many_objects", drop_many_objects); + c.bench_function( + "drop_many_objects_without_gil", + drop_many_objects_without_gil, + ); + c.bench_function( + "drop_many_objects_multiple_threads", + drop_many_objects_multiple_threads, + ); } criterion_group!(benches, criterion_benchmark); diff --git a/src/gil.rs b/src/gil.rs index 3e25c7cf..ec20fc64 100644 --- a/src/gil.rs +++ b/src/gil.rs @@ -5,6 +5,8 @@ use crate::impl_::not_send::{NotSend, NOT_SEND}; #[cfg(pyo3_disable_reference_pool)] use crate::impl_::panic::PanicTrap; use crate::{ffi, Python}; +#[cfg(not(pyo3_disable_reference_pool))] +use once_cell::sync::Lazy; use std::cell::Cell; #[cfg(all(feature = "gil-refs", debug_assertions))] use std::cell::RefCell; @@ -227,7 +229,9 @@ impl GILGuard { let pool = mem::ManuallyDrop::new(GILPool::new()); #[cfg(not(pyo3_disable_reference_pool))] - POOL.update_counts(Python::assume_gil_acquired()); + if let Some(pool) = Lazy::get(&POOL) { + pool.update_counts(Python::assume_gil_acquired()); + } GILGuard::Ensured { gstate, #[cfg(feature = "gil-refs")] @@ -240,7 +244,9 @@ impl GILGuard { increment_gil_count(); let guard = GILGuard::Assumed; #[cfg(not(pyo3_disable_reference_pool))] - POOL.update_counts(guard.python()); + if let Some(pool) = Lazy::get(&POOL) { + pool.update_counts(guard.python()); + } guard } @@ -307,11 +313,14 @@ impl ReferencePool { } } +#[cfg(not(pyo3_disable_reference_pool))] +unsafe impl Send for ReferencePool {} + #[cfg(not(pyo3_disable_reference_pool))] unsafe impl Sync for ReferencePool {} #[cfg(not(pyo3_disable_reference_pool))] -static POOL: ReferencePool = ReferencePool::new(); +static POOL: Lazy = Lazy::new(ReferencePool::new); /// A guard which can be used to temporarily release the GIL and restore on `Drop`. pub(crate) struct SuspendGIL { @@ -336,7 +345,9 @@ impl Drop for SuspendGIL { // Update counts of PyObjects / Py that were cloned or dropped while the GIL was released. #[cfg(not(pyo3_disable_reference_pool))] - POOL.update_counts(Python::assume_gil_acquired()); + if let Some(pool) = Lazy::get(&POOL) { + pool.update_counts(Python::assume_gil_acquired()); + } } } } @@ -409,7 +420,9 @@ impl GILPool { pub unsafe fn new() -> GILPool { // Update counts of PyObjects / Py that have been cloned or dropped since last acquisition #[cfg(not(pyo3_disable_reference_pool))] - POOL.update_counts(Python::assume_gil_acquired()); + if let Some(pool) = Lazy::get(&POOL) { + pool.update_counts(Python::assume_gil_acquired()); + } GILPool { start: OWNED_OBJECTS .try_with(|owned_objects| {