From e34087c5248b614b7b73b053ccb687be4f78d553 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 30 Oct 2024 20:37:28 -0700 Subject: [PATCH] Add a temporary hook for custom yielding in long-running op (#13103) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This is a simplified version of https://github.com/facebook/rocksdb/issues/13096, which called for a way to hook into long-running loops completely within RocksDB to change their thread priority (or similar). The current prime hook point is `DBIter::FindNextUserEntryInternal` likely because of iterating over tombstones. This is implemented using the weak symbol hack for ease of back-porting/patching, and while we get to know potential future requirements better for integration into the public API. (Consider potential relationships to `Env::GetThreadStatusUpdater()` and `TransactionDBMutexFactory`.) Pull Request resolved: https://github.com/facebook/rocksdb/pull/13103 Test Plan: Performance validated with db_bench and DEBUG_LEVEL=0: `./db_bench --benchmarks=fillseq,deleterandom,readseq[-X100] --value_size=1 --num=1000000` No consistent difference seen; variances likely in how DB / executable / memory were laid out. ``` With an empty hook: readseq [AVG 100 runs] : 1753018 (± 8850) ops/sec; 28.4 (± 0.1) MB/sec readseq [MEDIAN 100 runs] : 1763746 ops/sec; 28.6 MB/sec (recompile) readseq [AVG 100 runs] : 1789019 (± 10260) ops/sec; 29.0 (± 0.2) MB/sec readseq [MEDIAN 100 runs] : 1801849 ops/sec; 29.2 MB/sec Base: readseq [AVG 100 runs] : 1772196 (± 8240) ops/sec; 28.7 (± 0.1) MB/sec readseq [MEDIAN 100 runs] : 1780453 ops/sec; 28.9 MB/sec (recompile) readseq [AVG 100 runs] : 1777637 (± 7613) ops/sec; 28.8 (± 0.1) MB/sec readseq [MEDIAN 100 runs] : 1786657 ops/sec; 29.0 MB/sec With a functional hook (count number of calls into it): readseq [AVG 100 runs] : 1796733 (± 8854) ops/sec; 29.1 (± 0.1) MB/sec readseq [MEDIAN 100 runs] : 1804690 ops/sec; 29.3 MB/sec RocksDbThreadYield: 126915800 (recompile) readseq [AVG 100 runs] : 1775371 (± 10529) ops/sec; 28.8 (± 0.2) MB/sec readseq [MEDIAN 100 runs] : 1789046 ops/sec; 29.0 MB/sec RocksDbThreadYield: 126977000 Base: readseq [AVG 100 runs] : 1773071 (± 10657) ops/sec; 28.7 (± 0.2) MB/sec readseq [MEDIAN 100 runs] : 1783414 ops/sec; 28.9 MB/sec (recompile) readseq [AVG 100 runs] : 1750852 (± 10184) ops/sec; 28.4 (± 0.2) MB/sec readseq [MEDIAN 100 runs] : 1763587 ops/sec; 28.6 MB/sec ``` Reviewed By: george-reynya Differential Revision: D65235379 Pulled By: pdillinger fbshipit-source-id: 7829e4cc25a56d4c1801b8adf9c7f7aa49ab7aca --- db/db_impl/db_impl.cc | 2 ++ db/db_iter.cc | 2 ++ port/port.h | 16 ++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a9cde476ac..48f6529bee 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3230,6 +3230,8 @@ Status DBImpl::MultiGetImpl( s = Status::Aborted(); break; } + // This could be a long-running operation + ROCKSDB_THREAD_YIELD_HOOK(); } // Post processing (decrement reference counts and record statistics) diff --git a/db/db_iter.cc b/db/db_iter.cc index 97f6f7a076..49537f7011 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -581,6 +581,8 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key, } else { iter_.Next(); } + // This could be a long-running operation due to tombstones, etc. + ROCKSDB_THREAD_YIELD_HOOK(); } while (iter_.Valid()); valid_ = false; diff --git a/port/port.h b/port/port.h index 13aa56d47b..141716e5b9 100644 --- a/port/port.h +++ b/port/port.h @@ -19,3 +19,19 @@ #elif defined(OS_WIN) #include "port/win/port_win.h" #endif + +#ifdef OS_LINUX +// A temporary hook into long-running RocksDB threads to support modifying their +// priority etc. This should become a public API hook once the requirements +// are better understood. +extern "C" void RocksDbThreadYield() __attribute__((__weak__)); +#define ROCKSDB_THREAD_YIELD_HOOK() \ + { \ + if (RocksDbThreadYield) { \ + RocksDbThreadYield(); \ + } \ + } +#else +#define ROCKSDB_THREAD_YIELD_HOOK() \ + {} +#endif