Thread-specific histogram statistics

Summary:
To reduce contention for atomics when HistogramStats are shared across
threads, this diff makes them thread-specific so updates are faster. This comes
at the expense of slower reads (much less frequent), which now require merging
all histograms. In this diff,

- Thread-specific HistogramImpl is created upon the thread's first measureTime()
- Thread-specific HistogramImpl are merged and deleted upon thread termination or ThreadLocalPtr destruction, whichever comes first
- getHistogramString() and histogramData() merge all histograms, both thread-specific and previously merged ones

Test Plan:
unit tests, ran db_bench and verified histograms look similar

before:

  $ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
  ...
  +    7.63%  db_bench     db_bench             [.] rocksdb::HistogramStat::Add

after:

  $ TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench --benchmarks=readwhilewriting --statistics --num=1000000 --use_existing_db --threads=64 --cache_size=250000000 --compression_type=lz4
  ...
  +    0.98%  db_bench     db_bench             [.] rocksdb::HistogramStat::Add

Reviewers: sdong, MarkCallaghan, kradhakrishnan, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D62649
This commit is contained in:
Andrew Kryczka 2016-08-31 14:02:09 -07:00
parent 6a14d55bd9
commit 1613fa9490
2 changed files with 79 additions and 12 deletions

View file

@ -49,20 +49,34 @@ uint64_t StatisticsImpl::getTickerCount(uint32_t tickerType) const {
tickers_[tickerType].merged_sum.load(std::memory_order_relaxed);
}
std::unique_ptr<HistogramImpl>
StatisticsImpl::HistogramInfo::getMergedHistogram() const {
MutexLock lock(&merge_lock);
std::unique_ptr<HistogramImpl> res_hist(new HistogramImpl());
res_hist->Merge(merged_hist);
thread_value->Fold(
[](void* curr_ptr, void* res) {
auto tmp_res_hist = static_cast<HistogramImpl*>(res);
auto curr_hist = static_cast<HistogramImpl*>(curr_ptr);
tmp_res_hist->Merge(*curr_hist);
},
res_hist.get());
return res_hist;
}
void StatisticsImpl::histogramData(uint32_t histogramType,
HistogramData* const data) const {
assert(
enable_internal_stats_ ?
histogramType < INTERNAL_HISTOGRAM_ENUM_MAX :
histogramType < HISTOGRAM_ENUM_MAX);
// Return its own ticker version
histograms_[histogramType].Data(data);
histograms_[histogramType].getMergedHistogram()->Data(data);
}
std::string StatisticsImpl::getHistogramString(uint32_t histogramType) const {
assert(enable_internal_stats_ ? histogramType < INTERNAL_HISTOGRAM_ENUM_MAX
: histogramType < HISTOGRAM_ENUM_MAX);
return histograms_[histogramType].ToString();
return histograms_[histogramType].getMergedHistogram()->ToString();
}
StatisticsImpl::ThreadTickerInfo* StatisticsImpl::getThreadTickerInfo(
@ -77,6 +91,18 @@ StatisticsImpl::ThreadTickerInfo* StatisticsImpl::getThreadTickerInfo(
return info_ptr;
}
StatisticsImpl::ThreadHistogramInfo* StatisticsImpl::getThreadHistogramInfo(
uint32_t histogram_type) {
auto info_ptr = static_cast<ThreadHistogramInfo*>(
histograms_[histogram_type].thread_value->Get());
if (info_ptr == nullptr) {
info_ptr = new ThreadHistogramInfo(&histograms_[histogram_type].merged_hist,
&histograms_[histogram_type].merge_lock);
histograms_[histogram_type].thread_value->Reset(info_ptr);
}
return info_ptr;
}
void StatisticsImpl::setTickerCount(uint32_t tickerType, uint64_t count) {
{
MutexLock lock(&aggregate_lock_);
@ -117,7 +143,7 @@ void StatisticsImpl::measureTime(uint32_t histogramType, uint64_t value) {
histogramType < INTERNAL_HISTOGRAM_ENUM_MAX :
histogramType < HISTOGRAM_ENUM_MAX);
if (histogramType < HISTOGRAM_ENUM_MAX || enable_internal_stats_) {
histograms_[histogramType].Add(value);
getThreadHistogramInfo(histogramType)->value.Add(value);
}
if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) {
stats_->measureTime(histogramType, value);

View file

@ -66,8 +66,21 @@ class StatisticsImpl : public Statistics {
: value(_value), merged_sum(_merged_sum) {}
};
struct Ticker {
Ticker()
// Holds data maintained by each thread for implementing histograms.
struct ThreadHistogramInfo {
HistogramImpl value;
// During teardown, value will be merged into *merged_hist while holding
// *merge_lock, which also syncs with the merges necessary for reads.
HistogramImpl* merged_hist;
port::Mutex* merge_lock;
ThreadHistogramInfo(HistogramImpl* _merged_hist, port::Mutex* _merge_lock)
: value(), merged_hist(_merged_hist), merge_lock(_merge_lock) {}
};
// Holds global data for implementing tickers.
struct TickerInfo {
TickerInfo()
: thread_value(new ThreadLocalPtr(&mergeThreadValue)), merged_sum(0) {}
// Holds thread-specific pointer to ThreadTickerInfo
std::unique_ptr<ThreadLocalPtr> thread_value;
@ -84,15 +97,43 @@ class StatisticsImpl : public Statistics {
}
};
// Holds global data for implementing histograms.
struct HistogramInfo {
HistogramInfo()
: merged_hist(),
merge_lock(),
thread_value(new ThreadLocalPtr(&mergeThreadValue)) {}
// Merged thread-specific values for histograms that have been reset due to
// thread termination or ThreadLocalPtr destruction. Note these must be
// destroyed after thread_value since its destructor accesses them.
HistogramImpl merged_hist;
mutable port::Mutex merge_lock;
// Holds thread-specific pointer to ThreadHistogramInfo
std::unique_ptr<ThreadLocalPtr> thread_value;
static void mergeThreadValue(void* ptr) {
auto info_ptr = static_cast<ThreadHistogramInfo*>(ptr);
{
MutexLock lock(info_ptr->merge_lock);
info_ptr->merged_hist->Merge(info_ptr->value);
}
delete info_ptr;
}
// Returns a histogram that merges all histograms (thread-specific and
// previously merged ones).
std::unique_ptr<HistogramImpl> getMergedHistogram() const;
};
// Returns the info for this tickerType/thread. It sets a new info with zeroed
// counter if none exists.
ThreadTickerInfo* getThreadTickerInfo(uint32_t tickerType);
ThreadTickerInfo* getThreadTickerInfo(uint32_t ticker_type);
// Returns the info for this histogramType/thread. It sets a new histogram
// with zeroed data if none exists.
ThreadHistogramInfo* getThreadHistogramInfo(uint32_t histogram_type);
Ticker tickers_[INTERNAL_TICKER_ENUM_MAX];
// Attributes expand to nothing depending on the platform
__declspec(align(64))
HistogramImpl histograms_[INTERNAL_HISTOGRAM_ENUM_MAX]
__attribute__((aligned(64)));
TickerInfo tickers_[INTERNAL_TICKER_ENUM_MAX];
HistogramInfo histograms_[INTERNAL_HISTOGRAM_ENUM_MAX];
};
// Utility functions