mirror of https://github.com/facebook/rocksdb.git
Aggregate hot Iterator counters in LocalStatistics (DBIter::Next perf regression)
Summary: This patch bump the counters in the frequent code path DBIter::Next() / DBIter::Prev() in a local data members and send them to Statistics when the iterator is destroyed A better solution will be to have thread_local implementation for Statistics New performance ``` readseq : 0.035 micros/op 28597881 ops/sec; 3163.7 MB/s 1,851,568,819 stalled-cycles-frontend # 31.29% frontend cycles idle [49.86%] 884,929,823 stalled-cycles-backend # 14.95% backend cycles idle [50.21%] readreverse : 0.071 micros/op 14077393 ops/sec; 1557.3 MB/s 3,239,575,993 stalled-cycles-frontend # 27.36% frontend cycles idle [49.96%] 1,558,253,983 stalled-cycles-backend # 13.16% backend cycles idle [50.14%] ``` Existing performance ``` readreverse : 0.174 micros/op 5732342 ops/sec; 634.1 MB/s 20,570,209,389 stalled-cycles-frontend # 70.71% frontend cycles idle [50.01%] 18,422,816,837 stalled-cycles-backend # 63.33% backend cycles idle [50.04%] readseq : 0.119 micros/op 8400537 ops/sec; 929.3 MB/s 15,634,225,844 stalled-cycles-frontend # 79.07% frontend cycles idle [49.96%] 14,227,427,453 stalled-cycles-backend # 71.95% backend cycles idle [50.09%] ``` Test Plan: unit tests Reviewers: yhchiang, sdong, igor Reviewed By: sdong Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55107
This commit is contained in:
parent
790252805d
commit
580fede347
|
@ -3,6 +3,7 @@
|
|||
### Public API Changes
|
||||
* Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier.
|
||||
* Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signiture of Cache::Insert() is updated accordingly.
|
||||
* Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted.
|
||||
### New Features
|
||||
* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification.
|
||||
* Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned"
|
||||
|
|
|
@ -60,6 +60,44 @@ class DBIter: public Iterator {
|
|||
kReverse
|
||||
};
|
||||
|
||||
// LocalStatistics contain Statistics counters that will be aggregated per
|
||||
// each iterator instance and then will be sent to the global statistics when
|
||||
// the iterator is destroyed.
|
||||
//
|
||||
// The purpose of this approach is to avoid perf regression happening
|
||||
// when multiple threads bump the atomic counters from a DBIter::Next().
|
||||
struct LocalStatistics {
|
||||
explicit LocalStatistics() { ResetCounters(); }
|
||||
|
||||
void ResetCounters() {
|
||||
next_count_ = 0;
|
||||
next_found_count_ = 0;
|
||||
prev_count_ = 0;
|
||||
prev_found_count_ = 0;
|
||||
bytes_read_ = 0;
|
||||
}
|
||||
|
||||
void BumpGlobalStatistics(Statistics* global_statistics) {
|
||||
RecordTick(global_statistics, NUMBER_DB_NEXT, next_count_);
|
||||
RecordTick(global_statistics, NUMBER_DB_NEXT_FOUND, next_found_count_);
|
||||
RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
|
||||
RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
|
||||
RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
|
||||
ResetCounters();
|
||||
}
|
||||
|
||||
// Map to Tickers::NUMBER_DB_NEXT
|
||||
uint64_t next_count_;
|
||||
// Map to Tickers::NUMBER_DB_NEXT_FOUND
|
||||
uint64_t next_found_count_;
|
||||
// Map to Tickers::NUMBER_DB_PREV
|
||||
uint64_t prev_count_;
|
||||
// Map to Tickers::NUMBER_DB_PREV_FOUND
|
||||
uint64_t prev_found_count_;
|
||||
// Map to Tickers::ITER_BYTES_READ
|
||||
uint64_t bytes_read_;
|
||||
};
|
||||
|
||||
DBIter(Env* env, const ImmutableCFOptions& ioptions, const Comparator* cmp,
|
||||
InternalIterator* iter, SequenceNumber s, bool arena_mode,
|
||||
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
|
||||
|
@ -86,6 +124,7 @@ class DBIter: public Iterator {
|
|||
}
|
||||
virtual ~DBIter() {
|
||||
RecordTick(statistics_, NO_ITERATORS, -1);
|
||||
local_stats_.BumpGlobalStatistics(statistics_);
|
||||
if (!arena_mode_) {
|
||||
delete iter_;
|
||||
} else {
|
||||
|
@ -213,6 +252,7 @@ class DBIter: public Iterator {
|
|||
bool iter_pinned_;
|
||||
// List of operands for merge operator.
|
||||
std::deque<std::string> merge_operands_;
|
||||
LocalStatistics local_stats_;
|
||||
|
||||
// No copying allowed
|
||||
DBIter(const DBIter&);
|
||||
|
@ -250,6 +290,9 @@ void DBIter::Next() {
|
|||
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
|
||||
}
|
||||
|
||||
if (statistics_ != nullptr) {
|
||||
local_stats_.next_count_++;
|
||||
}
|
||||
// Now we point to the next internal position, for both of merge and
|
||||
// not merge cases.
|
||||
if (!iter_->Valid()) {
|
||||
|
@ -257,18 +300,15 @@ void DBIter::Next() {
|
|||
return;
|
||||
}
|
||||
FindNextUserEntry(true /* skipping the current user key */);
|
||||
if (statistics_ != nullptr) {
|
||||
RecordTick(statistics_, NUMBER_DB_NEXT);
|
||||
if (valid_) {
|
||||
RecordTick(statistics_, NUMBER_DB_NEXT_FOUND);
|
||||
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
|
||||
}
|
||||
}
|
||||
if (valid_ && prefix_extractor_ && prefix_same_as_start_ &&
|
||||
prefix_extractor_->Transform(saved_key_.GetKey())
|
||||
.compare(prefix_start_.GetKey()) != 0) {
|
||||
valid_ = false;
|
||||
}
|
||||
if (statistics_ != nullptr && valid_) {
|
||||
local_stats_.next_found_count_++;
|
||||
local_stats_.bytes_read_ += (key().size() + value().size());
|
||||
}
|
||||
}
|
||||
|
||||
// PRE: saved_key_ has the current user key if skipping
|
||||
|
@ -436,10 +476,10 @@ void DBIter::Prev() {
|
|||
}
|
||||
PrevInternal();
|
||||
if (statistics_ != nullptr) {
|
||||
RecordTick(statistics_, NUMBER_DB_PREV);
|
||||
local_stats_.prev_count_++;
|
||||
if (valid_) {
|
||||
RecordTick(statistics_, NUMBER_DB_PREV_FOUND);
|
||||
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
|
||||
local_stats_.prev_found_count_++;
|
||||
local_stats_.bytes_read_ += (key().size() + value().size());
|
||||
}
|
||||
}
|
||||
if (valid_ && prefix_extractor_ && prefix_same_as_start_ &&
|
||||
|
|
|
@ -10611,6 +10611,88 @@ TEST_F(DBTest, PrefixExtractorBlockFilter) {
|
|||
delete iter;
|
||||
}
|
||||
|
||||
TEST_F(DBTest, IteratorWithLocalStatistics) {
|
||||
Options options = CurrentOptions();
|
||||
options.statistics = rocksdb::CreateDBStatistics();
|
||||
DestroyAndReopen(options);
|
||||
|
||||
Random rnd(301);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
// Key 10 bytes / Value 10 bytes
|
||||
ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
|
||||
}
|
||||
|
||||
std::atomic<uint64_t> total_next(0);
|
||||
std::atomic<uint64_t> total_next_found(0);
|
||||
std::atomic<uint64_t> total_prev(0);
|
||||
std::atomic<uint64_t> total_prev_found(0);
|
||||
std::atomic<uint64_t> total_bytes(0);
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::function<void()> reader_func_next = [&]() {
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
|
||||
iter->SeekToFirst();
|
||||
// Seek will bump ITER_BYTES_READ
|
||||
total_bytes += iter->key().size();
|
||||
total_bytes += iter->value().size();
|
||||
while (true) {
|
||||
iter->Next();
|
||||
total_next++;
|
||||
|
||||
if (!iter->Valid()) {
|
||||
break;
|
||||
}
|
||||
total_next_found++;
|
||||
total_bytes += iter->key().size();
|
||||
total_bytes += iter->value().size();
|
||||
}
|
||||
|
||||
delete iter;
|
||||
};
|
||||
|
||||
std::function<void()> reader_func_prev = [&]() {
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
|
||||
iter->SeekToLast();
|
||||
// Seek will bump ITER_BYTES_READ
|
||||
total_bytes += iter->key().size();
|
||||
total_bytes += iter->value().size();
|
||||
while (true) {
|
||||
iter->Prev();
|
||||
total_prev++;
|
||||
|
||||
if (!iter->Valid()) {
|
||||
break;
|
||||
}
|
||||
total_prev_found++;
|
||||
total_bytes += iter->key().size();
|
||||
total_bytes += iter->value().size();
|
||||
}
|
||||
|
||||
delete iter;
|
||||
};
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
threads.emplace_back(reader_func_next);
|
||||
}
|
||||
for (int i = 0; i < 15; i++) {
|
||||
threads.emplace_back(reader_func_prev);
|
||||
}
|
||||
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), total_next);
|
||||
ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
|
||||
total_next_found);
|
||||
ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), total_prev);
|
||||
ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
|
||||
total_prev_found);
|
||||
ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes);
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
class BloomStatsTestWithParam
|
||||
: public DBTest,
|
||||
|
|
Loading…
Reference in New Issue