From a065cdb388cd7b35f78fb88db734642e8bfb4be3 Mon Sep 17 00:00:00 2001 From: dyniusz Date: Wed, 7 Oct 2015 11:23:20 -0700 Subject: [PATCH] bloom hit/miss stats for SST and memtable Summary: hit and miss bloom filter stats for memtable and SST stats added to perf_context struct key matches and prefix matches combined into one stat Test Plan: unit test veryfing the functionality added, see BloomStatsTest in db_test.cc for details Reviewers: yhchiang, igor, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D47859 --- db/db_test.cc | 153 ++++++++++++++++++++++++++++++ db/memtable.cc | 25 +++-- include/rocksdb/perf_context.h | 8 ++ table/block_based_filter_block.cc | 12 ++- table/full_filter_block.cc | 9 +- table/plain_table_reader.cc | 12 ++- util/perf_context.cc | 8 +- 7 files changed, 215 insertions(+), 12 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 38d5d7416c..4bfe4dbd24 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -131,6 +131,46 @@ class DBTestWithParam : public DBTest, uint32_t max_subcompactions_; }; +class BloomStatsTestWithParam + : public DBTest, + public testing::WithParamInterface> { + public: + BloomStatsTestWithParam() { + use_block_table_ = std::get<0>(GetParam()); + use_block_based_builder_ = std::get<1>(GetParam()); + + options_.create_if_missing = true; + options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4)); + options_.memtable_prefix_bloom_bits = 8 * 1024; + if (use_block_table_) { + BlockBasedTableOptions table_options; + table_options.hash_index_allow_collision = false; + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, use_block_based_builder_)); + options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); + } else { + PlainTableOptions table_options; + options_.table_factory.reset(NewPlainTableFactory(table_options)); + } + + perf_context.Reset(); + DestroyAndReopen(options_); + } + + ~BloomStatsTestWithParam() { + perf_context.Reset(); + Destroy(options_); + } + + // Required if inheriting from testing::WithParamInterface<> + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + bool use_block_table_; + bool use_block_based_builder_; + Options options_; +}; + TEST_F(DBTest, Empty) { do { Options options; @@ -9709,6 +9749,119 @@ TEST_F(DBTest, PauseBackgroundWorkTest) { ASSERT_EQ(true, done.load()); } +// 1 Insert 2 K-V pairs into DB +// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 +// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 +// 4 Call Flush() to create SST +// 5 Call Get() for both keys - expext SST bloom hit stat to be 2 +// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 +// Test both: block and plain SST +TEST_P(BloomStatsTestWithParam, BloomStatsTest) { + std::string key1("AAAA"); + std::string key2("RXDB"); // not in DB + std::string key3("ZBRA"); + std::string value1("Value1"); + std::string value3("Value3"); + + ASSERT_OK(Put(key1, value1, WriteOptions())); + ASSERT_OK(Put(key3, value3, WriteOptions())); + + // check memtable bloom stats + ASSERT_EQ(value1, Get(key1)); + ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(value3, Get(key3)); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); + + ASSERT_EQ("NOT_FOUND", Get(key2)); + ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + + // sanity checks + ASSERT_EQ(0, perf_context.bloom_sst_hit_count); + ASSERT_EQ(0, perf_context.bloom_sst_miss_count); + + Flush(); + + // sanity checks + ASSERT_EQ(0, perf_context.bloom_sst_hit_count); + ASSERT_EQ(0, perf_context.bloom_sst_miss_count); + + // check SST bloom stats + // NOTE: hits per get differs because of code paths differences + // in BlockBasedTable::Get() + int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1; + ASSERT_EQ(value1, Get(key1)); + ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count); + ASSERT_EQ(value3, Get(key3)); + ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count); + + ASSERT_EQ("NOT_FOUND", Get(key2)); + ASSERT_EQ(1, perf_context.bloom_sst_miss_count); +} + +// Same scenario as in BloomStatsTest but using an iterator +TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { + std::string key1("AAAA"); + std::string key2("RXDB"); // not in DB + std::string key3("ZBRA"); + std::string value1("Value1"); + std::string value3("Value3"); + + ASSERT_OK(Put(key1, value1, WriteOptions())); + ASSERT_OK(Put(key3, value3, WriteOptions())); + + unique_ptr iter(dbfull()->NewIterator(ReadOptions())); + + // check memtable bloom stats + iter->Seek(key1); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value1, iter->value().ToString()); + ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); + + iter->Seek(key3); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value3, iter->value().ToString()); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); + + iter->Seek(key2); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); + ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); + + Flush(); + + iter.reset(dbfull()->NewIterator(ReadOptions())); + + // check SST bloom stats + iter->Seek(key1); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value1, iter->value().ToString()); + ASSERT_EQ(1, perf_context.bloom_sst_hit_count); + + iter->Seek(key3); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(value3, iter->value().ToString()); + ASSERT_EQ(2, perf_context.bloom_sst_hit_count); + + iter->Seek(key2); + ASSERT_OK(iter->status()); + ASSERT_TRUE(!iter->Valid()); + ASSERT_EQ(1, perf_context.bloom_sst_miss_count); + ASSERT_EQ(2, perf_context.bloom_sst_hit_count); +} + +INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam, + ::testing::Values(std::make_tuple(true, true), + std::make_tuple(true, false), + std::make_tuple(false, false))); } // namespace rocksdb #endif diff --git a/db/memtable.cc b/db/memtable.cc index b3692364d1..54c119ee2f 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -230,10 +230,15 @@ class MemTableIterator: public Iterator { virtual void Seek(const Slice& k) override { PERF_TIMER_GUARD(seek_on_memtable_time); PERF_COUNTER_ADD(seek_on_memtable_count, 1); - if (bloom_ != nullptr && - !bloom_->MayContain(prefix_extractor_->Transform(ExtractUserKey(k)))) { - valid_ = false; - return; + if (bloom_ != nullptr) { + if (!bloom_->MayContain( + prefix_extractor_->Transform(ExtractUserKey(k)))) { + PERF_COUNTER_ADD(bloom_memtable_miss_count, 1); + valid_ = false; + return; + } else { + PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); + } } iter_->Seek(k, nullptr); valid_ = iter_->Valid(); @@ -508,12 +513,18 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, Slice user_key = key.user_key(); bool found_final_value = false; bool merge_in_progress = s->IsMergeInProgress(); - - if (prefix_bloom_ && - !prefix_bloom_->MayContain(prefix_extractor_->Transform(user_key))) { + bool const may_contain = + nullptr == prefix_bloom_ + ? false + : prefix_bloom_->MayContain(prefix_extractor_->Transform(user_key)); + if (prefix_bloom_ && !may_contain) { // iter is null if prefix bloom says the key does not exist + PERF_COUNTER_ADD(bloom_memtable_miss_count, 1); *seq = kMaxSequenceNumber; } else { + if (prefix_bloom_) { + PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); + } Saver saver; saver.status = s; saver.found_final_value = &found_final_value; diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 10cae422d6..a7c993c7b5 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -83,6 +83,14 @@ struct PerfContext { uint64_t block_seek_nanos; // Time spent on finding or creating a table reader uint64_t find_table_nanos; + // total number of mem table bloom hits + uint64_t bloom_memtable_hit_count; + // total number of mem table bloom misses + uint64_t bloom_memtable_miss_count; + // total number of SST table bloom hits + uint64_t bloom_sst_hit_count; + // total number of SST table bloom misses + uint64_t bloom_sst_miss_count; }; #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) diff --git a/table/block_based_filter_block.cc b/table/block_based_filter_block.cc index cd56028001..c33d485975 100644 --- a/table/block_based_filter_block.cc +++ b/table/block_based_filter_block.cc @@ -7,12 +7,13 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include #include "table/block_based_filter_block.h" +#include #include "db/dbformat.h" #include "rocksdb/filter_policy.h" #include "util/coding.h" +#include "util/perf_context_imp.h" #include "util/string_util.h" namespace rocksdb { @@ -219,7 +220,14 @@ bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry, uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); if (start <= limit && limit <= (uint32_t)(offset_ - data_)) { Slice filter = Slice(data_ + start, limit - start); - return policy_->KeyMayMatch(entry, filter); + bool const may_match = policy_->KeyMayMatch(entry, filter); + if (may_match) { + PERF_COUNTER_ADD(bloom_sst_hit_count, 1); + return true; + } else { + PERF_COUNTER_ADD(bloom_sst_miss_count, 1); + return false; + } } else if (start == limit) { // Empty filters do not match any entries return false; diff --git a/table/full_filter_block.cc b/table/full_filter_block.cc index b3afdac03f..3744d417f9 100644 --- a/table/full_filter_block.cc +++ b/table/full_filter_block.cc @@ -8,6 +8,7 @@ #include "rocksdb/filter_policy.h" #include "port/port.h" #include "util/coding.h" +#include "util/perf_context_imp.h" namespace rocksdb { @@ -89,7 +90,13 @@ bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix, bool FullFilterBlockReader::MayMatch(const Slice& entry) { if (contents_.size() != 0) { - return filter_bits_reader_->MayMatch(entry); + if (filter_bits_reader_->MayMatch(entry)) { + PERF_COUNTER_ADD(bloom_sst_hit_count, 1); + return true; + } else { + PERF_COUNTER_ADD(bloom_sst_miss_count, 1); + return false; + } } return true; // remain the same with block_based filter } diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 46457de165..1aabbb98f1 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -488,7 +488,17 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, } bool PlainTableReader::MatchBloom(uint32_t hash) const { - return !enable_bloom_ || bloom_.MayContainHash(hash); + if (!enable_bloom_) { + return true; + } + + if (bloom_.MayContainHash(hash)) { + PERF_COUNTER_ADD(bloom_sst_hit_count, 1); + return true; + } else { + PERF_COUNTER_ADD(bloom_sst_miss_count, 1); + return false; + } } Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset, diff --git a/util/perf_context.cc b/util/perf_context.cc index 9c3fd2029c..2825165901 100644 --- a/util/perf_context.cc +++ b/util/perf_context.cc @@ -54,6 +54,10 @@ void PerfContext::Reset() { new_table_iterator_nanos = 0; block_seek_nanos = 0; find_table_nanos = 0; + bloom_memtable_hit_count = 0; + bloom_memtable_miss_count = 0; + bloom_sst_hit_count = 0; + bloom_sst_miss_count = 0; #endif } @@ -80,7 +84,9 @@ std::string PerfContext::ToString() const { << OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time) << OUTPUT(read_index_block_nanos) << OUTPUT(read_filter_block_nanos) << OUTPUT(new_table_block_iter_nanos) << OUTPUT(new_table_iterator_nanos) - << OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos); + << OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos) + << OUTPUT(bloom_memtable_hit_count) << OUTPUT(bloom_memtable_miss_count) + << OUTPUT(bloom_sst_hit_count) << OUTPUT(bloom_sst_miss_count); return ss.str(); #endif }