diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 950fbe616e..c923ba354e 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -44,12 +44,13 @@ class FilterBitsBuilder { // The ownership of actual data is set to buf virtual Slice Finish(std::unique_ptr* buf) = 0; - // Calculate num of entries fit into a space. + // Calculate num of keys that can be added and generate a filter + // <= the specified number of bytes. #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4702) // unreachable code #endif - virtual int CalculateNumEntry(const uint32_t /*space*/) { + virtual int CalculateNumEntry(const uint32_t /*bytes*/) { #ifndef ROCKSDB_LITE throw std::runtime_error("CalculateNumEntry not Implemented"); #else @@ -119,13 +120,13 @@ class FilterPolicy { // list, but it should aim to return false with a high probability. virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; - // Get the FilterBitsBuilder, which is ONLY used for full filter block - // It contains interface to take individual key, then generate filter + // Return a new FilterBitsBuilder for full or partitioned filter blocks, or + // nullptr if using block-based filter. virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; } - // Get the FilterBitsReader, which is ONLY used for full filter block - // It contains interface to tell if key can be in filter - // The input slice should NOT be deleted by FilterPolicy + // Return a new FilterBitsReader for full or partitioned filter blocks, or + // nullptr if using block-based filter. + // As here, the input slice should NOT be deleted by FilterPolicy. virtual FilterBitsReader* GetFilterBitsReader( const Slice& /*contents*/) const { return nullptr; diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 5eef86be29..c2acc570ae 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -103,16 +103,16 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry, return data; } -int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) { +int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) { assert(bits_per_key_); - assert(space > 0); + assert(bytes > 0); uint32_t dont_care1, dont_care2; - int high = static_cast(space * 8 / bits_per_key_ + 1); + int high = static_cast(bytes * 8 / bits_per_key_ + 1); int low = 1; int n = high; for (; n >= low; n--) { uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2); - if (sz <= space) { + if (sz <= bytes) { break; } } diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 681d20dac4..458a344127 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -28,7 +28,7 @@ class FullFilterBitsBuilder : public FilterBitsBuilder { ~FullFilterBitsBuilder(); - virtual void AddKey(const Slice& key) override; + void AddKey(const Slice& key) override; // Create a filter that for hashes [0, n-1], the filter is allocated here // When creating filter, it is ensured that @@ -44,12 +44,13 @@ class FullFilterBitsBuilder : public FilterBitsBuilder { // +----------------------------------------------------------------+ // | ... | num_probes : 1 byte | num_lines : 4 bytes | // +----------------------------------------------------------------+ - virtual Slice Finish(std::unique_ptr* buf) override; + Slice Finish(std::unique_ptr* buf) override; - // Calculate num of entries fit into a space. - virtual int CalculateNumEntry(const uint32_t space) override; + int CalculateNumEntry(const uint32_t bytes) override; - // Calculate space for new filter. This is reverse of CalculateNumEntry. + // Calculate number of bytes needed for a new filter, including + // metadata. Passing the result to CalculateNumEntry should + // return >= the num_entry passed in. uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits, uint32_t* num_lines); diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index 0f6a5cdab8..3d9655644e 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -3,6 +3,8 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#include + #include "table/block_based/full_filter_block.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" @@ -205,24 +207,62 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) { /*lookup_context=*/nullptr)); } +class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder { + std::unique_ptr b_; + std::set uniq_; + + public: + explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {} + + ~CountUniqueFilterBitsBuilderWrapper() override {} + + void AddKey(const Slice& key) override { + b_->AddKey(key); + uniq_.insert(key.ToString()); + } + + Slice Finish(std::unique_ptr* buf) override { + Slice rv = b_->Finish(buf); + uniq_.clear(); + return rv; + } + + int CalculateNumEntry(const uint32_t bytes) override { + return b_->CalculateNumEntry(bytes); + } + + size_t CountUnique() { return uniq_.size(); } +}; + TEST_F(FullFilterBlockTest, DuplicateEntries) { { // empty prefixes std::unique_ptr prefix_extractor( NewFixedPrefixTransform(0)); - auto bits_builder = dynamic_cast( + auto bits_builder = new CountUniqueFilterBitsBuilderWrapper( table_options_.filter_policy->GetFilterBitsBuilder()); const bool WHOLE_KEY = true; FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, bits_builder); ASSERT_EQ(0, builder.NumAdded()); - builder.Add("key"); // test with empty prefix - ASSERT_EQ(2, bits_builder->hash_entries_.size()); + ASSERT_EQ(0, bits_builder->CountUnique()); + // adds key and empty prefix; both abstractions count them + builder.Add("key1"); + ASSERT_EQ(2, builder.NumAdded()); + ASSERT_EQ(2, bits_builder->CountUnique()); + // Add different key (unique) and also empty prefix (not unique). + // From here in this test, it's immaterial whether the block builder + // can count unique keys. + builder.Add("key2"); + ASSERT_EQ(3, bits_builder->CountUnique()); + // Empty key -> nothing unique + builder.Add(""); + ASSERT_EQ(3, bits_builder->CountUnique()); } // mix of empty and non-empty std::unique_ptr prefix_extractor( NewFixedPrefixTransform(7)); - auto bits_builder = dynamic_cast( + auto bits_builder = new CountUniqueFilterBitsBuilderWrapper( table_options_.filter_policy->GetFilterBitsBuilder()); const bool WHOLE_KEY = true; FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, @@ -234,8 +274,8 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) { builder.Add("prefix1key2"); builder.Add("prefix1key3"); builder.Add("prefix2key4"); - // two prefix adn 4 keys - ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size()); + // 1 empty, 2 non-empty prefixes, and 4 non-empty keys + ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique()); } TEST_F(FullFilterBlockTest, SingleChunk) { diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index 21583f3e04..999dbb7f9a 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -65,12 +65,15 @@ class PartitionedFilterBlockTest InternalKeyComparator icomp_; std::unique_ptr table_; std::shared_ptr cache_; + int bits_per_key_; PartitionedFilterBlockTest() : ioptions_(options_), env_options_(options_), - icomp_(options_.comparator) { - table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false)); + icomp_(options_.comparator), + bits_per_key_(10) { + table_options_.filter_policy.reset( + NewBloomFilterPolicy(bits_per_key_, false)); table_options_.format_version = GetParam(); table_options_.index_block_restart_interval = 3; } @@ -91,16 +94,9 @@ class PartitionedFilterBlockTest } uint64_t MaxFilterSize() { - uint32_t dont_care1, dont_care2; int num_keys = sizeof(keys) / sizeof(*keys); - auto filter_bits_reader = dynamic_cast( - table_options_.filter_policy->GetFilterBitsBuilder()); - assert(filter_bits_reader); - auto partition_size = - filter_bits_reader->CalculateSpace(num_keys, &dont_care1, &dont_care2); - delete filter_bits_reader; - return partition_size + - partition_size * table_options_.block_size_deviation / 100; + // General, rough over-approximation + return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5); } uint64_t last_offset = 10; diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 76a0541c89..9e9b794f19 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -324,6 +324,7 @@ class FullBloomTest : public testing::Test { TEST_F(FullBloomTest, FilterSize) { uint32_t dont_care1, dont_care2; auto full_bits_builder = GetFullFilterBitsBuilder(); + ASSERT_TRUE(full_bits_builder != nullptr); for (int n = 1; n < 100; n++) { auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2); auto n2 = full_bits_builder->CalculateNumEntry(space);