Clean up some filter tests and comments (#5960)

Summary:
Some filtering tests were unfriendly to new implementations of
FilterBitsBuilder because of dynamic_cast to FullFilterBitsBuilder. Most
of those have now been cleaned up, worked around, or at least changed
from crash on dynamic_cast failure to individual test failure.

Also put some clarifying comments on filter-related APIs.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5960

Test Plan: make check

Differential Revision: D18121223

Pulled By: pdillinger

fbshipit-source-id: e83827d9d5d96315d96f8e25a99cd70f497d802c
This commit is contained in:
Peter Dillinger 2019-10-24 18:46:48 -07:00 committed by Facebook Github Bot
parent 2309fd63bf
commit 013babc685
6 changed files with 72 additions and 33 deletions

View File

@ -44,12 +44,13 @@ class FilterBitsBuilder {
// The ownership of actual data is set to buf // The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0; virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Calculate num of entries fit into a space. // Calculate num of keys that can be added and generate a filter
// <= the specified number of bytes.
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(push) #pragma warning(push)
#pragma warning(disable : 4702) // unreachable code #pragma warning(disable : 4702) // unreachable code
#endif #endif
virtual int CalculateNumEntry(const uint32_t /*space*/) { virtual int CalculateNumEntry(const uint32_t /*bytes*/) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
throw std::runtime_error("CalculateNumEntry not Implemented"); throw std::runtime_error("CalculateNumEntry not Implemented");
#else #else
@ -119,13 +120,13 @@ class FilterPolicy {
// list, but it should aim to return false with a high probability. // list, but it should aim to return false with a high probability.
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
// Get the FilterBitsBuilder, which is ONLY used for full filter block // Return a new FilterBitsBuilder for full or partitioned filter blocks, or
// It contains interface to take individual key, then generate filter // nullptr if using block-based filter.
virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; } virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
// Get the FilterBitsReader, which is ONLY used for full filter block // Return a new FilterBitsReader for full or partitioned filter blocks, or
// It contains interface to tell if key can be in filter // nullptr if using block-based filter.
// The input slice should NOT be deleted by FilterPolicy // As here, the input slice should NOT be deleted by FilterPolicy.
virtual FilterBitsReader* GetFilterBitsReader( virtual FilterBitsReader* GetFilterBitsReader(
const Slice& /*contents*/) const { const Slice& /*contents*/) const {
return nullptr; return nullptr;

View File

@ -103,16 +103,16 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
return data; return data;
} }
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) { int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
assert(bits_per_key_); assert(bits_per_key_);
assert(space > 0); assert(bytes > 0);
uint32_t dont_care1, dont_care2; uint32_t dont_care1, dont_care2;
int high = static_cast<int>(space * 8 / bits_per_key_ + 1); int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
int low = 1; int low = 1;
int n = high; int n = high;
for (; n >= low; n--) { for (; n >= low; n--) {
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2); uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
if (sz <= space) { if (sz <= bytes) {
break; break;
} }
} }

View File

@ -28,7 +28,7 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
~FullFilterBitsBuilder(); ~FullFilterBitsBuilder();
virtual void AddKey(const Slice& key) override; void AddKey(const Slice& key) override;
// Create a filter that for hashes [0, n-1], the filter is allocated here // Create a filter that for hashes [0, n-1], the filter is allocated here
// When creating filter, it is ensured that // When creating filter, it is ensured that
@ -44,12 +44,13 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
// +----------------------------------------------------------------+ // +----------------------------------------------------------------+
// | ... | num_probes : 1 byte | num_lines : 4 bytes | // | ... | num_probes : 1 byte | num_lines : 4 bytes |
// +----------------------------------------------------------------+ // +----------------------------------------------------------------+
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override; Slice Finish(std::unique_ptr<const char[]>* buf) override;
// Calculate num of entries fit into a space. int CalculateNumEntry(const uint32_t bytes) override;
virtual int CalculateNumEntry(const uint32_t space) override;
// Calculate space for new filter. This is reverse of CalculateNumEntry. // Calculate number of bytes needed for a new filter, including
// metadata. Passing the result to CalculateNumEntry should
// return >= the num_entry passed in.
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits, uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines); uint32_t* num_lines);

View File

@ -3,6 +3,8 @@
// COPYING file in the root directory) and Apache 2.0 License // COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#include <set>
#include "table/block_based/full_filter_block.h" #include "table/block_based/full_filter_block.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
@ -205,24 +207,62 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
/*lookup_context=*/nullptr)); /*lookup_context=*/nullptr));
} }
class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
std::unique_ptr<FilterBitsBuilder> b_;
std::set<std::string> uniq_;
public:
explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {}
~CountUniqueFilterBitsBuilderWrapper() override {}
void AddKey(const Slice& key) override {
b_->AddKey(key);
uniq_.insert(key.ToString());
}
Slice Finish(std::unique_ptr<const char[]>* buf) override {
Slice rv = b_->Finish(buf);
uniq_.clear();
return rv;
}
int CalculateNumEntry(const uint32_t bytes) override {
return b_->CalculateNumEntry(bytes);
}
size_t CountUnique() { return uniq_.size(); }
};
TEST_F(FullFilterBlockTest, DuplicateEntries) { TEST_F(FullFilterBlockTest, DuplicateEntries) {
{ // empty prefixes { // empty prefixes
std::unique_ptr<const SliceTransform> prefix_extractor( std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(0)); NewFixedPrefixTransform(0));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>( auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
table_options_.filter_policy->GetFilterBitsBuilder()); table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true; const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder); bits_builder);
ASSERT_EQ(0, builder.NumAdded()); ASSERT_EQ(0, builder.NumAdded());
builder.Add("key"); // test with empty prefix ASSERT_EQ(0, bits_builder->CountUnique());
ASSERT_EQ(2, bits_builder->hash_entries_.size()); // adds key and empty prefix; both abstractions count them
builder.Add("key1");
ASSERT_EQ(2, builder.NumAdded());
ASSERT_EQ(2, bits_builder->CountUnique());
// Add different key (unique) and also empty prefix (not unique).
// From here in this test, it's immaterial whether the block builder
// can count unique keys.
builder.Add("key2");
ASSERT_EQ(3, bits_builder->CountUnique());
// Empty key -> nothing unique
builder.Add("");
ASSERT_EQ(3, bits_builder->CountUnique());
} }
// mix of empty and non-empty // mix of empty and non-empty
std::unique_ptr<const SliceTransform> prefix_extractor( std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(7)); NewFixedPrefixTransform(7));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>( auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
table_options_.filter_policy->GetFilterBitsBuilder()); table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true; const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
@ -234,8 +274,8 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
builder.Add("prefix1key2"); builder.Add("prefix1key2");
builder.Add("prefix1key3"); builder.Add("prefix1key3");
builder.Add("prefix2key4"); builder.Add("prefix2key4");
// two prefix adn 4 keys // 1 empty, 2 non-empty prefixes, and 4 non-empty keys
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size()); ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique());
} }
TEST_F(FullFilterBlockTest, SingleChunk) { TEST_F(FullFilterBlockTest, SingleChunk) {

View File

@ -65,12 +65,15 @@ class PartitionedFilterBlockTest
InternalKeyComparator icomp_; InternalKeyComparator icomp_;
std::unique_ptr<BlockBasedTable> table_; std::unique_ptr<BlockBasedTable> table_;
std::shared_ptr<Cache> cache_; std::shared_ptr<Cache> cache_;
int bits_per_key_;
PartitionedFilterBlockTest() PartitionedFilterBlockTest()
: ioptions_(options_), : ioptions_(options_),
env_options_(options_), env_options_(options_),
icomp_(options_.comparator) { icomp_(options_.comparator),
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false)); bits_per_key_(10) {
table_options_.filter_policy.reset(
NewBloomFilterPolicy(bits_per_key_, false));
table_options_.format_version = GetParam(); table_options_.format_version = GetParam();
table_options_.index_block_restart_interval = 3; table_options_.index_block_restart_interval = 3;
} }
@ -91,16 +94,9 @@ class PartitionedFilterBlockTest
} }
uint64_t MaxFilterSize() { uint64_t MaxFilterSize() {
uint32_t dont_care1, dont_care2;
int num_keys = sizeof(keys) / sizeof(*keys); int num_keys = sizeof(keys) / sizeof(*keys);
auto filter_bits_reader = dynamic_cast<rocksdb::FullFilterBitsBuilder*>( // General, rough over-approximation
table_options_.filter_policy->GetFilterBitsBuilder()); return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5);
assert(filter_bits_reader);
auto partition_size =
filter_bits_reader->CalculateSpace(num_keys, &dont_care1, &dont_care2);
delete filter_bits_reader;
return partition_size +
partition_size * table_options_.block_size_deviation / 100;
} }
uint64_t last_offset = 10; uint64_t last_offset = 10;

View File

@ -324,6 +324,7 @@ class FullBloomTest : public testing::Test {
TEST_F(FullBloomTest, FilterSize) { TEST_F(FullBloomTest, FilterSize) {
uint32_t dont_care1, dont_care2; uint32_t dont_care1, dont_care2;
auto full_bits_builder = GetFullFilterBitsBuilder(); auto full_bits_builder = GetFullFilterBitsBuilder();
ASSERT_TRUE(full_bits_builder != nullptr);
for (int n = 1; n < 100; n++) { for (int n = 1; n < 100; n++) {
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2); auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
auto n2 = full_bits_builder->CalculateNumEntry(space); auto n2 = full_bits_builder->CalculateNumEntry(space);