mirror of https://github.com/facebook/rocksdb.git
Clean up some filter tests and comments (#5960)
Summary: Some filtering tests were unfriendly to new implementations of FilterBitsBuilder because of dynamic_cast to FullFilterBitsBuilder. Most of those have now been cleaned up, worked around, or at least changed from crash on dynamic_cast failure to individual test failure. Also put some clarifying comments on filter-related APIs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5960 Test Plan: make check Differential Revision: D18121223 Pulled By: pdillinger fbshipit-source-id: e83827d9d5d96315d96f8e25a99cd70f497d802c
This commit is contained in:
parent
2309fd63bf
commit
013babc685
|
@ -44,12 +44,13 @@ class FilterBitsBuilder {
|
||||||
// The ownership of actual data is set to buf
|
// The ownership of actual data is set to buf
|
||||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
|
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
|
||||||
|
|
||||||
// Calculate num of entries fit into a space.
|
// Calculate num of keys that can be added and generate a filter
|
||||||
|
// <= the specified number of bytes.
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#pragma warning(push)
|
#pragma warning(push)
|
||||||
#pragma warning(disable : 4702) // unreachable code
|
#pragma warning(disable : 4702) // unreachable code
|
||||||
#endif
|
#endif
|
||||||
virtual int CalculateNumEntry(const uint32_t /*space*/) {
|
virtual int CalculateNumEntry(const uint32_t /*bytes*/) {
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
throw std::runtime_error("CalculateNumEntry not Implemented");
|
throw std::runtime_error("CalculateNumEntry not Implemented");
|
||||||
#else
|
#else
|
||||||
|
@ -119,13 +120,13 @@ class FilterPolicy {
|
||||||
// list, but it should aim to return false with a high probability.
|
// list, but it should aim to return false with a high probability.
|
||||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
||||||
|
|
||||||
// Get the FilterBitsBuilder, which is ONLY used for full filter block
|
// Return a new FilterBitsBuilder for full or partitioned filter blocks, or
|
||||||
// It contains interface to take individual key, then generate filter
|
// nullptr if using block-based filter.
|
||||||
virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
|
virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
|
||||||
|
|
||||||
// Get the FilterBitsReader, which is ONLY used for full filter block
|
// Return a new FilterBitsReader for full or partitioned filter blocks, or
|
||||||
// It contains interface to tell if key can be in filter
|
// nullptr if using block-based filter.
|
||||||
// The input slice should NOT be deleted by FilterPolicy
|
// As here, the input slice should NOT be deleted by FilterPolicy.
|
||||||
virtual FilterBitsReader* GetFilterBitsReader(
|
virtual FilterBitsReader* GetFilterBitsReader(
|
||||||
const Slice& /*contents*/) const {
|
const Slice& /*contents*/) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
|
@ -103,16 +103,16 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) {
|
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
|
||||||
assert(bits_per_key_);
|
assert(bits_per_key_);
|
||||||
assert(space > 0);
|
assert(bytes > 0);
|
||||||
uint32_t dont_care1, dont_care2;
|
uint32_t dont_care1, dont_care2;
|
||||||
int high = static_cast<int>(space * 8 / bits_per_key_ + 1);
|
int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
|
||||||
int low = 1;
|
int low = 1;
|
||||||
int n = high;
|
int n = high;
|
||||||
for (; n >= low; n--) {
|
for (; n >= low; n--) {
|
||||||
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
|
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
|
||||||
if (sz <= space) {
|
if (sz <= bytes) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||||
|
|
||||||
~FullFilterBitsBuilder();
|
~FullFilterBitsBuilder();
|
||||||
|
|
||||||
virtual void AddKey(const Slice& key) override;
|
void AddKey(const Slice& key) override;
|
||||||
|
|
||||||
// Create a filter that for hashes [0, n-1], the filter is allocated here
|
// Create a filter that for hashes [0, n-1], the filter is allocated here
|
||||||
// When creating filter, it is ensured that
|
// When creating filter, it is ensured that
|
||||||
|
@ -44,12 +44,13 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||||
// +----------------------------------------------------------------+
|
// +----------------------------------------------------------------+
|
||||||
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
|
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
|
||||||
// +----------------------------------------------------------------+
|
// +----------------------------------------------------------------+
|
||||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override;
|
Slice Finish(std::unique_ptr<const char[]>* buf) override;
|
||||||
|
|
||||||
// Calculate num of entries fit into a space.
|
int CalculateNumEntry(const uint32_t bytes) override;
|
||||||
virtual int CalculateNumEntry(const uint32_t space) override;
|
|
||||||
|
|
||||||
// Calculate space for new filter. This is reverse of CalculateNumEntry.
|
// Calculate number of bytes needed for a new filter, including
|
||||||
|
// metadata. Passing the result to CalculateNumEntry should
|
||||||
|
// return >= the num_entry passed in.
|
||||||
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
|
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
|
||||||
uint32_t* num_lines);
|
uint32_t* num_lines);
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
// COPYING file in the root directory) and Apache 2.0 License
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
// (found in the LICENSE.Apache file in the root directory).
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#include "table/block_based/full_filter_block.h"
|
#include "table/block_based/full_filter_block.h"
|
||||||
#include "rocksdb/filter_policy.h"
|
#include "rocksdb/filter_policy.h"
|
||||||
#include "table/block_based/block_based_table_reader.h"
|
#include "table/block_based/block_based_table_reader.h"
|
||||||
|
@ -205,24 +207,62 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
|
||||||
/*lookup_context=*/nullptr));
|
/*lookup_context=*/nullptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
|
||||||
|
std::unique_ptr<FilterBitsBuilder> b_;
|
||||||
|
std::set<std::string> uniq_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {}
|
||||||
|
|
||||||
|
~CountUniqueFilterBitsBuilderWrapper() override {}
|
||||||
|
|
||||||
|
void AddKey(const Slice& key) override {
|
||||||
|
b_->AddKey(key);
|
||||||
|
uniq_.insert(key.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
||||||
|
Slice rv = b_->Finish(buf);
|
||||||
|
uniq_.clear();
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CalculateNumEntry(const uint32_t bytes) override {
|
||||||
|
return b_->CalculateNumEntry(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t CountUnique() { return uniq_.size(); }
|
||||||
|
};
|
||||||
|
|
||||||
TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
||||||
{ // empty prefixes
|
{ // empty prefixes
|
||||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||||
NewFixedPrefixTransform(0));
|
NewFixedPrefixTransform(0));
|
||||||
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
|
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
|
||||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||||
const bool WHOLE_KEY = true;
|
const bool WHOLE_KEY = true;
|
||||||
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
||||||
bits_builder);
|
bits_builder);
|
||||||
ASSERT_EQ(0, builder.NumAdded());
|
ASSERT_EQ(0, builder.NumAdded());
|
||||||
builder.Add("key"); // test with empty prefix
|
ASSERT_EQ(0, bits_builder->CountUnique());
|
||||||
ASSERT_EQ(2, bits_builder->hash_entries_.size());
|
// adds key and empty prefix; both abstractions count them
|
||||||
|
builder.Add("key1");
|
||||||
|
ASSERT_EQ(2, builder.NumAdded());
|
||||||
|
ASSERT_EQ(2, bits_builder->CountUnique());
|
||||||
|
// Add different key (unique) and also empty prefix (not unique).
|
||||||
|
// From here in this test, it's immaterial whether the block builder
|
||||||
|
// can count unique keys.
|
||||||
|
builder.Add("key2");
|
||||||
|
ASSERT_EQ(3, bits_builder->CountUnique());
|
||||||
|
// Empty key -> nothing unique
|
||||||
|
builder.Add("");
|
||||||
|
ASSERT_EQ(3, bits_builder->CountUnique());
|
||||||
}
|
}
|
||||||
|
|
||||||
// mix of empty and non-empty
|
// mix of empty and non-empty
|
||||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||||
NewFixedPrefixTransform(7));
|
NewFixedPrefixTransform(7));
|
||||||
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
|
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
|
||||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||||
const bool WHOLE_KEY = true;
|
const bool WHOLE_KEY = true;
|
||||||
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
||||||
|
@ -234,8 +274,8 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
||||||
builder.Add("prefix1key2");
|
builder.Add("prefix1key2");
|
||||||
builder.Add("prefix1key3");
|
builder.Add("prefix1key3");
|
||||||
builder.Add("prefix2key4");
|
builder.Add("prefix2key4");
|
||||||
// two prefix adn 4 keys
|
// 1 empty, 2 non-empty prefixes, and 4 non-empty keys
|
||||||
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
|
ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(FullFilterBlockTest, SingleChunk) {
|
TEST_F(FullFilterBlockTest, SingleChunk) {
|
||||||
|
|
|
@ -65,12 +65,15 @@ class PartitionedFilterBlockTest
|
||||||
InternalKeyComparator icomp_;
|
InternalKeyComparator icomp_;
|
||||||
std::unique_ptr<BlockBasedTable> table_;
|
std::unique_ptr<BlockBasedTable> table_;
|
||||||
std::shared_ptr<Cache> cache_;
|
std::shared_ptr<Cache> cache_;
|
||||||
|
int bits_per_key_;
|
||||||
|
|
||||||
PartitionedFilterBlockTest()
|
PartitionedFilterBlockTest()
|
||||||
: ioptions_(options_),
|
: ioptions_(options_),
|
||||||
env_options_(options_),
|
env_options_(options_),
|
||||||
icomp_(options_.comparator) {
|
icomp_(options_.comparator),
|
||||||
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
bits_per_key_(10) {
|
||||||
|
table_options_.filter_policy.reset(
|
||||||
|
NewBloomFilterPolicy(bits_per_key_, false));
|
||||||
table_options_.format_version = GetParam();
|
table_options_.format_version = GetParam();
|
||||||
table_options_.index_block_restart_interval = 3;
|
table_options_.index_block_restart_interval = 3;
|
||||||
}
|
}
|
||||||
|
@ -91,16 +94,9 @@ class PartitionedFilterBlockTest
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t MaxFilterSize() {
|
uint64_t MaxFilterSize() {
|
||||||
uint32_t dont_care1, dont_care2;
|
|
||||||
int num_keys = sizeof(keys) / sizeof(*keys);
|
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||||
auto filter_bits_reader = dynamic_cast<rocksdb::FullFilterBitsBuilder*>(
|
// General, rough over-approximation
|
||||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5);
|
||||||
assert(filter_bits_reader);
|
|
||||||
auto partition_size =
|
|
||||||
filter_bits_reader->CalculateSpace(num_keys, &dont_care1, &dont_care2);
|
|
||||||
delete filter_bits_reader;
|
|
||||||
return partition_size +
|
|
||||||
partition_size * table_options_.block_size_deviation / 100;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t last_offset = 10;
|
uint64_t last_offset = 10;
|
||||||
|
|
|
@ -324,6 +324,7 @@ class FullBloomTest : public testing::Test {
|
||||||
TEST_F(FullBloomTest, FilterSize) {
|
TEST_F(FullBloomTest, FilterSize) {
|
||||||
uint32_t dont_care1, dont_care2;
|
uint32_t dont_care1, dont_care2;
|
||||||
auto full_bits_builder = GetFullFilterBitsBuilder();
|
auto full_bits_builder = GetFullFilterBitsBuilder();
|
||||||
|
ASSERT_TRUE(full_bits_builder != nullptr);
|
||||||
for (int n = 1; n < 100; n++) {
|
for (int n = 1; n < 100; n++) {
|
||||||
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
|
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
|
||||||
auto n2 = full_bits_builder->CalculateNumEntry(space);
|
auto n2 = full_bits_builder->CalculateNumEntry(space);
|
||||||
|
|
Loading…
Reference in New Issue