2013-10-16 21:59:46 +00:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2012-04-17 15:36:46 +00:00
|
|
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
2014-05-09 15:34:18 +00:00
|
|
|
#ifndef GFLAGS
|
|
|
|
#include <cstdio>
|
|
|
|
int main() {
|
|
|
|
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2013-11-27 22:27:02 +00:00
|
|
|
#include <gflags/gflags.h>
|
2014-09-08 17:37:05 +00:00
|
|
|
#include <vector>
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2013-08-23 15:38:13 +00:00
|
|
|
#include "rocksdb/filter_policy.h"
|
2012-04-17 15:36:46 +00:00
|
|
|
#include "util/logging.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
2014-09-08 17:37:05 +00:00
|
|
|
#include "util/arena.h"
|
2012-04-17 15:36:46 +00:00
|
|
|
|
2014-05-09 15:34:18 +00:00
|
|
|
using GFLAGS::ParseCommandLineFlags;
|
|
|
|
|
2013-11-27 22:27:02 +00:00
|
|
|
DEFINE_int32(bits_per_key, 10, "");
|
|
|
|
|
2013-10-04 04:49:15 +00:00
|
|
|
namespace rocksdb {
|
2012-04-17 15:36:46 +00:00
|
|
|
|
|
|
|
static const int kVerbose = 1;
|
|
|
|
|
|
|
|
static Slice Key(int i, char* buffer) {
|
|
|
|
memcpy(buffer, &i, sizeof(i));
|
|
|
|
return Slice(buffer, sizeof(i));
|
|
|
|
}
|
|
|
|
|
2014-09-08 17:37:05 +00:00
|
|
|
static int NextLength(int length) {
|
|
|
|
if (length < 10) {
|
|
|
|
length += 1;
|
|
|
|
} else if (length < 100) {
|
|
|
|
length += 10;
|
|
|
|
} else if (length < 1000) {
|
|
|
|
length += 100;
|
|
|
|
} else {
|
|
|
|
length += 1000;
|
|
|
|
}
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
2012-04-17 15:36:46 +00:00
|
|
|
class BloomTest {
|
|
|
|
private:
|
|
|
|
const FilterPolicy* policy_;
|
|
|
|
std::string filter_;
|
|
|
|
std::vector<std::string> keys_;
|
|
|
|
|
|
|
|
public:
|
2014-09-08 17:37:05 +00:00
|
|
|
BloomTest() : policy_(
|
|
|
|
NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
|
2012-04-17 15:36:46 +00:00
|
|
|
|
|
|
|
~BloomTest() {
|
|
|
|
delete policy_;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() {
|
|
|
|
keys_.clear();
|
|
|
|
filter_.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Add(const Slice& s) {
|
|
|
|
keys_.push_back(s.ToString());
|
|
|
|
}
|
|
|
|
|
|
|
|
void Build() {
|
|
|
|
std::vector<Slice> key_slices;
|
|
|
|
for (size_t i = 0; i < keys_.size(); i++) {
|
|
|
|
key_slices.push_back(Slice(keys_[i]));
|
|
|
|
}
|
|
|
|
filter_.clear();
|
2014-11-11 21:47:22 +00:00
|
|
|
policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
|
|
|
|
&filter_);
|
2012-04-17 15:36:46 +00:00
|
|
|
keys_.clear();
|
|
|
|
if (kVerbose >= 2) DumpFilter();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t FilterSize() const {
|
|
|
|
return filter_.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DumpFilter() {
|
|
|
|
fprintf(stderr, "F(");
|
|
|
|
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
|
|
|
const unsigned int c = static_cast<unsigned int>(filter_[i]);
|
|
|
|
for (int j = 0; j < 8; j++) {
|
|
|
|
fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fprintf(stderr, ")\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Matches(const Slice& s) {
|
|
|
|
if (!keys_.empty()) {
|
|
|
|
Build();
|
|
|
|
}
|
|
|
|
return policy_->KeyMayMatch(s, filter_);
|
|
|
|
}
|
|
|
|
|
|
|
|
double FalsePositiveRate() {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
int result = 0;
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
|
|
|
if (Matches(Key(i + 1000000000, buffer))) {
|
|
|
|
result++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result / 10000.0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(BloomTest, EmptyFilter) {
|
|
|
|
ASSERT_TRUE(! Matches("hello"));
|
|
|
|
ASSERT_TRUE(! Matches("world"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(BloomTest, Small) {
|
|
|
|
Add("hello");
|
|
|
|
Add("world");
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
ASSERT_TRUE(! Matches("x"));
|
|
|
|
ASSERT_TRUE(! Matches("foo"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(BloomTest, VaryingLengths) {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
|
|
|
|
// Count number of filters that significantly exceed the false positive rate
|
|
|
|
int mediocre_filters = 0;
|
|
|
|
int good_filters = 0;
|
|
|
|
|
|
|
|
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
|
|
|
Reset();
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
Add(Key(i, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
|
2012-11-06 20:02:18 +00:00
|
|
|
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
|
2012-04-17 15:36:46 +00:00
|
|
|
|
|
|
|
// All added keys must match
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
ASSERT_TRUE(Matches(Key(i, buffer)))
|
|
|
|
<< "Length " << length << "; key " << i;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check false positive rate
|
|
|
|
double rate = FalsePositiveRate();
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
|
|
|
rate*100.0, length, static_cast<int>(FilterSize()));
|
|
|
|
}
|
|
|
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
|
|
|
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
|
|
|
|
else good_filters++;
|
|
|
|
}
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
|
|
|
good_filters, mediocre_filters);
|
|
|
|
}
|
|
|
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Different bits-per-byte
|
|
|
|
|
2014-09-08 17:37:05 +00:00
|
|
|
class FullBloomTest {
|
|
|
|
private:
|
|
|
|
const FilterPolicy* policy_;
|
|
|
|
std::unique_ptr<FilterBitsBuilder> bits_builder_;
|
|
|
|
std::unique_ptr<FilterBitsReader> bits_reader_;
|
|
|
|
std::unique_ptr<const char[]> buf_;
|
|
|
|
size_t filter_size_;
|
|
|
|
|
|
|
|
public:
|
|
|
|
FullBloomTest() :
|
|
|
|
policy_(NewBloomFilterPolicy(FLAGS_bits_per_key, false)),
|
|
|
|
filter_size_(0) {
|
|
|
|
Reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
~FullBloomTest() {
|
|
|
|
delete policy_;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() {
|
|
|
|
bits_builder_.reset(policy_->GetFilterBitsBuilder());
|
|
|
|
bits_reader_.reset(nullptr);
|
|
|
|
buf_.reset(nullptr);
|
|
|
|
filter_size_ = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Add(const Slice& s) {
|
|
|
|
bits_builder_->AddKey(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Build() {
|
|
|
|
Slice filter = bits_builder_->Finish(&buf_);
|
|
|
|
bits_reader_.reset(policy_->GetFilterBitsReader(filter));
|
|
|
|
filter_size_ = filter.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t FilterSize() const {
|
|
|
|
return filter_size_;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Matches(const Slice& s) {
|
|
|
|
if (bits_reader_ == nullptr) {
|
|
|
|
Build();
|
|
|
|
}
|
|
|
|
return bits_reader_->MayMatch(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
double FalsePositiveRate() {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
int result = 0;
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
|
|
|
if (Matches(Key(i + 1000000000, buffer))) {
|
|
|
|
result++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result / 10000.0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(FullBloomTest, FullEmptyFilter) {
|
|
|
|
// Empty filter is not match, at this level
|
|
|
|
ASSERT_TRUE(!Matches("hello"));
|
|
|
|
ASSERT_TRUE(!Matches("world"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(FullBloomTest, FullSmall) {
|
|
|
|
Add("hello");
|
|
|
|
Add("world");
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
ASSERT_TRUE(!Matches("x"));
|
|
|
|
ASSERT_TRUE(!Matches("foo"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(FullBloomTest, FullVaryingLengths) {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
|
|
|
|
// Count number of filters that significantly exceed the false positive rate
|
|
|
|
int mediocre_filters = 0;
|
|
|
|
int good_filters = 0;
|
|
|
|
|
|
|
|
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
|
|
|
Reset();
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
Add(Key(i, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
|
|
|
|
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 128 + 5)) << length;
|
|
|
|
|
|
|
|
// All added keys must match
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
ASSERT_TRUE(Matches(Key(i, buffer)))
|
|
|
|
<< "Length " << length << "; key " << i;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check false positive rate
|
|
|
|
double rate = FalsePositiveRate();
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
|
|
|
rate*100.0, length, static_cast<int>(FilterSize()));
|
|
|
|
}
|
|
|
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
|
|
|
if (rate > 0.0125)
|
|
|
|
mediocre_filters++; // Allowed, but not too often
|
|
|
|
else
|
|
|
|
good_filters++;
|
|
|
|
}
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
|
|
|
good_filters, mediocre_filters);
|
|
|
|
}
|
|
|
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
|
|
|
}
|
|
|
|
|
2013-10-04 04:49:15 +00:00
|
|
|
} // namespace rocksdb
|
2012-04-17 15:36:46 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2014-05-09 15:34:18 +00:00
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2013-10-04 04:49:15 +00:00
|
|
|
return rocksdb::test::RunAllTests();
|
2012-04-17 15:36:46 +00:00
|
|
|
}
|
2014-05-09 15:34:18 +00:00
|
|
|
|
|
|
|
#endif // GFLAGS
|