2012-04-17 15:36:46 +00:00
|
|
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "leveldb/filter_policy.h"
|
|
|
|
|
|
|
|
#include "leveldb/slice.h"
|
|
|
|
#include "util/hash.h"
|
|
|
|
|
|
|
|
namespace leveldb {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
static uint32_t BloomHash(const Slice& key) {
|
|
|
|
return Hash(key.data(), key.size(), 0xbc9f1d34);
|
|
|
|
}
|
|
|
|
|
|
|
|
class BloomFilterPolicy : public FilterPolicy {
|
|
|
|
private:
|
|
|
|
size_t bits_per_key_;
|
|
|
|
size_t k_;
|
2012-07-10 00:31:12 +00:00
|
|
|
uint32_t (*hash_func_)(const Slice& key);
|
2012-04-17 15:36:46 +00:00
|
|
|
|
2012-07-10 00:31:12 +00:00
|
|
|
void initialize() {
|
2012-04-17 15:36:46 +00:00
|
|
|
// We intentionally round down to reduce probing cost a little bit
|
2012-07-10 00:31:12 +00:00
|
|
|
k_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
|
2012-04-17 15:36:46 +00:00
|
|
|
if (k_ < 1) k_ = 1;
|
|
|
|
if (k_ > 30) k_ = 30;
|
|
|
|
}
|
|
|
|
|
2012-07-10 00:31:12 +00:00
|
|
|
public:
|
|
|
|
explicit BloomFilterPolicy(int bits_per_key,
|
|
|
|
uint32_t (*hash_func)(const Slice& key))
|
|
|
|
: bits_per_key_(bits_per_key), hash_func_(hash_func) {
|
|
|
|
initialize();
|
|
|
|
}
|
|
|
|
explicit BloomFilterPolicy(int bits_per_key)
|
|
|
|
: bits_per_key_(bits_per_key) {
|
|
|
|
hash_func_ = BloomHash;
|
|
|
|
initialize();
|
|
|
|
}
|
|
|
|
|
2012-04-17 15:36:46 +00:00
|
|
|
virtual const char* Name() const {
|
|
|
|
return "leveldb.BuiltinBloomFilter";
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
|
|
|
// Compute bloom filter size (in both bits and bytes)
|
|
|
|
size_t bits = n * bits_per_key_;
|
|
|
|
|
|
|
|
// For small n, we can see a very high false positive rate. Fix it
|
|
|
|
// by enforcing a minimum bloom filter length.
|
|
|
|
if (bits < 64) bits = 64;
|
|
|
|
|
|
|
|
size_t bytes = (bits + 7) / 8;
|
|
|
|
bits = bytes * 8;
|
|
|
|
|
|
|
|
const size_t init_size = dst->size();
|
|
|
|
dst->resize(init_size + bytes, 0);
|
|
|
|
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
|
|
|
char* array = &(*dst)[init_size];
|
2012-11-06 20:02:18 +00:00
|
|
|
for (size_t i = 0; i < (size_t)n; i++) {
|
2012-04-17 15:36:46 +00:00
|
|
|
// Use double-hashing to generate a sequence of hash values.
|
|
|
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
2012-07-10 00:31:12 +00:00
|
|
|
uint32_t h = hash_func_(keys[i]);
|
2012-04-17 15:36:46 +00:00
|
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
|
|
|
for (size_t j = 0; j < k_; j++) {
|
|
|
|
const uint32_t bitpos = h % bits;
|
|
|
|
array[bitpos/8] |= (1 << (bitpos % 8));
|
|
|
|
h += delta;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
|
|
|
|
const size_t len = bloom_filter.size();
|
|
|
|
if (len < 2) return false;
|
|
|
|
|
|
|
|
const char* array = bloom_filter.data();
|
|
|
|
const size_t bits = (len - 1) * 8;
|
|
|
|
|
|
|
|
// Use the encoded k so that we can read filters generated by
|
|
|
|
// bloom filters created using different parameters.
|
|
|
|
const size_t k = array[len-1];
|
|
|
|
if (k > 30) {
|
|
|
|
// Reserved for potentially new encodings for short bloom filters.
|
|
|
|
// Consider it a match.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-07-10 00:31:12 +00:00
|
|
|
uint32_t h = hash_func_(key);
|
2012-04-17 15:36:46 +00:00
|
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
|
|
|
for (size_t j = 0; j < k; j++) {
|
|
|
|
const uint32_t bitpos = h % bits;
|
|
|
|
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
|
|
|
|
h += delta;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
|
|
|
|
return new BloomFilterPolicy(bits_per_key);
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace leveldb
|