mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-29 18:33:58 +00:00
d29f181923
Summary: Scripted and removed all trailing spaces and converted all tabs to spaces. Also fixed other lint errors. All lint errors from this point of time should be taken seriously. Test Plan: make all check Reviewers: dhruba Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D7059
107 lines
3.2 KiB
C++
107 lines
3.2 KiB
C++
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "leveldb/filter_policy.h"
|
|
|
|
#include "leveldb/slice.h"
|
|
#include "util/hash.h"
|
|
|
|
namespace leveldb {
|
|
|
|
namespace {
|
|
static uint32_t BloomHash(const Slice& key) {
|
|
return Hash(key.data(), key.size(), 0xbc9f1d34);
|
|
}
|
|
|
|
class BloomFilterPolicy : public FilterPolicy {
|
|
private:
|
|
size_t bits_per_key_;
|
|
size_t k_;
|
|
uint32_t (*hash_func_)(const Slice& key);
|
|
|
|
void initialize() {
|
|
// We intentionally round down to reduce probing cost a little bit
|
|
k_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
|
|
if (k_ < 1) k_ = 1;
|
|
if (k_ > 30) k_ = 30;
|
|
}
|
|
|
|
public:
|
|
explicit BloomFilterPolicy(int bits_per_key,
|
|
uint32_t (*hash_func)(const Slice& key))
|
|
: bits_per_key_(bits_per_key), hash_func_(hash_func) {
|
|
initialize();
|
|
}
|
|
explicit BloomFilterPolicy(int bits_per_key)
|
|
: bits_per_key_(bits_per_key) {
|
|
hash_func_ = BloomHash;
|
|
initialize();
|
|
}
|
|
|
|
virtual const char* Name() const {
|
|
return "leveldb.BuiltinBloomFilter";
|
|
}
|
|
|
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
|
// Compute bloom filter size (in both bits and bytes)
|
|
size_t bits = n * bits_per_key_;
|
|
|
|
// For small n, we can see a very high false positive rate. Fix it
|
|
// by enforcing a minimum bloom filter length.
|
|
if (bits < 64) bits = 64;
|
|
|
|
size_t bytes = (bits + 7) / 8;
|
|
bits = bytes * 8;
|
|
|
|
const size_t init_size = dst->size();
|
|
dst->resize(init_size + bytes, 0);
|
|
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
|
char* array = &(*dst)[init_size];
|
|
for (size_t i = 0; i < (size_t)n; i++) {
|
|
// Use double-hashing to generate a sequence of hash values.
|
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
|
uint32_t h = hash_func_(keys[i]);
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
|
for (size_t j = 0; j < k_; j++) {
|
|
const uint32_t bitpos = h % bits;
|
|
array[bitpos/8] |= (1 << (bitpos % 8));
|
|
h += delta;
|
|
}
|
|
}
|
|
}
|
|
|
|
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
|
|
const size_t len = bloom_filter.size();
|
|
if (len < 2) return false;
|
|
|
|
const char* array = bloom_filter.data();
|
|
const size_t bits = (len - 1) * 8;
|
|
|
|
// Use the encoded k so that we can read filters generated by
|
|
// bloom filters created using different parameters.
|
|
const size_t k = array[len-1];
|
|
if (k > 30) {
|
|
// Reserved for potentially new encodings for short bloom filters.
|
|
// Consider it a match.
|
|
return true;
|
|
}
|
|
|
|
uint32_t h = hash_func_(key);
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
|
for (size_t j = 0; j < k; j++) {
|
|
const uint32_t bitpos = h % bits;
|
|
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
|
|
h += delta;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
}
|
|
|
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
|
|
return new BloomFilterPolicy(bits_per_key);
|
|
}
|
|
|
|
} // namespace leveldb
|