Refactor some confusing logic in PlainTableReader

Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/5780

Test Plan: existing plain table unit test

Differential Revision: D17368629

Pulled By: pdillinger

fbshipit-source-id: f25409cdc2f39ebe8d5cbb599cf820270e6b5d26
This commit is contained in:
Peter Dillinger 2019-09-13 10:24:38 -07:00 committed by Facebook Github Bot
parent 1a928c22a0
commit aa2486b23c
12 changed files with 295 additions and 215 deletions

View File

@ -332,6 +332,11 @@ ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
endif endif
endif endif
ifdef TEST_CACHE_LINE_SIZE
PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
endif
# This (the first rule) must depend on "all". # This (the first rule) must depend on "all".
default: all default: all

View File

@ -212,7 +212,8 @@ void VerifySimilar(uint64_t a, uint64_t b, double bias) {
void VerifyTableProperties(const TableProperties& base_tp, void VerifyTableProperties(const TableProperties& base_tp,
const TableProperties& new_tp, const TableProperties& new_tp,
double filter_size_bias = 0.1, double filter_size_bias =
CACHE_LINE_SIZE >= 256 ? 0.15 : 0.1,
double index_size_bias = 0.1, double index_size_bias = 0.1,
double data_size_bias = 0.1, double data_size_bias = 0.1,
double num_data_blocks_bias = 0.05) { double num_data_blocks_bias = 0.05) {
@ -266,7 +267,8 @@ void GetExpectedTableProperties(
// discount 1 byte as value size is not encoded in value delta encoding // discount 1 byte as value size is not encoded in value delta encoding
(value_delta_encoding ? 1 : 0)); (value_delta_encoding ? 1 : 0));
expected_tp->filter_size = expected_tp->filter_size =
kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8); kTableCount * ((kKeysPerTable * kBloomBitsPerKey + 7) / 8 +
/*average-ish overhead*/ CACHE_LINE_SIZE / 2);
} }
} // anonymous namespace } // anonymous namespace

View File

@ -178,22 +178,31 @@ typedef pthread_once_t OnceType;
extern void InitOnce(OnceType* once, void (*initializer)()); extern void InitOnce(OnceType* once, void (*initializer)());
#ifndef CACHE_LINE_SIZE #ifndef CACHE_LINE_SIZE
#if defined(__s390__) // To test behavior with non-native cache line size, e.g. for
#define CACHE_LINE_SIZE 256U // Bloom filters, set TEST_CACHE_LINE_SIZE to the desired test size.
#elif defined(__powerpc__) || defined(__aarch64__) // This disables ALIGN_AS to keep it from failing compilation.
#define CACHE_LINE_SIZE 128U #ifdef TEST_CACHE_LINE_SIZE
#define CACHE_LINE_SIZE TEST_CACHE_LINE_SIZE
#define ALIGN_AS(n) /*empty*/
#else #else
#define CACHE_LINE_SIZE 64U #if defined(__s390__)
#define CACHE_LINE_SIZE 256U
#elif defined(__powerpc__) || defined(__aarch64__)
#define CACHE_LINE_SIZE 128U
#else
#define CACHE_LINE_SIZE 64U
#endif
#define ALIGN_AS(n) alignas(n)
#endif #endif
#endif #endif
static_assert((CACHE_LINE_SIZE & (CACHE_LINE_SIZE - 1)) == 0,
"Cache line size must be a power of 2 number of bytes");
extern void *cacheline_aligned_alloc(size_t size); extern void *cacheline_aligned_alloc(size_t size);
extern void cacheline_aligned_free(void *memblock); extern void cacheline_aligned_free(void *memblock);
#define ALIGN_AS(n) alignas(n)
#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
extern void Crash(const std::string& srcfile, int srcline); extern void Crash(const std::string& srcfile, int srcline);

View File

@ -20,8 +20,8 @@ class Slice;
class FullFilterBitsBuilder : public FilterBitsBuilder { class FullFilterBitsBuilder : public FilterBitsBuilder {
public: public:
explicit FullFilterBitsBuilder(const size_t bits_per_key, explicit FullFilterBitsBuilder(const int bits_per_key,
const size_t num_probes); const int num_probes);
// No Copy allowed // No Copy allowed
FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete; FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete;
@ -56,8 +56,8 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
private: private:
friend class FullFilterBlockTest_DuplicateEntries_Test; friend class FullFilterBlockTest_DuplicateEntries_Test;
size_t bits_per_key_; int bits_per_key_;
size_t num_probes_; int num_probes_;
std::vector<uint32_t> hash_entries_; std::vector<uint32_t> hash_entries_;
// Get totalbits that optimized for cpu cache line // Get totalbits that optimized for cpu cache line

View File

@ -33,9 +33,9 @@ uint32_t GetTotalBitsForLocality(uint32_t total_bits) {
PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes) PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes)
: kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {} : kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {}
void PlainTableBloomV1::SetRawData(unsigned char* raw_data, uint32_t total_bits, void PlainTableBloomV1::SetRawData(char* raw_data, uint32_t total_bits,
uint32_t num_blocks) { uint32_t num_blocks) {
data_ = reinterpret_cast<uint8_t*>(raw_data); data_ = raw_data;
kTotalBits = total_bits; kTotalBits = total_bits;
kNumBlocks = num_blocks; kNumBlocks = num_blocks;
} }
@ -63,7 +63,7 @@ void PlainTableBloomV1::SetTotalBits(Allocator* allocator,
if (kNumBlocks > 0 && cache_line_offset > 0) { if (kNumBlocks > 0 && cache_line_offset > 0) {
raw += CACHE_LINE_SIZE - cache_line_offset; raw += CACHE_LINE_SIZE - cache_line_offset;
} }
data_ = reinterpret_cast<uint8_t*>(raw); data_ = raw;
} }
void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) { void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) {

View File

@ -10,8 +10,11 @@
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "port/port.h" #include "port/port.h"
#include "util/bloom_impl.h"
#include "util/hash.h" #include "util/hash.h"
#include "third-party/folly/folly/ConstexprMath.h"
#include <memory> #include <memory>
namespace rocksdb { namespace rocksdb {
@ -51,10 +54,10 @@ class PlainTableBloomV1 {
uint32_t GetNumBlocks() const { return kNumBlocks; } uint32_t GetNumBlocks() const { return kNumBlocks; }
Slice GetRawData() const { Slice GetRawData() const {
return Slice(reinterpret_cast<char*>(data_), GetTotalBits() / 8); return Slice(data_, GetTotalBits() / 8);
} }
void SetRawData(unsigned char* raw_data, uint32_t total_bits, void SetRawData(char* raw_data, uint32_t total_bits,
uint32_t num_blocks = 0); uint32_t num_blocks = 0);
uint32_t GetTotalBits() const { return kTotalBits; } uint32_t GetTotalBits() const { return kTotalBits; }
@ -66,7 +69,10 @@ class PlainTableBloomV1 {
uint32_t kNumBlocks; uint32_t kNumBlocks;
const uint32_t kNumProbes; const uint32_t kNumProbes;
uint8_t* data_; char* data_;
static constexpr int LOG2_CACHE_LINE_SIZE =
folly::constexpr_log2(CACHE_LINE_SIZE);
}; };
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -76,8 +82,9 @@ class PlainTableBloomV1 {
#endif #endif
inline void PlainTableBloomV1::Prefetch(uint32_t h) { inline void PlainTableBloomV1::Prefetch(uint32_t h) {
if (kNumBlocks != 0) { if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); uint32_t ignored;
PREFETCH(&(data_[b / 8]), 0, 3); LegacyLocalityBloomImpl</*ExtraRotates*/true>::PrepareHashMayMatch(
h, kNumBlocks, data_, &ignored, LOG2_CACHE_LINE_SIZE);
} }
} }
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -86,54 +93,22 @@ inline void PlainTableBloomV1::Prefetch(uint32_t h) {
inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const { inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const {
assert(IsInitialized()); assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) { if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); return LegacyLocalityBloomImpl<true>::HashMayMatch(
for (uint32_t i = 0; i < kNumProbes; ++i) { h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE);
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else { } else {
for (uint32_t i = 0; i < kNumProbes; ++i) { return LegacyNoLocalityBloomImpl::HashMayMatch(
const uint32_t bitpos = h % kTotalBits; h, kTotalBits, kNumProbes, data_);
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
} }
return true;
} }
inline void PlainTableBloomV1::AddHash(uint32_t h) { inline void PlainTableBloomV1::AddHash(uint32_t h) {
assert(IsInitialized()); assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) { if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); LegacyLocalityBloomImpl<true>::AddHash(
for (uint32_t i = 0; i < kNumProbes; ++i) { h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE);
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data_[bitpos / 8] |= (1 << (bitpos % 8));
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else { } else {
for (uint32_t i = 0; i < kNumProbes; ++i) { LegacyNoLocalityBloomImpl::AddHash(h, kTotalBits, kNumProbes, data_);
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
} }
} }

View File

@ -260,23 +260,19 @@ Status PlainTableReader::PopulateIndexRecordList(
return s; return s;
} }
void PlainTableReader::AllocateAndFillBloom( void PlainTableReader::AllocateBloom(int bloom_bits_per_key, int num_keys,
int bloom_bits_per_key, int num_prefixes, size_t huge_page_tlb_size, size_t huge_page_tlb_size) {
std::vector<uint32_t>* prefix_hashes) { uint32_t bloom_total_bits = num_keys * bloom_bits_per_key;
if (!IsTotalOrderMode()) { if (bloom_total_bits > 0) {
uint32_t bloom_total_bits = num_prefixes * bloom_bits_per_key; enable_bloom_ = true;
if (bloom_total_bits > 0) { bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality,
enable_bloom_ = true; huge_page_tlb_size, ioptions_.info_log);
bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality,
huge_page_tlb_size, ioptions_.info_log);
FillBloom(prefix_hashes);
}
} }
} }
void PlainTableReader::FillBloom(std::vector<uint32_t>* prefix_hashes) { void PlainTableReader::FillBloom(const std::vector<uint32_t>& prefix_hashes) {
assert(bloom_.IsInitialized()); assert(bloom_.IsInitialized());
for (auto prefix_hash : *prefix_hashes) { for (const auto prefix_hash : prefix_hashes) {
bloom_.AddHash(prefix_hash); bloom_.AddHash(prefix_hash);
} }
} }
@ -354,14 +350,9 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
if (!index_in_file) { if (!index_in_file) {
// Allocate bloom filter here for total order mode. // Allocate bloom filter here for total order mode.
if (IsTotalOrderMode()) { if (IsTotalOrderMode()) {
uint32_t num_bloom_bits = AllocateBloom(bloom_bits_per_key,
static_cast<uint32_t>(table_properties_->num_entries) * static_cast<uint32_t>(table_properties_->num_entries),
bloom_bits_per_key; huge_page_tlb_size);
if (num_bloom_bits > 0) {
enable_bloom_ = true;
bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality,
huge_page_tlb_size, ioptions_.info_log);
}
} }
} else if (bloom_in_file) { } else if (bloom_in_file) {
enable_bloom_ = true; enable_bloom_ = true;
@ -377,8 +368,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
} }
// cast away const qualifier, because bloom_ won't be changed // cast away const qualifier, because bloom_ won't be changed
bloom_.SetRawData( bloom_.SetRawData(
const_cast<unsigned char*>( const_cast<char*>(bloom_block->data()),
reinterpret_cast<const unsigned char*>(bloom_block->data())),
static_cast<uint32_t>(bloom_block->size()) * 8, num_blocks); static_cast<uint32_t>(bloom_block->size()) * 8, num_blocks);
} else { } else {
// Index in file but no bloom in file. Disable bloom filter in this case. // Index in file but no bloom in file. Disable bloom filter in this case.
@ -392,6 +382,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
std::vector<uint32_t> prefix_hashes; std::vector<uint32_t> prefix_hashes;
if (!index_in_file) { if (!index_in_file) {
// Populates _bloom if enabled (total order mode)
s = PopulateIndexRecordList(&index_builder, &prefix_hashes); s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -404,10 +395,15 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
} }
if (!index_in_file) { if (!index_in_file) {
// Calculated bloom filter size and allocate memory for if (!IsTotalOrderMode()) {
// bloom filter based on the number of prefixes, then fill it. // Calculated bloom filter size and allocate memory for
AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(), // bloom filter based on the number of prefixes, then fill it.
huge_page_tlb_size, &prefix_hashes); AllocateBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
huge_page_tlb_size);
if (enable_bloom_) {
FillBloom(prefix_hashes);
}
}
} }
// Fill two table properties. // Fill two table properties.

View File

@ -209,12 +209,11 @@ class PlainTableReader: public TableReader {
Status PopulateIndexRecordList(PlainTableIndexBuilder* index_builder, Status PopulateIndexRecordList(PlainTableIndexBuilder* index_builder,
std::vector<uint32_t>* prefix_hashes); std::vector<uint32_t>* prefix_hashes);
// Internal helper function to allocate memory for bloom filter and fill it // Internal helper function to allocate memory for bloom filter
void AllocateAndFillBloom(int bloom_bits_per_key, int num_prefixes, void AllocateBloom(int bloom_bits_per_key, int num_prefixes,
size_t huge_page_tlb_size, size_t huge_page_tlb_size);
std::vector<uint32_t>* prefix_hashes);
void FillBloom(std::vector<uint32_t>* prefix_hashes); void FillBloom(const std::vector<uint32_t>& prefix_hashes);
// Read the key and value at `offset` to parameters for keys, the and // Read the key and value at `offset` to parameters for keys, the and
// `seekable`. // `seekable`.

View File

@ -14,4 +14,32 @@ template <typename T, typename... Ts>
constexpr T constexpr_max(T a, T b, Ts... ts) { constexpr T constexpr_max(T a, T b, Ts... ts) {
return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...); return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...);
} }
namespace detail {
template <typename T>
constexpr T constexpr_log2_(T a, T e) {
return e == T(1) ? a : constexpr_log2_(a + T(1), e / T(2));
}
template <typename T>
constexpr T constexpr_log2_ceil_(T l2, T t) {
return l2 + T(T(1) << l2 < t ? 1 : 0);
}
template <typename T>
constexpr T constexpr_square_(T t) {
return t * t;
}
} // namespace detail
template <typename T>
constexpr T constexpr_log2(T t) {
return detail::constexpr_log2_(T(0), t);
}
template <typename T>
constexpr T constexpr_log2_ceil(T t) {
return detail::constexpr_log2_ceil_(constexpr_log2(t), t);
}
} // namespace folly } // namespace folly

View File

@ -13,16 +13,19 @@
#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_filter_block.h"
#include "table/block_based/full_filter_block.h" #include "table/block_based/full_filter_block.h"
#include "table/full_filter_bits_builder.h" #include "table/full_filter_bits_builder.h"
#include "third-party/folly/folly/ConstexprMath.h"
#include "util/bloom_impl.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/hash.h" #include "util/hash.h"
namespace rocksdb { namespace rocksdb {
typedef LegacyLocalityBloomImpl</*ExtraRotates*/false> LegacyFullFilterImpl;
class BlockBasedFilterBlockBuilder; class BlockBasedFilterBlockBuilder;
class FullFilterBlockBuilder; class FullFilterBlockBuilder;
FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key, FullFilterBitsBuilder::FullFilterBitsBuilder(const int bits_per_key,
const size_t num_probes) const int num_probes)
: bits_per_key_(bits_per_key), num_probes_(num_probes) { : bits_per_key_(bits_per_key), num_probes_(num_probes) {
assert(bits_per_key_); assert(bits_per_key_);
} }
@ -74,7 +77,7 @@ uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry,
uint32_t* num_lines) { uint32_t* num_lines) {
assert(bits_per_key_); assert(bits_per_key_);
if (num_entry != 0) { if (num_entry != 0) {
uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_); uint32_t total_bits_tmp = static_cast<uint32_t>(num_entry * bits_per_key_);
*total_bits = GetTotalBitsForLocality(total_bits_tmp); *total_bits = GetTotalBitsForLocality(total_bits_tmp);
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8); *num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
@ -124,24 +127,16 @@ inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
#endif #endif
assert(num_lines > 0 && total_bits > 0); assert(num_lines > 0 && total_bits > 0);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits LegacyFullFilterImpl::AddHash(
uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8); h, num_lines, num_probes_, data,
folly::constexpr_log2(CACHE_LINE_SIZE));
for (uint32_t i = 0; i < num_probes_; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
} }
namespace { namespace {
class FullFilterBitsReader : public FilterBitsReader { class FullFilterBitsReader : public FilterBitsReader {
public: public:
explicit FullFilterBitsReader(const Slice& contents) explicit FullFilterBitsReader(const Slice& contents)
: data_(const_cast<char*>(contents.data())), : data_(contents.data()),
data_len_(static_cast<uint32_t>(contents.size())), data_len_(static_cast<uint32_t>(contents.size())),
num_probes_(0), num_probes_(0),
num_lines_(0), num_lines_(0),
@ -177,16 +172,23 @@ class FullFilterBitsReader : public FilterBitsReader {
~FullFilterBitsReader() override {} ~FullFilterBitsReader() override {}
bool MayMatch(const Slice& entry) override { // "contents" contains the data built by a preceding call to
// FilterBitsBuilder::Finish. MayMatch must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
bool MayMatch(const Slice& key) override {
if (data_len_ <= 5) { // remain same with original filter if (data_len_ <= 5) { // remain same with original filter
return false; return false;
} }
// Other Error params, including a broken filter, regarded as match // Other Error params, including a broken filter, regarded as match
if (num_probes_ == 0 || num_lines_ == 0) return true; if (num_probes_ == 0 || num_lines_ == 0) return true;
uint32_t hash = BloomHash(entry); uint32_t hash = BloomHash(key);
uint32_t bit_offset; uint32_t byte_offset;
FilterPrepare(hash, Slice(data_, data_len_), num_lines_, &bit_offset); LegacyFullFilterImpl::PrepareHashMayMatch(
return HashMayMatch(hash, Slice(data_, data_len_), num_probes_, bit_offset); hash, num_lines_, data_, /*out*/&byte_offset, log2_cache_line_size_);
return LegacyFullFilterImpl::HashMayMatchPrepared(
hash, num_probes_, data_ + byte_offset, log2_cache_line_size_);
} }
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
@ -202,16 +204,18 @@ class FullFilterBitsReader : public FilterBitsReader {
// Other Error params, including a broken filter, regarded as match // Other Error params, including a broken filter, regarded as match
if (num_probes_ == 0 || num_lines_ == 0) return; if (num_probes_ == 0 || num_lines_ == 0) return;
uint32_t hashes[MultiGetContext::MAX_BATCH_SIZE]; uint32_t hashes[MultiGetContext::MAX_BATCH_SIZE];
uint32_t bit_offsets[MultiGetContext::MAX_BATCH_SIZE]; uint32_t byte_offsets[MultiGetContext::MAX_BATCH_SIZE];
for (int i = 0; i < num_keys; ++i) { for (int i = 0; i < num_keys; ++i) {
hashes[i] = BloomHash(*keys[i]); hashes[i] = BloomHash(*keys[i]);
FilterPrepare(hashes[i], Slice(data_, data_len_), num_lines_, LegacyFullFilterImpl::PrepareHashMayMatch(
&bit_offsets[i]); hashes[i], num_lines_, data_,
/*out*/&byte_offsets[i], log2_cache_line_size_);
} }
for (int i = 0; i < num_keys; ++i) { for (int i = 0; i < num_keys; ++i) {
if (!HashMayMatch(hashes[i], Slice(data_, data_len_), num_probes_, if (!LegacyFullFilterImpl::HashMayMatchPrepared(
bit_offsets[i])) { hashes[i], num_probes_,
data_ + byte_offsets[i], log2_cache_line_size_)) {
may_match[i] = false; may_match[i] = false;
} }
} }
@ -219,38 +223,20 @@ class FullFilterBitsReader : public FilterBitsReader {
private: private:
// Filter meta data // Filter meta data
char* data_; const char* data_;
uint32_t data_len_; uint32_t data_len_;
size_t num_probes_; int num_probes_;
uint32_t num_lines_; uint32_t num_lines_;
uint32_t log2_cache_line_size_; uint32_t log2_cache_line_size_;
// Get num_probes, and num_lines from filter // Get num_probes, and num_lines from filter
// If filter format broken, set both to 0. // If filter format broken, set both to 0.
void GetFilterMeta(const Slice& filter, size_t* num_probes, void GetFilterMeta(const Slice& filter, int* num_probes,
uint32_t* num_lines); uint32_t* num_lines);
// "filter" contains the data appended by a preceding call to
// FilterBitsBuilder::Finish. This method must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
//
// hash: target to be checked
// filter: the whole filter, including meta data bytes
// num_probes: number of probes, read before hand
// num_lines: filter metadata, read before hand
// Before calling this function, need to ensure the input meta data
// is valid.
bool HashMayMatch(const uint32_t& hash, const Slice& filter,
const size_t& num_probes, const uint32_t& bit_offset);
void FilterPrepare(const uint32_t& hash, const Slice& filter,
const uint32_t& num_lines, uint32_t* bit_offset);
}; };
void FullFilterBitsReader::GetFilterMeta(const Slice& filter, void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
size_t* num_probes, uint32_t* num_lines) { int* num_probes, uint32_t* num_lines) {
uint32_t len = static_cast<uint32_t>(filter.size()); uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) { if (len <= 5) {
// filter is empty or broken // filter is empty or broken
@ -263,54 +249,6 @@ void FullFilterBitsReader::GetFilterMeta(const Slice& filter,
*num_lines = DecodeFixed32(filter.data() + len - 4); *num_lines = DecodeFixed32(filter.data() + len - 4);
} }
void FullFilterBitsReader::FilterPrepare(const uint32_t& hash,
const Slice& filter,
const uint32_t& num_lines,
uint32_t* bit_offset) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) return; // remain the same with original filter
// It is ensured the params are valid before calling it
assert(num_lines != 0 && (len - 5) % num_lines == 0);
uint32_t h = hash;
// Left shift by an extra 3 to convert bytes to bits
uint32_t b = (h % num_lines) << (log2_cache_line_size_ + 3);
PREFETCH(&filter.data()[b / 8], 0 /* rw */, 1 /* locality */);
PREFETCH(&filter.data()[b / 8 + (1 << log2_cache_line_size_) - 1],
0 /* rw */, 1 /* locality */);
*bit_offset = b;
}
bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash,
const Slice& filter,
const size_t& num_probes,
const uint32_t& bit_offset) {
uint32_t len = static_cast<uint32_t>(filter.size());
if (len <= 5) return false; // remain the same with original filter
// It is ensured the params are valid before calling it
assert(num_probes != 0);
const char* data = filter.data();
uint32_t h = hash;
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (uint32_t i = 0; i < num_probes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos =
bit_offset + (h & ((1 << (log2_cache_line_size_ + 3)) - 1));
if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
// An implementation of filter policy // An implementation of filter policy
class BloomFilterPolicy : public FilterPolicy { class BloomFilterPolicy : public FilterPolicy {
public: public:
@ -326,56 +264,43 @@ class BloomFilterPolicy : public FilterPolicy {
void CreateFilter(const Slice* keys, int n, std::string* dst) const override { void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
// Compute bloom filter size (in both bits and bytes) // Compute bloom filter size (in both bits and bytes)
size_t bits = n * bits_per_key_; uint32_t bits = static_cast<uint32_t>(n * bits_per_key_);
// For small n, we can see a very high false positive rate. Fix it // For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length. // by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64; if (bits < 64) bits = 64;
size_t bytes = (bits + 7) / 8; uint32_t bytes = (bits + 7) / 8;
bits = bytes * 8; bits = bytes * 8;
const size_t init_size = dst->size(); const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0); dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes
char* array = &(*dst)[init_size]; char* array = &(*dst)[init_size];
for (size_t i = 0; i < static_cast<size_t>(n); i++) { for (int i = 0; i < n; i++) {
// Use double-hashing to generate a sequence of hash values. LegacyNoLocalityBloomImpl::AddHash(hash_func_(keys[i]), bits,
// See analysis in [Kirsch,Mitzenmacher 2006]. num_probes_, array);
uint32_t h = hash_func_(keys[i]);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < num_probes_; j++) {
const uint32_t bitpos = h % bits;
array[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
} }
} }
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override { bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
const size_t len = bloom_filter.size(); const size_t len = bloom_filter.size();
if (len < 2) return false; if (len < 2 || len > 0xffffffffU) { return false; }
const char* array = bloom_filter.data(); const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8; const uint32_t bits = static_cast<uint32_t>(len - 1) * 8;
// Use the encoded k so that we can read filters generated by // Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters. // bloom filters created using different parameters.
const size_t k = array[len-1]; const int k = static_cast<uint8_t>(array[len-1]);
if (k > 30) { if (k > 30) {
// Reserved for potentially new encodings for short bloom filters. // Reserved for potentially new encodings for short bloom filters.
// Consider it a match. // Consider it a match.
return true; return true;
} }
// NB: using k not num_probes_
uint32_t h = hash_func_(key); return LegacyNoLocalityBloomImpl::HashMayMatch(hash_func_(key), bits,
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits k, array);
for (size_t j = 0; j < k; j++) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h += delta;
}
return true;
} }
FilterBitsBuilder* GetFilterBitsBuilder() const override { FilterBitsBuilder* GetFilterBitsBuilder() const override {
@ -394,15 +319,15 @@ class BloomFilterPolicy : public FilterPolicy {
bool UseBlockBasedBuilder() { return use_block_based_builder_; } bool UseBlockBasedBuilder() { return use_block_based_builder_; }
private: private:
size_t bits_per_key_; int bits_per_key_;
size_t num_probes_; int num_probes_;
uint32_t (*hash_func_)(const Slice& key); uint32_t (*hash_func_)(const Slice& key);
const bool use_block_based_builder_; const bool use_block_based_builder_;
void initialize() { void initialize() {
// We intentionally round down to reduce probing cost a little bit // We intentionally round down to reduce probing cost a little bit
num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2) num_probes_ = static_cast<int>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
if (num_probes_ < 1) num_probes_ = 1; if (num_probes_ < 1) num_probes_ = 1;
if (num_probes_ > 30) num_probes_ = 30; if (num_probes_ > 30) num_probes_ = 30;
} }

140
util/bloom_impl.h Normal file
View File

@ -0,0 +1,140 @@
// Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Implementation details of various Bloom filter implementations used in
// RocksDB. (DynamicBloom is in a separate file for now because it
// supports concurrent write.)
#pragma once
#include <stddef.h>
#include <stdint.h>
#include "rocksdb/slice.h"
namespace rocksdb {
// A legacy Bloom filter implementation with no locality of probes (slow).
// It uses double hashing to generate a sequence of hash values.
// Asymptotic analysis is in [Kirsch,Mitzenmacher 2006], but known to have
// subtle accuracy flaws for practical sizes [Dillinger,Manolios 2004].
//
// DO NOT REUSE - faster and more predictably accurate implementations
// are available at
// https://github.com/pdillinger/wormhashing/blob/master/bloom_simulation_tests/foo.cc
// See e.g. RocksDB DynamicBloom.
//
class LegacyNoLocalityBloomImpl {
public:
static inline void AddHash(uint32_t h, uint32_t total_bits,
int num_probes, char *data) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (int i = 0; i < num_probes; i++) {
const uint32_t bitpos = h % total_bits;
data[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
}
static inline bool HashMayMatch(uint32_t h, uint32_t total_bits,
int num_probes, const char *data) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (int i = 0; i < num_probes; i++) {
const uint32_t bitpos = h % total_bits;
if ((data[bitpos/8] & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return true;
}
};
// A legacy Bloom filter implementation with probes local to a single
// cache line (fast). Because SST files might be transported between
// platforms, the cache line size is a parameter rather than hard coded.
// (But if specified as a constant parameter, an optimizing compiler
// should take advantage of that.)
//
// When ExtraRotates is false, this implementation is notably deficient in
// accuracy. Specifically, it uses double hashing with a 1/512 chance of the
// increment being zero (when cache line size is 512 bits). Thus, there's a
// 1/512 chance of probing only one index, which we'd expect to incur about
// a 1/2 * 1/512 or absolute 0.1% FP rate penalty. More detail at
// https://github.com/facebook/rocksdb/issues/4120
//
// DO NOT REUSE - faster and more predictably accurate implementations
// are available at
// https://github.com/pdillinger/wormhashing/blob/master/bloom_simulation_tests/foo.cc
// See e.g. RocksDB DynamicBloom.
//
template <bool ExtraRotates>
class LegacyLocalityBloomImpl {
private:
static inline uint32_t GetLine(uint32_t h, uint32_t num_lines) {
uint32_t offset_h = ExtraRotates ? (h >> 11) | (h << 21) : h;
return offset_h % num_lines;
}
public:
static inline void AddHash(uint32_t h, uint32_t num_lines,
int num_probes, char *data,
int log2_cache_line_bytes) {
const int log2_cache_line_bits = log2_cache_line_bytes + 3;
char *data_at_offset =
data + (GetLine(h, num_lines) << log2_cache_line_bytes);
const uint32_t delta = (h >> 17) | (h << 15);
for (int i = 0; i < num_probes; ++i) {
// Mask to bit-within-cache-line address
const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
data_at_offset[bitpos / 8] |= (1 << (bitpos % 8));
if (ExtraRotates) {
h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
}
h += delta;
}
}
static inline void PrepareHashMayMatch(uint32_t h, uint32_t num_lines,
const char *data,
uint32_t /*out*/*byte_offset,
int log2_cache_line_bytes) {
uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
PREFETCH(data + b, 0 /* rw */, 1 /* locality */);
PREFETCH(data + b + ((1 << log2_cache_line_bytes) - 1),
0 /* rw */, 1 /* locality */);
*byte_offset = b;
}
static inline bool HashMayMatch(uint32_t h, uint32_t num_lines,
int num_probes, const char *data,
int log2_cache_line_bytes) {
uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
return HashMayMatchPrepared(h, num_probes,
data + b, log2_cache_line_bytes);
}
static inline bool HashMayMatchPrepared(uint32_t h, int num_probes,
const char *data_at_offset,
int log2_cache_line_bytes) {
const int log2_cache_line_bits = log2_cache_line_bytes + 3;
const uint32_t delta = (h >> 17) | (h << 15);
for (int i = 0; i < num_probes; ++i) {
// Mask to bit-within-cache-line address
const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
if (((data_at_offset[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
if (ExtraRotates) {
h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
}
h += delta;
}
return true;
}
};
} // namespace rocksdb

View File

@ -352,7 +352,8 @@ TEST_F(FullBloomTest, FullVaryingLengths) {
} }
Build(); Build();
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5)) << length; ASSERT_LE(FilterSize(),
(size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
// All added keys must match // All added keys must match
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {