diff --git a/Makefile b/Makefile index dc90bdc9ae..898570f84a 100644 --- a/Makefile +++ b/Makefile @@ -332,6 +332,11 @@ ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) endif endif +ifdef TEST_CACHE_LINE_SIZE + PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) + PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) +endif + # This (the first rule) must depend on "all". default: all diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 956accef82..0c3bb89146 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -212,7 +212,8 @@ void VerifySimilar(uint64_t a, uint64_t b, double bias) { void VerifyTableProperties(const TableProperties& base_tp, const TableProperties& new_tp, - double filter_size_bias = 0.1, + double filter_size_bias = + CACHE_LINE_SIZE >= 256 ? 0.15 : 0.1, double index_size_bias = 0.1, double data_size_bias = 0.1, double num_data_blocks_bias = 0.05) { @@ -266,7 +267,8 @@ void GetExpectedTableProperties( // discount 1 byte as value size is not encoded in value delta encoding (value_delta_encoding ? 1 : 0)); expected_tp->filter_size = - kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8); + kTableCount * ((kKeysPerTable * kBloomBitsPerKey + 7) / 8 + + /*average-ish overhead*/ CACHE_LINE_SIZE / 2); } } // anonymous namespace diff --git a/port/port_posix.h b/port/port_posix.h index 51eb241629..49d2b9ae85 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -178,22 +178,31 @@ typedef pthread_once_t OnceType; extern void InitOnce(OnceType* once, void (*initializer)()); #ifndef CACHE_LINE_SIZE - #if defined(__s390__) - #define CACHE_LINE_SIZE 256U - #elif defined(__powerpc__) || defined(__aarch64__) - #define CACHE_LINE_SIZE 128U + // To test behavior with non-native cache line size, e.g. for + // Bloom filters, set TEST_CACHE_LINE_SIZE to the desired test size. + // This disables ALIGN_AS to keep it from failing compilation. + #ifdef TEST_CACHE_LINE_SIZE + #define CACHE_LINE_SIZE TEST_CACHE_LINE_SIZE + #define ALIGN_AS(n) /*empty*/ #else - #define CACHE_LINE_SIZE 64U + #if defined(__s390__) + #define CACHE_LINE_SIZE 256U + #elif defined(__powerpc__) || defined(__aarch64__) + #define CACHE_LINE_SIZE 128U + #else + #define CACHE_LINE_SIZE 64U + #endif + #define ALIGN_AS(n) alignas(n) #endif #endif +static_assert((CACHE_LINE_SIZE & (CACHE_LINE_SIZE - 1)) == 0, + "Cache line size must be a power of 2 number of bytes"); extern void *cacheline_aligned_alloc(size_t size); extern void cacheline_aligned_free(void *memblock); -#define ALIGN_AS(n) alignas(n) - #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) extern void Crash(const std::string& srcfile, int srcline); diff --git a/table/full_filter_bits_builder.h b/table/full_filter_bits_builder.h index c719c698aa..5e0b9bb194 100644 --- a/table/full_filter_bits_builder.h +++ b/table/full_filter_bits_builder.h @@ -20,8 +20,8 @@ class Slice; class FullFilterBitsBuilder : public FilterBitsBuilder { public: - explicit FullFilterBitsBuilder(const size_t bits_per_key, - const size_t num_probes); + explicit FullFilterBitsBuilder(const int bits_per_key, + const int num_probes); // No Copy allowed FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete; @@ -56,8 +56,8 @@ class FullFilterBitsBuilder : public FilterBitsBuilder { private: friend class FullFilterBlockTest_DuplicateEntries_Test; - size_t bits_per_key_; - size_t num_probes_; + int bits_per_key_; + int num_probes_; std::vector hash_entries_; // Get totalbits that optimized for cpu cache line diff --git a/table/plain/plain_table_bloom.cc b/table/plain/plain_table_bloom.cc index 778b3b558a..542010169d 100644 --- a/table/plain/plain_table_bloom.cc +++ b/table/plain/plain_table_bloom.cc @@ -33,9 +33,9 @@ uint32_t GetTotalBitsForLocality(uint32_t total_bits) { PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes) : kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {} -void PlainTableBloomV1::SetRawData(unsigned char* raw_data, uint32_t total_bits, +void PlainTableBloomV1::SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks) { - data_ = reinterpret_cast(raw_data); + data_ = raw_data; kTotalBits = total_bits; kNumBlocks = num_blocks; } @@ -63,7 +63,7 @@ void PlainTableBloomV1::SetTotalBits(Allocator* allocator, if (kNumBlocks > 0 && cache_line_offset > 0) { raw += CACHE_LINE_SIZE - cache_line_offset; } - data_ = reinterpret_cast(raw); + data_ = raw; } void BloomBlockBuilder::AddKeysHashes(const std::vector& keys_hashes) { diff --git a/table/plain/plain_table_bloom.h b/table/plain/plain_table_bloom.h index b9248cdaf1..271aa8f4fe 100644 --- a/table/plain/plain_table_bloom.h +++ b/table/plain/plain_table_bloom.h @@ -10,8 +10,11 @@ #include "rocksdb/slice.h" #include "port/port.h" +#include "util/bloom_impl.h" #include "util/hash.h" +#include "third-party/folly/folly/ConstexprMath.h" + #include namespace rocksdb { @@ -51,10 +54,10 @@ class PlainTableBloomV1 { uint32_t GetNumBlocks() const { return kNumBlocks; } Slice GetRawData() const { - return Slice(reinterpret_cast(data_), GetTotalBits() / 8); + return Slice(data_, GetTotalBits() / 8); } - void SetRawData(unsigned char* raw_data, uint32_t total_bits, + void SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks = 0); uint32_t GetTotalBits() const { return kTotalBits; } @@ -66,7 +69,10 @@ class PlainTableBloomV1 { uint32_t kNumBlocks; const uint32_t kNumProbes; - uint8_t* data_; + char* data_; + + static constexpr int LOG2_CACHE_LINE_SIZE = + folly::constexpr_log2(CACHE_LINE_SIZE); }; #if defined(_MSC_VER) @@ -76,8 +82,9 @@ class PlainTableBloomV1 { #endif inline void PlainTableBloomV1::Prefetch(uint32_t h) { if (kNumBlocks != 0) { - uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); - PREFETCH(&(data_[b / 8]), 0, 3); + uint32_t ignored; + LegacyLocalityBloomImpl::PrepareHashMayMatch( + h, kNumBlocks, data_, &ignored, LOG2_CACHE_LINE_SIZE); } } #if defined(_MSC_VER) @@ -86,54 +93,22 @@ inline void PlainTableBloomV1::Prefetch(uint32_t h) { inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const { assert(IsInitialized()); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits if (kNumBlocks != 0) { - uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); - for (uint32_t i = 0; i < kNumProbes; ++i) { - // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized - // to a simple and operation by compiler. - const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8)); - if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) { - return false; - } - // Rotate h so that we don't reuse the same bytes. - h = h / (CACHE_LINE_SIZE * 8) + - (h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE); - h += delta; - } + return LegacyLocalityBloomImpl::HashMayMatch( + h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE); } else { - for (uint32_t i = 0; i < kNumProbes; ++i) { - const uint32_t bitpos = h % kTotalBits; - if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) { - return false; - } - h += delta; - } + return LegacyNoLocalityBloomImpl::HashMayMatch( + h, kTotalBits, kNumProbes, data_); } - return true; } inline void PlainTableBloomV1::AddHash(uint32_t h) { assert(IsInitialized()); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits if (kNumBlocks != 0) { - uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8); - for (uint32_t i = 0; i < kNumProbes; ++i) { - // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized - // to a simple and operation by compiler. - const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8)); - data_[bitpos / 8] |= (1 << (bitpos % 8)); - // Rotate h so that we don't reuse the same bytes. - h = h / (CACHE_LINE_SIZE * 8) + - (h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE); - h += delta; - } + LegacyLocalityBloomImpl::AddHash( + h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE); } else { - for (uint32_t i = 0; i < kNumProbes; ++i) { - const uint32_t bitpos = h % kTotalBits; - data_[bitpos / 8] |= (1 << (bitpos % 8)); - h += delta; - } + LegacyNoLocalityBloomImpl::AddHash(h, kTotalBits, kNumProbes, data_); } } diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index 2ac7cf2e33..ed3be5b933 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -260,23 +260,19 @@ Status PlainTableReader::PopulateIndexRecordList( return s; } -void PlainTableReader::AllocateAndFillBloom( - int bloom_bits_per_key, int num_prefixes, size_t huge_page_tlb_size, - std::vector* prefix_hashes) { - if (!IsTotalOrderMode()) { - uint32_t bloom_total_bits = num_prefixes * bloom_bits_per_key; - if (bloom_total_bits > 0) { - enable_bloom_ = true; - bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality, - huge_page_tlb_size, ioptions_.info_log); - FillBloom(prefix_hashes); - } +void PlainTableReader::AllocateBloom(int bloom_bits_per_key, int num_keys, + size_t huge_page_tlb_size) { + uint32_t bloom_total_bits = num_keys * bloom_bits_per_key; + if (bloom_total_bits > 0) { + enable_bloom_ = true; + bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality, + huge_page_tlb_size, ioptions_.info_log); } } -void PlainTableReader::FillBloom(std::vector* prefix_hashes) { +void PlainTableReader::FillBloom(const std::vector& prefix_hashes) { assert(bloom_.IsInitialized()); - for (auto prefix_hash : *prefix_hashes) { + for (const auto prefix_hash : prefix_hashes) { bloom_.AddHash(prefix_hash); } } @@ -354,14 +350,9 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, if (!index_in_file) { // Allocate bloom filter here for total order mode. if (IsTotalOrderMode()) { - uint32_t num_bloom_bits = - static_cast(table_properties_->num_entries) * - bloom_bits_per_key; - if (num_bloom_bits > 0) { - enable_bloom_ = true; - bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality, - huge_page_tlb_size, ioptions_.info_log); - } + AllocateBloom(bloom_bits_per_key, + static_cast(table_properties_->num_entries), + huge_page_tlb_size); } } else if (bloom_in_file) { enable_bloom_ = true; @@ -377,8 +368,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, } // cast away const qualifier, because bloom_ won't be changed bloom_.SetRawData( - const_cast( - reinterpret_cast(bloom_block->data())), + const_cast(bloom_block->data()), static_cast(bloom_block->size()) * 8, num_blocks); } else { // Index in file but no bloom in file. Disable bloom filter in this case. @@ -392,6 +382,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, std::vector prefix_hashes; if (!index_in_file) { + // Populates _bloom if enabled (total order mode) s = PopulateIndexRecordList(&index_builder, &prefix_hashes); if (!s.ok()) { return s; @@ -404,10 +395,15 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, } if (!index_in_file) { - // Calculated bloom filter size and allocate memory for - // bloom filter based on the number of prefixes, then fill it. - AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(), - huge_page_tlb_size, &prefix_hashes); + if (!IsTotalOrderMode()) { + // Calculated bloom filter size and allocate memory for + // bloom filter based on the number of prefixes, then fill it. + AllocateBloom(bloom_bits_per_key, index_.GetNumPrefixes(), + huge_page_tlb_size); + if (enable_bloom_) { + FillBloom(prefix_hashes); + } + } } // Fill two table properties. diff --git a/table/plain/plain_table_reader.h b/table/plain/plain_table_reader.h index 02539cc697..f95616cc57 100644 --- a/table/plain/plain_table_reader.h +++ b/table/plain/plain_table_reader.h @@ -209,12 +209,11 @@ class PlainTableReader: public TableReader { Status PopulateIndexRecordList(PlainTableIndexBuilder* index_builder, std::vector* prefix_hashes); - // Internal helper function to allocate memory for bloom filter and fill it - void AllocateAndFillBloom(int bloom_bits_per_key, int num_prefixes, - size_t huge_page_tlb_size, - std::vector* prefix_hashes); + // Internal helper function to allocate memory for bloom filter + void AllocateBloom(int bloom_bits_per_key, int num_prefixes, + size_t huge_page_tlb_size); - void FillBloom(std::vector* prefix_hashes); + void FillBloom(const std::vector& prefix_hashes); // Read the key and value at `offset` to parameters for keys, the and // `seekable`. diff --git a/third-party/folly/folly/ConstexprMath.h b/third-party/folly/folly/ConstexprMath.h index b125c5f423..6e3a9966e3 100644 --- a/third-party/folly/folly/ConstexprMath.h +++ b/third-party/folly/folly/ConstexprMath.h @@ -14,4 +14,32 @@ template constexpr T constexpr_max(T a, T b, Ts... ts) { return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...); } + +namespace detail { +template +constexpr T constexpr_log2_(T a, T e) { + return e == T(1) ? a : constexpr_log2_(a + T(1), e / T(2)); +} + +template +constexpr T constexpr_log2_ceil_(T l2, T t) { + return l2 + T(T(1) << l2 < t ? 1 : 0); +} + +template +constexpr T constexpr_square_(T t) { + return t * t; +} +} // namespace detail + +template +constexpr T constexpr_log2(T t) { + return detail::constexpr_log2_(T(0), t); +} + +template +constexpr T constexpr_log2_ceil(T t) { + return detail::constexpr_log2_ceil_(constexpr_log2(t), t); +} + } // namespace folly diff --git a/util/bloom.cc b/util/bloom.cc index 23607a51e4..1548f7c251 100644 --- a/util/bloom.cc +++ b/util/bloom.cc @@ -13,16 +13,19 @@ #include "table/block_based/block_based_filter_block.h" #include "table/block_based/full_filter_block.h" #include "table/full_filter_bits_builder.h" +#include "third-party/folly/folly/ConstexprMath.h" +#include "util/bloom_impl.h" #include "util/coding.h" #include "util/hash.h" namespace rocksdb { +typedef LegacyLocalityBloomImpl LegacyFullFilterImpl; class BlockBasedFilterBlockBuilder; class FullFilterBlockBuilder; -FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key, - const size_t num_probes) +FullFilterBitsBuilder::FullFilterBitsBuilder(const int bits_per_key, + const int num_probes) : bits_per_key_(bits_per_key), num_probes_(num_probes) { assert(bits_per_key_); } @@ -74,7 +77,7 @@ uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry, uint32_t* num_lines) { assert(bits_per_key_); if (num_entry != 0) { - uint32_t total_bits_tmp = num_entry * static_cast(bits_per_key_); + uint32_t total_bits_tmp = static_cast(num_entry * bits_per_key_); *total_bits = GetTotalBitsForLocality(total_bits_tmp); *num_lines = *total_bits / (CACHE_LINE_SIZE * 8); @@ -124,24 +127,16 @@ inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data, #endif assert(num_lines > 0 && total_bits > 0); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits - uint32_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8); - - for (uint32_t i = 0; i < num_probes_; ++i) { - // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized - // to a simple operation by compiler. - const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8)); - data[bitpos / 8] |= (1 << (bitpos % 8)); - - h += delta; - } + LegacyFullFilterImpl::AddHash( + h, num_lines, num_probes_, data, + folly::constexpr_log2(CACHE_LINE_SIZE)); } namespace { class FullFilterBitsReader : public FilterBitsReader { public: explicit FullFilterBitsReader(const Slice& contents) - : data_(const_cast(contents.data())), + : data_(contents.data()), data_len_(static_cast(contents.size())), num_probes_(0), num_lines_(0), @@ -177,16 +172,23 @@ class FullFilterBitsReader : public FilterBitsReader { ~FullFilterBitsReader() override {} - bool MayMatch(const Slice& entry) override { + // "contents" contains the data built by a preceding call to + // FilterBitsBuilder::Finish. MayMatch must return true if the key was + // passed to FilterBitsBuilder::AddKey. This method may return true or false + // if the key was not on the list, but it should aim to return false with a + // high probability. + bool MayMatch(const Slice& key) override { if (data_len_ <= 5) { // remain same with original filter return false; } // Other Error params, including a broken filter, regarded as match if (num_probes_ == 0 || num_lines_ == 0) return true; - uint32_t hash = BloomHash(entry); - uint32_t bit_offset; - FilterPrepare(hash, Slice(data_, data_len_), num_lines_, &bit_offset); - return HashMayMatch(hash, Slice(data_, data_len_), num_probes_, bit_offset); + uint32_t hash = BloomHash(key); + uint32_t byte_offset; + LegacyFullFilterImpl::PrepareHashMayMatch( + hash, num_lines_, data_, /*out*/&byte_offset, log2_cache_line_size_); + return LegacyFullFilterImpl::HashMayMatchPrepared( + hash, num_probes_, data_ + byte_offset, log2_cache_line_size_); } virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { @@ -202,16 +204,18 @@ class FullFilterBitsReader : public FilterBitsReader { // Other Error params, including a broken filter, regarded as match if (num_probes_ == 0 || num_lines_ == 0) return; uint32_t hashes[MultiGetContext::MAX_BATCH_SIZE]; - uint32_t bit_offsets[MultiGetContext::MAX_BATCH_SIZE]; + uint32_t byte_offsets[MultiGetContext::MAX_BATCH_SIZE]; for (int i = 0; i < num_keys; ++i) { hashes[i] = BloomHash(*keys[i]); - FilterPrepare(hashes[i], Slice(data_, data_len_), num_lines_, - &bit_offsets[i]); + LegacyFullFilterImpl::PrepareHashMayMatch( + hashes[i], num_lines_, data_, + /*out*/&byte_offsets[i], log2_cache_line_size_); } for (int i = 0; i < num_keys; ++i) { - if (!HashMayMatch(hashes[i], Slice(data_, data_len_), num_probes_, - bit_offsets[i])) { + if (!LegacyFullFilterImpl::HashMayMatchPrepared( + hashes[i], num_probes_, + data_ + byte_offsets[i], log2_cache_line_size_)) { may_match[i] = false; } } @@ -219,38 +223,20 @@ class FullFilterBitsReader : public FilterBitsReader { private: // Filter meta data - char* data_; + const char* data_; uint32_t data_len_; - size_t num_probes_; + int num_probes_; uint32_t num_lines_; uint32_t log2_cache_line_size_; // Get num_probes, and num_lines from filter // If filter format broken, set both to 0. - void GetFilterMeta(const Slice& filter, size_t* num_probes, + void GetFilterMeta(const Slice& filter, int* num_probes, uint32_t* num_lines); - - // "filter" contains the data appended by a preceding call to - // FilterBitsBuilder::Finish. This method must return true if the key was - // passed to FilterBitsBuilder::AddKey. This method may return true or false - // if the key was not on the list, but it should aim to return false with a - // high probability. - // - // hash: target to be checked - // filter: the whole filter, including meta data bytes - // num_probes: number of probes, read before hand - // num_lines: filter metadata, read before hand - // Before calling this function, need to ensure the input meta data - // is valid. - bool HashMayMatch(const uint32_t& hash, const Slice& filter, - const size_t& num_probes, const uint32_t& bit_offset); - - void FilterPrepare(const uint32_t& hash, const Slice& filter, - const uint32_t& num_lines, uint32_t* bit_offset); }; void FullFilterBitsReader::GetFilterMeta(const Slice& filter, - size_t* num_probes, uint32_t* num_lines) { + int* num_probes, uint32_t* num_lines) { uint32_t len = static_cast(filter.size()); if (len <= 5) { // filter is empty or broken @@ -263,54 +249,6 @@ void FullFilterBitsReader::GetFilterMeta(const Slice& filter, *num_lines = DecodeFixed32(filter.data() + len - 4); } -void FullFilterBitsReader::FilterPrepare(const uint32_t& hash, - const Slice& filter, - const uint32_t& num_lines, - uint32_t* bit_offset) { - uint32_t len = static_cast(filter.size()); - if (len <= 5) return; // remain the same with original filter - - // It is ensured the params are valid before calling it - assert(num_lines != 0 && (len - 5) % num_lines == 0); - - uint32_t h = hash; - // Left shift by an extra 3 to convert bytes to bits - uint32_t b = (h % num_lines) << (log2_cache_line_size_ + 3); - PREFETCH(&filter.data()[b / 8], 0 /* rw */, 1 /* locality */); - PREFETCH(&filter.data()[b / 8 + (1 << log2_cache_line_size_) - 1], - 0 /* rw */, 1 /* locality */); - *bit_offset = b; -} - -bool FullFilterBitsReader::HashMayMatch(const uint32_t& hash, - const Slice& filter, - const size_t& num_probes, - const uint32_t& bit_offset) { - uint32_t len = static_cast(filter.size()); - if (len <= 5) return false; // remain the same with original filter - - // It is ensured the params are valid before calling it - assert(num_probes != 0); - const char* data = filter.data(); - - uint32_t h = hash; - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits - - for (uint32_t i = 0; i < num_probes; ++i) { - // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized - // to a simple and operation by compiler. - const uint32_t bitpos = - bit_offset + (h & ((1 << (log2_cache_line_size_ + 3)) - 1)); - if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) { - return false; - } - - h += delta; - } - - return true; -} - // An implementation of filter policy class BloomFilterPolicy : public FilterPolicy { public: @@ -326,56 +264,43 @@ class BloomFilterPolicy : public FilterPolicy { void CreateFilter(const Slice* keys, int n, std::string* dst) const override { // Compute bloom filter size (in both bits and bytes) - size_t bits = n * bits_per_key_; + uint32_t bits = static_cast(n * bits_per_key_); // For small n, we can see a very high false positive rate. Fix it // by enforcing a minimum bloom filter length. if (bits < 64) bits = 64; - size_t bytes = (bits + 7) / 8; + uint32_t bytes = (bits + 7) / 8; bits = bytes * 8; const size_t init_size = dst->size(); dst->resize(init_size + bytes, 0); dst->push_back(static_cast(num_probes_)); // Remember # of probes char* array = &(*dst)[init_size]; - for (size_t i = 0; i < static_cast(n); i++) { - // Use double-hashing to generate a sequence of hash values. - // See analysis in [Kirsch,Mitzenmacher 2006]. - uint32_t h = hash_func_(keys[i]); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits - for (size_t j = 0; j < num_probes_; j++) { - const uint32_t bitpos = h % bits; - array[bitpos/8] |= (1 << (bitpos % 8)); - h += delta; - } + for (int i = 0; i < n; i++) { + LegacyNoLocalityBloomImpl::AddHash(hash_func_(keys[i]), bits, + num_probes_, array); } } bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override { const size_t len = bloom_filter.size(); - if (len < 2) return false; + if (len < 2 || len > 0xffffffffU) { return false; } const char* array = bloom_filter.data(); - const size_t bits = (len - 1) * 8; + const uint32_t bits = static_cast(len - 1) * 8; // Use the encoded k so that we can read filters generated by // bloom filters created using different parameters. - const size_t k = array[len-1]; + const int k = static_cast(array[len-1]); if (k > 30) { // Reserved for potentially new encodings for short bloom filters. // Consider it a match. return true; } - - uint32_t h = hash_func_(key); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits - for (size_t j = 0; j < k; j++) { - const uint32_t bitpos = h % bits; - if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; - h += delta; - } - return true; + // NB: using k not num_probes_ + return LegacyNoLocalityBloomImpl::HashMayMatch(hash_func_(key), bits, + k, array); } FilterBitsBuilder* GetFilterBitsBuilder() const override { @@ -394,15 +319,15 @@ class BloomFilterPolicy : public FilterPolicy { bool UseBlockBasedBuilder() { return use_block_based_builder_; } private: - size_t bits_per_key_; - size_t num_probes_; + int bits_per_key_; + int num_probes_; uint32_t (*hash_func_)(const Slice& key); const bool use_block_based_builder_; void initialize() { // We intentionally round down to reduce probing cost a little bit - num_probes_ = static_cast(bits_per_key_ * 0.69); // 0.69 =~ ln(2) + num_probes_ = static_cast(bits_per_key_ * 0.69); // 0.69 =~ ln(2) if (num_probes_ < 1) num_probes_ = 1; if (num_probes_ > 30) num_probes_ = 30; } diff --git a/util/bloom_impl.h b/util/bloom_impl.h new file mode 100644 index 0000000000..13c7a7ec6b --- /dev/null +++ b/util/bloom_impl.h @@ -0,0 +1,140 @@ +// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Implementation details of various Bloom filter implementations used in +// RocksDB. (DynamicBloom is in a separate file for now because it +// supports concurrent write.) + +#pragma once +#include +#include + +#include "rocksdb/slice.h" + +namespace rocksdb { + +// A legacy Bloom filter implementation with no locality of probes (slow). +// It uses double hashing to generate a sequence of hash values. +// Asymptotic analysis is in [Kirsch,Mitzenmacher 2006], but known to have +// subtle accuracy flaws for practical sizes [Dillinger,Manolios 2004]. +// +// DO NOT REUSE - faster and more predictably accurate implementations +// are available at +// https://github.com/pdillinger/wormhashing/blob/master/bloom_simulation_tests/foo.cc +// See e.g. RocksDB DynamicBloom. +// +class LegacyNoLocalityBloomImpl { +public: + static inline void AddHash(uint32_t h, uint32_t total_bits, + int num_probes, char *data) { + const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits + for (int i = 0; i < num_probes; i++) { + const uint32_t bitpos = h % total_bits; + data[bitpos/8] |= (1 << (bitpos % 8)); + h += delta; + } + } + + static inline bool HashMayMatch(uint32_t h, uint32_t total_bits, + int num_probes, const char *data) { + const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits + for (int i = 0; i < num_probes; i++) { + const uint32_t bitpos = h % total_bits; + if ((data[bitpos/8] & (1 << (bitpos % 8))) == 0) { + return false; + } + h += delta; + } + return true; + } +}; + + +// A legacy Bloom filter implementation with probes local to a single +// cache line (fast). Because SST files might be transported between +// platforms, the cache line size is a parameter rather than hard coded. +// (But if specified as a constant parameter, an optimizing compiler +// should take advantage of that.) +// +// When ExtraRotates is false, this implementation is notably deficient in +// accuracy. Specifically, it uses double hashing with a 1/512 chance of the +// increment being zero (when cache line size is 512 bits). Thus, there's a +// 1/512 chance of probing only one index, which we'd expect to incur about +// a 1/2 * 1/512 or absolute 0.1% FP rate penalty. More detail at +// https://github.com/facebook/rocksdb/issues/4120 +// +// DO NOT REUSE - faster and more predictably accurate implementations +// are available at +// https://github.com/pdillinger/wormhashing/blob/master/bloom_simulation_tests/foo.cc +// See e.g. RocksDB DynamicBloom. +// +template +class LegacyLocalityBloomImpl { +private: + static inline uint32_t GetLine(uint32_t h, uint32_t num_lines) { + uint32_t offset_h = ExtraRotates ? (h >> 11) | (h << 21) : h; + return offset_h % num_lines; + } +public: + static inline void AddHash(uint32_t h, uint32_t num_lines, + int num_probes, char *data, + int log2_cache_line_bytes) { + const int log2_cache_line_bits = log2_cache_line_bytes + 3; + + char *data_at_offset = + data + (GetLine(h, num_lines) << log2_cache_line_bytes); + const uint32_t delta = (h >> 17) | (h << 15); + for (int i = 0; i < num_probes; ++i) { + // Mask to bit-within-cache-line address + const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1); + data_at_offset[bitpos / 8] |= (1 << (bitpos % 8)); + if (ExtraRotates) { + h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits)); + } + h += delta; + } + } + + static inline void PrepareHashMayMatch(uint32_t h, uint32_t num_lines, + const char *data, + uint32_t /*out*/*byte_offset, + int log2_cache_line_bytes) { + uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes; + PREFETCH(data + b, 0 /* rw */, 1 /* locality */); + PREFETCH(data + b + ((1 << log2_cache_line_bytes) - 1), + 0 /* rw */, 1 /* locality */); + *byte_offset = b; + } + + static inline bool HashMayMatch(uint32_t h, uint32_t num_lines, + int num_probes, const char *data, + int log2_cache_line_bytes) { + uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes; + return HashMayMatchPrepared(h, num_probes, + data + b, log2_cache_line_bytes); + } + + static inline bool HashMayMatchPrepared(uint32_t h, int num_probes, + const char *data_at_offset, + int log2_cache_line_bytes) { + const int log2_cache_line_bits = log2_cache_line_bytes + 3; + + const uint32_t delta = (h >> 17) | (h << 15); + for (int i = 0; i < num_probes; ++i) { + // Mask to bit-within-cache-line address + const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1); + if (((data_at_offset[bitpos / 8]) & (1 << (bitpos % 8))) == 0) { + return false; + } + if (ExtraRotates) { + h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits)); + } + h += delta; + } + return true; + } +}; + +} // namespace rocksdb diff --git a/util/bloom_test.cc b/util/bloom_test.cc index b759303996..2614615210 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -352,7 +352,8 @@ TEST_F(FullBloomTest, FullVaryingLengths) { } Build(); - ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5)) << length; + ASSERT_LE(FilterSize(), + (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5)); // All added keys must match for (int i = 0; i < length; i++) {