2013-11-27 22:27:02 +00:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2014-06-12 17:06:18 +00:00
|
|
|
#include <util/arena.h>
|
|
|
|
#include <port/port_posix.h>
|
|
|
|
|
2013-11-27 22:27:02 +00:00
|
|
|
#include <atomic>
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class Slice;
|
2014-05-04 22:52:23 +00:00
|
|
|
class Logger;
|
2013-11-27 22:27:02 +00:00
|
|
|
|
|
|
|
class DynamicBloom {
|
|
|
|
public:
|
2014-06-30 22:54:31 +00:00
|
|
|
// arena: pass arena to bloom filter, hence trace the usage of memory
|
2013-11-27 22:27:02 +00:00
|
|
|
// total_bits: fixed total bits for the bloom
|
|
|
|
// num_probes: number of hash probes for a single key
|
2014-06-02 23:52:29 +00:00
|
|
|
// locality: If positive, optimize for cache line locality, 0 otherwise.
|
2014-03-28 16:21:20 +00:00
|
|
|
// hash_func: customized hash function
|
2014-05-04 20:55:53 +00:00
|
|
|
// huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
|
|
|
|
// withi this page size. Need to reserve huge pages for
|
|
|
|
// it to be allocated, like:
|
|
|
|
// sysctl -w vm.nr_hugepages=20
|
|
|
|
// See linux doc Documentation/vm/hugetlbpage.txt
|
2014-06-30 22:54:31 +00:00
|
|
|
explicit DynamicBloom(Arena* arena,
|
|
|
|
uint32_t total_bits, uint32_t locality = 0,
|
2014-05-04 20:55:53 +00:00
|
|
|
uint32_t num_probes = 6,
|
|
|
|
uint32_t (*hash_func)(const Slice& key) = nullptr,
|
2014-05-04 22:52:23 +00:00
|
|
|
size_t huge_page_tlb_size = 0,
|
|
|
|
Logger* logger = nullptr);
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2014-06-09 19:30:19 +00:00
|
|
|
explicit DynamicBloom(uint32_t num_probes = 6,
|
|
|
|
uint32_t (*hash_func)(const Slice& key) = nullptr);
|
|
|
|
|
2014-06-30 22:54:31 +00:00
|
|
|
void SetTotalBits(Arena* arena, uint32_t total_bits, uint32_t locality,
|
2014-06-09 19:30:19 +00:00
|
|
|
size_t huge_page_tlb_size, Logger* logger);
|
|
|
|
|
2014-05-04 20:55:53 +00:00
|
|
|
~DynamicBloom() {}
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2014-01-17 20:22:39 +00:00
|
|
|
// Assuming single threaded access to this function.
|
|
|
|
void Add(const Slice& key);
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2014-01-17 20:22:39 +00:00
|
|
|
// Assuming single threaded access to this function.
|
2013-12-20 17:35:24 +00:00
|
|
|
void AddHash(uint32_t hash);
|
|
|
|
|
2014-01-17 20:22:39 +00:00
|
|
|
// Multithreaded access to this function is OK
|
2014-06-09 19:30:19 +00:00
|
|
|
bool MayContain(const Slice& key) const;
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2014-01-17 20:22:39 +00:00
|
|
|
// Multithreaded access to this function is OK
|
2014-06-09 19:30:19 +00:00
|
|
|
bool MayContainHash(uint32_t hash) const;
|
2013-11-27 22:27:02 +00:00
|
|
|
|
2014-06-12 17:06:18 +00:00
|
|
|
void Prefetch(uint32_t h);
|
|
|
|
|
2013-11-27 22:27:02 +00:00
|
|
|
private:
|
2014-06-09 19:30:19 +00:00
|
|
|
uint32_t kTotalBits;
|
|
|
|
uint32_t kNumBlocks;
|
2014-01-15 05:30:13 +00:00
|
|
|
const uint32_t kNumProbes;
|
2014-03-28 16:21:20 +00:00
|
|
|
|
|
|
|
uint32_t (*hash_func_)(const Slice& key);
|
|
|
|
unsigned char* data_;
|
|
|
|
unsigned char* raw_;
|
2013-11-27 22:27:02 +00:00
|
|
|
};
|
|
|
|
|
2014-01-17 20:22:39 +00:00
|
|
|
inline void DynamicBloom::Add(const Slice& key) { AddHash(hash_func_(key)); }
|
|
|
|
|
2014-06-09 19:30:19 +00:00
|
|
|
inline bool DynamicBloom::MayContain(const Slice& key) const {
|
2014-01-15 05:30:13 +00:00
|
|
|
return (MayContainHash(hash_func_(key)));
|
|
|
|
}
|
|
|
|
|
2014-06-12 17:06:18 +00:00
|
|
|
inline void DynamicBloom::Prefetch(uint32_t h) {
|
|
|
|
if (kNumBlocks != 0) {
|
|
|
|
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
|
|
|
|
PREFETCH(&(data_[b]), 0, 3);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-09 19:30:19 +00:00
|
|
|
inline bool DynamicBloom::MayContainHash(uint32_t h) const {
|
|
|
|
assert(kNumBlocks > 0 || kTotalBits > 0);
|
2014-01-15 05:30:13 +00:00
|
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
2014-06-02 23:52:29 +00:00
|
|
|
if (kNumBlocks != 0) {
|
|
|
|
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
|
2014-03-28 16:21:20 +00:00
|
|
|
for (uint32_t i = 0; i < kNumProbes; ++i) {
|
2014-06-02 23:52:29 +00:00
|
|
|
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
|
|
|
|
// to a simple and operation by compiler.
|
|
|
|
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
|
2014-03-28 16:21:20 +00:00
|
|
|
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-06-02 23:52:29 +00:00
|
|
|
// Rotate h so that we don't reuse the same bytes.
|
|
|
|
h = h / (CACHE_LINE_SIZE * 8) +
|
|
|
|
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
|
2014-03-28 16:21:20 +00:00
|
|
|
h += delta;
|
|
|
|
}
|
|
|
|
} else {
|
2014-06-09 19:30:19 +00:00
|
|
|
if (kTotalBits == 0) {
|
|
|
|
// Not initialized.
|
|
|
|
return true;
|
|
|
|
}
|
2014-03-28 16:21:20 +00:00
|
|
|
for (uint32_t i = 0; i < kNumProbes; ++i) {
|
|
|
|
const uint32_t bitpos = h % kTotalBits;
|
|
|
|
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
h += delta;
|
2014-01-15 05:30:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2013-11-27 22:27:02 +00:00
|
|
|
}
|
2014-01-15 05:30:13 +00:00
|
|
|
|
|
|
|
inline void DynamicBloom::AddHash(uint32_t h) {
|
2014-06-09 19:30:19 +00:00
|
|
|
assert(kNumBlocks > 0 || kTotalBits > 0);
|
2014-01-15 05:30:13 +00:00
|
|
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
2014-06-02 23:52:29 +00:00
|
|
|
if (kNumBlocks != 0) {
|
|
|
|
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
|
2014-03-28 16:21:20 +00:00
|
|
|
for (uint32_t i = 0; i < kNumProbes; ++i) {
|
2014-06-02 23:52:29 +00:00
|
|
|
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
|
|
|
|
// to a simple and operation by compiler.
|
|
|
|
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
|
2014-03-28 16:21:20 +00:00
|
|
|
data_[bitpos / 8] |= (1 << (bitpos % 8));
|
2014-06-02 23:52:29 +00:00
|
|
|
// Rotate h so that we don't reuse the same bytes.
|
|
|
|
h = h / (CACHE_LINE_SIZE * 8) +
|
|
|
|
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
|
2014-03-28 16:21:20 +00:00
|
|
|
h += delta;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (uint32_t i = 0; i < kNumProbes; ++i) {
|
|
|
|
const uint32_t bitpos = h % kTotalBits;
|
|
|
|
data_[bitpos / 8] |= (1 << (bitpos % 8));
|
|
|
|
h += delta;
|
|
|
|
}
|
2014-01-15 05:30:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // rocksdb
|