mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-25 22:44:05 +00:00
Cut filter partition based on metadata_block_size
Summary: Currently metadata_block_size controls only index partition size. With this patch a partition is cut after any of index or filter partitions reaches metadata_block_size. Closes https://github.com/facebook/rocksdb/pull/2452 Differential Revision: D5275651 Pulled By: maysamyabandeh fbshipit-source-id: 5057e4424b4c8902043782e6bf8c38f0c4f25160
This commit is contained in:
parent
f4ae1bab02
commit
45b9bb0331
|
@ -20,8 +20,10 @@
|
|||
#ifndef STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
|
||||
#define STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
|
@ -41,6 +43,16 @@ class FilterBitsBuilder {
|
|||
// The return value of this function would be the filter bits,
|
||||
// The ownership of actual data is set to buf
|
||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
|
||||
|
||||
// Calculate num of entries fit into a space.
|
||||
virtual int CalculateNumEntry(const uint32_t space) {
|
||||
#ifndef ROCKSDB_LITE
|
||||
throw std::runtime_error("CalculateNumEntry not Implemented");
|
||||
#else
|
||||
abort();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// A class that checks if a key can be in filter
|
||||
|
|
|
@ -76,10 +76,18 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
|
|||
} else {
|
||||
if (table_opt.partition_filters) {
|
||||
assert(p_index_builder != nullptr);
|
||||
// Since after partition cut request from filter builder it takes time
|
||||
// until index builder actully cuts the partition, we take the lower bound
|
||||
// as partition size.
|
||||
assert(table_opt.block_size_deviation <= 100);
|
||||
auto partition_size =
|
||||
(const uint32_t)(table_opt.metadata_block_size *
|
||||
(100 - table_opt.block_size_deviation));
|
||||
partition_size = std::max(partition_size, (const uint32_t)1);
|
||||
return new PartitionedFilterBlockBuilder(
|
||||
opt.prefix_extractor, table_opt.whole_key_filtering,
|
||||
filter_bits_builder, table_opt.index_block_restart_interval,
|
||||
p_index_builder);
|
||||
p_index_builder, partition_size);
|
||||
} else {
|
||||
return new FullFilterBlockBuilder(opt.prefix_extractor,
|
||||
table_opt.whole_key_filtering,
|
||||
|
|
73
table/full_filter_bits_builder.h
Normal file
73
table/full_filter_bits_builder.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "rocksdb/filter_policy.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class Slice;
|
||||
|
||||
class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||
public:
|
||||
explicit FullFilterBitsBuilder(const size_t bits_per_key,
|
||||
const size_t num_probes);
|
||||
|
||||
~FullFilterBitsBuilder();
|
||||
|
||||
virtual void AddKey(const Slice& key) override;
|
||||
|
||||
// Create a filter that for hashes [0, n-1], the filter is allocated here
|
||||
// When creating filter, it is ensured that
|
||||
// total_bits = num_lines * CACHE_LINE_SIZE * 8
|
||||
// dst len is >= 5, 1 for num_probes, 4 for num_lines
|
||||
// Then total_bits = (len - 5) * 8, and cache_line_size could be calculated
|
||||
// +----------------------------------------------------------------+
|
||||
// | filter data with length total_bits/8 |
|
||||
// +----------------------------------------------------------------+
|
||||
// | |
|
||||
// | ... |
|
||||
// | |
|
||||
// +----------------------------------------------------------------+
|
||||
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
|
||||
// +----------------------------------------------------------------+
|
||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override;
|
||||
|
||||
// Calculate num of entries fit into a space.
|
||||
virtual int CalculateNumEntry(const uint32_t space) override;
|
||||
|
||||
// Calculate space for new filter. This is reverse of CalculateNumEntry.
|
||||
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
|
||||
uint32_t* num_lines);
|
||||
|
||||
private:
|
||||
size_t bits_per_key_;
|
||||
size_t num_probes_;
|
||||
std::vector<uint32_t> hash_entries_;
|
||||
|
||||
// Get totalbits that optimized for cpu cache line
|
||||
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
|
||||
|
||||
// Reserve space for new filter
|
||||
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
|
||||
uint32_t* num_lines);
|
||||
|
||||
// Assuming single threaded access to this function.
|
||||
void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
|
||||
|
||||
// No Copy allowed
|
||||
FullFilterBitsBuilder(const FullFilterBitsBuilder&);
|
||||
void operator=(const FullFilterBitsBuilder&);
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
|
@ -77,6 +77,11 @@ void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
|
|||
flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
|
||||
table_opt_.metadata_block_size, table_opt_.block_size_deviation,
|
||||
sub_index_builder_->index_block_builder_));
|
||||
partition_cut_requested_ = false;
|
||||
}
|
||||
|
||||
void PartitionedIndexBuilder::RequestPartitionCut() {
|
||||
partition_cut_requested_ = true;
|
||||
}
|
||||
|
||||
void PartitionedIndexBuilder::AddIndexEntry(
|
||||
|
@ -102,6 +107,7 @@ void PartitionedIndexBuilder::AddIndexEntry(
|
|||
std::string handle_encoding;
|
||||
block_handle.EncodeTo(&handle_encoding);
|
||||
bool do_flush =
|
||||
partition_cut_requested_ ||
|
||||
flush_policy_->Update(*last_key_in_current_block, handle_encoding);
|
||||
if (do_flush) {
|
||||
entries_.push_back(
|
||||
|
|
|
@ -314,6 +314,10 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|||
|
||||
std::string& GetPartitionKey() { return sub_index_last_key_; }
|
||||
|
||||
// Called when an external entity (such as filter partition builder) request
|
||||
// cutting the next partition
|
||||
void RequestPartitionCut();
|
||||
|
||||
private:
|
||||
void MakeNewSubIndexBuilder();
|
||||
|
||||
|
@ -331,6 +335,9 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|||
// true if Finish is called once but not complete yet.
|
||||
bool finishing_indexes = false;
|
||||
const BlockBasedTableOptions& table_opt_;
|
||||
// true if an external entity (such as filter partition builder) request
|
||||
// cutting the next partition
|
||||
bool partition_cut_requested_ = true;
|
||||
// true if it should cut the next filter partition block
|
||||
bool cut_filter_block = false;
|
||||
};
|
||||
|
|
|
@ -20,15 +20,25 @@ namespace rocksdb {
|
|||
PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder(
|
||||
const SliceTransform* prefix_extractor, bool whole_key_filtering,
|
||||
FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval,
|
||||
PartitionedIndexBuilder* const p_index_builder)
|
||||
PartitionedIndexBuilder* const p_index_builder,
|
||||
const uint32_t partition_size)
|
||||
: FullFilterBlockBuilder(prefix_extractor, whole_key_filtering,
|
||||
filter_bits_builder),
|
||||
index_on_filter_block_builder_(index_block_restart_interval),
|
||||
p_index_builder_(p_index_builder) {}
|
||||
p_index_builder_(p_index_builder) {
|
||||
filters_per_partition_ =
|
||||
filter_bits_builder_->CalculateNumEntry(partition_size);
|
||||
}
|
||||
|
||||
PartitionedFilterBlockBuilder::~PartitionedFilterBlockBuilder() {}
|
||||
|
||||
void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
|
||||
// Use == to send the request only once
|
||||
if (filters_in_partition_ == filters_per_partition_) {
|
||||
// Currently only index builder is in charge of cutting a partition. We keep
|
||||
// requesting until it is granted.
|
||||
p_index_builder_->RequestPartitionCut();
|
||||
}
|
||||
if (!p_index_builder_->ShouldCutFilterBlock()) {
|
||||
return;
|
||||
}
|
||||
|
@ -36,11 +46,13 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
|
|||
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
|
||||
std::string& index_key = p_index_builder_->GetPartitionKey();
|
||||
filters.push_back({index_key, filter});
|
||||
filters_in_partition_ = 0;
|
||||
}
|
||||
|
||||
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
|
||||
MaybeCutAFilterBlock();
|
||||
filter_bits_builder_->AddKey(key);
|
||||
filters_in_partition_++;
|
||||
}
|
||||
|
||||
Slice PartitionedFilterBlockBuilder::Finish(
|
||||
|
|
|
@ -28,7 +28,8 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
explicit PartitionedFilterBlockBuilder(
|
||||
const SliceTransform* prefix_extractor, bool whole_key_filtering,
|
||||
FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval,
|
||||
PartitionedIndexBuilder* const p_index_builder);
|
||||
PartitionedIndexBuilder* const p_index_builder,
|
||||
const uint32_t partition_size);
|
||||
|
||||
virtual ~PartitionedFilterBlockBuilder();
|
||||
|
||||
|
@ -51,7 +52,15 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
false; // true if Finish is called once but not complete yet.
|
||||
// The policy of when cut a filter block and Finish it
|
||||
void MaybeCutAFilterBlock();
|
||||
// Currently we keep the same number of partitions for filters and indexes.
|
||||
// This would allow for some potentioal optimizations in future. If such
|
||||
// optimizations did not realize we can use different number of partitions and
|
||||
// eliminate p_index_builder_
|
||||
PartitionedIndexBuilder* const p_index_builder_;
|
||||
// The desired number of filters per partition
|
||||
uint32_t filters_per_partition_;
|
||||
// The current number of filters in the last partition
|
||||
uint32_t filters_in_partition_;
|
||||
};
|
||||
|
||||
class PartitionedFilterBlockReader : public FilterBlockReader {
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "rocksdb/filter_policy.h"
|
||||
|
||||
#include "table/full_filter_bits_builder.h"
|
||||
#include "table/index_builder.h"
|
||||
#include "table/partitioned_filter_block.h"
|
||||
#include "util/coding.h"
|
||||
|
@ -64,6 +65,16 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
return max_index_size;
|
||||
}
|
||||
|
||||
uint64_t MaxFilterSize() {
|
||||
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||
auto filter_bits_reader = dynamic_cast<rocksdb::FullFilterBitsBuilder*>(
|
||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||
uint32_t dont_care1, dont_care2;
|
||||
auto partition_size =
|
||||
filter_bits_reader->CalculateSpace(num_keys, &dont_care1, &dont_care2);
|
||||
return partition_size + table_options_.block_size_deviation;
|
||||
}
|
||||
|
||||
int last_offset = 10;
|
||||
BlockHandle Write(const Slice& slice) {
|
||||
BlockHandle bh(last_offset + 1, slice.size());
|
||||
|
@ -78,10 +89,17 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
|
||||
PartitionedFilterBlockBuilder* NewBuilder(
|
||||
PartitionedIndexBuilder* const p_index_builder) {
|
||||
uint32_t partition_size =
|
||||
table_options_.metadata_block_size >
|
||||
(uint64_t)table_options_.block_size_deviation
|
||||
? table_options_.metadata_block_size -
|
||||
table_options_.block_size_deviation
|
||||
: 1;
|
||||
return new PartitionedFilterBlockBuilder(
|
||||
nullptr, table_options_.whole_key_filtering,
|
||||
table_options_.filter_policy->GetFilterBitsBuilder(),
|
||||
table_options_.index_block_restart_interval, p_index_builder);
|
||||
table_options_.index_block_restart_interval, p_index_builder,
|
||||
partition_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<MockedBlockBasedTable> table;
|
||||
|
@ -261,7 +279,8 @@ TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
|||
|
||||
TEST_F(PartitionedFilterBlockTest, PartitionCount) {
|
||||
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||
table_options_.metadata_block_size = MaxIndexSize();
|
||||
table_options_.metadata_block_size =
|
||||
std::max(MaxIndexSize(), MaxFilterSize());
|
||||
int partitions = TestBlockPerKey();
|
||||
ASSERT_EQ(partitions, 1);
|
||||
// A low number ensures cutting a block after each key
|
||||
|
|
|
@ -13,49 +13,32 @@
|
|||
|
||||
#include "rocksdb/slice.h"
|
||||
#include "table/block_based_filter_block.h"
|
||||
#include "table/full_filter_bits_builder.h"
|
||||
#include "table/full_filter_block.h"
|
||||
#include "util/hash.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/hash.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class BlockBasedFilterBlockBuilder;
|
||||
class FullFilterBlockBuilder;
|
||||
|
||||
namespace {
|
||||
class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||
public:
|
||||
explicit FullFilterBitsBuilder(const size_t bits_per_key,
|
||||
const size_t num_probes)
|
||||
: bits_per_key_(bits_per_key),
|
||||
num_probes_(num_probes) {
|
||||
assert(bits_per_key_);
|
||||
FullFilterBitsBuilder::FullFilterBitsBuilder(const size_t bits_per_key,
|
||||
const size_t num_probes)
|
||||
: bits_per_key_(bits_per_key), num_probes_(num_probes) {
|
||||
assert(bits_per_key_);
|
||||
}
|
||||
|
||||
~FullFilterBitsBuilder() {}
|
||||
FullFilterBitsBuilder::~FullFilterBitsBuilder() {}
|
||||
|
||||
virtual void AddKey(const Slice& key) override {
|
||||
void FullFilterBitsBuilder::AddKey(const Slice& key) {
|
||||
uint32_t hash = BloomHash(key);
|
||||
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
|
||||
hash_entries_.push_back(hash);
|
||||
}
|
||||
}
|
||||
|
||||
// Create a filter that for hashes [0, n-1], the filter is allocated here
|
||||
// When creating filter, it is ensured that
|
||||
// total_bits = num_lines * CACHE_LINE_SIZE * 8
|
||||
// dst len is >= 5, 1 for num_probes, 4 for num_lines
|
||||
// Then total_bits = (len - 5) * 8, and cache_line_size could be calculated
|
||||
// +----------------------------------------------------------------+
|
||||
// | filter data with length total_bits/8 |
|
||||
// +----------------------------------------------------------------+
|
||||
// | |
|
||||
// | ... |
|
||||
// | |
|
||||
// +----------------------------------------------------------------+
|
||||
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
|
||||
// +----------------------------------------------------------------+
|
||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
||||
Slice FullFilterBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
|
||||
uint32_t total_bits, num_lines;
|
||||
char* data = ReserveSpace(static_cast<int>(hash_entries_.size()),
|
||||
&total_bits, &num_lines);
|
||||
|
@ -76,27 +59,6 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
|
|||
return Slice(data, total_bits / 8 + 5);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t bits_per_key_;
|
||||
size_t num_probes_;
|
||||
std::vector<uint32_t> hash_entries_;
|
||||
|
||||
// Get totalbits that optimized for cpu cache line
|
||||
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
|
||||
|
||||
// Reserve space for new filter
|
||||
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
|
||||
uint32_t* num_lines);
|
||||
|
||||
// Assuming single threaded access to this function.
|
||||
void AddHash(uint32_t h, char* data, uint32_t num_lines,
|
||||
uint32_t total_bits);
|
||||
|
||||
// No Copy allowed
|
||||
FullFilterBitsBuilder(const FullFilterBitsBuilder&);
|
||||
void operator=(const FullFilterBitsBuilder&);
|
||||
};
|
||||
|
||||
uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
|
||||
uint32_t num_lines =
|
||||
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
|
||||
|
@ -109,10 +71,10 @@ uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
|
|||
return num_lines * (CACHE_LINE_SIZE * 8);
|
||||
}
|
||||
|
||||
char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
|
||||
uint32_t* total_bits, uint32_t* num_lines) {
|
||||
uint32_t FullFilterBitsBuilder::CalculateSpace(const int num_entry,
|
||||
uint32_t* total_bits,
|
||||
uint32_t* num_lines) {
|
||||
assert(bits_per_key_);
|
||||
char* data = nullptr;
|
||||
if (num_entry != 0) {
|
||||
uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_);
|
||||
|
||||
|
@ -128,12 +90,35 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
|
|||
// Reserve space for Filter
|
||||
uint32_t sz = *total_bits / 8;
|
||||
sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
|
||||
return sz;
|
||||
}
|
||||
|
||||
data = new char[sz];
|
||||
char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
|
||||
uint32_t* total_bits,
|
||||
uint32_t* num_lines) {
|
||||
uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines);
|
||||
char* data = new char[sz];
|
||||
memset(data, 0, sz);
|
||||
return data;
|
||||
}
|
||||
|
||||
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) {
|
||||
assert(bits_per_key_);
|
||||
assert(space > 0);
|
||||
uint32_t dont_care1, dont_care2;
|
||||
int high = (int) (space * 8 / bits_per_key_ + 1);
|
||||
int low = 1;
|
||||
int n = high;
|
||||
for (; n >= low; n--) {
|
||||
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
|
||||
if (sz <= space) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(n < high); // High should be an overestimation
|
||||
return n;
|
||||
}
|
||||
|
||||
inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
|
||||
uint32_t num_lines, uint32_t total_bits) {
|
||||
assert(num_lines > 0 && total_bits > 0);
|
||||
|
@ -151,6 +136,7 @@ inline void FullFilterBitsBuilder::AddHash(uint32_t h, char* data,
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
class FullFilterBitsReader : public FilterBitsReader {
|
||||
public:
|
||||
explicit FullFilterBitsReader(const Slice& contents)
|
||||
|
|
|
@ -21,10 +21,11 @@ int main() {
|
|||
#include <vector>
|
||||
|
||||
#include "rocksdb/filter_policy.h"
|
||||
#include "table/full_filter_bits_builder.h"
|
||||
#include "util/arena.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
#include "util/arena.h"
|
||||
|
||||
using GFLAGS::ParseCommandLineFlags;
|
||||
|
||||
|
@ -197,6 +198,10 @@ class FullBloomTest : public testing::Test {
|
|||
delete policy_;
|
||||
}
|
||||
|
||||
FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
|
||||
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
bits_builder_.reset(policy_->GetFilterBitsBuilder());
|
||||
bits_reader_.reset(nullptr);
|
||||
|
@ -237,6 +242,19 @@ class FullBloomTest : public testing::Test {
|
|||
}
|
||||
};
|
||||
|
||||
TEST_F(FullBloomTest, FilterSize) {
|
||||
uint32_t dont_care1, dont_care2;
|
||||
auto full_bits_builder = GetFullFilterBitsBuilder();
|
||||
for (int n = 1; n < 100; n++) {
|
||||
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
|
||||
auto n2 = full_bits_builder->CalculateNumEntry(space);
|
||||
ASSERT_GE(n2, n);
|
||||
auto space2 =
|
||||
full_bits_builder->CalculateSpace(n2, &dont_care1, &dont_care2);
|
||||
ASSERT_EQ(space, space2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FullBloomTest, FullEmptyFilter) {
|
||||
// Empty filter is not match, at this level
|
||||
ASSERT_TRUE(!Matches("hello"));
|
||||
|
|
Loading…
Reference in a new issue