mirror of https://github.com/facebook/rocksdb.git
Extend format 3 to partitioned index/filters (#3958)
Summary: format_version 3 changes the format of index blocks by storing user keys instead of the internal keys, which saves 8-bytes per key. This patch extends the format to top-level indexes in partitioned index/filters. Closes https://github.com/facebook/rocksdb/pull/3958 Differential Revision: D8294615 Pulled By: maysamyabandeh fbshipit-source-id: 17666cc16b8076c363972e2308e31547e835f0fe
This commit is contained in:
parent
5504a056f8
commit
b73652169e
|
@ -449,12 +449,14 @@ Options DBTestBase::GetOptions(
|
|||
break;
|
||||
}
|
||||
case kBlockBasedTableWithPartitionedIndexFormat3: {
|
||||
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
|
||||
options.prefix_extractor.reset(NewNoopTransform());
|
||||
table_options.format_version = 3;
|
||||
// Format 3 changes the binary index format. Since partitioned index is a
|
||||
// super-set of simple indexes, we are also using kTwoLevelIndexSearch to
|
||||
// test this format.
|
||||
table_options.format_version = 3;
|
||||
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
|
||||
// The top-level index in partition filters are also affected by format 3.
|
||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||
table_options.partition_filters = true;
|
||||
break;
|
||||
}
|
||||
case kBlockBasedTableWithIndexRestartInterval: {
|
||||
|
|
|
@ -237,16 +237,18 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
|
|||
return NewTwoLevelIterator(
|
||||
new BlockBasedTable::PartitionedIndexIteratorState(
|
||||
table_, &partition_map_, index_key_includes_seq_),
|
||||
index_block_->NewIterator(
|
||||
icomparator_, icomparator_->user_comparator(), nullptr, true));
|
||||
index_block_->NewIterator(icomparator_,
|
||||
icomparator_->user_comparator(), nullptr,
|
||||
true, nullptr, index_key_includes_seq_));
|
||||
} else {
|
||||
auto ro = ReadOptions();
|
||||
ro.fill_cache = fill_cache;
|
||||
bool kIsIndex = true;
|
||||
return new BlockBasedTableIterator(
|
||||
table_, ro, *icomparator_,
|
||||
index_block_->NewIterator(
|
||||
icomparator_, icomparator_->user_comparator(), nullptr, true),
|
||||
index_block_->NewIterator(icomparator_,
|
||||
icomparator_->user_comparator(), nullptr,
|
||||
true, nullptr, index_key_includes_seq_),
|
||||
false,
|
||||
/* prefix_extractor */ nullptr, kIsIndex, index_key_includes_seq_);
|
||||
}
|
||||
|
@ -262,7 +264,7 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
|
|||
BlockIter biter;
|
||||
BlockHandle handle;
|
||||
index_block_->NewIterator(icomparator_, icomparator_->user_comparator(),
|
||||
&biter, true);
|
||||
&biter, true, nullptr, index_key_includes_seq_);
|
||||
// Index partitions are assumed to be consecuitive. Prefetch them all.
|
||||
// Read the first block offset
|
||||
biter.SeekToFirst();
|
||||
|
@ -1308,7 +1310,9 @@ FilterBlockReader* BlockBasedTable::ReadFilter(
|
|||
return new PartitionedFilterBlockReader(
|
||||
rep->prefix_filtering ? prefix_extractor : nullptr,
|
||||
rep->whole_key_filtering, std::move(block), nullptr,
|
||||
rep->ioptions.statistics, rep->internal_comparator, this);
|
||||
rep->ioptions.statistics, rep->internal_comparator, this,
|
||||
rep_->table_properties == nullptr ||
|
||||
!rep_->table_properties->index_key_is_user_key);
|
||||
}
|
||||
|
||||
case Rep::FilterType::kBlockFilter:
|
||||
|
|
|
@ -66,6 +66,8 @@ PartitionedIndexBuilder::PartitionedIndexBuilder(
|
|||
: IndexBuilder(comparator),
|
||||
index_block_builder_(table_opt.index_block_restart_interval,
|
||||
table_opt.format_version),
|
||||
index_block_builder_without_seq_(table_opt.index_block_restart_interval,
|
||||
table_opt.format_version),
|
||||
sub_index_builder_(nullptr),
|
||||
table_opt_(table_opt),
|
||||
seperator_is_key_plus_seq_(false) {}
|
||||
|
@ -149,11 +151,20 @@ Status PartitionedIndexBuilder::Finish(
|
|||
std::string handle_encoding;
|
||||
last_partition_block_handle.EncodeTo(&handle_encoding);
|
||||
index_block_builder_.Add(last_entry.key, handle_encoding);
|
||||
if (!seperator_is_key_plus_seq_) {
|
||||
index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
|
||||
handle_encoding);
|
||||
}
|
||||
entries_.pop_front();
|
||||
}
|
||||
// If there is no sub_index left, then return the 2nd level index.
|
||||
if (UNLIKELY(entries_.empty())) {
|
||||
index_blocks->index_block_contents = index_block_builder_.Finish();
|
||||
if (seperator_is_key_plus_seq_) {
|
||||
index_blocks->index_block_contents = index_block_builder_.Finish();
|
||||
} else {
|
||||
index_blocks->index_block_contents =
|
||||
index_block_builder_without_seq_.Finish();
|
||||
}
|
||||
return Status::OK();
|
||||
} else {
|
||||
// Finish the next partition index in line and Incomplete() to indicate we
|
||||
|
@ -192,7 +203,9 @@ size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize(
|
|||
uint64_t size = it->value->EstimatedSize();
|
||||
BlockHandle tmp_block_handle(offset, size);
|
||||
tmp_block_handle.EncodeTo(&tmp_handle_encoding);
|
||||
tmp_builder.Add(it->key, tmp_handle_encoding);
|
||||
tmp_builder.Add(
|
||||
seperator_is_key_plus_seq_ ? it->key : ExtractUserKey(it->key),
|
||||
tmp_handle_encoding);
|
||||
offset += size;
|
||||
}
|
||||
return tmp_builder.CurrentSizeEstimate();
|
||||
|
|
|
@ -368,6 +368,7 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|||
};
|
||||
std::list<Entry> entries_; // list of partitioned indexes and their keys
|
||||
BlockBuilder index_block_builder_; // top-level index builder
|
||||
BlockBuilder index_block_builder_without_seq_; // same for user keys
|
||||
// the active partition index builder
|
||||
ShortenedIndexBuilder* sub_index_builder_;
|
||||
// the last key in the active partition index builder
|
||||
|
|
|
@ -24,6 +24,7 @@ PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder(
|
|||
: FullFilterBlockBuilder(prefix_extractor, whole_key_filtering,
|
||||
filter_bits_builder),
|
||||
index_on_filter_block_builder_(index_block_restart_interval),
|
||||
index_on_filter_block_builder_without_seq_(index_block_restart_interval),
|
||||
p_index_builder_(p_index_builder),
|
||||
filters_in_partition_(0),
|
||||
num_added_(0) {
|
||||
|
@ -65,6 +66,10 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
std::string handle_encoding;
|
||||
last_partition_block_handle.EncodeTo(&handle_encoding);
|
||||
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding);
|
||||
if (!p_index_builder_->seperator_is_key_plus_seq()) {
|
||||
index_on_filter_block_builder_without_seq_.Add(
|
||||
ExtractUserKey(last_entry.key), handle_encoding);
|
||||
}
|
||||
filters.pop_front();
|
||||
} else {
|
||||
MaybeCutAFilterBlock();
|
||||
|
@ -74,7 +79,11 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
if (UNLIKELY(filters.empty())) {
|
||||
*status = Status::OK();
|
||||
if (finishing_filters) {
|
||||
return index_on_filter_block_builder_.Finish();
|
||||
if (p_index_builder_->seperator_is_key_plus_seq()) {
|
||||
return index_on_filter_block_builder_.Finish();
|
||||
} else {
|
||||
return index_on_filter_block_builder_without_seq_.Finish();
|
||||
}
|
||||
} else {
|
||||
// This is the rare case where no key was added to the filter
|
||||
return Slice();
|
||||
|
@ -91,12 +100,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
PartitionedFilterBlockReader::PartitionedFilterBlockReader(
|
||||
const SliceTransform* prefix_extractor, bool _whole_key_filtering,
|
||||
BlockContents&& contents, FilterBitsReader* /*filter_bits_reader*/,
|
||||
Statistics* stats, const Comparator& comparator,
|
||||
const BlockBasedTable* table)
|
||||
Statistics* stats, const InternalKeyComparator comparator,
|
||||
const BlockBasedTable* table, const bool index_key_includes_seq)
|
||||
: FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
|
||||
prefix_extractor_(prefix_extractor),
|
||||
comparator_(comparator),
|
||||
table_(table) {
|
||||
table_(table),
|
||||
index_key_includes_seq_(index_key_includes_seq) {
|
||||
idx_on_fltr_blk_.reset(new Block(std::move(contents),
|
||||
kDisableGlobalSequenceNumber,
|
||||
0 /* read_amp_bytes_per_bit */, stats));
|
||||
|
@ -113,7 +123,8 @@ PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
|
|||
char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
BlockIter biter;
|
||||
BlockHandle handle;
|
||||
idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &biter, true);
|
||||
idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(),
|
||||
&biter, true, nullptr, index_key_includes_seq_);
|
||||
biter.SeekToFirst();
|
||||
for (; biter.Valid(); biter.Next()) {
|
||||
auto input = biter.value();
|
||||
|
@ -207,7 +218,8 @@ bool PartitionedFilterBlockReader::PrefixMayMatch(
|
|||
Slice PartitionedFilterBlockReader::GetFilterPartitionHandle(
|
||||
const Slice& entry) {
|
||||
BlockIter iter;
|
||||
idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &iter, true);
|
||||
idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(),
|
||||
&iter, true, nullptr, index_key_includes_seq_);
|
||||
iter.Seek(entry);
|
||||
if (UNLIKELY(!iter.Valid())) {
|
||||
return Slice();
|
||||
|
@ -269,7 +281,8 @@ void PartitionedFilterBlockReader::CacheDependencies(
|
|||
auto rep = table_->rep_;
|
||||
BlockIter biter;
|
||||
BlockHandle handle;
|
||||
idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &biter, true);
|
||||
idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(),
|
||||
&biter, true, nullptr, index_key_includes_seq_);
|
||||
// Index partitions are assumed to be consecuitive. Prefetch them all.
|
||||
// Read the first block offset
|
||||
biter.SeekToFirst();
|
||||
|
|
|
@ -41,6 +41,8 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
private:
|
||||
// Filter data
|
||||
BlockBuilder index_on_filter_block_builder_; // top-level index builder
|
||||
BlockBuilder
|
||||
index_on_filter_block_builder_without_seq_; // same for user keys
|
||||
struct FilterEntry {
|
||||
std::string key;
|
||||
Slice filter;
|
||||
|
@ -68,13 +70,11 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
class PartitionedFilterBlockReader : public FilterBlockReader,
|
||||
public Cleanable {
|
||||
public:
|
||||
explicit PartitionedFilterBlockReader(const SliceTransform* prefix_extractor,
|
||||
bool whole_key_filtering,
|
||||
BlockContents&& contents,
|
||||
FilterBitsReader* filter_bits_reader,
|
||||
Statistics* stats,
|
||||
const Comparator& comparator,
|
||||
const BlockBasedTable* table);
|
||||
explicit PartitionedFilterBlockReader(
|
||||
const SliceTransform* prefix_extractor, bool whole_key_filtering,
|
||||
BlockContents&& contents, FilterBitsReader* filter_bits_reader,
|
||||
Statistics* stats, const InternalKeyComparator comparator,
|
||||
const BlockBasedTable* table, const bool index_key_includes_seq);
|
||||
virtual ~PartitionedFilterBlockReader();
|
||||
|
||||
virtual bool IsBlockBased() override { return false; }
|
||||
|
@ -98,8 +98,9 @@ class PartitionedFilterBlockReader : public FilterBlockReader,
|
|||
|
||||
const SliceTransform* prefix_extractor_;
|
||||
std::unique_ptr<Block> idx_on_fltr_blk_;
|
||||
const Comparator& comparator_;
|
||||
const InternalKeyComparator comparator_;
|
||||
const BlockBasedTable* table_;
|
||||
const bool index_key_includes_seq_;
|
||||
std::unordered_map<uint64_t,
|
||||
BlockBasedTable::CachableEntry<FilterBlockReader>>
|
||||
filter_map_;
|
||||
|
|
|
@ -111,7 +111,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
std::unique_ptr<MockedBlockBasedTable> table;
|
||||
|
||||
PartitionedFilterBlockReader* NewReader(
|
||||
PartitionedFilterBlockBuilder* builder) {
|
||||
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
|
||||
BlockHandle bh;
|
||||
Status status;
|
||||
Slice slice;
|
||||
|
@ -127,13 +127,14 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
ioptions, env_options, table_options_, icomp, false)));
|
||||
auto reader = new PartitionedFilterBlockReader(
|
||||
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr,
|
||||
nullptr, *icomp.user_comparator(), table.get());
|
||||
nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
|
||||
return reader;
|
||||
}
|
||||
|
||||
void VerifyReader(PartitionedFilterBlockBuilder* builder,
|
||||
bool empty = false) {
|
||||
std::unique_ptr<PartitionedFilterBlockReader> reader(NewReader(builder));
|
||||
PartitionedIndexBuilder* pib, bool empty = false) {
|
||||
std::unique_ptr<PartitionedFilterBlockReader> reader(
|
||||
NewReader(builder, pib));
|
||||
// Querying added keys
|
||||
const bool no_io = true;
|
||||
for (auto key : keys) {
|
||||
|
@ -182,7 +183,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
builder->Add(keys[i]);
|
||||
CutABlock(pib.get(), keys[i]);
|
||||
|
||||
VerifyReader(builder.get());
|
||||
VerifyReader(builder.get(), pib.get());
|
||||
return CountNumOfIndexPartitions(pib.get());
|
||||
}
|
||||
|
||||
|
@ -202,7 +203,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
builder->Add(keys[i]);
|
||||
CutABlock(pib.get(), keys[i]);
|
||||
|
||||
VerifyReader(builder.get());
|
||||
VerifyReader(builder.get(), pib.get());
|
||||
}
|
||||
|
||||
void TestBlockPerAllKeys() {
|
||||
|
@ -220,7 +221,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
builder->Add(keys[i]);
|
||||
CutABlock(pib.get(), keys[i]);
|
||||
|
||||
VerifyReader(builder.get());
|
||||
VerifyReader(builder.get(), pib.get());
|
||||
}
|
||||
|
||||
void CutABlock(PartitionedIndexBuilder* builder,
|
||||
|
@ -261,7 +262,7 @@ TEST_F(PartitionedFilterBlockTest, EmptyBuilder) {
|
|||
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
|
||||
const bool empty = true;
|
||||
VerifyReader(builder.get(), empty);
|
||||
VerifyReader(builder.get(), pib.get(), empty);
|
||||
}
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, OneBlock) {
|
||||
|
|
Loading…
Reference in New Issue