mirror of https://github.com/facebook/rocksdb.git
Fix bug in partition filters with format_version=4 (#4381)
Summary: Value delta encoding in format_version 4 requires the differences between the size of two consecutive handles to be sent to BlockBuilder::Add. This applies not only to indexes on blocks but also the indexes on indexes and filters in partitioned indexes and filters respectively. The patch fixes a bug where the partitioned filters would encode the entire size of the handle rather than the difference of the size with the last size. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4381 Differential Revision: D9879505 Pulled By: maysamyabandeh fbshipit-source-id: 27a22e49b482b927fbd5629dc310c46d63d4b6d1
This commit is contained in:
parent
1626f6ab6b
commit
65ac72edd9
|
@ -22,11 +22,12 @@ class DBBloomFilterTest : public DBTestBase {
|
|||
|
||||
class DBBloomFilterTestWithParam
|
||||
: public DBTestBase,
|
||||
public testing::WithParamInterface<std::tuple<bool, bool>> {
|
||||
public testing::WithParamInterface<std::tuple<bool, bool, uint32_t>> {
|
||||
// public testing::WithParamInterface<bool> {
|
||||
protected:
|
||||
bool use_block_based_filter_;
|
||||
bool partition_filters_;
|
||||
uint32_t format_version_;
|
||||
|
||||
public:
|
||||
DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {}
|
||||
|
@ -36,9 +37,12 @@ class DBBloomFilterTestWithParam
|
|||
void SetUp() override {
|
||||
use_block_based_filter_ = std::get<0>(GetParam());
|
||||
partition_filters_ = std::get<1>(GetParam());
|
||||
format_version_ = std::get<2>(GetParam());
|
||||
}
|
||||
};
|
||||
|
||||
class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {};
|
||||
|
||||
class SliceTransformLimitedDomainGeneric : public SliceTransform {
|
||||
const char* Name() const override {
|
||||
return "SliceTransformLimitedDomainGeneric";
|
||||
|
@ -62,7 +66,7 @@ class SliceTransformLimitedDomainGeneric : public SliceTransform {
|
|||
// KeyMayExist can lead to a few false positives, but not false negatives.
|
||||
// To make test deterministic, use a much larger number of bits per key-20 than
|
||||
// bits in the key, so that false positives are eliminated
|
||||
TEST_P(DBBloomFilterTestWithParam, KeyMayExist) {
|
||||
TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
|
||||
do {
|
||||
ReadOptions ropts;
|
||||
std::string value;
|
||||
|
@ -401,6 +405,11 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
|||
table_options.index_type =
|
||||
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||
}
|
||||
table_options.format_version = format_version_;
|
||||
if (format_version_ >= 4) {
|
||||
// value delta encoding challenged more with index interval > 1
|
||||
table_options.index_block_restart_interval = 8;
|
||||
}
|
||||
table_options.metadata_block_size = 32;
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
|
||||
|
@ -456,10 +465,26 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
|
|||
} while (ChangeCompactOptions());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(DBBloomFilterTestWithParam, DBBloomFilterTestWithParam,
|
||||
::testing::Values(std::make_tuple(true, false),
|
||||
std::make_tuple(false, true),
|
||||
std::make_tuple(false, false)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
FormatDef, DBBloomFilterTestDefFormatVersion,
|
||||
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
|
||||
std::make_tuple(false, true, test::kDefaultFormatVersion),
|
||||
std::make_tuple(false, false,
|
||||
test::kDefaultFormatVersion)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
FormatDef, DBBloomFilterTestWithParam,
|
||||
::testing::Values(std::make_tuple(true, false, test::kDefaultFormatVersion),
|
||||
std::make_tuple(false, true, test::kDefaultFormatVersion),
|
||||
std::make_tuple(false, false,
|
||||
test::kDefaultFormatVersion)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
FormatLatest, DBBloomFilterTestWithParam,
|
||||
::testing::Values(std::make_tuple(true, false, test::kLatestFormatVersion),
|
||||
std::make_tuple(false, true, test::kLatestFormatVersion),
|
||||
std::make_tuple(false, false,
|
||||
test::kLatestFormatVersion)));
|
||||
|
||||
TEST_F(DBBloomFilterTest, BloomFilterRate) {
|
||||
while (ChangeFilterOptions()) {
|
||||
|
|
|
@ -451,13 +451,14 @@ Options DBTestBase::GetOptions(
|
|||
}
|
||||
case kBlockBasedTableWithPartitionedIndexFormat4: {
|
||||
table_options.format_version = 4;
|
||||
// Format 3 changes the binary index format. Since partitioned index is a
|
||||
// Format 4 changes the binary index format. Since partitioned index is a
|
||||
// super-set of simple indexes, we are also using kTwoLevelIndexSearch to
|
||||
// test this format.
|
||||
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
|
||||
// The top-level index in partition filters are also affected by format 3.
|
||||
// The top-level index in partition filters are also affected by format 4.
|
||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||
table_options.partition_filters = true;
|
||||
table_options.index_block_restart_interval = 8;
|
||||
break;
|
||||
}
|
||||
case kBlockBasedTableWithIndexRestartInterval: {
|
||||
|
|
|
@ -109,8 +109,6 @@ struct OptionsOverride {
|
|||
// These will be used only if filter_policy is set
|
||||
bool partition_filters = false;
|
||||
uint64_t metadata_block_size = 1024;
|
||||
BlockBasedTableOptions::IndexType index_type =
|
||||
BlockBasedTableOptions::IndexType::kBinarySearch;
|
||||
|
||||
// Used as a bit mask of individual enums in which to skip an XF test point
|
||||
int skip_policy = 0;
|
||||
|
|
|
@ -54,13 +54,6 @@ void BlockHandle::EncodeTo(std::string* dst) const {
|
|||
PutVarint64Varint64(dst, offset_, size_);
|
||||
}
|
||||
|
||||
void BlockHandle::EncodeSizeTo(std::string* dst) const {
|
||||
// Sanity check that all fields have been set
|
||||
assert(offset_ != ~static_cast<uint64_t>(0));
|
||||
assert(size_ != ~static_cast<uint64_t>(0));
|
||||
PutVarint64(dst, size_);
|
||||
}
|
||||
|
||||
Status BlockHandle::DecodeFrom(Slice* input) {
|
||||
if (GetVarint64(input, &offset_) &&
|
||||
GetVarint64(input, &size_)) {
|
||||
|
|
|
@ -55,7 +55,6 @@ class BlockHandle {
|
|||
void EncodeTo(std::string* dst) const;
|
||||
Status DecodeFrom(Slice* input);
|
||||
Status DecodeSizeFrom(uint64_t offset, Slice* input);
|
||||
void EncodeSizeTo(std::string* dst) const;
|
||||
|
||||
// Return a string that contains the copy of handle.
|
||||
std::string ToString(bool hex = true) const;
|
||||
|
|
|
@ -79,7 +79,10 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
std::string handle_encoding;
|
||||
last_partition_block_handle.EncodeTo(&handle_encoding);
|
||||
std::string handle_delta_encoding;
|
||||
last_partition_block_handle.EncodeSizeTo(&handle_delta_encoding);
|
||||
PutVarsignedint64(
|
||||
&handle_delta_encoding,
|
||||
last_partition_block_handle.size() - last_encoded_handle_.size());
|
||||
last_encoded_handle_ = last_partition_block_handle;
|
||||
const Slice handle_delta_encoding_slice(handle_delta_encoding);
|
||||
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
|
||||
&handle_delta_encoding_slice);
|
||||
|
|
|
@ -66,6 +66,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
uint32_t filters_in_partition_;
|
||||
// Number of keys added
|
||||
size_t num_added_;
|
||||
BlockHandle last_encoded_handle_;
|
||||
};
|
||||
|
||||
class PartitionedFilterBlockReader : public FilterBlockReader,
|
||||
|
|
|
@ -50,7 +50,9 @@ class MockedBlockBasedTable : public BlockBasedTable {
|
|||
}
|
||||
};
|
||||
|
||||
class PartitionedFilterBlockTest : public testing::Test {
|
||||
class PartitionedFilterBlockTest
|
||||
: public testing::Test,
|
||||
virtual public ::testing::WithParamInterface<uint32_t> {
|
||||
public:
|
||||
BlockBasedTableOptions table_options_;
|
||||
InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator());
|
||||
|
@ -60,6 +62,8 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close
|
||||
// will access variable that are not
|
||||
// initialized in our mocked version
|
||||
table_options_.format_version = GetParam();
|
||||
table_options_.index_block_restart_interval = 3;
|
||||
}
|
||||
|
||||
std::shared_ptr<Cache> cache_;
|
||||
|
@ -279,14 +283,19 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||
}
|
||||
};
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, EmptyBuilder) {
|
||||
INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
|
||||
testing::Values(test::kDefaultFormatVersion));
|
||||
INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
|
||||
testing::Values(test::kLatestFormatVersion));
|
||||
|
||||
TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
|
||||
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||
std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
|
||||
const bool empty = true;
|
||||
VerifyReader(builder.get(), pib.get(), empty);
|
||||
}
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, OneBlock) {
|
||||
TEST_P(PartitionedFilterBlockTest, OneBlock) {
|
||||
uint64_t max_index_size = MaxIndexSize();
|
||||
for (uint64_t i = 1; i < max_index_size + 1; i++) {
|
||||
table_options_.metadata_block_size = i;
|
||||
|
@ -294,7 +303,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlock) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
|
||||
TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) {
|
||||
uint64_t max_index_size = MaxIndexSize();
|
||||
for (uint64_t i = 1; i < max_index_size + 1; i++) {
|
||||
table_options_.metadata_block_size = i;
|
||||
|
@ -304,7 +313,7 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
|
|||
|
||||
// This reproduces the bug that a prefix is the same among multiple consecutive
|
||||
// blocks but the bug would add it only to the first block.
|
||||
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
|
||||
TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
|
||||
// some small number to cause partition cuts
|
||||
table_options_.metadata_block_size = 1;
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor
|
||||
|
@ -330,7 +339,7 @@ TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
||||
TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {
|
||||
uint64_t max_index_size = MaxIndexSize();
|
||||
for (uint64_t i = 1; i < max_index_size + 1; i++) {
|
||||
table_options_.metadata_block_size = i;
|
||||
|
@ -338,7 +347,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, PartitionCount) {
|
||||
TEST_P(PartitionedFilterBlockTest, PartitionCount) {
|
||||
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||
table_options_.metadata_block_size =
|
||||
std::max(MaxIndexSize(), MaxFilterSize());
|
||||
|
|
Loading…
Reference in New Issue