From 9e4913ce9d5113f205f8d56fa4490d503ab61472 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 2 Oct 2019 15:31:54 -0700 Subject: [PATCH] Add FullBloomTest.CorruptFilters,RawSchema (#5834) Summary: There was significant untested logic in FullFilterBitsReader in the handling of serialized Bloom filter bits that cannot be generated by FullFilterBitsBuilder in the current compilation. These now test many of those corner-case behaviors, including bad metadata or filters created with different cache line size than the current compiled-in value. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5834 Test Plan: thisisthetest Differential Revision: D17726372 Pulled By: pdillinger fbshipit-source-id: fb7b8003b5a8e6fb4666fe95206128f3d5835fc7 --- util/bloom.cc | 2 +- util/bloom_test.cc | 185 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 1 deletion(-) diff --git a/util/bloom.cc b/util/bloom.cc index 56eef320d1..d59e29b51b 100644 --- a/util/bloom.cc +++ b/util/bloom.cc @@ -153,7 +153,7 @@ class FullFilterBitsReader : public FilterBitsReader { if (num_lines_at_curr_cache_size == 0) { // The cache line size seems not a power of two. It's not supported // and indicates a corruption so disable using this filter. - assert(false); + // Removed for unit testing corruption: assert(false); num_lines_ = 0; num_probes_ = 0; break; diff --git a/util/bloom_test.cc b/util/bloom_test.cc index cda5e73e12..f57d101bd9 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -274,6 +274,10 @@ class FullBloomTest : public testing::Test { bits_builder_->AddKey(s); } + void OpenRaw(const Slice& s) { + bits_reader_.reset(policy_->GetFilterBitsReader(s)); + } + void Build() { Slice filter = bits_builder_->Finish(&buf_); bits_reader_.reset(policy_->GetFilterBitsReader(filter)); @@ -293,6 +297,17 @@ class FullBloomTest : public testing::Test { return bits_reader_->MayMatch(s); } + uint64_t PackedMatches() { + char buffer[sizeof(int)]; + uint64_t result = 0; + for (int i = 0; i < 64; i++) { + if (Matches(Key(i + 12345, buffer))) { + result |= 1 << i; + } + } + return result; + } + double FalsePositiveRate() { char buffer[sizeof(int)]; int result = 0; @@ -452,6 +467,176 @@ TEST_F(FullBloomTest, Schema) { ResetPolicy(); } +// A helper class for testing custom or corrupt filter bits as read by +// FullFilterBitsReader. +struct RawFilterTester { + // Buffer, from which we always return a tail Slice, so the + // last five bytes are always the metadata bytes. + std::array data_; + // Points five bytes from the end + char* metadata_ptr_; + + RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {} + + Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines, + uint32_t num_probes) { + metadata_ptr_[0] = static_cast(num_probes); + EncodeFixed32(metadata_ptr_ + 1, num_lines); + uint32_t len = len_without_metadata + /*metadata*/ 5; + assert(len <= data_.size()); + return Slice(metadata_ptr_ - len_without_metadata, len); + } + + Slice Reset(uint32_t len_without_metadata, uint32_t num_lines, + uint32_t num_probes, bool fill_ones) { + data_.fill(fill_ones ? 0xff : 0); + return ResetNoFill(len_without_metadata, num_lines, num_probes); + } + + Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines, + uint32_t num_probes) { + for (uint32_t i = 0; i < data_.size(); ++i) { + data_[i] = static_cast(0x7b7b >> (i % 7)); + } + return ResetNoFill(len_without_metadata, num_lines, num_probes); + } +}; + +TEST_F(FullBloomTest, RawSchema) { + RawFilterTester cft; + // Two probes, about 3/4 bits set: ~50% "FP" rate + // One 256-byte cache line. + OpenRaw(cft.ResetWeirdFill(256, 1, 2)); + ASSERT_EQ(18446744073675927543ULL, PackedMatches()); + + // Two 128-byte cache lines. + OpenRaw(cft.ResetWeirdFill(256, 2, 2)); + ASSERT_EQ(18446744073407559151ULL, PackedMatches()); + + // Four 64-byte cache lines. + OpenRaw(cft.ResetWeirdFill(256, 4, 2)); + ASSERT_EQ(18446744073441107966ULL, PackedMatches()); +} + +TEST_F(FullBloomTest, CorruptFilters) { + RawFilterTester cft; + + for (bool fill : {false, true}) { + // Good filter bits - returns same as fill + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Good filter bits - returns same as fill + OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Good filter bits - returns same as fill + // 256 is unusual but legal cache line size + OpenRaw(cft.Reset(256 * 3, 3, 6, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Good filter bits - returns same as fill + // 30 should be max num_probes + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Good filter bits - returns same as fill + // 1 should be min num_probes + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Type 1 trivial filter bits - returns true as if FP by zero probes + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill)); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + + // Type 2 trivial filter bits - returns false as if built from zero keys + OpenRaw(cft.Reset(0, 0, 6, fill)); + ASSERT_FALSE(Matches("hello")); + ASSERT_FALSE(Matches("world")); + + // Type 2 trivial filter bits - returns false as if built from zero keys + OpenRaw(cft.Reset(0, 37, 6, fill)); + ASSERT_FALSE(Matches("hello")); + ASSERT_FALSE(Matches("world")); + + // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes + OpenRaw(cft.Reset(0, 0, 0, fill)); + ASSERT_FALSE(Matches("hello")); + ASSERT_FALSE(Matches("world")); + + // Bad filter bits - returns true for safety + // No solution to 0 * x == CACHE_LINE_SIZE + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill)); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + + // Bad filter bits - returns true for safety + // Can't have 3 * x == 4 for integer x + OpenRaw(cft.Reset(4, 3, 6, fill)); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + + // Bad filter bits - returns true for safety + // 97 bytes is not a power of two, so not a legal cache line size + OpenRaw(cft.Reset(97 * 3, 3, 6, fill)); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + + // Bad filter bits + // 65 bytes is not a power of two, so not a legal cache line size + OpenRaw(cft.Reset(65 * 3, 3, 6, fill)); + // ASSERT_TRUE(Matches("hello")); + // ASSERT_TRUE(Matches("world")); + // NB: NOT PROPERLY CHECKED in implementation + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Bad filter bits - returns false as if built from zero keys + // < 5 bytes overall means missing even metadata + OpenRaw(cft.Reset(-1, 3, 6, fill)); + ASSERT_FALSE(Matches("hello")); + ASSERT_FALSE(Matches("world")); + + OpenRaw(cft.Reset(-5, 3, 6, fill)); + ASSERT_FALSE(Matches("hello")); + ASSERT_FALSE(Matches("world")); + + // Dubious filter bits - returns same as fill (for now) + // 31 is not a useful num_probes, nor generated by RocksDB unless directly + // using filter bits API without BloomFilterPolicy. + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Dubious filter bits - returns same as fill (for now) + // Similar, with 127, largest positive char + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill)); + ASSERT_EQ(fill, Matches("hello")); + ASSERT_EQ(fill, Matches("world")); + + // Dubious filter bits - returns true (for now) + // num_probes set to 128 / -128, lowest negative char + // NB: Bug in implementation interprets this as negative and has same + // effect as zero probes, but effectively reserves negative char values + // for future use. + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill)); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + + // Dubious filter bits - returns true (for now) + // Similar, with 255 / -1 + OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill)); + ASSERT_TRUE(Matches("hello")); + ASSERT_TRUE(Matches("world")); + } +} + } // namespace rocksdb int main(int argc, char** argv) {