Improve bloom_test bits_per_key flag (#8093)

Summary:
Improved handling of -bits_per_key other than 10, but at least
the OptimizeForMemory test is simply not designed for generally handling
other settings. (ribbon_test does have a statistical framework for this
kind of testing, but it's not important to do that same for Bloom right
now.)

Closes https://github.com/facebook/rocksdb/issues/7019

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8093

Test Plan: for I in `seq 1 20`; do ./bloom_test --gtest_filter=-*OptimizeForMemory* --bits_per_key=$I &> /dev/null || echo FAILED; done

Reviewed By: mrambacher

Differential Revision: D27275875

Pulled By: pdillinger

fbshipit-source-id: 7362e8ac2c41ea11f639412e4f30c8b375f04388
This commit is contained in:
Peter Dillinger 2021-03-23 21:41:15 -07:00 committed by Facebook GitHub Bot
parent 41e554da2b
commit da6b90ab48
1 changed files with 28 additions and 14 deletions

View File

@ -30,6 +30,9 @@ int main() {
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
// The test is not fully designed for bits_per_key other than 10, but with
// this parameter you can easily explore the behavior of other bits_per_key.
// See also filter_bench.
DEFINE_int32(bits_per_key, 10, "");
namespace ROCKSDB_NAMESPACE {
@ -158,7 +161,8 @@ TEST_F(BlockBasedBloomTest, VaryingLengths) {
}
Build();
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
ASSERT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + 40))
<< length;
// All added keys must match
for (int i = 0; i < length; i++) {
@ -172,11 +176,16 @@ TEST_F(BlockBasedBloomTest, VaryingLengths) {
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
rate*100.0, length, static_cast<int>(FilterSize()));
}
ASSERT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
else good_filters++;
if (FLAGS_bits_per_key == 10) {
ASSERT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125) {
mediocre_filters++; // Allowed, but not too often
} else {
good_filters++;
}
}
}
if (kVerbose >= 1) {
if (FLAGS_bits_per_key == 10 && kVerbose >= 1) {
fprintf(stderr, "Filters: %d good, %d mediocre\n",
good_filters, mediocre_filters);
}
@ -481,8 +490,8 @@ TEST_P(FullBloomTest, FullVaryingLengths) {
}
Build();
EXPECT_LE(FilterSize(),
(size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
CACHE_LINE_SIZE * 2 + 5));
// All added keys must match
for (int i = 0; i < length; i++) {
@ -496,11 +505,14 @@ TEST_P(FullBloomTest, FullVaryingLengths) {
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
rate*100.0, length, static_cast<int>(FilterSize()));
}
EXPECT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125)
mediocre_filters++; // Allowed, but not too often
else
good_filters++;
if (FLAGS_bits_per_key == 10) {
EXPECT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125) {
mediocre_filters++; // Allowed, but not too often
} else {
good_filters++;
}
}
}
if (kVerbose >= 1) {
fprintf(stderr, "Filters: %d good, %d mediocre\n",
@ -538,8 +550,10 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
total_keys += nkeys;
total_fp_rate += FalsePositiveRate();
}
EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
EXPECT_GE(total_fp_rate / double{nfilters}, 0.008);
if (FLAGS_bits_per_key == 10) {
EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
EXPECT_GE(total_fp_rate / double{nfilters}, 0.008);
}
int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {