Turn on memtable bloom filter by default. (#6584)

Summary:
Memtable bloom filter is useful in many use cases. A default value on with conservative 1.5% memory can benefit more use cases than use cases impacted.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/6584

Test Plan: Run all existing tests.

Reviewed By: pdillinger

Differential Revision: D20626739

fbshipit-source-id: 1dd45532b932139552519b8c2682bd954550c2f9
This commit is contained in:
sdong 2021-02-05 12:57:44 -08:00 committed by Facebook GitHub Bot
parent cbf38af705
commit ee79a28963
13 changed files with 102 additions and 7 deletions

View File

@ -1,7 +1,8 @@
# Rocksdb Change Log
## Unreleased
### Behavior Changes
* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper.
### Default Option Change
* Change default memtable_prefix_bloom_size_ratio from 0 to 0.015 and memtable_whole_key_filtering from false to true. It means that memtable bloom filter will be on, which uses up to 1.5% of memtable space.
### New Features
* Add support for key-value integrity protection in live updates from the user buffers provided to `WriteBatch` through the write to RocksDB's in-memory update buffer (memtable). This is intended to detect some cases of in-memory data corruption, due to either software or hardware errors. Users can enable protection by constructing their `WriteBatch` with `protection_bytes_per_key == 8`.

View File

@ -44,6 +44,8 @@ class CuckooTableDBTest : public testing::Test {
options.allow_mmap_reads = true;
options.create_if_missing = true;
options.allow_concurrent_memtable_write = false;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
return options;
}

View File

@ -1867,6 +1867,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) {
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
options.disable_auto_compactions = true;
options.statistics = CreateDBStatistics();
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
// Enable prefix bloom for SST files
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(new BFP(10, bfp_impl));

View File

@ -4001,6 +4001,8 @@ TEST_F(DBTest, DynamicMemtableOptions) {
options.level0_file_num_compaction_trigger = 1024;
options.level0_slowdown_writes_trigger = 1024;
options.level0_stop_writes_trigger = 1024;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
DestroyAndReopen(options);
auto gen_l0_kb = [this](int size) {

View File

@ -156,6 +156,8 @@ class PartitionedIndexTestListener : public EventListener {
TEST_F(DBTest2, PartitionedIndexUserToInternalKey) {
BlockBasedTableOptions table_options;
Options options = CurrentOptions();
// Hold all data until manual flush.
options.memtable_factory.reset(new SpecialSkipListFactory(5000));
table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
PartitionedIndexTestListener* listener = new PartitionedIndexTestListener();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

View File

@ -344,6 +344,8 @@ Options DBTestBase::GetDefaultOptions() const {
if (!env_->skip_fsync_) {
options.track_and_verify_wals_in_manifest = true;
}
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
return options;
}

View File

@ -110,6 +110,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) {
options.write_buffer_size = 105 << 10; // 105KB
options.arena_block_size = 4 << 10;
options.target_file_size_base = 32 << 10; // 32KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
// trigger compaction if there are >= 4 files
KeepFilterFactory* filter = new KeepFilterFactory(true);
filter->expect_manual_compaction_.store(false);
@ -144,6 +146,8 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) {
options.target_file_size_base = 32 << 10; // 32KB
// trigger compaction if there are >= 4 files
options.level0_file_num_compaction_trigger = 4;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
BlockBasedTableOptions bbto;
bbto.cache_index_and_filter_blocks = true;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
@ -213,6 +217,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) {
options.target_file_size_base = 32 << 10; // 32KB
// trigger compaction if there are >= 4 files
options.level0_file_num_compaction_trigger = 4;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
KeepFilterFactory* filter = new KeepFilterFactory(true);
filter->expect_manual_compaction_.store(false);
options.compaction_filter_factory.reset(filter);
@ -317,6 +323,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSizeAmplification) {
options.write_buffer_size = 100 << 10; // 100KB
options.target_file_size_base = 32 << 10; // 32KB
options.level0_file_num_compaction_trigger = 3;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
@ -359,6 +367,8 @@ TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionSizeAmplification) {
options.write_buffer_size = 100 << 10; // 100KB
options.target_file_size_base = 32 << 10; // 32KB
options.level0_file_num_compaction_trigger = 3;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
// Initial setup of compaction_options_universal will prevent universal
// compaction from happening
options.compaction_options_universal.size_ratio = 100;
@ -439,6 +449,8 @@ TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionReadAmplification) {
options.write_buffer_size = 100 << 10; // 100KB
options.target_file_size_base = 32 << 10; // 32KB
options.level0_file_num_compaction_trigger = 3;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
// Initial setup of compaction_options_universal will prevent universal
// compaction from happening
options.compaction_options_universal.max_size_amplification_percent = 2000;
@ -540,6 +552,8 @@ TEST_P(DBTestUniversalCompaction, CompactFilesOnUniversalCompaction) {
options.create_if_missing = true;
options.compaction_style = kCompactionStyleLevel;
options.num_levels = 1;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.target_file_size_base = options.write_buffer_size;
options.compression = kNoCompression;
options = CurrentOptions(options);
@ -605,6 +619,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTargetLevel) {
options.compaction_style = kCompactionStyleUniversal;
options.write_buffer_size = 100 << 10; // 100KB
options.num_levels = 7;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.disable_auto_compactions = true;
DestroyAndReopen(options);
@ -651,6 +667,8 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) {
options.write_buffer_size = 100 << 10; // 100KB
options.level0_file_num_compaction_trigger = 8;
options.max_background_compactions = 3;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.target_file_size_base = 32 * 1024;
CreateAndReopenWithCF({"pikachu"}, options);
@ -696,6 +714,8 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) {
options.level0_file_num_compaction_trigger = 3;
options.max_background_compactions = 2;
options.target_file_size_base = 32 * 1024;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
@ -741,6 +761,8 @@ TEST_P(DBTestUniversalCompactionParallel, UniversalCompactionParallel) {
options.max_background_compactions = 3;
options.max_background_flushes = 3;
options.target_file_size_base = 1 * 1024;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.compaction_options_universal.max_size_amplification_percent = 110;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
@ -800,6 +822,8 @@ TEST_P(DBTestUniversalCompactionParallel, PickByFileNumberBug) {
options.level0_file_num_compaction_trigger = 7;
options.max_background_compactions = 2;
options.target_file_size_base = 1024 * 1024; // 1MB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
// Disable size amplifiction compaction
options.compaction_options_universal.max_size_amplification_percent =
@ -916,6 +940,8 @@ INSTANTIATE_TEST_CASE_P(Parallel, DBTestUniversalCompactionParallel,
TEST_P(DBTestUniversalCompaction, UniversalCompactionOptions) {
Options options = CurrentOptions();
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.compaction_style = kCompactionStyleUniversal;
options.write_buffer_size = 105 << 10; // 105KB
options.arena_block_size = 4 << 10; // 4KB
@ -951,6 +977,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionStopStyleSimilarSize) {
options.compaction_style = kCompactionStyleUniversal;
options.write_buffer_size = 105 << 10; // 105KB
options.arena_block_size = 4 << 10; // 4KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.target_file_size_base = 32 << 10; // 32KB
// trigger compaction if there are >= 4 files
options.level0_file_num_compaction_trigger = 4;
@ -1037,7 +1065,9 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio1) {
Options options = CurrentOptions();
options.compaction_style = kCompactionStyleUniversal;
options.write_buffer_size = 100 << 10; // 100KB
options.write_buffer_size = 100 << 10; // 100KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.target_file_size_base = 32 << 10; // 32KB
options.level0_file_num_compaction_trigger = 2;
options.num_levels = num_levels_;
@ -1105,6 +1135,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio2) {
Options options = CurrentOptions();
options.compaction_style = kCompactionStyleUniversal;
options.write_buffer_size = 100 << 10; // 100KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.target_file_size_base = 32 << 10; // 32KB
options.level0_file_num_compaction_trigger = 2;
options.num_levels = num_levels_;
@ -1150,6 +1182,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest1) {
options.compaction_options_universal.allow_trivial_move = true;
options.num_levels = 2;
options.write_buffer_size = 100 << 10; // 100KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.level0_file_num_compaction_trigger = 3;
options.max_background_compactions = 1;
options.target_file_size_base = 32 * 1024;
@ -1196,6 +1230,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest2) {
options.compaction_options_universal.allow_trivial_move = true;
options.num_levels = 15;
options.write_buffer_size = 100 << 10; // 100KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.level0_file_num_compaction_trigger = 8;
options.max_background_compactions = 2;
options.target_file_size_base = 64 * 1024;
@ -1235,6 +1271,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionFourPaths) {
options.compaction_options_universal.size_ratio = 5;
options.write_buffer_size = 111 << 10; // 114KB
options.arena_block_size = 4 << 10;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.level0_file_num_compaction_trigger = 2;
options.num_levels = 1;
@ -1339,6 +1377,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCFPathUse) {
options.compaction_options_universal.size_ratio = 10;
options.write_buffer_size = 111 << 10; // 114KB
options.arena_block_size = 4 << 10;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.level0_file_num_compaction_trigger = 2;
options.num_levels = 1;
@ -1497,6 +1537,8 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) {
options.compaction_style = kCompactionStyleUniversal;
options.num_levels = 1;
options.write_buffer_size = 200 << 10; // 200KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.level0_file_num_compaction_trigger = 3;
options.memtable_factory.reset(new SpecialSkipListFactory(KNumKeysPerFile));
options = CurrentOptions(options);
@ -1576,6 +1618,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSecondPathRatio) {
options.compaction_options_universal.size_ratio = 5;
options.write_buffer_size = 111 << 10; // 114KB
options.arena_block_size = 4 << 10;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.level0_file_num_compaction_trigger = 2;
options.num_levels = 1;
options.memtable_factory.reset(
@ -1679,6 +1723,8 @@ TEST_P(DBTestUniversalCompaction, ConcurrentBottomPriLowPriCompactions) {
options.compaction_style = kCompactionStyleUniversal;
options.num_levels = num_levels_;
options.write_buffer_size = 100 << 10; // 100KB
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
options.target_file_size_base = 32 << 10; // 32KB
options.level0_file_num_compaction_trigger = kNumFilesTrigger;
// Trigger compaction if size amplification exceeds 110%
@ -1736,6 +1782,8 @@ TEST_P(DBTestUniversalCompaction, RecalculateScoreAfterPicking) {
options.compaction_style = kCompactionStyleUniversal;
options.level0_file_num_compaction_trigger = kNumFilesTrigger;
options.num_levels = num_levels_;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
Reopen(options);
std::atomic<int> num_compactions_attempted(0);
@ -1836,6 +1884,8 @@ TEST_P(DBTestUniversalManualCompactionOutputPathId,
options.num_levels = num_levels_;
options.target_file_size_base = 1 << 30; // Big size
options.level0_file_num_compaction_trigger = 10;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
Destroy(options);
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
@ -1903,6 +1953,8 @@ TEST_F(DBTestUniversalCompaction2, BasicL0toL1) {
opts.compaction_options_universal.size_ratio = 10;
opts.compaction_options_universal.min_merge_width = 2;
opts.compaction_options_universal.max_size_amplification_percent = 200;
opts.memtable_whole_key_filtering = false;
opts.memtable_prefix_bloom_size_ratio = 0;
Reopen(opts);
// add an L1 file to prevent tombstones from dropping due to obsolescence
@ -1946,6 +1998,8 @@ TEST_F(DBTestUniversalCompaction2, SingleLevel) {
opts.compaction_options_universal.size_ratio = 10;
opts.compaction_options_universal.min_merge_width = 2;
opts.compaction_options_universal.max_size_amplification_percent = 200;
opts.memtable_whole_key_filtering = false;
opts.memtable_prefix_bloom_size_ratio = 0;
Reopen(opts);
// add an L1 file to prevent tombstones from dropping due to obsolescence
@ -1984,6 +2038,8 @@ TEST_F(DBTestUniversalCompaction2, MultipleLevels) {
opts.compaction_options_universal.size_ratio = 10;
opts.compaction_options_universal.min_merge_width = 2;
opts.compaction_options_universal.max_size_amplification_percent = 200;
opts.memtable_whole_key_filtering = false;
opts.memtable_prefix_bloom_size_ratio = 0;
Reopen(opts);
// add an L1 file to prevent tombstones from dropping due to obsolescence
@ -2056,6 +2112,8 @@ TEST_F(DBTestUniversalCompaction2, OverlappingL0) {
opts.compaction_options_universal.size_ratio = 10;
opts.compaction_options_universal.min_merge_width = 2;
opts.compaction_options_universal.max_size_amplification_percent = 200;
opts.memtable_whole_key_filtering = false;
opts.memtable_prefix_bloom_size_ratio = 0;
Reopen(opts);
// add an L1 file to prevent tombstones from dropping due to obsolescence
@ -2098,6 +2156,8 @@ TEST_F(DBTestUniversalCompaction2, IngestBehind) {
opts.compaction_options_universal.size_ratio = 10;
opts.compaction_options_universal.min_merge_width = 2;
opts.compaction_options_universal.max_size_amplification_percent = 200;
opts.memtable_whole_key_filtering = false;
opts.memtable_prefix_bloom_size_ratio = 0;
Reopen(opts);
// add an L1 file to prevent tombstones from dropping due to obsolescence
@ -2160,6 +2220,8 @@ TEST_F(DBTestUniversalCompaction2, PeriodicCompaction) {
opts.compaction_options_universal.min_merge_width = 2;
opts.compaction_options_universal.max_size_amplification_percent = 200;
opts.periodic_compaction_seconds = 48 * 60 * 60; // 2 days
opts.memtable_whole_key_filtering = false;
opts.memtable_prefix_bloom_size_ratio = 0;
opts.num_levels = 5;
env_->SetMockSleep();
Reopen(opts);

View File

@ -30,6 +30,8 @@ class MemTableListTest : public testing::Test {
MemTableListTest() : db(nullptr), file_number(1) {
dbname = test::PerThreadDBPath("memtable_list_test");
options.create_if_missing = true;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
EXPECT_OK(DestroyDB(dbname, options));
}

View File

@ -300,19 +300,19 @@ struct AdvancedColumnFamilyOptions {
// write_buffer_size * memtable_prefix_bloom_size_ratio.
// If it is larger than 0.25, it is sanitized to 0.25.
//
// Default: 0 (disable)
// Default: 0.015
//
// Dynamically changeable through SetOptions() API
double memtable_prefix_bloom_size_ratio = 0.0;
double memtable_prefix_bloom_size_ratio = 0.015;
// Enable whole key bloom filter in memtable. Note this will only take effect
// if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering
// can potentially reduce CPU usage for point-look-ups.
//
// Default: false (disable)
// Default: true (enable)
//
// Dynamically changeable through SetOptions() API
bool memtable_whole_key_filtering = false;
bool memtable_whole_key_filtering = true;
// Page size for huge page for the arena used by the memtable. If <=0, it
// won't allocate from huge page but from malloc.

View File

@ -484,6 +484,11 @@ DBOptions* DBOptions::OldDefaults(int rocksdb_major_version,
ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
int rocksdb_major_version, int rocksdb_minor_version) {
if (rocksdb_major_version < 6 ||
(rocksdb_major_version == 6 && rocksdb_minor_version < 18)) {
memtable_prefix_bloom_size_ratio = 0;
memtable_whole_key_filtering = false;
}
if (rocksdb_major_version < 5 ||
(rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) {
compaction_pri = CompactionPri::kByCompensatedSize;
@ -501,7 +506,6 @@ ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
} else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) {
level0_stop_writes_trigger = 30;
}
return this;
}

View File

@ -3052,6 +3052,15 @@ TEST_F(OptionsParserTest, DifferentDefault) {
old_default_opts.OldDefaults(5, 18);
ASSERT_TRUE(old_default_opts.compaction_pri ==
CompactionPri::kByCompensatedSize);
ASSERT_EQ(0, old_default_opts.memtable_prefix_bloom_size_ratio);
ASSERT_FALSE(old_default_opts.memtable_whole_key_filtering);
}
{
Options old_default_opts;
old_default_opts.OldDefaults(6, 17);
ASSERT_EQ(0, old_default_opts.memtable_prefix_bloom_size_ratio);
ASSERT_FALSE(old_default_opts.memtable_whole_key_filtering);
}
Options small_opts;

View File

@ -319,6 +319,8 @@ TEST_P(OptimisticTransactionTest, CheckKeySkipOldMemtable) {
for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable;
attempt++) {
options.max_write_buffer_number_to_maintain = 3;
options.memtable_whole_key_filtering = false;
options.memtable_prefix_bloom_size_ratio = 0;
Reopen();
WriteOptions write_options;

View File

@ -5672,6 +5672,10 @@ TEST_P(TransactionTest, DuplicateKeys) {
ASSERT_OK(ReOpen());
std::unique_ptr<const Comparator> comp_gc(new ThreeBytewiseComparator());
cf_options.comparator = comp_gc.get();
// ThreeBytewiseComparator won't work with bloom filters.
cf_options.memtable_whole_key_filtering = false;
cf_options.memtable_prefix_bloom_size_ratio = 0;
ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle));
WriteOptions write_options;
WriteBatch batch;