diff --git a/HISTORY.md b/HISTORY.md index c03180849c..694b90a53b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,7 +1,8 @@ # Rocksdb Change Log ## Unreleased -### Behavior Changes * When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper. +### Default Option Change +* Change default memtable_prefix_bloom_size_ratio from 0 to 0.015 and memtable_whole_key_filtering from false to true. It means that memtable bloom filter will be on, which uses up to 1.5% of memtable space. ### New Features * Add support for key-value integrity protection in live updates from the user buffers provided to `WriteBatch` through the write to RocksDB's in-memory update buffer (memtable). This is intended to detect some cases of in-memory data corruption, due to either software or hardware errors. Users can enable protection by constructing their `WriteBatch` with `protection_bytes_per_key == 8`. diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index 9b76c03d5c..87120d1476 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -44,6 +44,8 @@ class CuckooTableDBTest : public testing::Test { options.allow_mmap_reads = true; options.create_if_missing = true; options.allow_concurrent_memtable_write = false; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; return options; } diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 7c9277c141..c83b503b49 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -1867,6 +1867,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; + // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.filter_policy.reset(new BFP(10, bfp_impl)); diff --git a/db/db_test.cc b/db/db_test.cc index dbe4a161b9..df231ccbc7 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -4001,6 +4001,8 @@ TEST_F(DBTest, DynamicMemtableOptions) { options.level0_file_num_compaction_trigger = 1024; options.level0_slowdown_writes_trigger = 1024; options.level0_stop_writes_trigger = 1024; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; DestroyAndReopen(options); auto gen_l0_kb = [this](int size) { diff --git a/db/db_test2.cc b/db/db_test2.cc index 33c13e69c3..3a1bf71fdd 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -156,6 +156,8 @@ class PartitionedIndexTestListener : public EventListener { TEST_F(DBTest2, PartitionedIndexUserToInternalKey) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); + // Hold all data until manual flush. + options.memtable_factory.reset(new SpecialSkipListFactory(5000)); table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; PartitionedIndexTestListener* listener = new PartitionedIndexTestListener(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 3a104346dd..46d7c44788 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -344,6 +344,8 @@ Options DBTestBase::GetDefaultOptions() const { if (!env_->skip_fsync_) { options.track_and_verify_wals_in_manifest = true; } + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; return options; } diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 548b8ae0e0..f669c1d4a8 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -110,6 +110,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) { options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; options.target_file_size_base = 32 << 10; // 32KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // trigger compaction if there are >= 4 files KeepFilterFactory* filter = new KeepFilterFactory(true); filter->expect_manual_compaction_.store(false); @@ -144,6 +146,8 @@ TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); @@ -213,6 +217,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) { options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; KeepFilterFactory* filter = new KeepFilterFactory(true); filter->expect_manual_compaction_.store(false); options.compaction_filter_factory.reset(filter); @@ -317,6 +323,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSizeAmplification) { options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -359,6 +367,8 @@ TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionSizeAmplification) { options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // Initial setup of compaction_options_universal will prevent universal // compaction from happening options.compaction_options_universal.size_ratio = 100; @@ -439,6 +449,8 @@ TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionReadAmplification) { options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // Initial setup of compaction_options_universal will prevent universal // compaction from happening options.compaction_options_universal.max_size_amplification_percent = 2000; @@ -540,6 +552,8 @@ TEST_P(DBTestUniversalCompaction, CompactFilesOnUniversalCompaction) { options.create_if_missing = true; options.compaction_style = kCompactionStyleLevel; options.num_levels = 1; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = options.write_buffer_size; options.compression = kNoCompression; options = CurrentOptions(options); @@ -605,6 +619,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTargetLevel) { options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB options.num_levels = 7; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.disable_auto_compactions = true; DestroyAndReopen(options); @@ -651,6 +667,8 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) { options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 8; options.max_background_compactions = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 * 1024; CreateAndReopenWithCF({"pikachu"}, options); @@ -696,6 +714,8 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) { options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 2; options.target_file_size_base = 32 * 1024; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -741,6 +761,8 @@ TEST_P(DBTestUniversalCompactionParallel, UniversalCompactionParallel) { options.max_background_compactions = 3; options.max_background_flushes = 3; options.target_file_size_base = 1 * 1024; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.compaction_options_universal.max_size_amplification_percent = 110; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -800,6 +822,8 @@ TEST_P(DBTestUniversalCompactionParallel, PickByFileNumberBug) { options.level0_file_num_compaction_trigger = 7; options.max_background_compactions = 2; options.target_file_size_base = 1024 * 1024; // 1MB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; // Disable size amplifiction compaction options.compaction_options_universal.max_size_amplification_percent = @@ -916,6 +940,8 @@ INSTANTIATE_TEST_CASE_P(Parallel, DBTestUniversalCompactionParallel, TEST_P(DBTestUniversalCompaction, UniversalCompactionOptions) { Options options = CurrentOptions(); + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; // 4KB @@ -951,6 +977,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionStopStyleSimilarSize) { options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; // 4KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; @@ -1037,7 +1065,9 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio1) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 100 << 10; // 100KB + options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 2; options.num_levels = num_levels_; @@ -1105,6 +1135,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio2) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 2; options.num_levels = num_levels_; @@ -1150,6 +1182,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest1) { options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 2; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 1; options.target_file_size_base = 32 * 1024; @@ -1196,6 +1230,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest2) { options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 15; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 8; options.max_background_compactions = 2; options.target_file_size_base = 64 * 1024; @@ -1235,6 +1271,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionFourPaths) { options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; @@ -1339,6 +1377,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionCFPathUse) { options.compaction_options_universal.size_ratio = 10; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; @@ -1497,6 +1537,8 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options.write_buffer_size = 200 << 10; // 200KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 3; options.memtable_factory.reset(new SpecialSkipListFactory(KNumKeysPerFile)); options = CurrentOptions(options); @@ -1576,6 +1618,8 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSecondPathRatio) { options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; options.memtable_factory.reset( @@ -1679,6 +1723,8 @@ TEST_P(DBTestUniversalCompaction, ConcurrentBottomPriLowPriCompactions) { options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 100 << 10; // 100KB + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = kNumFilesTrigger; // Trigger compaction if size amplification exceeds 110% @@ -1736,6 +1782,8 @@ TEST_P(DBTestUniversalCompaction, RecalculateScoreAfterPicking) { options.compaction_style = kCompactionStyleUniversal; options.level0_file_num_compaction_trigger = kNumFilesTrigger; options.num_levels = num_levels_; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; Reopen(options); std::atomic num_compactions_attempted(0); @@ -1836,6 +1884,8 @@ TEST_P(DBTestUniversalManualCompactionOutputPathId, options.num_levels = num_levels_; options.target_file_size_base = 1 << 30; // Big size options.level0_file_num_compaction_trigger = 10; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; Destroy(options); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -1903,6 +1953,8 @@ TEST_F(DBTestUniversalCompaction2, BasicL0toL1) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -1946,6 +1998,8 @@ TEST_F(DBTestUniversalCompaction2, SingleLevel) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -1984,6 +2038,8 @@ TEST_F(DBTestUniversalCompaction2, MultipleLevels) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -2056,6 +2112,8 @@ TEST_F(DBTestUniversalCompaction2, OverlappingL0) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -2098,6 +2156,8 @@ TEST_F(DBTestUniversalCompaction2, IngestBehind) { opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence @@ -2160,6 +2220,8 @@ TEST_F(DBTestUniversalCompaction2, PeriodicCompaction) { opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; opts.periodic_compaction_seconds = 48 * 60 * 60; // 2 days + opts.memtable_whole_key_filtering = false; + opts.memtable_prefix_bloom_size_ratio = 0; opts.num_levels = 5; env_->SetMockSleep(); Reopen(opts); diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 7c6d15bfdb..af42f8dee2 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -30,6 +30,8 @@ class MemTableListTest : public testing::Test { MemTableListTest() : db(nullptr), file_number(1) { dbname = test::PerThreadDBPath("memtable_list_test"); options.create_if_missing = true; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; EXPECT_OK(DestroyDB(dbname, options)); } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index a7d9f542f5..f6c0123738 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -300,19 +300,19 @@ struct AdvancedColumnFamilyOptions { // write_buffer_size * memtable_prefix_bloom_size_ratio. // If it is larger than 0.25, it is sanitized to 0.25. // - // Default: 0 (disable) + // Default: 0.015 // // Dynamically changeable through SetOptions() API - double memtable_prefix_bloom_size_ratio = 0.0; + double memtable_prefix_bloom_size_ratio = 0.015; // Enable whole key bloom filter in memtable. Note this will only take effect // if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering // can potentially reduce CPU usage for point-look-ups. // - // Default: false (disable) + // Default: true (enable) // // Dynamically changeable through SetOptions() API - bool memtable_whole_key_filtering = false; + bool memtable_whole_key_filtering = true; // Page size for huge page for the arena used by the memtable. If <=0, it // won't allocate from huge page but from malloc. diff --git a/options/options.cc b/options/options.cc index d76a154417..eb0e6c0ed5 100644 --- a/options/options.cc +++ b/options/options.cc @@ -484,6 +484,11 @@ DBOptions* DBOptions::OldDefaults(int rocksdb_major_version, ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults( int rocksdb_major_version, int rocksdb_minor_version) { + if (rocksdb_major_version < 6 || + (rocksdb_major_version == 6 && rocksdb_minor_version < 18)) { + memtable_prefix_bloom_size_ratio = 0; + memtable_whole_key_filtering = false; + } if (rocksdb_major_version < 5 || (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) { compaction_pri = CompactionPri::kByCompensatedSize; @@ -501,7 +506,6 @@ ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults( } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) { level0_stop_writes_trigger = 30; } - return this; } diff --git a/options/options_test.cc b/options/options_test.cc index b15be02066..9296959215 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -3052,6 +3052,15 @@ TEST_F(OptionsParserTest, DifferentDefault) { old_default_opts.OldDefaults(5, 18); ASSERT_TRUE(old_default_opts.compaction_pri == CompactionPri::kByCompensatedSize); + ASSERT_EQ(0, old_default_opts.memtable_prefix_bloom_size_ratio); + ASSERT_FALSE(old_default_opts.memtable_whole_key_filtering); + } + + { + Options old_default_opts; + old_default_opts.OldDefaults(6, 17); + ASSERT_EQ(0, old_default_opts.memtable_prefix_bloom_size_ratio); + ASSERT_FALSE(old_default_opts.memtable_whole_key_filtering); } Options small_opts; diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index ad27bd9643..ac0b9165bb 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -319,6 +319,8 @@ TEST_P(OptimisticTransactionTest, CheckKeySkipOldMemtable) { for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable; attempt++) { options.max_write_buffer_number_to_maintain = 3; + options.memtable_whole_key_filtering = false; + options.memtable_prefix_bloom_size_ratio = 0; Reopen(); WriteOptions write_options; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 9c4ce5604a..06e31a8cd4 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -5672,6 +5672,10 @@ TEST_P(TransactionTest, DuplicateKeys) { ASSERT_OK(ReOpen()); std::unique_ptr comp_gc(new ThreeBytewiseComparator()); cf_options.comparator = comp_gc.get(); + // ThreeBytewiseComparator won't work with bloom filters. + cf_options.memtable_whole_key_filtering = false; + cf_options.memtable_prefix_bloom_size_ratio = 0; + ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle)); WriteOptions write_options; WriteBatch batch;