Make option `level_compaction_dynamic_level_bytes` true by default (#11525)

Summary:
after https://github.com/facebook/rocksdb/issues/11321 and https://github.com/facebook/rocksdb/issues/11340 (both included in RocksDB v8.2), migration from `level_compaction_dynamic_level_bytes=false` to `level_compaction_dynamic_level_bytes=true` is automatic by RocksDB and requires no manual compaction from user. Making the option true by default as it has several advantages: 1. better space amplification guarantee (a more stable LSM shape). 2. compaction is more adaptive to write traffic. 3. automatic draining of unneeded levels. Wiki is updated with more detail: https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#option-level_compaction_dynamic_level_bytes-and-levels-target-size.

The PR mostly contains fixes for unit tests as they assumed `level_compaction_dynamic_level_bytes=false`. Most notable change is commit f742be330c and b1928e42b3 which override the default option in DBTestBase to still set `level_compaction_dynamic_level_bytes=false` by default. This helps to reduce the change needed for unit tests. I think this default option override in unit tests is okay since the behavior of `level_compaction_dynamic_level_bytes=true` is tested by explicitly setting this option. Also, `level_compaction_dynamic_level_bytes=false` may be more desired in unit tests as it makes it easier to create a desired LSM shape.

Comment for option `level_compaction_dynamic_level_bytes` is updated to reflect this change and change made in https://github.com/facebook/rocksdb/issues/10057.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11525

Test Plan: `make -j32 J=32 check` several times to try to catch flaky tests due to this option change.

Reviewed By: ajkr

Differential Revision: D46654256

Pulled By: cbi42

fbshipit-source-id: 6b5827dae124f6f1fdc8cca2ac6f6fcd878830e1
This commit is contained in:
Changyu Bi 2023-06-15 21:12:39 -07:00 committed by Facebook GitHub Bot
parent 253bc91953
commit bc04ec85db
23 changed files with 170 additions and 135 deletions

View File

@ -1255,6 +1255,7 @@ TEST_P(ColumnFamilyTest, DifferentCompactionStyles) {
ColumnFamilyOptions default_cf, one, two;
db_options_.max_open_files = 20; // only 10 files in file cache
default_cf.level_compaction_dynamic_level_bytes = false;
default_cf.compaction_style = kCompactionStyleLevel;
default_cf.num_levels = 3;
default_cf.write_buffer_size = 64 << 10; // 64KB
@ -1272,6 +1273,7 @@ TEST_P(ColumnFamilyTest, DifferentCompactionStyles) {
one.level0_file_num_compaction_trigger = 4;
one.write_buffer_size = 120000;
two.level_compaction_dynamic_level_bytes = false;
two.compaction_style = kCompactionStyleLevel;
two.num_levels = 4;
two.level0_file_num_compaction_trigger = 3;
@ -1326,6 +1328,7 @@ TEST_P(ColumnFamilyTest, MultipleManualCompactions) {
db_options_.max_open_files = 20; // only 10 files in file cache
db_options_.max_background_compactions = 3;
default_cf.level_compaction_dynamic_level_bytes = false;
default_cf.compaction_style = kCompactionStyleLevel;
default_cf.num_levels = 3;
default_cf.write_buffer_size = 64 << 10; // 64KB
@ -1342,6 +1345,7 @@ TEST_P(ColumnFamilyTest, MultipleManualCompactions) {
one.level0_file_num_compaction_trigger = 4;
one.write_buffer_size = 120000;
two.level_compaction_dynamic_level_bytes = false;
two.compaction_style = kCompactionStyleLevel;
two.num_levels = 4;
two.level0_file_num_compaction_trigger = 3;
@ -1424,13 +1428,14 @@ TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) {
db_options_.max_open_files = 20; // only 10 files in file cache
db_options_.max_background_compactions = 3;
default_cf.level_compaction_dynamic_level_bytes = false;
default_cf.compaction_style = kCompactionStyleLevel;
default_cf.num_levels = 3;
default_cf.write_buffer_size = 64 << 10; // 64KB
default_cf.target_file_size_base = 30 << 10;
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
;
table_options.no_block_cache = true;
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -1441,6 +1446,7 @@ TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) {
one.level0_file_num_compaction_trigger = 4;
one.write_buffer_size = 120000;
two.level_compaction_dynamic_level_bytes = false;
two.compaction_style = kCompactionStyleLevel;
two.num_levels = 4;
two.level0_file_num_compaction_trigger = 3;
@ -1519,13 +1525,14 @@ TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) {
db_options_.max_open_files = 20; // only 10 files in file cache
db_options_.max_background_compactions = 3;
default_cf.level_compaction_dynamic_level_bytes = false;
default_cf.compaction_style = kCompactionStyleLevel;
default_cf.num_levels = 3;
default_cf.write_buffer_size = 64 << 10; // 64KB
default_cf.target_file_size_base = 30 << 10;
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
;
table_options.no_block_cache = true;
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
@ -1536,6 +1543,7 @@ TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) {
one.level0_file_num_compaction_trigger = 4;
one.write_buffer_size = 120000;
two.level_compaction_dynamic_level_bytes = false;
two.compaction_style = kCompactionStyleLevel;
two.num_levels = 4;
two.level0_file_num_compaction_trigger = 3;

View File

@ -66,6 +66,7 @@ TEST_F(CompactFilesTest, L0ConflictsFiles) {
const int kWriteBufferSize = 10000;
const int kLevel0Trigger = 2;
options.create_if_missing = true;
options.level_compaction_dynamic_level_bytes = false;
options.compaction_style = kCompactionStyleLevel;
// Small slowdown and stop trigger for experimental purpose.
options.level0_slowdown_writes_trigger = 20;
@ -359,6 +360,7 @@ TEST_F(CompactFilesTest, CompactionFilterWithGetSv) {
std::shared_ptr<FilterWithGet> cf(new FilterWithGet());
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.create_if_missing = true;
options.compaction_filter = cf.get();
@ -401,6 +403,7 @@ TEST_F(CompactFilesTest, SentinelCompressionType) {
CompactionStyle::kCompactionStyleNone}) {
ASSERT_OK(DestroyDB(db_name_, Options()));
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.compaction_style = compaction_style;
// L0: Snappy, L1: ZSTD, L2: Snappy
options.compression_per_level = {CompressionType::kSnappyCompression,

View File

@ -616,6 +616,7 @@ TEST_P(CompactionJobStatsTest, CompactionJobStatsTest) {
// via AddExpectedStats().
auto* stats_checker = new CompactionJobStatsChecker();
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.listeners.emplace_back(stats_checker);
options.create_if_missing = true;
// just enough setting to hold off auto-compaction.
@ -815,6 +816,7 @@ TEST_P(CompactionJobStatsTest, DeletionStatsTest) {
// what we expect.
auto* stats_checker = new CompactionJobDeletionStatsChecker();
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.listeners.emplace_back(stats_checker);
options.create_if_missing = true;
options.level0_file_num_compaction_trigger = kTestScale + 1;

View File

@ -70,6 +70,11 @@ class CompactionPickerTestBase : public testing::Test {
mutable_cf_options_.RefreshDerivedOptions(ioptions_);
ioptions_.cf_paths.emplace_back("dummy",
std::numeric_limits<uint64_t>::max());
// When the default value of this option is true, universal compaction
// tests can encounter assertion failure since SanitizeOption() is
// not run to set this option to false. So we do the sanitization
// here. Tests that test this option set this option to true explicitly.
ioptions_.level_compaction_dynamic_level_bytes = false;
}
~CompactionPickerTestBase() override {}

View File

@ -450,6 +450,7 @@ TEST_F(CorruptionTest, TableFile) {
TEST_F(CorruptionTest, VerifyChecksumReadahead) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
SpecialEnv senv(base_env_);
options.env = &senv;
// Disable block cache as we are going to check checksum for
@ -503,6 +504,7 @@ TEST_F(CorruptionTest, VerifyChecksumReadahead) {
TEST_F(CorruptionTest, TableFileIndexData) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
// very big, we'll trigger flushes manually
options.write_buffer_size = 100 * 1024 * 1024;
Reopen(&options);
@ -659,6 +661,7 @@ TEST_F(CorruptionTest, CorruptedDescriptor) {
TEST_F(CorruptionTest, CompactionInputError) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
Reopen(&options);
Build(10);
@ -680,6 +683,7 @@ TEST_F(CorruptionTest, CompactionInputError) {
TEST_F(CorruptionTest, CompactionInputErrorParanoid) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.paranoid_checks = true;
options.write_buffer_size = 131072;
@ -777,6 +781,7 @@ TEST_F(CorruptionTest, RangeDeletionCorrupted) {
TEST_F(CorruptionTest, FileSystemStateCorrupted) {
for (int iter = 0; iter < 2; ++iter) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.paranoid_checks = true;
options.create_if_missing = true;
@ -816,6 +821,7 @@ static const auto& corruption_modes = {
TEST_F(CorruptionTest, ParanoidFileChecksOnFlush) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.check_flush_compaction_key_order = false;
options.paranoid_file_checks = true;
@ -844,6 +850,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksOnFlush) {
TEST_F(CorruptionTest, ParanoidFileChecksOnCompact) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.paranoid_file_checks = true;
options.create_if_missing = true;
@ -877,6 +884,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksOnCompact) {
TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeFirst) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.check_flush_compaction_key_order = false;
options.paranoid_file_checks = true;
@ -913,6 +921,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeFirst) {
TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRange) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.check_flush_compaction_key_order = false;
options.paranoid_file_checks = true;
@ -952,6 +961,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRange) {
TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeLast) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.check_flush_compaction_key_order = false;
options.paranoid_file_checks = true;
@ -988,6 +998,7 @@ TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeLast) {
TEST_F(CorruptionTest, LogCorruptionErrorsInCompactionIterator) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.create_if_missing = true;
options.allow_data_in_errors = true;
@ -1017,6 +1028,7 @@ TEST_F(CorruptionTest, LogCorruptionErrorsInCompactionIterator) {
TEST_F(CorruptionTest, CompactionKeyOrderCheck) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.paranoid_file_checks = false;
options.create_if_missing = true;
@ -1044,6 +1056,7 @@ TEST_F(CorruptionTest, CompactionKeyOrderCheck) {
TEST_F(CorruptionTest, FlushKeyOrderCheck) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
options.paranoid_file_checks = false;
options.create_if_missing = true;
@ -1097,6 +1110,7 @@ TEST_F(CorruptionTest, DisableKeyOrderCheck) {
TEST_F(CorruptionTest, VerifyWholeTableChecksum) {
CloseDb();
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_.get();
ASSERT_OK(DestroyDB(dbname_, options));
options.create_if_missing = true;
@ -1182,6 +1196,7 @@ INSTANTIATE_TEST_CASE_P(CorruptionTest, CrashDuringRecoveryWithCorruptionTest,
TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) {
CloseDb();
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.track_and_verify_wals_in_manifest =
track_and_verify_wals_in_manifest_;
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
@ -1354,6 +1369,7 @@ TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) {
TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) {
CloseDb();
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
options.track_and_verify_wals_in_manifest =
track_and_verify_wals_in_manifest_;
@ -1551,6 +1567,7 @@ TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) {
TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecoveryWithFlush) {
CloseDb();
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
options.avoid_flush_during_recovery = false;
options.env = env_.get();

View File

@ -39,6 +39,7 @@ class CuckooTableDBTest : public testing::Test {
Options CurrentOptions() {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.table_factory.reset(NewCuckooTableFactory());
options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true));
options.allow_mmap_reads = true;

View File

@ -8001,10 +8001,8 @@ TEST_F(DBCompactionTest, ChangeLevelErrorPathTest) {
}
TEST_F(DBCompactionTest, CompactionWithBlob) {
Options options;
options.env = env_;
Options options = CurrentOptions();
options.disable_auto_compactions = true;
Reopen(options);
constexpr char first_key[] = "first_key";
@ -8096,10 +8094,8 @@ INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobError, DBCompactionTestBlobError,
"BlobFileBuilder::WriteBlobToFile:AppendFooter"}));
TEST_P(DBCompactionTestBlobError, CompactionError) {
Options options;
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.env = env_;
Reopen(options);
constexpr char first_key[] = "first_key";
@ -8265,8 +8261,7 @@ TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGCOverrides) {
}
TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGC) {
Options options;
options.env = env_;
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.enable_blob_files = true;
options.blob_file_size = 32; // one blob per file

View File

@ -49,10 +49,8 @@ TEST_F(DBMergeOperandTest, CacheEvictedMergeOperandReadAfterFreeBug) {
// There was a bug of reading merge operands after they are mistakely freed
// in DB::GetMergeOperands, which is surfaced by cache full.
// See PR#9507 for more.
Options options;
options.create_if_missing = true;
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.env = env_;
BlockBasedTableOptions table_options;
// Small cache to simulate cache full
@ -121,11 +119,9 @@ TEST_F(DBMergeOperandTest, FlushedMergeOperandReadAfterFreeBug) {
}
TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) {
Options options;
options.create_if_missing = true;
Options options = CurrentOptions();
// Use only the latest two merge operands.
options.merge_operator = std::make_shared<LimitedStringAppendMergeOp>(2, ',');
options.env = env_;
Reopen(options);
int num_records = 4;
int number_of_operands = 0;
@ -309,13 +305,11 @@ TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) {
}
TEST_F(DBMergeOperandTest, BlobDBGetMergeOperandsBasic) {
Options options;
options.create_if_missing = true;
Options options = CurrentOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
// Use only the latest two merge operands.
options.merge_operator = std::make_shared<LimitedStringAppendMergeOp>(2, ',');
options.env = env_;
Reopen(options);
int num_records = 4;
int number_of_operands = 0;
@ -401,8 +395,7 @@ TEST_F(DBMergeOperandTest, GetMergeOperandsLargeResultOptimization) {
const int kNumOperands = 1024;
const int kOperandLen = 1024;
Options options;
options.create_if_missing = true;
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
DestroyAndReopen(options);

View File

@ -81,7 +81,7 @@ TEST_F(DBMergeOperatorTest, LimitMergeOperands) {
size_t limit_ = 0;
};
Options options;
Options options = CurrentOptions();
options.create_if_missing = true;
// Use only the latest two merge operands.
options.merge_operator = std::make_shared<LimitedStringAppendMergeOp>(2, ',');
@ -134,7 +134,7 @@ TEST_F(DBMergeOperatorTest, LimitMergeOperands) {
}
TEST_F(DBMergeOperatorTest, MergeErrorOnRead) {
Options options;
Options options = CurrentOptions();
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
@ -147,7 +147,7 @@ TEST_F(DBMergeOperatorTest, MergeErrorOnRead) {
}
TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) {
Options options;
Options options = CurrentOptions();
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.max_successive_merges = 3;
@ -163,7 +163,7 @@ TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) {
}
TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) {
Options options;
Options options = CurrentOptions();
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
@ -221,7 +221,7 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
// expect "k0" and "k2" to always be readable. "k1" is expected to be readable
// only by APIs that do not require merging, such as `GetMergeOperands()`.
const int kNumOperands = 3;
Options options;
Options options = CurrentOptions();
options.merge_operator.reset(new TestPutOperator());
options.env = env_;
Reopen(options);
@ -361,7 +361,7 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) {
TEST_F(DBMergeOperatorTest, DataBlockBinaryAndHash) {
// Basic test to check that merge operator works with data block index type
// DataBlockBinaryAndHash.
Options options;
Options options = CurrentOptions();
options.create_if_missing = true;
options.merge_operator.reset(new TestPutOperator());
options.env = env_;

View File

@ -582,6 +582,7 @@ TEST_F(DBOptionsTest, EnableAutoCompactionAndTriggerStall) {
TEST_F(DBOptionsTest, SetOptionsMayTriggerCompaction) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.create_if_missing = true;
options.level0_file_num_compaction_trigger = 1000;
options.env = env_;

View File

@ -282,6 +282,7 @@ TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) {
Random rnd(301);
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.create_if_missing = true;
options.write_buffer_size = 4096;
options.max_write_buffer_number = 2;

View File

@ -5272,6 +5272,7 @@ TEST_F(DBTest, DynamicCompactionOptions) {
const uint64_t k1MB = 1 << 20;
const uint64_t k4KB = 1 << 12;
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = env_;
options.create_if_missing = true;
options.compression = kNoCompression;

View File

@ -324,6 +324,12 @@ Options DBTestBase::GetDefaultOptions() const {
options.max_open_files = 5000;
options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords;
options.compaction_pri = CompactionPri::kByCompensatedSize;
// The original default value for this option is false,
// and many unit tests assume this value. It also makes
// it easier to create desired LSM shape in unit tests.
// Unit tests for this option sets level_compaction_dynamic_level_bytes=true
// explicitly.
options.level_compaction_dynamic_level_bytes = false;
options.env = env_;
if (!env_->skip_fsync_) {
options.track_and_verify_wals_in_manifest = true;
@ -569,6 +575,8 @@ Options DBTestBase::GetOptions(
if (set_block_based_table_factory) {
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
}
options.level_compaction_dynamic_level_bytes =
options_override.level_compaction_dynamic_level_bytes;
options.env = env_;
options.create_if_missing = true;
options.fail_if_options_file_error = true;

View File

@ -114,6 +114,12 @@ struct OptionsOverride {
// Used as a bit mask of individual enums in which to skip an XF test point
int skip_policy = 0;
// The default value for this option is changed from false to true.
// Keeping the default to false for unit tests as old unit tests assume
// this behavior. Tests for level_compaction_dynamic_level_bytes
// will set the option to true explicitly.
bool level_compaction_dynamic_level_bytes = false;
};
} // namespace anon

View File

@ -551,6 +551,7 @@ class TestCompactionReasonListener : public EventListener {
TEST_F(EventListenerTest, CompactionReasonLevel) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.env = CurrentOptions().env;
options.create_if_missing = true;
options.memtable_factory.reset(test::NewSpecialSkipListFactory(

View File

@ -190,6 +190,7 @@ TEST_F(ManualCompactionTest, Test) {
TEST_F(ManualCompactionTest, SkipLevel) {
DB* db;
Options options;
options.level_compaction_dynamic_level_bytes = false;
options.num_levels = 3;
// Initially, flushed L0 files won't exceed 100.
options.level0_file_num_compaction_trigger = 100;

View File

@ -123,6 +123,7 @@ class PlainTableDBTest : public testing::Test,
// Return the current option configuration.
Options CurrentOptions() {
Options options;
options.level_compaction_dynamic_level_bytes = false;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;

View File

@ -3460,12 +3460,11 @@ void VersionStorageInfo::ComputeCompactionScore(
// Level-based involves L0->L0 compactions that can lead to oversized
// L0 files. Take into account size as well to avoid later giant
// compactions to the base level.
// If score in L0 is always too high, L0->L1 will always be
// prioritized over L1->L2 compaction and L1 will accumulate to
// too large. But if L0 score isn't high enough, L0 will accumulate
// and data is not moved to L1 fast enough. With potential L0->L0
// compaction, number of L0 files aren't always an indication of
// L0 oversizing, and we also need to consider total size of L0.
// If score in L0 is always too high, L0->LBase will always be
// prioritized over LBase->LBase+1 compaction and LBase will
// accumulate to too large. But if L0 score isn't high enough, L0 will
// accumulate and data is not moved to LBase fast enough. The score
// calculation below takes into account L0 size vs LBase size.
if (immutable_options.level_compaction_dynamic_level_bytes) {
if (total_size >= mutable_cf_options.max_bytes_for_level_base) {
// When calculating estimated_compaction_needed_bytes, we assume
@ -3477,10 +3476,13 @@ void VersionStorageInfo::ComputeCompactionScore(
score = std::max(score, 1.01);
}
if (total_size > level_max_bytes_[base_level_]) {
// In this case, we compare L0 size with actual L1 size and make
// sure score is more than 1.0 (10.0 after scaled) if L0 is larger
// than L1. Since in this case L1 score is lower than 10.0, L0->L1
// is prioritized over L1->L2.
// In this case, we compare L0 size with actual LBase size and
// make sure score is more than 1.0 (10.0 after scaled) if L0 is
// larger than LBase. Since LBase score = LBase size /
// (target size + total_downcompact_bytes) where
// total_downcompact_bytes = total_size > LBase size,
// LBase score is lower than 10.0. So L0->LBase is prioritized
// over LBase -> LBase+1.
uint64_t base_level_size = 0;
for (auto f : files_[base_level_]) {
base_level_size += f->compensated_file_size;
@ -4703,7 +4705,7 @@ void VersionStorageInfo::CalculateBaseBytes(const ImmutableOptions& ioptions,
assert(base_level_ == 1);
base_level_size = base_bytes_max;
} else {
base_level_size = cur_level_size;
base_level_size = std::max(static_cast<uint64_t>(1), cur_level_size);
}
}

View File

@ -3547,6 +3547,7 @@ INSTANTIATE_TEST_CASE_P(
TEST_P(ChargeFileMetadataTestWithParam, Basic) {
Options options;
options.level_compaction_dynamic_level_bytes = false;
BlockBasedTableOptions table_options;
CacheEntryRoleOptions::Decision charge_file_metadata = GetParam();
table_options.cache_usage_options.options_overrides.insert(

View File

@ -600,11 +600,11 @@ struct AdvancedColumnFamilyOptions {
// 1. target size is in the range of
// (max_bytes_for_level_base / max_bytes_for_level_multiplier,
// max_bytes_for_level_base]
// 2. target size of the last level (level num_levels-1) equals to extra size
// of the level.
// At the same time max_bytes_for_level_multiplier and
// max_bytes_for_level_multiplier_additional are still satisfied.
// (When L0 is too large, we make some adjustment. See below.)
// 2. target size of the last level (level num_levels-1) equals to the max
// size of a level in the LSM (typically the last level).
// At the same time max_bytes_for_level_multiplier is still satisfied.
// Note that max_bytes_for_level_multiplier_additional is ignored with this
// flag on.
//
// With this option on, from an empty DB, we make last level the base level,
// which means merging L0 data into the last level, until it exceeds
@ -642,60 +642,37 @@ struct AdvancedColumnFamilyOptions {
// By doing it, we give max_bytes_for_level_multiplier a priority against
// max_bytes_for_level_base, for a more predictable LSM tree shape. It is
// useful to limit worse case space amplification.
//
//
// If the compaction from L0 is lagged behind, a special mode will be turned
// on to prioritize write amplification against max_bytes_for_level_multiplier
// or max_bytes_for_level_base. The L0 compaction is lagged behind by looking
// at number of L0 files and total L0 size. If number of L0 files is at least
// the double of level0_file_num_compaction_trigger, or the total size is
// at least max_bytes_for_level_base, this mode is on. The target of L1 grows
// to the actual data size in L0, and then determine the target for each level
// so that each level will have the same level multiplier.
//
// For example, when L0 size is 100MB, the size of last level is 1600MB,
// max_bytes_for_level_base = 80MB, and max_bytes_for_level_multiplier = 10.
// Since L0 size is larger than max_bytes_for_level_base, this is a L0
// compaction backlogged mode. So that the L1 size is determined to be 100MB.
// Based on max_bytes_for_level_multiplier = 10, at least 3 non-0 levels will
// be needed. The level multiplier will be calculated to be 4 and the three
// levels' target to be [100MB, 400MB, 1600MB].
//
// In this mode, The number of levels will be no more than the normal mode,
// and the level multiplier will be lower. The write amplification will
// likely to be reduced.
//
//
// max_bytes_for_level_multiplier_additional is ignored with this flag on.
//
// To make the migration easier, when turning this feature on, files in the
// LSM will be trivially moved down to fill the LSM starting from the
// bottommost level during DB open. For example, if the LSM looks like:
// L0: f0, f1
// L1: f2, f3
// L2: f4
// L3:
// L4: f5
// and the DB is opened with num_levels = 7 with this feature turned on,
// new LSM after DB open looks like the following:
// L0: f0, f1, (and possibly data flushed from WAL)
// L4: f2, f3
// L5: f4
// L6: f5
//
// If `allow_ingest_behind=true` or `preclude_last_level_data_seconds > 0`,
// then the last level is reserved, and we will start filling LSM from the
// second last level (L5 in the above example).
// second last level.
//
// With this option on, compaction is more adaptive to write traffic:
// Compaction priority will take into account estimated bytes to be compacted
// down to a level and favors compacting lower levels when there is a write
// traffic spike (and hence more compaction debt). Refer to
// https://github.com/facebook/rocksdb/wiki/Leveled-Compactio#option-level_compaction_dynamic_level_bytes-and-levels-target-size
// for more detailed description. See more implementation detail in:
// VersionStorageInfo::ComputeCompactionScore().
//
// With this option on, unneeded levels will be drained automatically:
// Note that there may be excessive levels (where target level size is 0 when
// computed based on this feature) in the LSM after a user migrates to turn
// this feature on. This is especially likely when a user migrates from
// leveled compaction with a smaller multiplier or from universal compaction.
// RocksDB will gradually drain these unnecessary levels by compacting files
// down the LSM.
// computed based on this feature) in the LSM. This can happen after a user
// migrates to turn this feature on or deletes a lot of data. This is
// especially likely when a user migrates from leveled compaction with a
// smaller multiplier or from universal compaction. RocksDB will gradually
// drain these unnecessary levels by compacting files down the LSM. Smaller
// number of levels should help to reduce read amplification.
//
// Default: false
bool level_compaction_dynamic_level_bytes = false;
// Migration to turn on this option:
// - Before RocksDB v8.2, users are expected to do a full manual compaction
// and then restart DB to turn on this option.
// - Since RocksDB v8.2, users can just restart DB with this option on, as
// long as num_levels is no smaller than number of non-empty levels in the
// LSM. Migration will be done automatically by RocksDB. See more in
// https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#migrating-from-level_compaction_dynamic_level_bytesfalse-to-level_compaction_dynamic_level_bytestrue
//
// Default: true
bool level_compaction_dynamic_level_bytes = true;
// Allows RocksDB to generate files that are not exactly the target_file_size
// only for the non-bottommost files. Which can reduce the write-amplification
@ -714,6 +691,8 @@ struct AdvancedColumnFamilyOptions {
// Different max-size multipliers for different levels.
// These are multiplied by max_bytes_for_level_multiplier to arrive
// at the max-size of each level.
// This option only applies to leveled compaction with
// `level_compaction_dynamic_level_bytes = false`.
//
// Default: 1
//

View File

@ -818,25 +818,27 @@ public class RocksDBTest {
final int NUM_L0_FILES = 10;
final int TEST_SCALE = 5;
final int KEY_INTERVAL = 100;
try (final Options opt = new Options().
setCreateIfMissing(true).
setCompactionStyle(CompactionStyle.LEVEL).
setNumLevels(5).
// a slightly bigger write buffer than L0 file
// so that we can ensure manual flush always
// go before background flush happens.
setWriteBufferSize(L0_FILE_SIZE * 2).
// Disable auto L0 -> L1 compaction
setLevelZeroFileNumCompactionTrigger(20).
setTargetFileSizeBase(L0_FILE_SIZE * 100).
setTargetFileSizeMultiplier(1).
// To disable auto compaction
setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100).
setMaxBytesForLevelMultiplier(2).
setDisableAutoCompactions(true);
final RocksDB db = RocksDB.open(opt,
dbFolder.getRoot().getAbsolutePath())
) {
try (final Options opt = new Options()
.setCreateIfMissing(true)
.setCompactionStyle(CompactionStyle.LEVEL)
.setLevelCompactionDynamicLevelBytes(false)
.setNumLevels(5)
.
// a slightly bigger write buffer than L0 file
// so that we can ensure manual flush always
// go before background flush happens.
setWriteBufferSize(L0_FILE_SIZE * 2)
.
// Disable auto L0 -> L1 compaction
setLevelZeroFileNumCompactionTrigger(20)
.setTargetFileSizeBase(L0_FILE_SIZE * 100)
.setTargetFileSizeMultiplier(1)
.
// To disable auto compaction
setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100)
.setMaxBytesForLevelMultiplier(2)
.setDisableAutoCompactions(true);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
// fill database with key/value pairs
final byte[] value = new byte[VALUE_SIZE];
int int_key = 0;
@ -904,7 +906,8 @@ public class RocksDBTest {
.setCompressionType(CompressionType.NO_COMPRESSION)
.setTargetFileSizeBase(FILE_SIZE)
.setWriteBufferSize(FILE_SIZE / 2)
.setDisableAutoCompactions(true);
.setDisableAutoCompactions(true)
.setLevelCompactionDynamicLevelBytes(false);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
final int records = FILE_SIZE / (KEY_SIZE + VALUE_SIZE);
@ -954,25 +957,28 @@ public class RocksDBTest {
final int TEST_SCALE = 5;
final int KEY_INTERVAL = 100;
try (final DBOptions opt = new DBOptions().
setCreateIfMissing(true).
setCreateMissingColumnFamilies(true);
final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions().
setCompactionStyle(CompactionStyle.LEVEL).
setNumLevels(5).
try (final DBOptions opt =
new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true);
final ColumnFamilyOptions new_cf_opts =
new ColumnFamilyOptions()
.setCompactionStyle(CompactionStyle.LEVEL)
.setLevelCompactionDynamicLevelBytes(false)
.setNumLevels(5)
.
// a slightly bigger write buffer than L0 file
// so that we can ensure manual flush always
// go before background flush happens.
setWriteBufferSize(L0_FILE_SIZE * 2).
setWriteBufferSize(L0_FILE_SIZE * 2)
.
// Disable auto L0 -> L1 compaction
setLevelZeroFileNumCompactionTrigger(20).
setTargetFileSizeBase(L0_FILE_SIZE * 100).
setTargetFileSizeMultiplier(1).
setLevelZeroFileNumCompactionTrigger(20)
.setTargetFileSizeBase(L0_FILE_SIZE * 100)
.setTargetFileSizeMultiplier(1)
.
// To disable auto compaction
setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100).
setMaxBytesForLevelMultiplier(2).
setDisableAutoCompactions(true)
) {
setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100)
.setMaxBytesForLevelMultiplier(2)
.setDisableAutoCompactions(true)) {
final List<ColumnFamilyDescriptor> columnFamilyDescriptors =
Arrays.asList(
new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY),
@ -1267,15 +1273,16 @@ public class RocksDBTest {
final byte[] cfName = "pikachu".getBytes(UTF_8);
try (final Options options = new Options()
.setCreateIfMissing(true)
.setWriteBufferSize(writeBufferSize)
.setCompactionStyle(CompactionStyle.LEVEL)
.setTargetFileSizeBase(writeBufferSize)
.setMaxBytesForLevelBase(writeBufferSize * 2)
.setLevel0StopWritesTrigger(2)
.setMaxBytesForLevelMultiplier(2)
.setCompressionType(CompressionType.NO_COMPRESSION)
.setMaxSubcompactions(4)) {
.setCreateIfMissing(true)
.setWriteBufferSize(writeBufferSize)
.setCompactionStyle(CompactionStyle.LEVEL)
.setLevelCompactionDynamicLevelBytes(false)
.setTargetFileSizeBase(writeBufferSize)
.setMaxBytesForLevelBase(writeBufferSize * 2)
.setLevel0StopWritesTrigger(2)
.setMaxBytesForLevelMultiplier(2)
.setCompressionType(CompressionType.NO_COMPRESSION)
.setMaxSubcompactions(4)) {
final String dbPath = dbFolder.getRoot().getAbsolutePath();
try (final RocksDB db = RocksDB.open(options, dbPath);
final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(options)) {

View File

@ -81,6 +81,7 @@ class ReduceLevelTest : public testing::Test {
Status ReduceLevelTest::OpenDB(bool create_if_missing, int num_levels) {
ROCKSDB_NAMESPACE::Options opt;
opt.level_compaction_dynamic_level_bytes = false;
opt.num_levels = num_levels;
opt.create_if_missing = create_if_missing;
ROCKSDB_NAMESPACE::Status st =

View File

@ -0,0 +1 @@
Change the default value for option `level_compaction_dynamic_level_bytes` to true. This affects users who use leveled compaction and do not set this option explicitly. These users may see additional background compactions following DB open. These compactions help to shape the LSM according to `level_compaction_dynamic_level_bytes` such that the size of each level Ln is approximately size of Ln-1 * `max_bytes_for_level_multiplier`. Turning on this option has other benefits too: see more detail in wiki: https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#option-level_compaction_dynamic_level_bytes-and-levels-target-size and in option comment in advanced_options.h (#11525).