diff --git a/HISTORY.md b/HISTORY.md index 1559bc312f..adf43773f3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -17,6 +17,7 @@ * When options.soft_rate_limit or options.level0_slowdown_writes_trigger is triggered, the way to slow down writes is changed to: write rate to DB is limited to to options.delayed_write_rate. * DB::GetApproximateSizes() adds a parameter to allow the estimation to include data in mem table, with default to be not to include. It is now only supported in skip list mem table. * DB::CompactRange() now accept CompactRangeOptions instead of multiple paramters. CompactRangeOptions is defined in include/rocksdb/options.h. +* Add force_bottommost_level_compaction option to CompactRangeOptions, which prevent compaction from skipping compacting bottommost level. ## 3.11.0 (5/19/2015) ### New Features diff --git a/db/compaction_job_stats_test.cc b/db/compaction_job_stats_test.cc index 3fe4afcd7b..2d71eb3fa2 100644 --- a/db/compaction_job_stats_test.cc +++ b/db/compaction_job_stats_test.cc @@ -668,17 +668,9 @@ TEST_F(CompactionJobStatsTest, CompactionJobStatsTest) { 1, num_keys_per_L0_file * 2, compression_ratio, num_keys_per_L0_file)); - // In the second sub-compaction, we expect L1 compaction. - stats_checker->AddExpectedStats( - NewManualCompactionJobStats( - smallest_key, largest_key, - 4, 4, num_keys_per_L0_file * 8, - kKeySize, kValueSize, - 1, num_keys_per_L0_file * 8, - compression_ratio, 0)); - ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 2U); + ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U); Compact(1, smallest_key, largest_key); - ASSERT_EQ("0,1", FilesPerLevel(1)); + ASSERT_EQ("0,4", FilesPerLevel(1)); options.compression = GetAnyCompression(); if (options.compression == kNoCompression) { break; diff --git a/db/db_impl.cc b/db/db_impl.cc index 680f5dd50f..1688912933 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1372,8 +1372,18 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options, // level 0 can never be the bottommost level (i.e. if all files are in // level 0, we will compact to level 1) if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || - cfd->ioptions()->compaction_style == kCompactionStyleFIFO || - (level == max_level_with_files && level > 0)) { + cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { + output_level = level; + } else if (level == max_level_with_files && level > 0) { + if (options.force_bottommost_level_compaction == false && + cfd->ioptions()->compaction_filter == nullptr && + cfd->ioptions()->compaction_filter_factory == nullptr && + cfd->ioptions()->compaction_filter_factory_v2 == nullptr) { + // If we are not forced to compact the bottommost level and there is + // no compaction filter we can skip the compaction of + // the bottommost level + continue; + } output_level = level; } else { output_level = level + 1; diff --git a/db/db_test.cc b/db/db_test.cc index ac6f8ec196..f81b438a6b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -3964,6 +3964,64 @@ TEST_F(DBTest, TrivialMoveTargetLevel) { } } +TEST_F(DBTest, TrivialMoveToLastLevelWithFiles) { + int32_t trivial_move = 0; + int32_t non_trivial_move = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:TrivialMove", + [&](void* arg) { trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:NonTrivial", + [&](void* arg) { non_trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options; + options.write_buffer_size = 100000000; + options = CurrentOptions(options); + DestroyAndReopen(options); + + int32_t value_size = 10 * 1024; // 10 KB + + Random rnd(301); + std::vector values; + // File with keys [ 0 => 99 ] + for (int i = 0; i < 100; i++) { + values.push_back(RandomString(&rnd, value_size)); + ASSERT_OK(Put(Key(i), values[i])); + } + ASSERT_OK(Flush()); + + ASSERT_EQ("1", FilesPerLevel(0)); + // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 + CompactRangeOptions compact_options; + compact_options.change_level = true; + compact_options.target_level = 3; + ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); + ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); + ASSERT_EQ(trivial_move, 1); + ASSERT_EQ(non_trivial_move, 0); + + // File with keys [ 100 => 199 ] + for (int i = 100; i < 200; i++) { + values.push_back(RandomString(&rnd, value_size)); + ASSERT_OK(Put(Key(i), values[i])); + } + ASSERT_OK(Flush()); + + ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); + // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); + ASSERT_EQ(trivial_move, 4); + ASSERT_EQ(non_trivial_move, 0); + + for (int i = 0; i < 200; i++) { + ASSERT_EQ(Get(Key(i)), values[i]); + } + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBTest, CompactionTrigger) { Options options; options.write_buffer_size = 100<<10; //100KB @@ -5431,6 +5489,7 @@ TEST_F(DBTest, ConvertCompactionStyle) { CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 0; + compact_options.force_bottommost_level_compaction = true; dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr); // Only 1 file in L0 @@ -13739,6 +13798,67 @@ TEST_F(DBTest, SoftLimit) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBTest, ForceBottommostLevelCompaction) { + int32_t trivial_move = 0; + int32_t non_trivial_move = 0; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:TrivialMove", + [&](void* arg) { trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:NonTrivial", + [&](void* arg) { non_trivial_move++; }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + Options options; + options.write_buffer_size = 100000000; + options = CurrentOptions(options); + DestroyAndReopen(options); + + int32_t value_size = 10 * 1024; // 10 KB + + Random rnd(301); + std::vector values; + // File with keys [ 0 => 99 ] + for (int i = 0; i < 100; i++) { + values.push_back(RandomString(&rnd, value_size)); + ASSERT_OK(Put(Key(i), values[i])); + } + ASSERT_OK(Flush()); + + ASSERT_EQ("1", FilesPerLevel(0)); + // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 + CompactRangeOptions compact_options; + compact_options.change_level = true; + compact_options.target_level = 3; + ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); + ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); + ASSERT_EQ(trivial_move, 1); + ASSERT_EQ(non_trivial_move, 0); + + // File with keys [ 100 => 199 ] + for (int i = 100; i < 200; i++) { + values.push_back(RandomString(&rnd, value_size)); + ASSERT_OK(Put(Key(i), values[i])); + } + ASSERT_OK(Flush()); + + ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); + // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) + // then compacte the bottommost level L3=>L3 (non trivial move) + compact_options = CompactRangeOptions(); + compact_options.force_bottommost_level_compaction = true; + ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); + ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); + ASSERT_EQ(trivial_move, 4); + ASSERT_EQ(non_trivial_move, 1); + + for (int i = 0; i < 200; i++) { + ASSERT_EQ(Get(Key(i)), values[i]); + } + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 24927f2014..f36892d809 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1251,6 +1251,10 @@ struct CompactRangeOptions { // Compaction outputs will be placed in options.db_paths[target_path_id]. // Behavior is undefined if target_path_id is out of range. uint32_t target_path_id = 0; + // By default compaction will try to skip compacting bottommost level if + // possible, setting this flag to true will force compaction to compact + // the bottomost level. + bool force_bottommost_level_compaction = false; }; } // namespace rocksdb