Disallow refitting more than 1 file from non-L0 to L0 (#12481)

Summary:
**Context/Summary:**
We recently discovered that `CompactRange(change_level=true, target_level=0)` can possibly refit more than 1 files to L0. This refitting can cause read performance regression as we need to go through every file in L0, corruption in some edge case and false positive corruption caught by force consistency check. We decided to explicitly disallow such behavior.

A related change to OptionChangeMigration():
- When migrating to FIFO with `compaction_options_fifo.max_table_files_size > 0`, RocksDB will [CompactRange() all the to-be-migrate data into a couple L0 files](https://github.com/facebook/rocksdb/blob/main/utilities/option_change_migration/option_change_migration.cc#L164-L169) to avoid dropping all the data upon migration finishes when the migrated data is larger than max_table_files_size. This is achieved by first compacting all the data into a couple non-L0 files and refitting those files from non-L0 to L0 if needed. In that way, only some data instead of all data will be dropped immediately after migration to FIFO with a max_table_files_size.
- Since this type of refitting behavior is disallowed from now on, we won't do this trick anymore and explicitly state such risk in API comment.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12481

Test Plan:
- New UT
- Modified UT

Reviewed By: cbi42

Differential Revision: D55351178

Pulled By: hx235

fbshipit-source-id: 9d8854f2f81d7e8aff859c3a4e53b7d688048e80
This commit is contained in:
Hui Xiao 2024-03-29 10:52:36 -07:00 committed by Facebook GitHub Bot
parent 4e226c97b8
commit d985902ef4
8 changed files with 102 additions and 29 deletions

View File

@ -9890,6 +9890,62 @@ TEST_F(DBCompactionTest, TurnOnLevelCompactionDynamicLevelBytesUCToLC) {
ASSERT_EQ(expected_lsm, FilesPerLevel(1));
}
TEST_F(DBCompactionTest, DisallowRefitFilesFromNonL0ToL02) {
Options options = CurrentOptions();
options.compaction_style = CompactionStyle::kCompactionStyleLevel;
options.num_levels = 3;
DestroyAndReopen(options);
// To set up LSM shape:
// L0
// L1
// L2:[a@1, k@3], [k@2, z@4] (sorted by ascending smallest key)
// Both of these 2 files have epoch number = 1
const Snapshot* s1 = db_->GetSnapshot();
ASSERT_OK(Put("a", "@1"));
ASSERT_OK(Put("k", "@2"));
const Snapshot* s2 = db_->GetSnapshot();
ASSERT_OK(Put("k", "@3"));
ASSERT_OK(Put("z", "v3"));
ASSERT_OK(Flush());
// Cut file between k@3 and k@2
SyncPoint::GetInstance()->SetCallBack(
"CompactionOutputs::ShouldStopBefore::manual_decision",
[options](void* p) {
auto* pair = (std::pair<bool*, const Slice>*)p;
if ((options.comparator->Compare(ExtractUserKey(pair->second), "k") ==
0) &&
(GetInternalKeySeqno(pair->second) == 2)) {
*(pair->first) = true;
}
});
SyncPoint::GetInstance()->EnableProcessing();
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
cro.change_level = true;
cro.target_level = 2;
Status s = dbfull()->CompactRange(cro, nullptr, nullptr);
ASSERT_OK(s);
ASSERT_EQ("0,0,2", FilesPerLevel());
std::vector<LiveFileMetaData> files;
dbfull()->GetLiveFilesMetaData(&files);
ASSERT_EQ(files.size(), 2);
ASSERT_EQ(files[0].smallestkey, "a");
ASSERT_EQ(files[0].largestkey, "k");
ASSERT_EQ(files[1].smallestkey, "k");
ASSERT_EQ(files[1].largestkey, "z");
// Disallow moving 2 non-L0 files to L0
CompactRangeOptions cro2;
cro2.change_level = true;
cro2.target_level = 0;
s = dbfull()->CompactRange(cro2, nullptr, nullptr);
ASSERT_TRUE(s.IsAborted());
db_->ReleaseSnapshot(s1);
db_->ReleaseSnapshot(s2);
}
TEST_F(DBCompactionTest, DrainUnnecessaryLevelsAfterMultiplierChanged) {
// When the level size multiplier increases such that fewer levels become
// necessary, unnecessary levels should to be drained.

View File

@ -1774,6 +1774,8 @@ void DBImpl::NotifyOnCompactionCompleted(
// REQUIREMENT: block all background work by calling PauseBackgroundWork()
// before calling this function
// TODO (hx235): Replace Status::NotSupported() with Status::Aborted() for
// better semantics like CompactFiles()
Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
assert(level < cfd->NumberLevels());
if (target_level >= cfd->NumberLevels()) {
@ -1809,6 +1811,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
if (to_level != level) {
std::vector<CompactionInputFiles> input(1);
input[0].level = level;
// TODO (hx235): Only refit the output files in the current manual
// compaction instead of all the files in the output level
for (auto& f : vstorage->LevelFiles(level)) {
input[0].files.push_back(f);
}
@ -1840,6 +1844,12 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
}
} else {
// to_level < level
if (to_level == 0 && input[0].files.size() > 1) {
refitting_level_ = false;
return Status::Aborted(
"Moving more than 1 file from non-L0 to L0 is not allowed as it "
"does not bring any benefit to read nor write throughput.");
}
// Check levels are empty for a trivial move
for (int l = to_level; l < level; l++) {
if (vstorage->NumLevelFiles(l) > 0) {

View File

@ -2537,7 +2537,8 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
bool non_ok_status_allowed =
status.IsManualCompactionPaused() ||
(status.getState() && std::strstr(status.getState(), "injected")) ||
status.IsInvalidArgument() || status.IsNotSupported();
status.IsAborted() || status.IsInvalidArgument() ||
status.IsNotSupported();
fprintf(non_ok_status_allowed ? stdout : stderr,
"Unable to perform CompactRange(): %s under specified "
"CompactRangeOptions: %s (Empty string or "

View File

@ -15,10 +15,10 @@ namespace ROCKSDB_NAMESPACE {
// Multiple column families is not supported.
// It is best-effort. No guarantee to succeed.
// A full compaction may be executed.
// If the target options use FIFO compaction, the FIFO condition might be
// sacrificed: for data migrated, data inserted later might be dropped
// earlier. This is to gurantee FIFO compaction won't drop all the
// migrated data to fit max_table_files_size.
// WARNING: using this to migrate from non-FIFO to FIFO compaction
// with `Options::compaction_options_fifo.max_table_files_size` > 0 can cause
// the whole DB to be dropped right after migration if the migrated data is
// larger than `max_table_files_size`
Status OptionChangeMigration(std::string dbname, const Options& old_opts,
const Options& new_opts);
} // namespace ROCKSDB_NAMESPACE

View File

@ -0,0 +1 @@
`CompactRange()` with `CompactRangeOptions::change_level = true` and `CompactRangeOptions::target_level = 0` that ends up moving more than 1 file from non-L0 to L0 will return `Status::Aborted()`.

View File

@ -0,0 +1,4 @@
Using `OptionChangeMigration()` to migrate from non-FIFO to FIFO compaction
with `Options::compaction_options_fifo.max_table_files_size` > 0 can cause
the whole DB to be dropped right after migration if the migrated data is larger than
`max_table_files_size`

View File

@ -160,14 +160,7 @@ Status OptionChangeMigration(std::string dbname, const Options& old_opts,
return MigrateToLevelBase(dbname, old_opts, new_opts);
} else if (new_opts.compaction_style ==
CompactionStyle::kCompactionStyleFIFO) {
uint64_t l0_file_size = 0;
if (new_opts.compaction_options_fifo.max_table_files_size > 0) {
// Create at least 8 files when max_table_files_size hits, so that the DB
// doesn't just disappear. This in fact violates the FIFO condition, but
// otherwise, the migrated DB is unlikley to be usable.
l0_file_size = new_opts.compaction_options_fifo.max_table_files_size / 8;
}
return CompactToLevel(old_opts, dbname, 0, l0_file_size, true);
return CompactToLevel(old_opts, dbname, 0, 0 /* l0_file_size */, true);
} else {
return Status::NotSupported(
"Do not how to migrate to this compaction style");

View File

@ -9,6 +9,8 @@
#include "rocksdb/utilities/option_change_migration.h"
#include <cstdint>
#include <limits>
#include <set>
#include "db/db_test_util.h"
@ -31,6 +33,8 @@ class DBOptionChangeMigrationTests
level2_ = std::get<3>(GetParam());
compaction_style2_ = std::get<4>(GetParam());
is_dynamic2_ = std::get<5>(GetParam());
// This is set to be extremely large if not zero to avoid dropping any data
// right after migration, which makes test verification difficult
fifo_max_table_files_size_ = std::get<6>(GetParam());
}
@ -457,26 +461,30 @@ INSTANTIATE_TEST_CASE_P(
false /* is dynamic leveling in old option */,
4 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */, 0),
std::make_tuple(4 /* old num_levels */, 0 /* old compaction style */,
false /* is dynamic leveling in old option */,
1 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
5 * 1024 * 1024 /*fifo max_table_files_size*/),
std::make_tuple(3 /* old num_levels */, 0 /* old compaction style */,
true /* is dynamic leveling in old option */,
2 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
5 * 1024 * 1024 /*fifo max_table_files_size*/),
std::make_tuple(3 /* old num_levels */, 1 /* old compaction style */,
false /* is dynamic leveling in old option */,
3 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
5 * 1024 * 1024 /*fifo max_table_files_size*/),
std::make_tuple(
4 /* old num_levels */, 0 /* old compaction style */,
false /* is dynamic leveling in old option */,
1 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
std::numeric_limits<uint64_t>::max() /*fifo max_table_files_size*/),
std::make_tuple(
3 /* old num_levels */, 0 /* old compaction style */,
true /* is dynamic leveling in old option */,
2 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
std::numeric_limits<uint64_t>::max() /*fifo max_table_files_size*/),
std::make_tuple(
3 /* old num_levels */, 1 /* old compaction style */,
false /* is dynamic leveling in old option */,
3 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
std::numeric_limits<uint64_t>::max() /*fifo max_table_files_size*/),
std::make_tuple(1 /* old num_levels */, 1 /* old compaction style */,
false /* is dynamic leveling in old option */,
4 /* old num_levels */, 2 /* new compaction style */,
false /* is dynamic leveling in new option */,
5 * 1024 * 1024 /*fifo max_table_files_size*/)));
std::numeric_limits<
uint64_t>::max() /*fifo max_table_files_size*/)));
class DBOptionChangeMigrationTest : public DBTestBase {
public: