mirror of https://github.com/facebook/rocksdb.git
Disallow refitting more than 1 file from non-L0 to L0 (#12481)
Summary: **Context/Summary:** We recently discovered that `CompactRange(change_level=true, target_level=0)` can possibly refit more than 1 files to L0. This refitting can cause read performance regression as we need to go through every file in L0, corruption in some edge case and false positive corruption caught by force consistency check. We decided to explicitly disallow such behavior. A related change to OptionChangeMigration(): - When migrating to FIFO with `compaction_options_fifo.max_table_files_size > 0`, RocksDB will [CompactRange() all the to-be-migrate data into a couple L0 files](https://github.com/facebook/rocksdb/blob/main/utilities/option_change_migration/option_change_migration.cc#L164-L169) to avoid dropping all the data upon migration finishes when the migrated data is larger than max_table_files_size. This is achieved by first compacting all the data into a couple non-L0 files and refitting those files from non-L0 to L0 if needed. In that way, only some data instead of all data will be dropped immediately after migration to FIFO with a max_table_files_size. - Since this type of refitting behavior is disallowed from now on, we won't do this trick anymore and explicitly state such risk in API comment. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12481 Test Plan: - New UT - Modified UT Reviewed By: cbi42 Differential Revision: D55351178 Pulled By: hx235 fbshipit-source-id: 9d8854f2f81d7e8aff859c3a4e53b7d688048e80
This commit is contained in:
parent
4e226c97b8
commit
d985902ef4
|
@ -9890,6 +9890,62 @@ TEST_F(DBCompactionTest, TurnOnLevelCompactionDynamicLevelBytesUCToLC) {
|
|||
ASSERT_EQ(expected_lsm, FilesPerLevel(1));
|
||||
}
|
||||
|
||||
TEST_F(DBCompactionTest, DisallowRefitFilesFromNonL0ToL02) {
|
||||
Options options = CurrentOptions();
|
||||
options.compaction_style = CompactionStyle::kCompactionStyleLevel;
|
||||
options.num_levels = 3;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// To set up LSM shape:
|
||||
// L0
|
||||
// L1
|
||||
// L2:[a@1, k@3], [k@2, z@4] (sorted by ascending smallest key)
|
||||
// Both of these 2 files have epoch number = 1
|
||||
const Snapshot* s1 = db_->GetSnapshot();
|
||||
ASSERT_OK(Put("a", "@1"));
|
||||
ASSERT_OK(Put("k", "@2"));
|
||||
const Snapshot* s2 = db_->GetSnapshot();
|
||||
ASSERT_OK(Put("k", "@3"));
|
||||
ASSERT_OK(Put("z", "v3"));
|
||||
ASSERT_OK(Flush());
|
||||
// Cut file between k@3 and k@2
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"CompactionOutputs::ShouldStopBefore::manual_decision",
|
||||
[options](void* p) {
|
||||
auto* pair = (std::pair<bool*, const Slice>*)p;
|
||||
if ((options.comparator->Compare(ExtractUserKey(pair->second), "k") ==
|
||||
0) &&
|
||||
(GetInternalKeySeqno(pair->second) == 2)) {
|
||||
*(pair->first) = true;
|
||||
}
|
||||
});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
CompactRangeOptions cro;
|
||||
cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
|
||||
cro.change_level = true;
|
||||
cro.target_level = 2;
|
||||
Status s = dbfull()->CompactRange(cro, nullptr, nullptr);
|
||||
ASSERT_OK(s);
|
||||
ASSERT_EQ("0,0,2", FilesPerLevel());
|
||||
std::vector<LiveFileMetaData> files;
|
||||
dbfull()->GetLiveFilesMetaData(&files);
|
||||
ASSERT_EQ(files.size(), 2);
|
||||
ASSERT_EQ(files[0].smallestkey, "a");
|
||||
ASSERT_EQ(files[0].largestkey, "k");
|
||||
ASSERT_EQ(files[1].smallestkey, "k");
|
||||
ASSERT_EQ(files[1].largestkey, "z");
|
||||
|
||||
// Disallow moving 2 non-L0 files to L0
|
||||
CompactRangeOptions cro2;
|
||||
cro2.change_level = true;
|
||||
cro2.target_level = 0;
|
||||
s = dbfull()->CompactRange(cro2, nullptr, nullptr);
|
||||
ASSERT_TRUE(s.IsAborted());
|
||||
|
||||
db_->ReleaseSnapshot(s1);
|
||||
db_->ReleaseSnapshot(s2);
|
||||
}
|
||||
|
||||
TEST_F(DBCompactionTest, DrainUnnecessaryLevelsAfterMultiplierChanged) {
|
||||
// When the level size multiplier increases such that fewer levels become
|
||||
// necessary, unnecessary levels should to be drained.
|
||||
|
|
|
@ -1774,6 +1774,8 @@ void DBImpl::NotifyOnCompactionCompleted(
|
|||
|
||||
// REQUIREMENT: block all background work by calling PauseBackgroundWork()
|
||||
// before calling this function
|
||||
// TODO (hx235): Replace Status::NotSupported() with Status::Aborted() for
|
||||
// better semantics like CompactFiles()
|
||||
Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
||||
assert(level < cfd->NumberLevels());
|
||||
if (target_level >= cfd->NumberLevels()) {
|
||||
|
@ -1809,6 +1811,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|||
if (to_level != level) {
|
||||
std::vector<CompactionInputFiles> input(1);
|
||||
input[0].level = level;
|
||||
// TODO (hx235): Only refit the output files in the current manual
|
||||
// compaction instead of all the files in the output level
|
||||
for (auto& f : vstorage->LevelFiles(level)) {
|
||||
input[0].files.push_back(f);
|
||||
}
|
||||
|
@ -1840,6 +1844,12 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|||
}
|
||||
} else {
|
||||
// to_level < level
|
||||
if (to_level == 0 && input[0].files.size() > 1) {
|
||||
refitting_level_ = false;
|
||||
return Status::Aborted(
|
||||
"Moving more than 1 file from non-L0 to L0 is not allowed as it "
|
||||
"does not bring any benefit to read nor write throughput.");
|
||||
}
|
||||
// Check levels are empty for a trivial move
|
||||
for (int l = to_level; l < level; l++) {
|
||||
if (vstorage->NumLevelFiles(l) > 0) {
|
||||
|
|
|
@ -2537,7 +2537,8 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
|
|||
bool non_ok_status_allowed =
|
||||
status.IsManualCompactionPaused() ||
|
||||
(status.getState() && std::strstr(status.getState(), "injected")) ||
|
||||
status.IsInvalidArgument() || status.IsNotSupported();
|
||||
status.IsAborted() || status.IsInvalidArgument() ||
|
||||
status.IsNotSupported();
|
||||
fprintf(non_ok_status_allowed ? stdout : stderr,
|
||||
"Unable to perform CompactRange(): %s under specified "
|
||||
"CompactRangeOptions: %s (Empty string or "
|
||||
|
|
|
@ -15,10 +15,10 @@ namespace ROCKSDB_NAMESPACE {
|
|||
// Multiple column families is not supported.
|
||||
// It is best-effort. No guarantee to succeed.
|
||||
// A full compaction may be executed.
|
||||
// If the target options use FIFO compaction, the FIFO condition might be
|
||||
// sacrificed: for data migrated, data inserted later might be dropped
|
||||
// earlier. This is to gurantee FIFO compaction won't drop all the
|
||||
// migrated data to fit max_table_files_size.
|
||||
// WARNING: using this to migrate from non-FIFO to FIFO compaction
|
||||
// with `Options::compaction_options_fifo.max_table_files_size` > 0 can cause
|
||||
// the whole DB to be dropped right after migration if the migrated data is
|
||||
// larger than `max_table_files_size`
|
||||
Status OptionChangeMigration(std::string dbname, const Options& old_opts,
|
||||
const Options& new_opts);
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
`CompactRange()` with `CompactRangeOptions::change_level = true` and `CompactRangeOptions::target_level = 0` that ends up moving more than 1 file from non-L0 to L0 will return `Status::Aborted()`.
|
|
@ -0,0 +1,4 @@
|
|||
Using `OptionChangeMigration()` to migrate from non-FIFO to FIFO compaction
|
||||
with `Options::compaction_options_fifo.max_table_files_size` > 0 can cause
|
||||
the whole DB to be dropped right after migration if the migrated data is larger than
|
||||
`max_table_files_size`
|
|
@ -160,14 +160,7 @@ Status OptionChangeMigration(std::string dbname, const Options& old_opts,
|
|||
return MigrateToLevelBase(dbname, old_opts, new_opts);
|
||||
} else if (new_opts.compaction_style ==
|
||||
CompactionStyle::kCompactionStyleFIFO) {
|
||||
uint64_t l0_file_size = 0;
|
||||
if (new_opts.compaction_options_fifo.max_table_files_size > 0) {
|
||||
// Create at least 8 files when max_table_files_size hits, so that the DB
|
||||
// doesn't just disappear. This in fact violates the FIFO condition, but
|
||||
// otherwise, the migrated DB is unlikley to be usable.
|
||||
l0_file_size = new_opts.compaction_options_fifo.max_table_files_size / 8;
|
||||
}
|
||||
return CompactToLevel(old_opts, dbname, 0, l0_file_size, true);
|
||||
return CompactToLevel(old_opts, dbname, 0, 0 /* l0_file_size */, true);
|
||||
} else {
|
||||
return Status::NotSupported(
|
||||
"Do not how to migrate to this compaction style");
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include "rocksdb/utilities/option_change_migration.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <set>
|
||||
|
||||
#include "db/db_test_util.h"
|
||||
|
@ -31,6 +33,8 @@ class DBOptionChangeMigrationTests
|
|||
level2_ = std::get<3>(GetParam());
|
||||
compaction_style2_ = std::get<4>(GetParam());
|
||||
is_dynamic2_ = std::get<5>(GetParam());
|
||||
// This is set to be extremely large if not zero to avoid dropping any data
|
||||
// right after migration, which makes test verification difficult
|
||||
fifo_max_table_files_size_ = std::get<6>(GetParam());
|
||||
}
|
||||
|
||||
|
@ -457,26 +461,30 @@ INSTANTIATE_TEST_CASE_P(
|
|||
false /* is dynamic leveling in old option */,
|
||||
4 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */, 0),
|
||||
std::make_tuple(4 /* old num_levels */, 0 /* old compaction style */,
|
||||
false /* is dynamic leveling in old option */,
|
||||
1 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
5 * 1024 * 1024 /*fifo max_table_files_size*/),
|
||||
std::make_tuple(3 /* old num_levels */, 0 /* old compaction style */,
|
||||
true /* is dynamic leveling in old option */,
|
||||
2 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
5 * 1024 * 1024 /*fifo max_table_files_size*/),
|
||||
std::make_tuple(3 /* old num_levels */, 1 /* old compaction style */,
|
||||
false /* is dynamic leveling in old option */,
|
||||
3 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
5 * 1024 * 1024 /*fifo max_table_files_size*/),
|
||||
std::make_tuple(
|
||||
4 /* old num_levels */, 0 /* old compaction style */,
|
||||
false /* is dynamic leveling in old option */,
|
||||
1 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
std::numeric_limits<uint64_t>::max() /*fifo max_table_files_size*/),
|
||||
std::make_tuple(
|
||||
3 /* old num_levels */, 0 /* old compaction style */,
|
||||
true /* is dynamic leveling in old option */,
|
||||
2 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
std::numeric_limits<uint64_t>::max() /*fifo max_table_files_size*/),
|
||||
std::make_tuple(
|
||||
3 /* old num_levels */, 1 /* old compaction style */,
|
||||
false /* is dynamic leveling in old option */,
|
||||
3 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
std::numeric_limits<uint64_t>::max() /*fifo max_table_files_size*/),
|
||||
std::make_tuple(1 /* old num_levels */, 1 /* old compaction style */,
|
||||
false /* is dynamic leveling in old option */,
|
||||
4 /* old num_levels */, 2 /* new compaction style */,
|
||||
false /* is dynamic leveling in new option */,
|
||||
5 * 1024 * 1024 /*fifo max_table_files_size*/)));
|
||||
std::numeric_limits<
|
||||
uint64_t>::max() /*fifo max_table_files_size*/)));
|
||||
|
||||
class DBOptionChangeMigrationTest : public DBTestBase {
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue