mirror of https://github.com/facebook/rocksdb.git
Properly determine a truncated CompactRange stop key (#4496)
Summary: When a CompactRange() call for a level is truncated before the end key is reached, because it exceeds max_compaction_bytes, we need to properly set the compaction_end parameter to indicate the stop key. The next CompactRange will use that as the begin key. We set it to the smallest key of the next file in the level after expanding inputs to get a clean cut. Previously, we were setting it before expanding inputs. So we could end up recompacting some files. In a pathological case, where a single key has many entries spanning all the files in the level (possibly due to merge operands without a partial merge operator, thus resulting in compaction output identical to the input), this would result in an endless loop over the same set of files. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4496 Differential Revision: D10395026 Pulled By: anand1976 fbshipit-source-id: f0c2f89fee29b4b3be53b6467b53abba8e9146a9
This commit is contained in:
parent
e633983cf1
commit
1e3845805d
|
@ -6,6 +6,7 @@
|
|||
### Bug Fixes
|
||||
* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set.
|
||||
* Fix in-memory range tombstone truncation to avoid erroneously covering newer keys at a lower level, and include range tombstones in compacted files whose largest key is the range tombstone's start key.
|
||||
* Properly set the stop key for a truncated manual CompactRange
|
||||
* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds.
|
||||
|
||||
## 5.17.0 (10/05/2018)
|
||||
|
|
|
@ -219,7 +219,8 @@ void CompactionPicker::GetRange(const std::vector<CompactionInputFiles>& inputs,
|
|||
|
||||
bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
|
||||
VersionStorageInfo* vstorage,
|
||||
CompactionInputFiles* inputs) {
|
||||
CompactionInputFiles* inputs,
|
||||
InternalKey** next_smallest) {
|
||||
// This isn't good compaction
|
||||
assert(!inputs->empty());
|
||||
|
||||
|
@ -242,7 +243,8 @@ bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
|
|||
GetRange(*inputs, &smallest, &largest);
|
||||
inputs->clear();
|
||||
vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files,
|
||||
hint_index, &hint_index);
|
||||
hint_index, &hint_index, true,
|
||||
next_smallest);
|
||||
} while (inputs->size() > old_size);
|
||||
|
||||
// we started off with inputs non-empty and the previous loop only grew
|
||||
|
@ -649,7 +651,6 @@ Compaction* CompactionPicker::CompactRange(
|
|||
uint64_t s = inputs[i]->compensated_file_size;
|
||||
total += s;
|
||||
if (total >= limit) {
|
||||
**compaction_end = inputs[i + 1]->smallest;
|
||||
covering_the_whole_range = false;
|
||||
inputs.files.resize(i + 1);
|
||||
break;
|
||||
|
@ -658,7 +659,10 @@ Compaction* CompactionPicker::CompactRange(
|
|||
}
|
||||
assert(output_path_id < static_cast<uint32_t>(ioptions_.cf_paths.size()));
|
||||
|
||||
if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs) == false) {
|
||||
InternalKey key_storage;
|
||||
InternalKey* next_smallest = &key_storage;
|
||||
if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) ==
|
||||
false) {
|
||||
// manual compaction is now multi-threaded, so it can
|
||||
// happen that ExpandWhileOverlapping fails
|
||||
// we handle it higher in RunManualCompaction
|
||||
|
@ -666,8 +670,10 @@ Compaction* CompactionPicker::CompactRange(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
if (covering_the_whole_range) {
|
||||
if (covering_the_whole_range || !next_smallest) {
|
||||
*compaction_end = nullptr;
|
||||
} else {
|
||||
**compaction_end = *next_smallest;
|
||||
}
|
||||
|
||||
CompactionInputFiles output_level_inputs;
|
||||
|
|
|
@ -151,7 +151,8 @@ class CompactionPicker {
|
|||
// Will return false if it is impossible to apply this compaction.
|
||||
bool ExpandInputsToCleanCut(const std::string& cf_name,
|
||||
VersionStorageInfo* vstorage,
|
||||
CompactionInputFiles* inputs);
|
||||
CompactionInputFiles* inputs,
|
||||
InternalKey** next_smallest = nullptr);
|
||||
|
||||
// Returns true if any one of the parent files are being compacted
|
||||
bool IsRangeInCompaction(VersionStorageInfo* vstorage,
|
||||
|
|
|
@ -3961,6 +3961,50 @@ INSTANTIATE_TEST_CASE_P(
|
|||
CompactionPri::kOldestSmallestSeqFirst,
|
||||
CompactionPri::kMinOverlappingRatio));
|
||||
|
||||
class NoopMergeOperator : public MergeOperator {
|
||||
public:
|
||||
NoopMergeOperator() {}
|
||||
|
||||
virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
|
||||
MergeOperationOutput* merge_out) const override {
|
||||
std::string val("bar");
|
||||
merge_out->new_value = val;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual const char* Name() const override { return "Noop"; }
|
||||
};
|
||||
|
||||
TEST_F(DBCompactionTest, PartialManualCompaction) {
|
||||
Options opts = CurrentOptions();
|
||||
opts.num_levels = 3;
|
||||
opts.level0_file_num_compaction_trigger = 10;
|
||||
opts.compression = kNoCompression;
|
||||
opts.merge_operator.reset(new NoopMergeOperator());
|
||||
opts.target_file_size_base = 10240;
|
||||
DestroyAndReopen(opts);
|
||||
|
||||
Random rnd(301);
|
||||
for (auto i = 0; i < 8; ++i) {
|
||||
for (auto j = 0; j < 10; ++j) {
|
||||
Merge("foo", RandomString(&rnd, 1024));
|
||||
}
|
||||
Flush();
|
||||
}
|
||||
|
||||
MoveFilesToLevel(2);
|
||||
|
||||
std::string prop;
|
||||
EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop));
|
||||
uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2;
|
||||
ASSERT_OK(dbfull()->SetOptions(
|
||||
{{"max_compaction_bytes", std::to_string(max_compaction_bytes)}}));
|
||||
|
||||
CompactRangeOptions cro;
|
||||
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
||||
dbfull()->CompactRange(cro, nullptr, nullptr);
|
||||
}
|
||||
|
||||
#endif // !defined(ROCKSDB_LITE)
|
||||
} // namespace rocksdb
|
||||
|
||||
|
|
|
@ -2046,7 +2046,7 @@ bool VersionStorageInfo::OverlapInLevel(int level,
|
|||
void VersionStorageInfo::GetOverlappingInputs(
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
|
||||
bool expand_range) const {
|
||||
bool expand_range, InternalKey** next_smallest) const {
|
||||
if (level >= num_non_empty_levels_) {
|
||||
// this level is empty, no overlapping inputs
|
||||
return;
|
||||
|
@ -2058,11 +2058,17 @@ void VersionStorageInfo::GetOverlappingInputs(
|
|||
}
|
||||
const Comparator* user_cmp = user_comparator_;
|
||||
if (level > 0) {
|
||||
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
|
||||
hint_index, file_index);
|
||||
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index,
|
||||
file_index, false, next_smallest);
|
||||
return;
|
||||
}
|
||||
|
||||
if (next_smallest) {
|
||||
// next_smallest key only makes sense for non-level 0, where files are
|
||||
// non-overlapping
|
||||
*next_smallest = nullptr;
|
||||
}
|
||||
|
||||
Slice user_begin, user_end;
|
||||
if (begin != nullptr) {
|
||||
user_begin = begin->user_key();
|
||||
|
@ -2162,7 +2168,7 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
|
|||
void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
|
||||
bool within_interval) const {
|
||||
bool within_interval, InternalKey** next_smallest) const {
|
||||
assert(level > 0);
|
||||
int min = 0;
|
||||
int mid = 0;
|
||||
|
@ -2198,6 +2204,9 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
|||
|
||||
// If there were no overlapping files, return immediately.
|
||||
if (!foundOverlap) {
|
||||
if (next_smallest) {
|
||||
next_smallest = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
// returns the index where an overlap is found
|
||||
|
@ -2218,6 +2227,15 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
|||
for (int i = start_index; i <= end_index; i++) {
|
||||
inputs->push_back(files_[level][i]);
|
||||
}
|
||||
|
||||
if (next_smallest != nullptr) {
|
||||
// Provide the next key outside the range covered by inputs
|
||||
if (++end_index < static_cast<int>(files_[level].size())) {
|
||||
**next_smallest = files_[level][end_index]->smallest;
|
||||
} else {
|
||||
*next_smallest = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store in *start_index and *end_index the range of all files in
|
||||
|
|
|
@ -188,9 +188,11 @@ class VersionStorageInfo {
|
|||
std::vector<FileMetaData*>* inputs,
|
||||
int hint_index = -1, // index of overlap file
|
||||
int* file_index = nullptr, // return index of overlap file
|
||||
bool expand_range = true) // if set, returns files which overlap the
|
||||
const; // range and overlap each other. If false,
|
||||
bool expand_range = true, // if set, returns files which overlap the
|
||||
// range and overlap each other. If false,
|
||||
// then just files intersecting the range
|
||||
InternalKey** next_smallest = nullptr) // if non-null, returns the
|
||||
const; // smallest key of next file not included
|
||||
void GetCleanInputsWithinInterval(
|
||||
int level, const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
|
@ -200,14 +202,15 @@ class VersionStorageInfo {
|
|||
const;
|
||||
|
||||
void GetOverlappingInputsRangeBinarySearch(
|
||||
int level, // level > 0
|
||||
int level, // level > 0
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
std::vector<FileMetaData*>* inputs,
|
||||
int hint_index, // index of overlap file
|
||||
int* file_index, // return index of overlap file
|
||||
bool within_interval = false) // if set, force the inputs within interval
|
||||
const;
|
||||
bool within_interval = false, // if set, force the inputs within interval
|
||||
InternalKey** next_smallest = nullptr) // if non-null, returns the
|
||||
const; // smallest key of next file not included
|
||||
|
||||
void ExtendFileRangeOverlappingInterval(
|
||||
int level,
|
||||
|
|
Loading…
Reference in New Issue