Properly determine a truncated CompactRange stop key (#4496)

Summary:
When a CompactRange() call for a level is truncated before the end key
is reached, because it exceeds max_compaction_bytes, we need to properly
set the compaction_end parameter to indicate the stop key. The next
CompactRange will use that as the begin key. We set it to the smallest
key of the next file in the level after expanding inputs to get a clean
cut.

Previously, we were setting it before expanding inputs. So we could end
up recompacting some files. In a pathological case, where a single key
has many entries spanning all the files in the level (possibly due to
merge operands without a partial merge operator, thus resulting in
compaction output identical to the input), this would result in
an endless loop over the same set of files.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4496

Differential Revision: D10395026

Pulled By: anand1976

fbshipit-source-id: f0c2f89fee29b4b3be53b6467b53abba8e9146a9
This commit is contained in:
anand1976 2018-10-15 23:20:15 -07:00 committed by Facebook Github Bot
parent e633983cf1
commit 1e3845805d
6 changed files with 88 additions and 15 deletions

View File

@ -6,6 +6,7 @@
### Bug Fixes
* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set.
* Fix in-memory range tombstone truncation to avoid erroneously covering newer keys at a lower level, and include range tombstones in compacted files whose largest key is the range tombstone's start key.
* Properly set the stop key for a truncated manual CompactRange
* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds.
## 5.17.0 (10/05/2018)

View File

@ -219,7 +219,8 @@ void CompactionPicker::GetRange(const std::vector<CompactionInputFiles>& inputs,
bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
VersionStorageInfo* vstorage,
CompactionInputFiles* inputs) {
CompactionInputFiles* inputs,
InternalKey** next_smallest) {
// This isn't good compaction
assert(!inputs->empty());
@ -242,7 +243,8 @@ bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/,
GetRange(*inputs, &smallest, &largest);
inputs->clear();
vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files,
hint_index, &hint_index);
hint_index, &hint_index, true,
next_smallest);
} while (inputs->size() > old_size);
// we started off with inputs non-empty and the previous loop only grew
@ -649,7 +651,6 @@ Compaction* CompactionPicker::CompactRange(
uint64_t s = inputs[i]->compensated_file_size;
total += s;
if (total >= limit) {
**compaction_end = inputs[i + 1]->smallest;
covering_the_whole_range = false;
inputs.files.resize(i + 1);
break;
@ -658,7 +659,10 @@ Compaction* CompactionPicker::CompactRange(
}
assert(output_path_id < static_cast<uint32_t>(ioptions_.cf_paths.size()));
if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs) == false) {
InternalKey key_storage;
InternalKey* next_smallest = &key_storage;
if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) ==
false) {
// manual compaction is now multi-threaded, so it can
// happen that ExpandWhileOverlapping fails
// we handle it higher in RunManualCompaction
@ -666,8 +670,10 @@ Compaction* CompactionPicker::CompactRange(
return nullptr;
}
if (covering_the_whole_range) {
if (covering_the_whole_range || !next_smallest) {
*compaction_end = nullptr;
} else {
**compaction_end = *next_smallest;
}
CompactionInputFiles output_level_inputs;

View File

@ -151,7 +151,8 @@ class CompactionPicker {
// Will return false if it is impossible to apply this compaction.
bool ExpandInputsToCleanCut(const std::string& cf_name,
VersionStorageInfo* vstorage,
CompactionInputFiles* inputs);
CompactionInputFiles* inputs,
InternalKey** next_smallest = nullptr);
// Returns true if any one of the parent files are being compacted
bool IsRangeInCompaction(VersionStorageInfo* vstorage,

View File

@ -3961,6 +3961,50 @@ INSTANTIATE_TEST_CASE_P(
CompactionPri::kOldestSmallestSeqFirst,
CompactionPri::kMinOverlappingRatio));
class NoopMergeOperator : public MergeOperator {
public:
NoopMergeOperator() {}
virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
MergeOperationOutput* merge_out) const override {
std::string val("bar");
merge_out->new_value = val;
return true;
}
virtual const char* Name() const override { return "Noop"; }
};
TEST_F(DBCompactionTest, PartialManualCompaction) {
Options opts = CurrentOptions();
opts.num_levels = 3;
opts.level0_file_num_compaction_trigger = 10;
opts.compression = kNoCompression;
opts.merge_operator.reset(new NoopMergeOperator());
opts.target_file_size_base = 10240;
DestroyAndReopen(opts);
Random rnd(301);
for (auto i = 0; i < 8; ++i) {
for (auto j = 0; j < 10; ++j) {
Merge("foo", RandomString(&rnd, 1024));
}
Flush();
}
MoveFilesToLevel(2);
std::string prop;
EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop));
uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2;
ASSERT_OK(dbfull()->SetOptions(
{{"max_compaction_bytes", std::to_string(max_compaction_bytes)}}));
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
dbfull()->CompactRange(cro, nullptr, nullptr);
}
#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb

View File

@ -2046,7 +2046,7 @@ bool VersionStorageInfo::OverlapInLevel(int level,
void VersionStorageInfo::GetOverlappingInputs(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
bool expand_range) const {
bool expand_range, InternalKey** next_smallest) const {
if (level >= num_non_empty_levels_) {
// this level is empty, no overlapping inputs
return;
@ -2058,11 +2058,17 @@ void VersionStorageInfo::GetOverlappingInputs(
}
const Comparator* user_cmp = user_comparator_;
if (level > 0) {
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
hint_index, file_index);
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index,
file_index, false, next_smallest);
return;
}
if (next_smallest) {
// next_smallest key only makes sense for non-level 0, where files are
// non-overlapping
*next_smallest = nullptr;
}
Slice user_begin, user_end;
if (begin != nullptr) {
user_begin = begin->user_key();
@ -2162,7 +2168,7 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
int level, const InternalKey* begin, const InternalKey* end,
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
bool within_interval) const {
bool within_interval, InternalKey** next_smallest) const {
assert(level > 0);
int min = 0;
int mid = 0;
@ -2198,6 +2204,9 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
// If there were no overlapping files, return immediately.
if (!foundOverlap) {
if (next_smallest) {
next_smallest = nullptr;
}
return;
}
// returns the index where an overlap is found
@ -2218,6 +2227,15 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
for (int i = start_index; i <= end_index; i++) {
inputs->push_back(files_[level][i]);
}
if (next_smallest != nullptr) {
// Provide the next key outside the range covered by inputs
if (++end_index < static_cast<int>(files_[level].size())) {
**next_smallest = files_[level][end_index]->smallest;
} else {
*next_smallest = nullptr;
}
}
}
// Store in *start_index and *end_index the range of all files in

View File

@ -188,9 +188,11 @@ class VersionStorageInfo {
std::vector<FileMetaData*>* inputs,
int hint_index = -1, // index of overlap file
int* file_index = nullptr, // return index of overlap file
bool expand_range = true) // if set, returns files which overlap the
const; // range and overlap each other. If false,
bool expand_range = true, // if set, returns files which overlap the
// range and overlap each other. If false,
// then just files intersecting the range
InternalKey** next_smallest = nullptr) // if non-null, returns the
const; // smallest key of next file not included
void GetCleanInputsWithinInterval(
int level, const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
@ -200,14 +202,15 @@ class VersionStorageInfo {
const;
void GetOverlappingInputsRangeBinarySearch(
int level, // level > 0
int level, // level > 0
const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
std::vector<FileMetaData*>* inputs,
int hint_index, // index of overlap file
int* file_index, // return index of overlap file
bool within_interval = false) // if set, force the inputs within interval
const;
bool within_interval = false, // if set, force the inputs within interval
InternalKey** next_smallest = nullptr) // if non-null, returns the
const; // smallest key of next file not included
void ExtendFileRangeOverlappingInterval(
int level,