mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
VersionSet: optmize GetOverlappingInputsRangeBinarySearch (#4987)
Summary: `GetOverlappingInputsRangeBinarySearch` firstly use binary search to find a index in the given range `[begin, end]`. But after find the index, then use linear search to find the `start_index` and `end_index`. So the search process degraded to linear time. Here optmize the search process with below changes: - use `std::lower_bound` and `std::upper_bound` to get `lg(n)` search complexity. - use uniformed lambda for search process. - simplify process for `within_interval` true or false. - remove function `ExtendFileRangeWithinInterval` and `ExtendFileRangeOverlappingInterval`. Signed-off-by: JiYou <jiyou09@gmail.com> Pull Request resolved: https://github.com/facebook/rocksdb/pull/4987 Differential Revision: D14984192 Pulled By: riversand963 fbshipit-source-id: fae4b8e59a21b7e350718d60cdc94dd55ac81e89
This commit is contained in:
parent
248b6b551e
commit
5b7e09bd6f
|
@ -2772,14 +2772,6 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
|
|||
return;
|
||||
}
|
||||
|
||||
const auto& level_files = level_files_brief_[level];
|
||||
if (begin == nullptr) {
|
||||
begin = &level_files.files[0].file_metadata->smallest;
|
||||
}
|
||||
if (end == nullptr) {
|
||||
end = &level_files.files[level_files.num_files - 1].file_metadata->largest;
|
||||
}
|
||||
|
||||
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
|
||||
hint_index, file_index,
|
||||
true /* within_interval */);
|
||||
|
@ -2797,67 +2789,94 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
|||
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
|
||||
bool within_interval, InternalKey** next_smallest) const {
|
||||
assert(level > 0);
|
||||
int min = 0;
|
||||
int mid = 0;
|
||||
int max = static_cast<int>(files_[level].size()) - 1;
|
||||
bool foundOverlap = false;
|
||||
|
||||
auto user_cmp = user_comparator_;
|
||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
||||
const int num_files = static_cast<int>(level_files_brief_[level].num_files);
|
||||
|
||||
// if the caller already knows the index of a file that has overlap,
|
||||
// then we can skip the binary search.
|
||||
if (hint_index != -1) {
|
||||
mid = hint_index;
|
||||
foundOverlap = true;
|
||||
// begin to use binary search to find lower bound
|
||||
// and upper bound.
|
||||
int start_index = 0;
|
||||
int end_index = num_files;
|
||||
|
||||
if (begin != nullptr) {
|
||||
// if within_interval is true, with file_key would find
|
||||
// not overlapping ranges in std::lower_bound.
|
||||
auto cmp = [&user_cmp, &within_interval](const FdWithKeyRange& f,
|
||||
const InternalKey* k) {
|
||||
auto& file_key = within_interval ? f.file_metadata->smallest
|
||||
: f.file_metadata->largest;
|
||||
return sstableKeyCompare(user_cmp, file_key, *k) < 0;
|
||||
};
|
||||
|
||||
start_index = static_cast<int>(
|
||||
std::lower_bound(files,
|
||||
files + (hint_index == -1 ? num_files : hint_index),
|
||||
begin, cmp) -
|
||||
files);
|
||||
|
||||
if (start_index > 0 && within_interval) {
|
||||
bool is_overlapping = true;
|
||||
while (is_overlapping && start_index < num_files) {
|
||||
auto& pre_limit = files[start_index - 1].file_metadata->largest;
|
||||
auto& cur_start = files[start_index].file_metadata->smallest;
|
||||
is_overlapping = sstableKeyCompare(user_cmp, pre_limit, cur_start) == 0;
|
||||
start_index += is_overlapping;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (!foundOverlap && min <= max) {
|
||||
mid = (min + max)/2;
|
||||
FdWithKeyRange* f = &(level_files_brief_[level].files[mid]);
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
auto& largest = f->file_metadata->largest;
|
||||
if ((!within_interval && sstableKeyCompare(user_cmp, begin, largest) > 0) ||
|
||||
(within_interval && sstableKeyCompare(user_cmp, begin, smallest) > 0)) {
|
||||
min = mid + 1;
|
||||
} else if ((!within_interval &&
|
||||
sstableKeyCompare(user_cmp, smallest, end) > 0) ||
|
||||
(within_interval &&
|
||||
sstableKeyCompare(user_cmp, largest, end) > 0)) {
|
||||
max = mid - 1;
|
||||
} else {
|
||||
foundOverlap = true;
|
||||
break;
|
||||
if (end != nullptr) {
|
||||
// if within_interval is true, with file_key would find
|
||||
// not overlapping ranges in std::upper_bound.
|
||||
auto cmp = [&user_cmp, &within_interval](const InternalKey* k,
|
||||
const FdWithKeyRange& f) {
|
||||
auto& file_key = within_interval ? f.file_metadata->largest
|
||||
: f.file_metadata->smallest;
|
||||
return sstableKeyCompare(user_cmp, *k, file_key) < 0;
|
||||
};
|
||||
|
||||
end_index = static_cast<int>(
|
||||
std::upper_bound(files + start_index, files + num_files, end, cmp) -
|
||||
files);
|
||||
|
||||
if (end_index < num_files && within_interval) {
|
||||
bool is_overlapping = true;
|
||||
while (is_overlapping && end_index > start_index) {
|
||||
auto& next_start = files[end_index].file_metadata->smallest;
|
||||
auto& cur_limit = files[end_index - 1].file_metadata->largest;
|
||||
is_overlapping =
|
||||
sstableKeyCompare(user_cmp, cur_limit, next_start) == 0;
|
||||
end_index -= is_overlapping;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(start_index <= end_index);
|
||||
|
||||
// If there were no overlapping files, return immediately.
|
||||
if (!foundOverlap) {
|
||||
if (start_index == end_index) {
|
||||
if (next_smallest) {
|
||||
next_smallest = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
assert(start_index < end_index);
|
||||
|
||||
// returns the index where an overlap is found
|
||||
if (file_index) {
|
||||
*file_index = mid;
|
||||
*file_index = start_index;
|
||||
}
|
||||
|
||||
int start_index, end_index;
|
||||
if (within_interval) {
|
||||
ExtendFileRangeWithinInterval(level, begin, end, mid,
|
||||
&start_index, &end_index);
|
||||
} else {
|
||||
ExtendFileRangeOverlappingInterval(level, begin, end, mid,
|
||||
&start_index, &end_index);
|
||||
assert(end_index >= start_index);
|
||||
}
|
||||
// insert overlapping files into vector
|
||||
for (int i = start_index; i <= end_index; i++) {
|
||||
for (int i = start_index; i < end_index; i++) {
|
||||
inputs->push_back(files_[level][i]);
|
||||
}
|
||||
|
||||
if (next_smallest != nullptr) {
|
||||
// Provide the next key outside the range covered by inputs
|
||||
if (++end_index < static_cast<int>(files_[level].size())) {
|
||||
if (end_index < static_cast<int>(files_[level].size())) {
|
||||
**next_smallest = files_[level][end_index]->smallest;
|
||||
} else {
|
||||
*next_smallest = nullptr;
|
||||
|
@ -2865,136 +2884,6 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
|||
}
|
||||
}
|
||||
|
||||
// Store in *start_index and *end_index the range of all files in
|
||||
// "level" that overlap [begin,end]
|
||||
// The mid_index specifies the index of at least one file that
|
||||
// overlaps the specified range. From that file, iterate backward
|
||||
// and forward to find all overlapping files.
|
||||
// Use FileLevel in searching, make it faster
|
||||
void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
unsigned int mid_index, int* start_index, int* end_index) const {
|
||||
auto user_cmp = user_comparator_;
|
||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
// assert that the file at mid_index overlaps with the range
|
||||
assert(mid_index < level_files_brief_[level].num_files);
|
||||
const FdWithKeyRange* f = &files[mid_index];
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
auto& largest = f->file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, begin, smallest) <= 0) {
|
||||
assert(sstableKeyCompare(user_cmp, smallest, end) <= 0);
|
||||
} else {
|
||||
// fprintf(stderr, "ExtendFileRangeOverlappingInterval\n%s - %s\n%s - %s\n%d %d\n",
|
||||
// begin ? begin->DebugString().c_str() : "(null)",
|
||||
// end ? end->DebugString().c_str() : "(null)",
|
||||
// smallest->DebugString().c_str(),
|
||||
// largest->DebugString().c_str(),
|
||||
// sstableKeyCompare(user_cmp, smallest, begin),
|
||||
// sstableKeyCompare(user_cmp, largest, begin));
|
||||
assert(sstableKeyCompare(user_cmp, begin, largest) <= 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
*start_index = mid_index + 1;
|
||||
*end_index = mid_index;
|
||||
int count __attribute__((__unused__));
|
||||
count = 0;
|
||||
|
||||
// check backwards from 'mid' to lower indices
|
||||
for (int i = mid_index; i >= 0 ; i--) {
|
||||
const FdWithKeyRange* f = &files[i];
|
||||
auto& largest = f->file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, begin, largest) <= 0) {
|
||||
*start_index = i;
|
||||
assert((count++, true));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// check forward from 'mid+1' to higher indices
|
||||
for (unsigned int i = mid_index+1;
|
||||
i < level_files_brief_[level].num_files; i++) {
|
||||
const FdWithKeyRange* f = &files[i];
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
if (sstableKeyCompare(user_cmp, smallest, end) <= 0) {
|
||||
assert((count++, true));
|
||||
*end_index = i;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(count == *end_index - *start_index + 1);
|
||||
}
|
||||
|
||||
// Store in *start_index and *end_index the clean range of all files in
|
||||
// "level" within [begin,end]
|
||||
// The mid_index specifies the index of at least one file within
|
||||
// the specified range. From that file, iterate backward
|
||||
// and forward to find all overlapping files and then "shrink" to
|
||||
// the clean range required.
|
||||
// Use FileLevel in searching, make it faster
|
||||
void VersionStorageInfo::ExtendFileRangeWithinInterval(
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
unsigned int mid_index, int* start_index, int* end_index) const {
|
||||
assert(level != 0);
|
||||
auto* user_cmp = user_comparator_;
|
||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
// assert that the file at mid_index is within the range
|
||||
assert(mid_index < level_files_brief_[level].num_files);
|
||||
const FdWithKeyRange* f = &files[mid_index];
|
||||
auto& smallest = f->file_metadata->smallest;
|
||||
auto& largest = f->file_metadata->largest;
|
||||
assert(sstableKeyCompare(user_cmp, begin, smallest) <= 0 &&
|
||||
sstableKeyCompare(user_cmp, largest, end) <= 0);
|
||||
}
|
||||
#endif
|
||||
ExtendFileRangeOverlappingInterval(level, begin, end, mid_index,
|
||||
start_index, end_index);
|
||||
int left = *start_index;
|
||||
int right = *end_index;
|
||||
// shrink from left to right
|
||||
while (left <= right) {
|
||||
auto& smallest = files[left].file_metadata->smallest;
|
||||
if (sstableKeyCompare(user_cmp, begin, smallest) > 0) {
|
||||
left++;
|
||||
continue;
|
||||
}
|
||||
if (left > 0) { // If not first file
|
||||
auto& largest = files[left - 1].file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
|
||||
left++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
// shrink from right to left
|
||||
while (left <= right) {
|
||||
auto& largest = files[right].file_metadata->largest;
|
||||
if (sstableKeyCompare(user_cmp, largest, end) > 0) {
|
||||
right--;
|
||||
continue;
|
||||
}
|
||||
if (right < static_cast<int>(level_files_brief_[level].num_files) -
|
||||
1) { // If not the last file
|
||||
auto& smallest = files[right + 1].file_metadata->smallest;
|
||||
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
|
||||
// The last user key in range overlaps with the next file's first key
|
||||
right--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
*start_index = left;
|
||||
*end_index = right;
|
||||
}
|
||||
|
||||
uint64_t VersionStorageInfo::NumLevelBytes(int level) const {
|
||||
assert(level >= 0);
|
||||
assert(level < num_levels());
|
||||
|
|
|
@ -220,24 +220,6 @@ class VersionStorageInfo {
|
|||
InternalKey** next_smallest = nullptr) // if non-null, returns the
|
||||
const; // smallest key of next file not included
|
||||
|
||||
void ExtendFileRangeOverlappingInterval(
|
||||
int level,
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
unsigned int index, // start extending from this index
|
||||
int* startIndex, // return the startIndex of input range
|
||||
int* endIndex) // return the endIndex of input range
|
||||
const;
|
||||
|
||||
void ExtendFileRangeWithinInterval(
|
||||
int level,
|
||||
const InternalKey* begin, // nullptr means before all keys
|
||||
const InternalKey* end, // nullptr means after all keys
|
||||
unsigned int index, // start extending from this index
|
||||
int* startIndex, // return the startIndex of input range
|
||||
int* endIndex) // return the endIndex of input range
|
||||
const;
|
||||
|
||||
// Returns true iff some file in the specified level overlaps
|
||||
// some part of [*smallest_user_key,*largest_user_key].
|
||||
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
|
||||
|
|
Loading…
Reference in a new issue