mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
VersionSet: optmize GetOverlappingInputsRangeBinarySearch (#4987)
Summary: `GetOverlappingInputsRangeBinarySearch` firstly use binary search to find a index in the given range `[begin, end]`. But after find the index, then use linear search to find the `start_index` and `end_index`. So the search process degraded to linear time. Here optmize the search process with below changes: - use `std::lower_bound` and `std::upper_bound` to get `lg(n)` search complexity. - use uniformed lambda for search process. - simplify process for `within_interval` true or false. - remove function `ExtendFileRangeWithinInterval` and `ExtendFileRangeOverlappingInterval`. Signed-off-by: JiYou <jiyou09@gmail.com> Pull Request resolved: https://github.com/facebook/rocksdb/pull/4987 Differential Revision: D14984192 Pulled By: riversand963 fbshipit-source-id: fae4b8e59a21b7e350718d60cdc94dd55ac81e89
This commit is contained in:
parent
248b6b551e
commit
5b7e09bd6f
|
@ -2772,14 +2772,6 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& level_files = level_files_brief_[level];
|
|
||||||
if (begin == nullptr) {
|
|
||||||
begin = &level_files.files[0].file_metadata->smallest;
|
|
||||||
}
|
|
||||||
if (end == nullptr) {
|
|
||||||
end = &level_files.files[level_files.num_files - 1].file_metadata->largest;
|
|
||||||
}
|
|
||||||
|
|
||||||
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
|
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
|
||||||
hint_index, file_index,
|
hint_index, file_index,
|
||||||
true /* within_interval */);
|
true /* within_interval */);
|
||||||
|
@ -2797,67 +2789,94 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||||
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
|
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
|
||||||
bool within_interval, InternalKey** next_smallest) const {
|
bool within_interval, InternalKey** next_smallest) const {
|
||||||
assert(level > 0);
|
assert(level > 0);
|
||||||
int min = 0;
|
|
||||||
int mid = 0;
|
|
||||||
int max = static_cast<int>(files_[level].size()) - 1;
|
|
||||||
bool foundOverlap = false;
|
|
||||||
auto user_cmp = user_comparator_;
|
auto user_cmp = user_comparator_;
|
||||||
|
const FdWithKeyRange* files = level_files_brief_[level].files;
|
||||||
|
const int num_files = static_cast<int>(level_files_brief_[level].num_files);
|
||||||
|
|
||||||
// if the caller already knows the index of a file that has overlap,
|
// begin to use binary search to find lower bound
|
||||||
// then we can skip the binary search.
|
// and upper bound.
|
||||||
if (hint_index != -1) {
|
int start_index = 0;
|
||||||
mid = hint_index;
|
int end_index = num_files;
|
||||||
foundOverlap = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!foundOverlap && min <= max) {
|
if (begin != nullptr) {
|
||||||
mid = (min + max)/2;
|
// if within_interval is true, with file_key would find
|
||||||
FdWithKeyRange* f = &(level_files_brief_[level].files[mid]);
|
// not overlapping ranges in std::lower_bound.
|
||||||
auto& smallest = f->file_metadata->smallest;
|
auto cmp = [&user_cmp, &within_interval](const FdWithKeyRange& f,
|
||||||
auto& largest = f->file_metadata->largest;
|
const InternalKey* k) {
|
||||||
if ((!within_interval && sstableKeyCompare(user_cmp, begin, largest) > 0) ||
|
auto& file_key = within_interval ? f.file_metadata->smallest
|
||||||
(within_interval && sstableKeyCompare(user_cmp, begin, smallest) > 0)) {
|
: f.file_metadata->largest;
|
||||||
min = mid + 1;
|
return sstableKeyCompare(user_cmp, file_key, *k) < 0;
|
||||||
} else if ((!within_interval &&
|
};
|
||||||
sstableKeyCompare(user_cmp, smallest, end) > 0) ||
|
|
||||||
(within_interval &&
|
start_index = static_cast<int>(
|
||||||
sstableKeyCompare(user_cmp, largest, end) > 0)) {
|
std::lower_bound(files,
|
||||||
max = mid - 1;
|
files + (hint_index == -1 ? num_files : hint_index),
|
||||||
} else {
|
begin, cmp) -
|
||||||
foundOverlap = true;
|
files);
|
||||||
break;
|
|
||||||
|
if (start_index > 0 && within_interval) {
|
||||||
|
bool is_overlapping = true;
|
||||||
|
while (is_overlapping && start_index < num_files) {
|
||||||
|
auto& pre_limit = files[start_index - 1].file_metadata->largest;
|
||||||
|
auto& cur_start = files[start_index].file_metadata->smallest;
|
||||||
|
is_overlapping = sstableKeyCompare(user_cmp, pre_limit, cur_start) == 0;
|
||||||
|
start_index += is_overlapping;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (end != nullptr) {
|
||||||
|
// if within_interval is true, with file_key would find
|
||||||
|
// not overlapping ranges in std::upper_bound.
|
||||||
|
auto cmp = [&user_cmp, &within_interval](const InternalKey* k,
|
||||||
|
const FdWithKeyRange& f) {
|
||||||
|
auto& file_key = within_interval ? f.file_metadata->largest
|
||||||
|
: f.file_metadata->smallest;
|
||||||
|
return sstableKeyCompare(user_cmp, *k, file_key) < 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
end_index = static_cast<int>(
|
||||||
|
std::upper_bound(files + start_index, files + num_files, end, cmp) -
|
||||||
|
files);
|
||||||
|
|
||||||
|
if (end_index < num_files && within_interval) {
|
||||||
|
bool is_overlapping = true;
|
||||||
|
while (is_overlapping && end_index > start_index) {
|
||||||
|
auto& next_start = files[end_index].file_metadata->smallest;
|
||||||
|
auto& cur_limit = files[end_index - 1].file_metadata->largest;
|
||||||
|
is_overlapping =
|
||||||
|
sstableKeyCompare(user_cmp, cur_limit, next_start) == 0;
|
||||||
|
end_index -= is_overlapping;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(start_index <= end_index);
|
||||||
|
|
||||||
// If there were no overlapping files, return immediately.
|
// If there were no overlapping files, return immediately.
|
||||||
if (!foundOverlap) {
|
if (start_index == end_index) {
|
||||||
if (next_smallest) {
|
if (next_smallest) {
|
||||||
next_smallest = nullptr;
|
next_smallest = nullptr;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(start_index < end_index);
|
||||||
|
|
||||||
// returns the index where an overlap is found
|
// returns the index where an overlap is found
|
||||||
if (file_index) {
|
if (file_index) {
|
||||||
*file_index = mid;
|
*file_index = start_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
int start_index, end_index;
|
|
||||||
if (within_interval) {
|
|
||||||
ExtendFileRangeWithinInterval(level, begin, end, mid,
|
|
||||||
&start_index, &end_index);
|
|
||||||
} else {
|
|
||||||
ExtendFileRangeOverlappingInterval(level, begin, end, mid,
|
|
||||||
&start_index, &end_index);
|
|
||||||
assert(end_index >= start_index);
|
|
||||||
}
|
|
||||||
// insert overlapping files into vector
|
// insert overlapping files into vector
|
||||||
for (int i = start_index; i <= end_index; i++) {
|
for (int i = start_index; i < end_index; i++) {
|
||||||
inputs->push_back(files_[level][i]);
|
inputs->push_back(files_[level][i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (next_smallest != nullptr) {
|
if (next_smallest != nullptr) {
|
||||||
// Provide the next key outside the range covered by inputs
|
// Provide the next key outside the range covered by inputs
|
||||||
if (++end_index < static_cast<int>(files_[level].size())) {
|
if (end_index < static_cast<int>(files_[level].size())) {
|
||||||
**next_smallest = files_[level][end_index]->smallest;
|
**next_smallest = files_[level][end_index]->smallest;
|
||||||
} else {
|
} else {
|
||||||
*next_smallest = nullptr;
|
*next_smallest = nullptr;
|
||||||
|
@ -2865,136 +2884,6 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store in *start_index and *end_index the range of all files in
|
|
||||||
// "level" that overlap [begin,end]
|
|
||||||
// The mid_index specifies the index of at least one file that
|
|
||||||
// overlaps the specified range. From that file, iterate backward
|
|
||||||
// and forward to find all overlapping files.
|
|
||||||
// Use FileLevel in searching, make it faster
|
|
||||||
void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
|
|
||||||
int level, const InternalKey* begin, const InternalKey* end,
|
|
||||||
unsigned int mid_index, int* start_index, int* end_index) const {
|
|
||||||
auto user_cmp = user_comparator_;
|
|
||||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
|
||||||
#ifndef NDEBUG
|
|
||||||
{
|
|
||||||
// assert that the file at mid_index overlaps with the range
|
|
||||||
assert(mid_index < level_files_brief_[level].num_files);
|
|
||||||
const FdWithKeyRange* f = &files[mid_index];
|
|
||||||
auto& smallest = f->file_metadata->smallest;
|
|
||||||
auto& largest = f->file_metadata->largest;
|
|
||||||
if (sstableKeyCompare(user_cmp, begin, smallest) <= 0) {
|
|
||||||
assert(sstableKeyCompare(user_cmp, smallest, end) <= 0);
|
|
||||||
} else {
|
|
||||||
// fprintf(stderr, "ExtendFileRangeOverlappingInterval\n%s - %s\n%s - %s\n%d %d\n",
|
|
||||||
// begin ? begin->DebugString().c_str() : "(null)",
|
|
||||||
// end ? end->DebugString().c_str() : "(null)",
|
|
||||||
// smallest->DebugString().c_str(),
|
|
||||||
// largest->DebugString().c_str(),
|
|
||||||
// sstableKeyCompare(user_cmp, smallest, begin),
|
|
||||||
// sstableKeyCompare(user_cmp, largest, begin));
|
|
||||||
assert(sstableKeyCompare(user_cmp, begin, largest) <= 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
*start_index = mid_index + 1;
|
|
||||||
*end_index = mid_index;
|
|
||||||
int count __attribute__((__unused__));
|
|
||||||
count = 0;
|
|
||||||
|
|
||||||
// check backwards from 'mid' to lower indices
|
|
||||||
for (int i = mid_index; i >= 0 ; i--) {
|
|
||||||
const FdWithKeyRange* f = &files[i];
|
|
||||||
auto& largest = f->file_metadata->largest;
|
|
||||||
if (sstableKeyCompare(user_cmp, begin, largest) <= 0) {
|
|
||||||
*start_index = i;
|
|
||||||
assert((count++, true));
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// check forward from 'mid+1' to higher indices
|
|
||||||
for (unsigned int i = mid_index+1;
|
|
||||||
i < level_files_brief_[level].num_files; i++) {
|
|
||||||
const FdWithKeyRange* f = &files[i];
|
|
||||||
auto& smallest = f->file_metadata->smallest;
|
|
||||||
if (sstableKeyCompare(user_cmp, smallest, end) <= 0) {
|
|
||||||
assert((count++, true));
|
|
||||||
*end_index = i;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(count == *end_index - *start_index + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store in *start_index and *end_index the clean range of all files in
|
|
||||||
// "level" within [begin,end]
|
|
||||||
// The mid_index specifies the index of at least one file within
|
|
||||||
// the specified range. From that file, iterate backward
|
|
||||||
// and forward to find all overlapping files and then "shrink" to
|
|
||||||
// the clean range required.
|
|
||||||
// Use FileLevel in searching, make it faster
|
|
||||||
void VersionStorageInfo::ExtendFileRangeWithinInterval(
|
|
||||||
int level, const InternalKey* begin, const InternalKey* end,
|
|
||||||
unsigned int mid_index, int* start_index, int* end_index) const {
|
|
||||||
assert(level != 0);
|
|
||||||
auto* user_cmp = user_comparator_;
|
|
||||||
const FdWithKeyRange* files = level_files_brief_[level].files;
|
|
||||||
#ifndef NDEBUG
|
|
||||||
{
|
|
||||||
// assert that the file at mid_index is within the range
|
|
||||||
assert(mid_index < level_files_brief_[level].num_files);
|
|
||||||
const FdWithKeyRange* f = &files[mid_index];
|
|
||||||
auto& smallest = f->file_metadata->smallest;
|
|
||||||
auto& largest = f->file_metadata->largest;
|
|
||||||
assert(sstableKeyCompare(user_cmp, begin, smallest) <= 0 &&
|
|
||||||
sstableKeyCompare(user_cmp, largest, end) <= 0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
ExtendFileRangeOverlappingInterval(level, begin, end, mid_index,
|
|
||||||
start_index, end_index);
|
|
||||||
int left = *start_index;
|
|
||||||
int right = *end_index;
|
|
||||||
// shrink from left to right
|
|
||||||
while (left <= right) {
|
|
||||||
auto& smallest = files[left].file_metadata->smallest;
|
|
||||||
if (sstableKeyCompare(user_cmp, begin, smallest) > 0) {
|
|
||||||
left++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (left > 0) { // If not first file
|
|
||||||
auto& largest = files[left - 1].file_metadata->largest;
|
|
||||||
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
|
|
||||||
left++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// shrink from right to left
|
|
||||||
while (left <= right) {
|
|
||||||
auto& largest = files[right].file_metadata->largest;
|
|
||||||
if (sstableKeyCompare(user_cmp, largest, end) > 0) {
|
|
||||||
right--;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (right < static_cast<int>(level_files_brief_[level].num_files) -
|
|
||||||
1) { // If not the last file
|
|
||||||
auto& smallest = files[right + 1].file_metadata->smallest;
|
|
||||||
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
|
|
||||||
// The last user key in range overlaps with the next file's first key
|
|
||||||
right--;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
*start_index = left;
|
|
||||||
*end_index = right;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t VersionStorageInfo::NumLevelBytes(int level) const {
|
uint64_t VersionStorageInfo::NumLevelBytes(int level) const {
|
||||||
assert(level >= 0);
|
assert(level >= 0);
|
||||||
assert(level < num_levels());
|
assert(level < num_levels());
|
||||||
|
|
|
@ -220,24 +220,6 @@ class VersionStorageInfo {
|
||||||
InternalKey** next_smallest = nullptr) // if non-null, returns the
|
InternalKey** next_smallest = nullptr) // if non-null, returns the
|
||||||
const; // smallest key of next file not included
|
const; // smallest key of next file not included
|
||||||
|
|
||||||
void ExtendFileRangeOverlappingInterval(
|
|
||||||
int level,
|
|
||||||
const InternalKey* begin, // nullptr means before all keys
|
|
||||||
const InternalKey* end, // nullptr means after all keys
|
|
||||||
unsigned int index, // start extending from this index
|
|
||||||
int* startIndex, // return the startIndex of input range
|
|
||||||
int* endIndex) // return the endIndex of input range
|
|
||||||
const;
|
|
||||||
|
|
||||||
void ExtendFileRangeWithinInterval(
|
|
||||||
int level,
|
|
||||||
const InternalKey* begin, // nullptr means before all keys
|
|
||||||
const InternalKey* end, // nullptr means after all keys
|
|
||||||
unsigned int index, // start extending from this index
|
|
||||||
int* startIndex, // return the startIndex of input range
|
|
||||||
int* endIndex) // return the endIndex of input range
|
|
||||||
const;
|
|
||||||
|
|
||||||
// Returns true iff some file in the specified level overlaps
|
// Returns true iff some file in the specified level overlaps
|
||||||
// some part of [*smallest_user_key,*largest_user_key].
|
// some part of [*smallest_user_key,*largest_user_key].
|
||||||
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
|
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
|
||||||
|
|
Loading…
Reference in a new issue