diff --git a/db/version_set.cc b/db/version_set.cc index 50dfbe3c18..56cbffa854 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2772,14 +2772,6 @@ void VersionStorageInfo::GetCleanInputsWithinInterval( return; } - const auto& level_files = level_files_brief_[level]; - if (begin == nullptr) { - begin = &level_files.files[0].file_metadata->smallest; - } - if (end == nullptr) { - end = &level_files.files[level_files.num_files - 1].file_metadata->largest; - } - GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index, file_index, true /* within_interval */); @@ -2797,67 +2789,94 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( std::vector* inputs, int hint_index, int* file_index, bool within_interval, InternalKey** next_smallest) const { assert(level > 0); - int min = 0; - int mid = 0; - int max = static_cast(files_[level].size()) - 1; - bool foundOverlap = false; + auto user_cmp = user_comparator_; + const FdWithKeyRange* files = level_files_brief_[level].files; + const int num_files = static_cast(level_files_brief_[level].num_files); - // if the caller already knows the index of a file that has overlap, - // then we can skip the binary search. - if (hint_index != -1) { - mid = hint_index; - foundOverlap = true; - } + // begin to use binary search to find lower bound + // and upper bound. + int start_index = 0; + int end_index = num_files; - while (!foundOverlap && min <= max) { - mid = (min + max)/2; - FdWithKeyRange* f = &(level_files_brief_[level].files[mid]); - auto& smallest = f->file_metadata->smallest; - auto& largest = f->file_metadata->largest; - if ((!within_interval && sstableKeyCompare(user_cmp, begin, largest) > 0) || - (within_interval && sstableKeyCompare(user_cmp, begin, smallest) > 0)) { - min = mid + 1; - } else if ((!within_interval && - sstableKeyCompare(user_cmp, smallest, end) > 0) || - (within_interval && - sstableKeyCompare(user_cmp, largest, end) > 0)) { - max = mid - 1; - } else { - foundOverlap = true; - break; + if (begin != nullptr) { + // if within_interval is true, with file_key would find + // not overlapping ranges in std::lower_bound. + auto cmp = [&user_cmp, &within_interval](const FdWithKeyRange& f, + const InternalKey* k) { + auto& file_key = within_interval ? f.file_metadata->smallest + : f.file_metadata->largest; + return sstableKeyCompare(user_cmp, file_key, *k) < 0; + }; + + start_index = static_cast( + std::lower_bound(files, + files + (hint_index == -1 ? num_files : hint_index), + begin, cmp) - + files); + + if (start_index > 0 && within_interval) { + bool is_overlapping = true; + while (is_overlapping && start_index < num_files) { + auto& pre_limit = files[start_index - 1].file_metadata->largest; + auto& cur_start = files[start_index].file_metadata->smallest; + is_overlapping = sstableKeyCompare(user_cmp, pre_limit, cur_start) == 0; + start_index += is_overlapping; + } } } + if (end != nullptr) { + // if within_interval is true, with file_key would find + // not overlapping ranges in std::upper_bound. + auto cmp = [&user_cmp, &within_interval](const InternalKey* k, + const FdWithKeyRange& f) { + auto& file_key = within_interval ? f.file_metadata->largest + : f.file_metadata->smallest; + return sstableKeyCompare(user_cmp, *k, file_key) < 0; + }; + + end_index = static_cast( + std::upper_bound(files + start_index, files + num_files, end, cmp) - + files); + + if (end_index < num_files && within_interval) { + bool is_overlapping = true; + while (is_overlapping && end_index > start_index) { + auto& next_start = files[end_index].file_metadata->smallest; + auto& cur_limit = files[end_index - 1].file_metadata->largest; + is_overlapping = + sstableKeyCompare(user_cmp, cur_limit, next_start) == 0; + end_index -= is_overlapping; + } + } + } + + assert(start_index <= end_index); + // If there were no overlapping files, return immediately. - if (!foundOverlap) { + if (start_index == end_index) { if (next_smallest) { next_smallest = nullptr; } return; } + + assert(start_index < end_index); + // returns the index where an overlap is found if (file_index) { - *file_index = mid; + *file_index = start_index; } - int start_index, end_index; - if (within_interval) { - ExtendFileRangeWithinInterval(level, begin, end, mid, - &start_index, &end_index); - } else { - ExtendFileRangeOverlappingInterval(level, begin, end, mid, - &start_index, &end_index); - assert(end_index >= start_index); - } // insert overlapping files into vector - for (int i = start_index; i <= end_index; i++) { + for (int i = start_index; i < end_index; i++) { inputs->push_back(files_[level][i]); } if (next_smallest != nullptr) { // Provide the next key outside the range covered by inputs - if (++end_index < static_cast(files_[level].size())) { + if (end_index < static_cast(files_[level].size())) { **next_smallest = files_[level][end_index]->smallest; } else { *next_smallest = nullptr; @@ -2865,136 +2884,6 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( } } -// Store in *start_index and *end_index the range of all files in -// "level" that overlap [begin,end] -// The mid_index specifies the index of at least one file that -// overlaps the specified range. From that file, iterate backward -// and forward to find all overlapping files. -// Use FileLevel in searching, make it faster -void VersionStorageInfo::ExtendFileRangeOverlappingInterval( - int level, const InternalKey* begin, const InternalKey* end, - unsigned int mid_index, int* start_index, int* end_index) const { - auto user_cmp = user_comparator_; - const FdWithKeyRange* files = level_files_brief_[level].files; -#ifndef NDEBUG - { - // assert that the file at mid_index overlaps with the range - assert(mid_index < level_files_brief_[level].num_files); - const FdWithKeyRange* f = &files[mid_index]; - auto& smallest = f->file_metadata->smallest; - auto& largest = f->file_metadata->largest; - if (sstableKeyCompare(user_cmp, begin, smallest) <= 0) { - assert(sstableKeyCompare(user_cmp, smallest, end) <= 0); - } else { - // fprintf(stderr, "ExtendFileRangeOverlappingInterval\n%s - %s\n%s - %s\n%d %d\n", - // begin ? begin->DebugString().c_str() : "(null)", - // end ? end->DebugString().c_str() : "(null)", - // smallest->DebugString().c_str(), - // largest->DebugString().c_str(), - // sstableKeyCompare(user_cmp, smallest, begin), - // sstableKeyCompare(user_cmp, largest, begin)); - assert(sstableKeyCompare(user_cmp, begin, largest) <= 0); - } - } -#endif - *start_index = mid_index + 1; - *end_index = mid_index; - int count __attribute__((__unused__)); - count = 0; - - // check backwards from 'mid' to lower indices - for (int i = mid_index; i >= 0 ; i--) { - const FdWithKeyRange* f = &files[i]; - auto& largest = f->file_metadata->largest; - if (sstableKeyCompare(user_cmp, begin, largest) <= 0) { - *start_index = i; - assert((count++, true)); - } else { - break; - } - } - // check forward from 'mid+1' to higher indices - for (unsigned int i = mid_index+1; - i < level_files_brief_[level].num_files; i++) { - const FdWithKeyRange* f = &files[i]; - auto& smallest = f->file_metadata->smallest; - if (sstableKeyCompare(user_cmp, smallest, end) <= 0) { - assert((count++, true)); - *end_index = i; - } else { - break; - } - } - assert(count == *end_index - *start_index + 1); -} - -// Store in *start_index and *end_index the clean range of all files in -// "level" within [begin,end] -// The mid_index specifies the index of at least one file within -// the specified range. From that file, iterate backward -// and forward to find all overlapping files and then "shrink" to -// the clean range required. -// Use FileLevel in searching, make it faster -void VersionStorageInfo::ExtendFileRangeWithinInterval( - int level, const InternalKey* begin, const InternalKey* end, - unsigned int mid_index, int* start_index, int* end_index) const { - assert(level != 0); - auto* user_cmp = user_comparator_; - const FdWithKeyRange* files = level_files_brief_[level].files; -#ifndef NDEBUG - { - // assert that the file at mid_index is within the range - assert(mid_index < level_files_brief_[level].num_files); - const FdWithKeyRange* f = &files[mid_index]; - auto& smallest = f->file_metadata->smallest; - auto& largest = f->file_metadata->largest; - assert(sstableKeyCompare(user_cmp, begin, smallest) <= 0 && - sstableKeyCompare(user_cmp, largest, end) <= 0); - } -#endif - ExtendFileRangeOverlappingInterval(level, begin, end, mid_index, - start_index, end_index); - int left = *start_index; - int right = *end_index; - // shrink from left to right - while (left <= right) { - auto& smallest = files[left].file_metadata->smallest; - if (sstableKeyCompare(user_cmp, begin, smallest) > 0) { - left++; - continue; - } - if (left > 0) { // If not first file - auto& largest = files[left - 1].file_metadata->largest; - if (sstableKeyCompare(user_cmp, smallest, largest) == 0) { - left++; - continue; - } - } - break; - } - // shrink from right to left - while (left <= right) { - auto& largest = files[right].file_metadata->largest; - if (sstableKeyCompare(user_cmp, largest, end) > 0) { - right--; - continue; - } - if (right < static_cast(level_files_brief_[level].num_files) - - 1) { // If not the last file - auto& smallest = files[right + 1].file_metadata->smallest; - if (sstableKeyCompare(user_cmp, smallest, largest) == 0) { - // The last user key in range overlaps with the next file's first key - right--; - continue; - } - } - break; - } - - *start_index = left; - *end_index = right; -} - uint64_t VersionStorageInfo::NumLevelBytes(int level) const { assert(level >= 0); assert(level < num_levels()); diff --git a/db/version_set.h b/db/version_set.h index b2b6cc863d..d82c5b4729 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -220,24 +220,6 @@ class VersionStorageInfo { InternalKey** next_smallest = nullptr) // if non-null, returns the const; // smallest key of next file not included - void ExtendFileRangeOverlappingInterval( - int level, - const InternalKey* begin, // nullptr means before all keys - const InternalKey* end, // nullptr means after all keys - unsigned int index, // start extending from this index - int* startIndex, // return the startIndex of input range - int* endIndex) // return the endIndex of input range - const; - - void ExtendFileRangeWithinInterval( - int level, - const InternalKey* begin, // nullptr means before all keys - const InternalKey* end, // nullptr means after all keys - unsigned int index, // start extending from this index - int* startIndex, // return the startIndex of input range - int* endIndex) // return the endIndex of input range - const; - // Returns true iff some file in the specified level overlaps // some part of [*smallest_user_key,*largest_user_key]. // smallest_user_key==NULL represents a key smaller than all keys in the DB.