mirror of https://github.com/facebook/rocksdb.git
No need for files_by_size_ in universal compaction
Summary: files_by_size_ is sorted by time in case of universal compaction. However, Version::files_ is also sorted by time. So no need for files_by_size_ Test Plan: 1) make check with the change 2) make check with `assert(last_index == c->input_version_->files_[level].size() - 1);` in compaction picker Reviewers: dhruba, haobo, yhchiang, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19125
This commit is contained in:
parent
5656367416
commit
a2e0d890ed
|
@ -585,15 +585,9 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version,
|
|||
newerfile = f;
|
||||
}
|
||||
|
||||
// The files are sorted from newest first to oldest last.
|
||||
std::vector<int>& file_by_time = c->input_version_->files_by_size_[level];
|
||||
|
||||
// Is the earliest file part of this compaction?
|
||||
int last_index = file_by_time[file_by_time.size()-1];
|
||||
FileMetaData* last_file = c->input_version_->files_[level][last_index];
|
||||
if (c->inputs_[0][c->inputs_[0].size()-1] == last_file) {
|
||||
c->bottommost_level_ = true;
|
||||
}
|
||||
FileMetaData* last_file = c->input_version_->files_[level].back();
|
||||
c->bottommost_level_ = c->inputs_[0].back() == last_file;
|
||||
|
||||
// update statistics
|
||||
MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION,
|
||||
|
@ -628,12 +622,12 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
|||
options_->compaction_options_universal.max_merge_width;
|
||||
|
||||
// The files are sorted from newest first to oldest last.
|
||||
std::vector<int>& file_by_time = version->files_by_size_[level];
|
||||
const auto& files = version->files_[level];
|
||||
|
||||
FileMetaData* f = nullptr;
|
||||
bool done = false;
|
||||
int start_index = 0;
|
||||
unsigned int candidate_count = 0;
|
||||
assert(file_by_time.size() == version->files_[level].size());
|
||||
|
||||
unsigned int max_files_to_compact = std::min(max_merge_width,
|
||||
max_number_of_files_to_compact);
|
||||
|
@ -641,14 +635,13 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
|||
|
||||
// Considers a candidate file only if it is smaller than the
|
||||
// total size accumulated so far.
|
||||
for (unsigned int loop = 0; loop < file_by_time.size(); loop++) {
|
||||
for (unsigned int loop = 0; loop < files.size(); loop++) {
|
||||
|
||||
candidate_count = 0;
|
||||
|
||||
// Skip files that are already being compacted
|
||||
for (f = nullptr; loop < file_by_time.size(); loop++) {
|
||||
int index = file_by_time[loop];
|
||||
f = version->files_[level][index];
|
||||
for (f = nullptr; loop < files.size(); loop++) {
|
||||
f = files[loop];
|
||||
|
||||
if (!f->being_compacted) {
|
||||
candidate_count = 1;
|
||||
|
@ -670,11 +663,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
|||
}
|
||||
|
||||
// Check if the suceeding files need compaction.
|
||||
for (unsigned int i = loop+1;
|
||||
candidate_count < max_files_to_compact && i < file_by_time.size();
|
||||
i++) {
|
||||
int index = file_by_time[i];
|
||||
FileMetaData* f = version->files_[level][index];
|
||||
for (unsigned int i = loop + 1;
|
||||
candidate_count < max_files_to_compact && i < files.size(); i++) {
|
||||
FileMetaData* f = files[i];
|
||||
if (f->being_compacted) {
|
||||
break;
|
||||
}
|
||||
|
@ -713,14 +704,14 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
|||
break;
|
||||
} else {
|
||||
for (unsigned int i = loop;
|
||||
i < loop + candidate_count && i < file_by_time.size(); i++) {
|
||||
int index = file_by_time[i];
|
||||
FileMetaData* f = version->files_[level][index];
|
||||
LogToBuffer(log_buffer,
|
||||
"[%s] Universal: Skipping file %" PRIu64 "[%d] "
|
||||
"with size %" PRIu64 " (compensated size %" PRIu64 ") %d\n",
|
||||
version->cfd_->GetName().c_str(), f->fd.GetNumber(),
|
||||
i, f->fd.GetFileSize(), f->compensated_file_size, f->being_compacted);
|
||||
i < loop + candidate_count && i < files.size(); i++) {
|
||||
FileMetaData* f = files[i];
|
||||
LogToBuffer(log_buffer, "[%s] Universal: Skipping file %" PRIu64
|
||||
"[%d] with size %" PRIu64
|
||||
" (compensated size %" PRIu64 ") %d\n",
|
||||
version->cfd_->GetName().c_str(), f->fd.GetNumber(), i,
|
||||
f->fd.GetFileSize(), f->compensated_file_size,
|
||||
f->being_compacted);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -736,10 +727,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
|||
if (ratio_to_compress >= 0) {
|
||||
uint64_t total_size = version->NumLevelBytes(level);
|
||||
uint64_t older_file_size = 0;
|
||||
for (unsigned int i = file_by_time.size() - 1; i >= first_index_after;
|
||||
i--) {
|
||||
older_file_size +=
|
||||
version->files_[level][file_by_time[i]]->fd.GetFileSize();
|
||||
for (unsigned int i = files.size() - 1;
|
||||
i >= first_index_after; i--) {
|
||||
older_file_size += files[i]->fd.GetFileSize();
|
||||
if (older_file_size * 100L >= total_size * (long) ratio_to_compress) {
|
||||
enable_compression = false;
|
||||
break;
|
||||
|
@ -752,8 +742,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
|||
c->score_ = score;
|
||||
|
||||
for (unsigned int i = start_index; i < first_index_after; i++) {
|
||||
int index = file_by_time[i];
|
||||
FileMetaData* f = c->input_version_->files_[level][index];
|
||||
FileMetaData* f = c->input_version_->files_[level][i];
|
||||
c->inputs_[0].push_back(f);
|
||||
LogToBuffer(log_buffer,
|
||||
"[%s] Universal: Picking file %" PRIu64 "[%d] "
|
||||
|
@ -780,8 +769,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
|||
max_size_amplification_percent;
|
||||
|
||||
// The files are sorted from newest first to oldest last.
|
||||
std::vector<int>& file_by_time = version->files_by_size_[level];
|
||||
assert(file_by_time.size() == version->files_[level].size());
|
||||
const auto& files = version->files_[level];
|
||||
|
||||
unsigned int candidate_count = 0;
|
||||
uint64_t candidate_size = 0;
|
||||
|
@ -789,9 +777,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
|||
FileMetaData* f = nullptr;
|
||||
|
||||
// Skip files that are already being compacted
|
||||
for (unsigned int loop = 0; loop < file_by_time.size() - 1; loop++) {
|
||||
int index = file_by_time[loop];
|
||||
f = version->files_[level][index];
|
||||
for (unsigned int loop = 0; loop < files.size() - 1; loop++) {
|
||||
f = files[loop];
|
||||
if (!f->being_compacted) {
|
||||
start_index = loop; // Consider this as the first candidate.
|
||||
break;
|
||||
|
@ -812,10 +799,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
|||
" to reduce size amp.\n");
|
||||
|
||||
// keep adding up all the remaining files
|
||||
for (unsigned int loop = start_index; loop < file_by_time.size() - 1;
|
||||
loop++) {
|
||||
int index = file_by_time[loop];
|
||||
f = version->files_[level][index];
|
||||
for (unsigned int loop = start_index; loop < files.size() - 1; loop++) {
|
||||
f = files[loop];
|
||||
if (f->being_compacted) {
|
||||
LogToBuffer(
|
||||
log_buffer,
|
||||
|
@ -832,8 +817,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
|||
}
|
||||
|
||||
// size of earliest file
|
||||
int index = file_by_time[file_by_time.size() - 1];
|
||||
uint64_t earliest_file_size = version->files_[level][index]->fd.GetFileSize();
|
||||
uint64_t earliest_file_size = files.back()->fd.GetFileSize();
|
||||
|
||||
// size amplification = percentage of additional size
|
||||
if (candidate_size * 100 < ratio * earliest_file_size) {
|
||||
|
@ -850,7 +834,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
|||
"earliest-file-size %" PRIu64,
|
||||
version->cfd_->GetName().c_str(), candidate_size, earliest_file_size);
|
||||
}
|
||||
assert(start_index >= 0 && start_index < file_by_time.size() - 1);
|
||||
assert(start_index >= 0 && start_index < files.size() - 1);
|
||||
|
||||
// create a compaction request
|
||||
// We always compact all the files, so always compress.
|
||||
|
@ -858,9 +842,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
|||
new Compaction(version, level, level, MaxFileSizeForLevel(level),
|
||||
LLONG_MAX, false, true);
|
||||
c->score_ = score;
|
||||
for (unsigned int loop = start_index; loop < file_by_time.size(); loop++) {
|
||||
int index = file_by_time[loop];
|
||||
f = c->input_version_->files_[level][index];
|
||||
for (unsigned int loop = start_index; loop < files.size(); loop++) {
|
||||
f = c->input_version_->files_[level][loop];
|
||||
c->inputs_[0].push_back(f);
|
||||
LogToBuffer(log_buffer,
|
||||
"[%s] Universal: size amp picking file %" PRIu64 "[%d] "
|
||||
|
|
|
@ -861,7 +861,6 @@ void Version::ComputeCompactionScore(
|
|||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Compator that is used to sort files based on their size
|
||||
// In normal mode: descending size
|
||||
bool CompareCompensatedSizeDescending(const Version::Fsize& first,
|
||||
|
@ -869,18 +868,6 @@ bool CompareCompensatedSizeDescending(const Version::Fsize& first,
|
|||
return (first.file->compensated_file_size >
|
||||
second.file->compensated_file_size);
|
||||
}
|
||||
// A static compator used to sort files based on their seqno
|
||||
// In universal style : descending seqno
|
||||
bool CompareSeqnoDescending(const Version::Fsize& first,
|
||||
const Version::Fsize& second) {
|
||||
if (first.file->smallest_seqno > second.file->smallest_seqno) {
|
||||
assert(first.file->largest_seqno > second.file->largest_seqno);
|
||||
return true;
|
||||
}
|
||||
assert(first.file->largest_seqno <= second.file->largest_seqno);
|
||||
return false;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void Version::UpdateNumNonEmptyLevels() {
|
||||
|
@ -895,19 +882,15 @@ void Version::UpdateNumNonEmptyLevels() {
|
|||
}
|
||||
|
||||
void Version::UpdateFilesBySize() {
|
||||
if (cfd_->options()->compaction_style == kCompactionStyleFIFO) {
|
||||
if (cfd_->options()->compaction_style == kCompactionStyleFIFO ||
|
||||
cfd_->options()->compaction_style == kCompactionStyleUniversal) {
|
||||
// don't need this
|
||||
return;
|
||||
}
|
||||
// No need to sort the highest level because it is never compacted.
|
||||
int max_level =
|
||||
(cfd_->options()->compaction_style == kCompactionStyleUniversal)
|
||||
? NumberLevels()
|
||||
: NumberLevels() - 1;
|
||||
|
||||
for (int level = 0; level < max_level; level++) {
|
||||
for (int level = 0; level < NumberLevels() - 1; level++) {
|
||||
const std::vector<FileMetaData*>& files = files_[level];
|
||||
std::vector<int>& files_by_size = files_by_size_[level];
|
||||
auto& files_by_size = files_by_size_[level];
|
||||
assert(files_by_size.size() == 0);
|
||||
|
||||
// populate a temp vector for sorting based on size
|
||||
|
@ -918,18 +901,12 @@ void Version::UpdateFilesBySize() {
|
|||
}
|
||||
|
||||
// sort the top number_of_files_to_sort_ based on file size
|
||||
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
|
||||
int num = temp.size();
|
||||
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
|
||||
CompareSeqnoDescending);
|
||||
} else {
|
||||
int num = Version::number_of_files_to_sort_;
|
||||
if (num > (int)temp.size()) {
|
||||
num = temp.size();
|
||||
}
|
||||
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
|
||||
CompareCompensatedSizeDescending);
|
||||
size_t num = Version::number_of_files_to_sort_;
|
||||
if (num > temp.size()) {
|
||||
num = temp.size();
|
||||
}
|
||||
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
|
||||
CompareCompensatedSizeDescending);
|
||||
assert(temp.size() == files.size());
|
||||
|
||||
// initialize files_by_size_
|
||||
|
|
|
@ -294,7 +294,7 @@ class Version {
|
|||
// that on a running system, we need to look at only the first
|
||||
// few largest files because a new version is created every few
|
||||
// seconds/minutes (because of concurrent compactions).
|
||||
static const int number_of_files_to_sort_ = 50;
|
||||
static const size_t number_of_files_to_sort_ = 50;
|
||||
|
||||
// Level that should be compacted next and its compaction score.
|
||||
// Score < 1 means compaction is not strictly needed. These fields
|
||||
|
|
Loading…
Reference in New Issue