FindObsoleteFiles() to directly check whether candidate files are live (#10040)

Summary:
Right now, in FindObsoleteFiles() we build a list of all live SST files from all existing Versions. This is all done in DB mutex, and is O(m*n) where m is number of versions and n is number of files. In some extereme cases, it can take very long. The list is used to see whether a candidate file still shows up in a version. With this commit, every candidate file is directly check against all the versions for file existance. This operation would be O(m*k) where k is number of candidate files. Since is usually small (except perhaps full compaction in universal compaction), this is usually much faster than the previous solution.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10040

Test Plan: TBD

Reviewed By: riversand963

Differential Revision: D36613391

fbshipit-source-id: 3f13b090f755d9b3ae417faec62cd6e798bac1eb
This commit is contained in:
sdong 2022-05-25 12:43:48 -07:00 committed by Facebook GitHub Bot
parent b0e190604b
commit 356f8c5d81
4 changed files with 123 additions and 46 deletions

View File

@ -49,6 +49,7 @@
### Performance Improvements
* Avoid calling malloc_usable_size() in LRU Cache's mutex.
* Reduce DB mutex holding time when finding obsolete files to delete. When a file is trivial moved to another level, the internal files will be referenced twice internally and sometimes opened twice too. If a deletion candidate file is not the last reference, we need to destroy the reference and close the file but not deleting the file. Right now we determine it by building a set of all live files. With the improvement, we check the file against all live LSM-tree versions instead.
## 7.2.0 (04/15/2022)
### Bug Fixes

View File

@ -166,8 +166,8 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
job_context->log_number = MinLogNumberToKeep();
job_context->prev_log_number = versions_->prev_log_number();
versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live);
if (doing_the_full_scan) {
versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live);
InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(),
dbname_);
std::set<std::string> paths;
@ -242,6 +242,14 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
log_file, immutable_db_options_.db_log_dir);
}
}
} else {
// Instead of filling ob_context->sst_live and job_context->blob_live,
// directly remove files that show up in any Version. This is because
// candidate files tend to be a small percentage of all files, so it is
// usually cheaper to check them against every version, compared to
// building a map for all files.
versions_->RemoveLiveFiles(job_context->sst_delete_files,
job_context->blob_delete_files);
}
// logs_ is empty when called during recovery, in which case there can't yet
@ -395,8 +403,10 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
state.manifest_delete_files.size());
// We may ignore the dbname when generating the file names.
for (auto& file : state.sst_delete_files) {
candidate_files.emplace_back(
MakeTableFileName(file.metadata->fd.GetNumber()), file.path);
if (!file.only_delete_metadata) {
candidate_files.emplace_back(
MakeTableFileName(file.metadata->fd.GetNumber()), file.path);
}
if (file.metadata->table_reader_handle) {
table_cache_->Release(file.metadata->table_reader_handle);
}

View File

@ -3941,6 +3941,26 @@ void Version::AddLiveFiles(std::vector<uint64_t>* live_table_files,
}
}
void Version::RemoveLiveFiles(
std::vector<ObsoleteFileInfo>& sst_delete_candidates,
std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const {
for (ObsoleteFileInfo& fi : sst_delete_candidates) {
if (!fi.only_delete_metadata &&
storage_info()->GetFileLocation(fi.metadata->fd.GetNumber()) !=
VersionStorageInfo::FileLocation::Invalid()) {
fi.only_delete_metadata = true;
}
}
blob_delete_candidates.erase(
std::remove_if(
blob_delete_candidates.begin(), blob_delete_candidates.end(),
[this](ObsoleteBlobFileInfo& x) {
return storage_info()->GetBlobFileMetaData(x.GetBlobFileNumber());
}),
blob_delete_candidates.end());
}
std::string Version::DebugString(bool hex, bool print_stats) const {
std::string r;
for (int level = 0; level < storage_info_.num_levels_; level++) {
@ -5745,6 +5765,38 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f,
v->GetMutableCFOptions().prefix_extractor);
}
void VersionSet::RemoveLiveFiles(
std::vector<ObsoleteFileInfo>& sst_delete_candidates,
std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const {
assert(column_family_set_);
for (auto cfd : *column_family_set_) {
assert(cfd);
if (!cfd->initialized()) {
continue;
}
auto* current = cfd->current();
bool found_current = false;
Version* const dummy_versions = cfd->dummy_versions();
assert(dummy_versions);
for (Version* v = dummy_versions->next_; v != dummy_versions;
v = v->next_) {
v->RemoveLiveFiles(sst_delete_candidates, blob_delete_candidates);
if (v == current) {
found_current = true;
}
}
if (!found_current && current != nullptr) {
// Should never happen unless it is a bug.
assert(false);
current->RemoveLiveFiles(sst_delete_candidates, blob_delete_candidates);
}
}
}
void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_table_files,
std::vector<uint64_t>* live_blob_files) const {
assert(live_table_files);

View File

@ -688,6 +688,53 @@ class VersionStorageInfo {
friend class VersionSet;
};
struct ObsoleteFileInfo {
FileMetaData* metadata;
std::string path;
// If true, the FileMataData should be destroyed but the file should
// not be deleted. This is because another FileMetaData still references
// the file, usually because the file is trivial moved so two FileMetadata
// is managing the file.
bool only_delete_metadata = false;
ObsoleteFileInfo() noexcept
: metadata(nullptr), only_delete_metadata(false) {}
ObsoleteFileInfo(FileMetaData* f, const std::string& file_path)
: metadata(f), path(file_path), only_delete_metadata(false) {}
ObsoleteFileInfo(const ObsoleteFileInfo&) = delete;
ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete;
ObsoleteFileInfo(ObsoleteFileInfo&& rhs) noexcept : ObsoleteFileInfo() {
*this = std::move(rhs);
}
ObsoleteFileInfo& operator=(ObsoleteFileInfo&& rhs) noexcept {
path = std::move(rhs.path);
metadata = rhs.metadata;
rhs.metadata = nullptr;
return *this;
}
void DeleteMetadata() {
delete metadata;
metadata = nullptr;
}
};
class ObsoleteBlobFileInfo {
public:
ObsoleteBlobFileInfo(uint64_t blob_file_number, std::string path)
: blob_file_number_(blob_file_number), path_(std::move(path)) {}
uint64_t GetBlobFileNumber() const { return blob_file_number_; }
const std::string& GetPath() const { return path_; }
private:
uint64_t blob_file_number_;
std::string path_;
};
using MultiGetRange = MultiGetContext::Range;
// A column family's version consists of the table and blob files owned by
// the column family at a certain point in time.
@ -789,6 +836,11 @@ class Version {
void AddLiveFiles(std::vector<uint64_t>* live_table_files,
std::vector<uint64_t>* live_blob_files) const;
// Remove live files that are in the delete candidate lists.
void RemoveLiveFiles(
std::vector<ObsoleteFileInfo>& sst_delete_candidates,
std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const;
// Return a human readable string that describes this version's contents.
std::string DebugString(bool hex = false, bool print_stats = false) const;
@ -927,49 +979,6 @@ class Version {
void operator=(const Version&) = delete;
};
struct ObsoleteFileInfo {
FileMetaData* metadata;
std::string path;
ObsoleteFileInfo() noexcept : metadata(nullptr) {}
ObsoleteFileInfo(FileMetaData* f, const std::string& file_path)
: metadata(f), path(file_path) {}
ObsoleteFileInfo(const ObsoleteFileInfo&) = delete;
ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete;
ObsoleteFileInfo(ObsoleteFileInfo&& rhs) noexcept :
ObsoleteFileInfo() {
*this = std::move(rhs);
}
ObsoleteFileInfo& operator=(ObsoleteFileInfo&& rhs) noexcept {
path = std::move(rhs.path);
metadata = rhs.metadata;
rhs.metadata = nullptr;
return *this;
}
void DeleteMetadata() {
delete metadata;
metadata = nullptr;
}
};
class ObsoleteBlobFileInfo {
public:
ObsoleteBlobFileInfo(uint64_t blob_file_number, std::string path)
: blob_file_number_(blob_file_number), path_(std::move(path)) {}
uint64_t GetBlobFileNumber() const { return blob_file_number_; }
const std::string& GetPath() const { return path_; }
private:
uint64_t blob_file_number_;
std::string path_;
};
class BaseReferencedVersionBuilder;
class AtomicGroupReadBuffer {
@ -1287,6 +1296,11 @@ class VersionSet {
void AddLiveFiles(std::vector<uint64_t>* live_table_files,
std::vector<uint64_t>* live_blob_files) const;
// Remove live files that are in the delete candidate lists.
void RemoveLiveFiles(
std::vector<ObsoleteFileInfo>& sst_delete_candidates,
std::vector<ObsoleteBlobFileInfo>& blob_delete_candidates) const;
// Return the approximate size of data to be scanned for range [start, end)
// in levels [start_level, end_level). If end_level == -1 it will search
// through all non-empty levels