Avoid create directory for every column families (#6358)

Summary:
A relatively recent regression causes for every CF, create and open directory is called for the DB directory, unless CF has a private directory. This doesn't scale well with large number of column families.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6358

Test Plan: Run all existing tests and see it pass. strace with db_bench --num_column_families and observe it doesn't open directory for number of column families.

Differential Revision: D19675141

fbshipit-source-id: da01d9216f1dae3f03d4064fbd88ce71245bd9be
This commit is contained in:
sdong 2020-02-03 14:11:50 -08:00 committed by Facebook Github Bot
parent eb4d6af5ae
commit 36c504be17
5 changed files with 25 additions and 12 deletions

View File

@ -1320,17 +1320,26 @@ Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) {
static_cast<int>(Env::WLTH_MEDIUM));
}
Status ColumnFamilyData::AddDirectories() {
Status ColumnFamilyData::AddDirectories(
std::map<std::string, std::shared_ptr<Directory>>* created_dirs) {
Status s;
assert(created_dirs != nullptr);
assert(data_dirs_.empty());
for (auto& p : ioptions_.cf_paths) {
std::unique_ptr<Directory> path_directory;
s = DBImpl::CreateAndNewDirectory(ioptions_.env, p.path, &path_directory);
if (!s.ok()) {
return s;
auto existing_dir = created_dirs->find(p.path);
if (existing_dir == created_dirs->end()) {
std::unique_ptr<Directory> path_directory;
s = DBImpl::CreateAndNewDirectory(ioptions_.env, p.path, &path_directory);
if (!s.ok()) {
return s;
}
assert(path_directory != nullptr);
data_dirs_.emplace_back(path_directory.release());
(*created_dirs)[p.path] = data_dirs_.back();
} else {
data_dirs_.emplace_back(existing_dir->second);
}
assert(path_directory != nullptr);
data_dirs_.emplace_back(path_directory.release());
}
assert(data_dirs_.size() == ioptions_.cf_paths.size());
return s;

View File

@ -497,7 +497,10 @@ class ColumnFamilyData {
Env::WriteLifeTimeHint CalculateSSTWriteHint(int level);
Status AddDirectories();
// created_dirs remembers directory created, so that we don't need to call
// the same data creation operation again.
Status AddDirectories(
std::map<std::string, std::shared_ptr<Directory>>* created_dirs);
Directory* GetDataDir(size_t path_id) const;
@ -589,7 +592,7 @@ class ColumnFamilyData {
std::atomic<uint64_t> last_memtable_id_;
// Directories corresponding to cf_paths.
std::vector<std::unique_ptr<Directory>> data_dirs_;
std::vector<std::shared_ptr<Directory>> data_dirs_;
};
// ColumnFamilySet has interesting thread-safety requirements

View File

@ -2301,7 +2301,8 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
auto* cfd =
versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name);
assert(cfd != nullptr);
s = cfd->AddDirectories();
std::map<std::string, std::shared_ptr<Directory>> dummy_created_dirs;
s = cfd->AddDirectories(&dummy_created_dirs);
}
if (s.ok()) {
single_column_family_mode_ = false;

View File

@ -826,7 +826,6 @@ class DBImpl : public DB {
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
const bool seq_per_batch, const bool batch_per_txn);
static Status CreateAndNewDirectory(Env* env, const std::string& dirname,
std::unique_ptr<Directory>* directory);

View File

@ -454,8 +454,9 @@ Status DBImpl::Recover(
s = CheckConsistency();
}
if (s.ok() && !read_only) {
std::map<std::string, std::shared_ptr<Directory>> created_dirs;
for (auto cfd : *versions_->GetColumnFamilySet()) {
s = cfd->AddDirectories();
s = cfd->AddDirectories(&created_dirs);
if (!s.ok()) {
return s;
}