Options for file temperature for more files (#12957)

Summary:
We have a request to use the cold tier as primary source of truth for the DB, and to best support such use cases and to complement the existing options controlling SST file temperatures, we add two new DB options:
* `metadata_write_temperature` for DB "small" files that don't contain much user data
* `wal_write_temperature` for WALs.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12957

Test Plan: Unit test included, though it's hard to be sure we've covered all the places

Reviewed By: jowlyzhang

Differential Revision: D61664815

Pulled By: pdillinger

fbshipit-source-id: 8e19c9dd8fd2db059bb15f74938d6bc12002e82b
This commit is contained in:
Peter Dillinger 2024-08-23 19:49:25 -07:00 committed by Facebook GitHub Bot
parent d6aed64de4
commit 96340dbce2
19 changed files with 382 additions and 78 deletions

View File

@ -552,7 +552,8 @@ class CompactionJobTestBase : public testing::Test {
/*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"",
/*error_handler=*/nullptr, /*read_only=*/false)); /*error_handler=*/nullptr, /*read_only=*/false));
compaction_job_stats_.Reset(); compaction_job_stats_.Reset();
ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); ASSERT_OK(
SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
VersionEdit new_db; VersionEdit new_db;
new_db.SetLogNumber(0); new_db.SetLogNumber(0);
@ -575,7 +576,8 @@ class CompactionJobTestBase : public testing::Test {
} }
ASSERT_OK(s); ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file. // Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
Temperature::kUnknown, nullptr);
ASSERT_OK(s); ASSERT_OK(s);

View File

@ -970,7 +970,9 @@ Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
} }
// Persist it to IDENTITY file if allowed // Persist it to IDENTITY file if allowed
if (!read_only) { if (!read_only) {
s = SetIdentityFile(write_options, env_, dbname_, db_id_); s = SetIdentityFile(write_options, env_, dbname_,
immutable_db_options_.metadata_write_temperature,
db_id_);
} }
return s; return s;
} }

View File

@ -295,7 +295,8 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
Status DBImpl::NewDB(std::vector<std::string>* new_filenames) { Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
VersionEdit new_db; VersionEdit new_db;
const WriteOptions write_options(Env::IOActivity::kDBOpen); const WriteOptions write_options(Env::IOActivity::kDBOpen);
Status s = SetIdentityFile(write_options, env_, dbname_); Status s = SetIdentityFile(write_options, env_, dbname_,
immutable_db_options_.metadata_write_temperature);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -319,6 +320,12 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
} }
std::unique_ptr<FSWritableFile> file; std::unique_ptr<FSWritableFile> file;
FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_); FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_);
// DB option takes precedence when not kUnknown
if (immutable_db_options_.metadata_write_temperature !=
Temperature::kUnknown) {
file_options.temperature =
immutable_db_options_.metadata_write_temperature;
}
s = NewWritableFile(fs_.get(), manifest, &file, file_options); s = NewWritableFile(fs_.get(), manifest, &file, file_options);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -344,6 +351,7 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
if (s.ok()) { if (s.ok()) {
// Make "CURRENT" file that points to the new manifest file. // Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(write_options, fs_.get(), dbname_, 1, s = SetCurrentFile(write_options, fs_.get(), dbname_, 1,
immutable_db_options_.metadata_write_temperature,
directories_.GetDbDir()); directories_.GetDbDir());
if (new_filenames) { if (new_filenames) {
new_filenames->emplace_back( new_filenames->emplace_back(
@ -1936,6 +1944,10 @@ IOStatus DBImpl::CreateWAL(const WriteOptions& write_options,
BuildDBOptions(immutable_db_options_, mutable_db_options_); BuildDBOptions(immutable_db_options_, mutable_db_options_);
FileOptions opt_file_options = FileOptions opt_file_options =
fs_->OptimizeForLogWrite(file_options_, db_options); fs_->OptimizeForLogWrite(file_options_, db_options);
// DB option takes precedence when not kUnknown
if (immutable_db_options_.wal_write_temperature != Temperature::kUnknown) {
opt_file_options.temperature = immutable_db_options_.wal_write_temperature;
}
std::string wal_dir = immutable_db_options_.GetWalDir(); std::string wal_dir = immutable_db_options_.GetWalDir();
std::string log_fname = LogFileName(wal_dir, log_file_num); std::string log_fname = LogFileName(wal_dir, log_file_num);

View File

@ -10,6 +10,7 @@
#include <atomic> #include <atomic>
#include <cstdlib> #include <cstdlib>
#include <functional> #include <functional>
#include <iostream>
#include <memory> #include <memory>
#include "db/db_test_util.h" #include "db/db_test_util.h"
@ -26,6 +27,7 @@
#include "rocksdb/utilities/replayer.h" #include "rocksdb/utilities/replayer.h"
#include "rocksdb/wal_filter.h" #include "rocksdb/wal_filter.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
#include "util/defer.h"
#include "util/random.h" #include "util/random.h"
#include "utilities/fault_injection_env.h" #include "utilities/fault_injection_env.h"
@ -6544,6 +6546,234 @@ TEST_P(RenameCurrentTest, Compaction) {
ASSERT_EQ("d_value", Get("d")); ASSERT_EQ("d_value", Get("d"));
} }
TEST_F(DBTest2, VariousFileTemperatures) {
constexpr size_t kNumberFileTypes = static_cast<size_t>(kBlobFile) + 1U;
struct MyTestFS : public FileTemperatureTestFS {
explicit MyTestFS(const std::shared_ptr<FileSystem>& fs)
: FileTemperatureTestFS(fs) {
Reset();
}
IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts,
std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) override {
IOStatus ios =
FileTemperatureTestFS::NewWritableFile(fname, opts, result, dbg);
if (ios.ok()) {
uint64_t number;
FileType type;
if (ParseFileName(GetFileName(fname), &number, "LOG", &type)) {
if (type == kTableFile) {
// Not checked here
} else if (type == kWalFile) {
if (opts.temperature != expected_wal_temperature) {
std::cerr << "Attempt to open " << fname << " with temperature "
<< temperature_to_string[opts.temperature]
<< " rather than "
<< temperature_to_string[expected_wal_temperature]
<< std::endl;
assert(false);
}
} else if (type == kDescriptorFile) {
if (opts.temperature != expected_manifest_temperature) {
std::cerr << "Attempt to open " << fname << " with temperature "
<< temperature_to_string[opts.temperature]
<< " rather than "
<< temperature_to_string[expected_wal_temperature]
<< std::endl;
assert(false);
}
} else if (opts.temperature != expected_other_metadata_temperature) {
std::cerr << "Attempt to open " << fname << " with temperature "
<< temperature_to_string[opts.temperature]
<< " rather than "
<< temperature_to_string[expected_wal_temperature]
<< std::endl;
assert(false);
}
UpdateCount(type, 1);
}
}
return ios;
}
IOStatus RenameFile(const std::string& src, const std::string& dst,
const IOOptions& options,
IODebugContext* dbg) override {
IOStatus ios = FileTemperatureTestFS::RenameFile(src, dst, options, dbg);
if (ios.ok()) {
uint64_t number;
FileType src_type;
FileType dst_type;
assert(ParseFileName(GetFileName(src), &number, "LOG", &src_type));
assert(ParseFileName(GetFileName(dst), &number, "LOG", &dst_type));
UpdateCount(src_type, -1);
UpdateCount(dst_type, 1);
}
return ios;
}
void UpdateCount(FileType type, int delta) {
size_t i = static_cast<size_t>(type);
assert(i < kNumberFileTypes);
counts[i].FetchAddRelaxed(delta);
}
std::map<FileType, size_t> PopCounts() {
std::map<FileType, size_t> ret;
for (size_t i = 0; i < kNumberFileTypes; ++i) {
int c = counts[i].ExchangeRelaxed(0);
if (c > 0) {
ret[static_cast<FileType>(i)] = c;
}
}
return ret;
}
FileOptions OptimizeForLogWrite(
const FileOptions& file_options,
const DBOptions& /*db_options*/) const override {
FileOptions opts = file_options;
if (optimize_wal_temperature != Temperature::kUnknown) {
opts.temperature = optimize_wal_temperature;
}
return opts;
}
FileOptions OptimizeForManifestWrite(
const FileOptions& file_options) const override {
FileOptions opts = file_options;
if (optimize_manifest_temperature != Temperature::kUnknown) {
opts.temperature = optimize_manifest_temperature;
}
return opts;
}
void Reset() {
optimize_manifest_temperature = Temperature::kUnknown;
optimize_wal_temperature = Temperature::kUnknown;
expected_manifest_temperature = Temperature::kUnknown;
expected_other_metadata_temperature = Temperature::kUnknown;
expected_wal_temperature = Temperature::kUnknown;
for (auto& c : counts) {
c.StoreRelaxed(0);
}
}
Temperature optimize_manifest_temperature;
Temperature optimize_wal_temperature;
Temperature expected_manifest_temperature;
Temperature expected_other_metadata_temperature;
Temperature expected_wal_temperature;
std::array<RelaxedAtomic<int>, kNumberFileTypes> counts;
};
// We don't have enough non-unknown temps to confidently distinguish that
// a specific setting caused a specific outcome, in a single run. This is a
// reasonable work-around without blowing up test time. Only returns
// non-unknown temperatures.
auto RandomTemp = [] {
static std::vector<Temperature> temps = {
Temperature::kHot, Temperature::kWarm, Temperature::kCold};
return temps[Random::GetTLSInstance()->Uniform(
static_cast<int>(temps.size()))];
};
auto test_fs = std::make_shared<MyTestFS>(env_->GetFileSystem());
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
for (bool use_optimize : {false, true}) {
std::cerr << "use_optimize: " << std::to_string(use_optimize) << std::endl;
for (bool use_temp_options : {false, true}) {
std::cerr << "use_temp_options: " << std::to_string(use_temp_options)
<< std::endl;
Options options = CurrentOptions();
// Currently require for last level temperature
options.compaction_style = kCompactionStyleUniversal;
options.env = env.get();
test_fs->Reset();
if (use_optimize) {
test_fs->optimize_manifest_temperature = RandomTemp();
test_fs->expected_manifest_temperature =
test_fs->optimize_manifest_temperature;
test_fs->optimize_wal_temperature = RandomTemp();
test_fs->expected_wal_temperature = test_fs->optimize_wal_temperature;
}
if (use_temp_options) {
options.metadata_write_temperature = RandomTemp();
test_fs->expected_manifest_temperature =
options.metadata_write_temperature;
test_fs->expected_other_metadata_temperature =
options.metadata_write_temperature;
options.wal_write_temperature = RandomTemp();
test_fs->expected_wal_temperature = options.wal_write_temperature;
options.last_level_temperature = RandomTemp();
options.default_write_temperature = RandomTemp();
}
DestroyAndReopen(options);
Defer closer([&] { Close(); });
using FTC = std::map<FileType, size_t>;
// Files on DB startup
ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
{kDescriptorFile, 2},
{kCurrentFile, 2},
{kIdentityFile, 1},
{kOptionsFile, 1}}));
// Temperature count map
using TCM = std::map<Temperature, size_t>;
ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), TCM({}));
ASSERT_OK(Put("foo", "1"));
ASSERT_OK(Put("bar", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Put("foo", "2"));
ASSERT_OK(Put("bar", "2"));
ASSERT_OK(Flush());
ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
TCM({{options.default_write_temperature, 2}}));
ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
TCM({{options.last_level_temperature, 1}}));
ASSERT_OK(Put("foo", "3"));
ASSERT_OK(Put("bar", "3"));
ASSERT_OK(Flush());
// Just in memtable/WAL
ASSERT_OK(Put("dog", "3"));
{
TCM expected;
expected[options.default_write_temperature] += 1;
expected[options.last_level_temperature] += 1;
ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), expected);
}
// New files during operation
ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 3}, {kTableFile, 4}}));
Reopen(options);
// New files during re-open/recovery
ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
{kTableFile, 1},
{kDescriptorFile, 1},
{kCurrentFile, 1},
{kOptionsFile, 1}}));
Destroy(options);
}
}
}
TEST_F(DBTest2, LastLevelTemperature) { TEST_F(DBTest2, LastLevelTemperature) {
class TestListener : public EventListener { class TestListener : public EventListener {
public: public:

View File

@ -831,6 +831,15 @@ class FileTemperatureTestFS : public FileSystemWrapper {
return count; return count;
} }
std::map<Temperature, size_t> CountCurrentSstFilesByTemp() {
MutexLock lock(&mu_);
std::map<Temperature, size_t> ret;
for (const auto& e : current_sst_file_temperatures_) {
ret[e.second]++;
}
return ret;
}
void OverrideSstFileTemperature(uint64_t number, Temperature temp) { void OverrideSstFileTemperature(uint64_t number, Temperature temp) {
MutexLock lock(&mu_); MutexLock lock(&mu_);
current_sst_file_temperatures_[number] = temp; current_sst_file_temperatures_[number] = temp;
@ -842,7 +851,7 @@ class FileTemperatureTestFS : public FileSystemWrapper {
requested_sst_file_temperatures_; requested_sst_file_temperatures_;
std::map<uint64_t, Temperature> current_sst_file_temperatures_; std::map<uint64_t, Temperature> current_sst_file_temperatures_;
std::string GetFileName(const std::string& fname) { static std::string GetFileName(const std::string& fname) {
auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1); auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1);
// workaround only for Windows that the file path could contain both Windows // workaround only for Windows that the file path could contain both Windows
// FilePathSeparator and '/' // FilePathSeparator and '/'

View File

@ -68,7 +68,8 @@ class FlushJobTestBase : public testing::Test {
} }
void NewDB() { void NewDB() {
ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); ASSERT_OK(
SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
VersionEdit new_db; VersionEdit new_db;
new_db.SetLogNumber(0); new_db.SetLogNumber(0);
@ -114,7 +115,8 @@ class FlushJobTestBase : public testing::Test {
} }
ASSERT_OK(s); ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file. // Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
Temperature::kUnknown, nullptr);
ASSERT_OK(s); ASSERT_OK(s);
} }

View File

@ -5511,6 +5511,10 @@ Status VersionSet::ProcessManifestWrites(
std::unique_ptr<log::Writer> new_desc_log_ptr; std::unique_ptr<log::Writer> new_desc_log_ptr;
{ {
FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_); FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_);
// DB option (in file_options_) takes precedence when not kUnknown
if (file_options_.temperature != Temperature::kUnknown) {
opt_file_opts.temperature = file_options_.temperature;
}
mu->Unlock(); mu->Unlock();
TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestStart"); TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestStart");
TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:WriteManifest", nullptr); TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:WriteManifest", nullptr);
@ -5637,9 +5641,9 @@ Status VersionSet::ProcessManifestWrites(
assert(manifest_io_status.ok()); assert(manifest_io_status.ok());
} }
if (s.ok() && new_descriptor_log) { if (s.ok() && new_descriptor_log) {
io_s = SetCurrentFile(write_options, fs_.get(), dbname_, io_s = SetCurrentFile(
pending_manifest_file_number_, write_options, fs_.get(), dbname_, pending_manifest_file_number_,
dir_contains_current_file); file_options_.temperature, dir_contains_current_file);
if (!io_s.ok()) { if (!io_s.ok()) {
s = io_s; s = io_s;
// Quarantine old manifest file in case new manifest file's CURRENT file // Quarantine old manifest file in case new manifest file's CURRENT file

View File

@ -1415,16 +1415,22 @@ class VersionSetTestBase {
} }
} }
void CreateCurrentFile() {
// Make "CURRENT" file point to the new manifest file.
ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
Temperature::kUnknown,
/* dir_contains_current_file */ nullptr));
}
// Create DB with 3 column families. // Create DB with 3 column families.
void NewDB() { void NewDB() {
SequenceNumber last_seqno; SequenceNumber last_seqno;
std::unique_ptr<log::Writer> log_writer; std::unique_ptr<log::Writer> log_writer;
ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); ASSERT_OK(
SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
PrepareManifest(&column_families_, &last_seqno, &log_writer); PrepareManifest(&column_families_, &last_seqno, &log_writer);
log_writer.reset(); log_writer.reset();
// Make "CURRENT" file point to the new manifest file. CreateCurrentFile();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
EXPECT_OK(versions_->Recover(column_families_, false)); EXPECT_OK(versions_->Recover(column_families_, false));
EXPECT_EQ(column_families_.size(), EXPECT_EQ(column_families_.size(),
@ -2600,7 +2606,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
edits_[i].MarkAtomicGroup(--remaining); edits_[i].MarkAtomicGroup(--remaining);
edits_[i].SetLastSequence(last_seqno_++); edits_[i].SetLastSequence(last_seqno_++);
} }
ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); CreateCurrentFile();
} }
void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) { void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) {
@ -2612,7 +2618,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
edits_[i].MarkAtomicGroup(--remaining); edits_[i].MarkAtomicGroup(--remaining);
edits_[i].SetLastSequence(last_seqno_++); edits_[i].SetLastSequence(last_seqno_++);
} }
ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); CreateCurrentFile();
} }
void SetupCorruptedAtomicGroup(int atomic_group_size) { void SetupCorruptedAtomicGroup(int atomic_group_size) {
@ -2626,7 +2632,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
} }
edits_[i].SetLastSequence(last_seqno_++); edits_[i].SetLastSequence(last_seqno_++);
} }
ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); CreateCurrentFile();
} }
void SetupIncorrectAtomicGroup(int atomic_group_size) { void SetupIncorrectAtomicGroup(int atomic_group_size) {
@ -2642,7 +2648,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
} }
edits_[i].SetLastSequence(last_seqno_++); edits_[i].SetLastSequence(last_seqno_++);
} }
ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); CreateCurrentFile();
} }
void SetupTestSyncPoints() { void SetupTestSyncPoints() {
@ -3408,8 +3414,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
SequenceNumber last_seqno; SequenceNumber last_seqno;
std::unique_ptr<log::Writer> log_writer; std::unique_ptr<log::Writer> log_writer;
PrepareManifest(&column_families, &last_seqno, &log_writer); PrepareManifest(&column_families, &last_seqno, &log_writer);
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); CreateCurrentFile();
ASSERT_OK(s);
EXPECT_OK(versions_->Recover(column_families, false /* read_only */)); EXPECT_OK(versions_->Recover(column_families, false /* read_only */));
EXPECT_EQ(column_families.size(), EXPECT_EQ(column_families.size(),
@ -3431,7 +3436,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
cfd_to_drop->Ref(); cfd_to_drop->Ref();
drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID()); drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID());
mutex_.Lock(); mutex_.Lock();
s = versions_->LogAndApply( Status s = versions_->LogAndApply(
cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), read_options, cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), read_options,
write_options, &drop_cf_edit, &mutex_, nullptr); write_options, &drop_cf_edit, &mutex_, nullptr);
mutex_.Unlock(); mutex_.Unlock();
@ -3541,9 +3546,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase,
TEST_F(EmptyDefaultCfNewManifest, Recover) { TEST_F(EmptyDefaultCfNewManifest, Recover) {
PrepareManifest(nullptr, nullptr, &log_writer_); PrepareManifest(nullptr, nullptr, &log_writer_);
log_writer_.reset(); log_writer_.reset();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, CreateCurrentFile();
/* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
std::vector<ColumnFamilyDescriptor> column_families; std::vector<ColumnFamilyDescriptor> column_families;
@ -3552,7 +3555,7 @@ TEST_F(EmptyDefaultCfNewManifest, Recover) {
cf_options_); cf_options_);
std::string db_id; std::string db_id;
bool has_missing_table_file = false; bool has_missing_table_file = false;
s = versions_->TryRecoverFromOneManifest( Status s = versions_->TryRecoverFromOneManifest(
manifest_path, column_families, false, &db_id, &has_missing_table_file); manifest_path, column_families, false, &db_id, &has_missing_table_file);
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_FALSE(has_missing_table_file); ASSERT_FALSE(has_missing_table_file);
@ -3573,7 +3576,8 @@ class VersionSetTestEmptyDb
assert(nullptr != log_writer); assert(nullptr != log_writer);
VersionEdit new_db; VersionEdit new_db;
if (db_options_.write_dbid_to_manifest) { if (db_options_.write_dbid_to_manifest) {
ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_,
Temperature::kUnknown));
DBOptions tmp_db_options; DBOptions tmp_db_options;
tmp_db_options.env = env_; tmp_db_options.env = env_;
std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_)); std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
@ -3606,9 +3610,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) {
db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
PrepareManifest(nullptr, nullptr, &log_writer_); PrepareManifest(nullptr, nullptr, &log_writer_);
log_writer_.reset(); log_writer_.reset();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, CreateCurrentFile();
/* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
@ -3623,8 +3625,8 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) {
std::string db_id; std::string db_id;
bool has_missing_table_file = false; bool has_missing_table_file = false;
s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, Status s = versions_->TryRecoverFromOneManifest(
read_only, &db_id, manifest_path, column_families, read_only, &db_id,
&has_missing_table_file); &has_missing_table_file);
auto iter = auto iter =
std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
@ -3651,9 +3653,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) {
ASSERT_OK(s); ASSERT_OK(s);
} }
log_writer_.reset(); log_writer_.reset();
s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, CreateCurrentFile();
/* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
@ -3699,9 +3699,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) {
ASSERT_OK(s); ASSERT_OK(s);
} }
log_writer_.reset(); log_writer_.reset();
s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, CreateCurrentFile();
/* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
@ -3758,9 +3756,7 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) {
ASSERT_OK(s); ASSERT_OK(s);
} }
log_writer_.reset(); log_writer_.reset();
s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, CreateCurrentFile();
/* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
@ -3816,9 +3812,7 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) {
ASSERT_OK(s); ASSERT_OK(s);
} }
log_writer_.reset(); log_writer_.reset();
s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, CreateCurrentFile();
/* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
@ -4025,15 +4019,14 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
WriteFileAdditionAndDeletionToManifest( WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files); /*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
log_writer_.reset(); log_writer_.reset();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); CreateCurrentFile();
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
std::string db_id; std::string db_id;
bool has_missing_table_file = false; bool has_missing_table_file = false;
s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, Status s = versions_->TryRecoverFromOneManifest(
/*read_only=*/false, &db_id, manifest_path, column_families_,
&has_missing_table_file); /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_TRUE(has_missing_table_file); ASSERT_TRUE(has_missing_table_file);
for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
@ -4083,15 +4076,14 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
WriteFileAdditionAndDeletionToManifest( WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>()); /*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
log_writer_.reset(); log_writer_.reset();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); CreateCurrentFile();
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
std::string db_id; std::string db_id;
bool has_missing_table_file = false; bool has_missing_table_file = false;
s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, Status s = versions_->TryRecoverFromOneManifest(
/*read_only=*/false, &db_id, manifest_path, column_families_,
&has_missing_table_file); /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_TRUE(has_missing_table_file); ASSERT_TRUE(has_missing_table_file);
for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
@ -4137,15 +4129,14 @@ TEST_F(VersionSetTestMissingFiles, NoFileMissing) {
WriteFileAdditionAndDeletionToManifest( WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files); /*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
log_writer_.reset(); log_writer_.reset();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); CreateCurrentFile();
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
std::string db_id; std::string db_id;
bool has_missing_table_file = false; bool has_missing_table_file = false;
s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, Status s = versions_->TryRecoverFromOneManifest(
/*read_only=*/false, &db_id, manifest_path, column_families_,
&has_missing_table_file); /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_FALSE(has_missing_table_file); ASSERT_FALSE(has_missing_table_file);
for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
@ -4266,15 +4257,14 @@ class BestEffortsRecoverIncompleteVersionTest
/*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>(), /*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>(),
blob_files); blob_files);
log_writer_.reset(); log_writer_.reset();
Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); CreateCurrentFile();
ASSERT_OK(s);
std::string manifest_path; std::string manifest_path;
VerifyManifest(&manifest_path); VerifyManifest(&manifest_path);
std::string db_id; std::string db_id;
bool has_missing_table_file = false; bool has_missing_table_file = false;
s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, Status s = versions_->TryRecoverFromOneManifest(
/*read_only=*/false, &db_id, manifest_path, column_families_,
&has_missing_table_file); /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_TRUE(has_missing_table_file); ASSERT_TRUE(has_missing_table_file);
} }

6
env/file_system.cc vendored
View File

@ -181,10 +181,10 @@ FileOptions FileSystem::OptimizeForBlobFileRead(
IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
const std::string& fname, bool should_sync, const std::string& fname, bool should_sync,
const IOOptions& io_options) { const IOOptions& io_options,
const FileOptions& file_options) {
std::unique_ptr<FSWritableFile> file; std::unique_ptr<FSWritableFile> file;
EnvOptions soptions; IOStatus s = fs->NewWritableFile(fname, file_options, &file, nullptr);
IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

View File

@ -388,6 +388,7 @@ bool ParseFileName(const std::string& fname, uint64_t* number,
IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
const std::string& dbname, uint64_t descriptor_number, const std::string& dbname, uint64_t descriptor_number,
Temperature temp,
FSDirectory* dir_contains_current_file) { FSDirectory* dir_contains_current_file) {
// Remove leading "dbname/" and add newline to manifest file name // Remove leading "dbname/" and add newline to manifest file name
std::string manifest = DescriptorFileName(dbname, descriptor_number); std::string manifest = DescriptorFileName(dbname, descriptor_number);
@ -397,8 +398,11 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
std::string tmp = TempFileName(dbname, descriptor_number); std::string tmp = TempFileName(dbname, descriptor_number);
IOOptions opts; IOOptions opts;
IOStatus s = PrepareIOFromWriteOptions(write_options, opts); IOStatus s = PrepareIOFromWriteOptions(write_options, opts);
FileOptions file_opts;
file_opts.temperature = temp;
if (s.ok()) { if (s.ok()) {
s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts); s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts,
file_opts);
} }
TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s); TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s);
if (s.ok()) { if (s.ok()) {
@ -423,7 +427,8 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
} }
Status SetIdentityFile(const WriteOptions& write_options, Env* env, Status SetIdentityFile(const WriteOptions& write_options, Env* env,
const std::string& dbname, const std::string& db_id) { const std::string& dbname, Temperature temp,
const std::string& db_id) {
std::string id; std::string id;
if (db_id.empty()) { if (db_id.empty()) {
id = env->GenerateUniqueId(); id = env->GenerateUniqueId();
@ -437,8 +442,11 @@ Status SetIdentityFile(const WriteOptions& write_options, Env* env,
Status s; Status s;
IOOptions opts; IOOptions opts;
s = PrepareIOFromWriteOptions(write_options, opts); s = PrepareIOFromWriteOptions(write_options, opts);
FileOptions file_opts;
file_opts.temperature = temp;
if (s.ok()) { if (s.ok()) {
s = WriteStringToFile(env, id, tmp, true, &opts); s = WriteStringToFile(env->GetFileSystem().get(), id, tmp,
/*should_sync=*/true, opts, file_opts);
} }
if (s.ok()) { if (s.ok()) {
s = env->RenameFile(tmp, identify_file_name); s = env->RenameFile(tmp, identify_file_name);

View File

@ -161,11 +161,12 @@ bool ParseFileName(const std::string& filename, uint64_t* number,
// when // when
IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
const std::string& dbname, uint64_t descriptor_number, const std::string& dbname, uint64_t descriptor_number,
Temperature temp,
FSDirectory* dir_contains_current_file); FSDirectory* dir_contains_current_file);
// Make the IDENTITY file for the db // Make the IDENTITY file for the db
Status SetIdentityFile(const WriteOptions& write_options, Env* env, Status SetIdentityFile(const WriteOptions& write_options, Env* env,
const std::string& dbname, const std::string& dbname, Temperature temp,
const std::string& db_id = {}); const std::string& db_id = {});
// Sync manifest file `file`. // Sync manifest file `file`.

View File

@ -813,7 +813,7 @@ struct AdvancedColumnFamilyOptions {
// If this option is set, when creating the last level files, pass this // If this option is set, when creating the last level files, pass this
// temperature to FileSystem used. Should be no-op for default FileSystem // temperature to FileSystem used. Should be no-op for default FileSystem
// and users need to plug in their own FileSystem to take advantage of it. // and users need to plug in their own FileSystem to take advantage of it.
// When using FIFO compaction, this option is ignored. // Currently only compatible with universal compaction.
// //
// Dynamically changeable through the SetOptions() API // Dynamically changeable through the SetOptions() API
Temperature last_level_temperature = Temperature::kUnknown; Temperature last_level_temperature = Temperature::kUnknown;

View File

@ -195,7 +195,9 @@ struct FileOptions : EnvOptions {
FileOptions() : EnvOptions(), handoff_checksum_type(ChecksumType::kCRC32c) {} FileOptions() : EnvOptions(), handoff_checksum_type(ChecksumType::kCRC32c) {}
FileOptions(const DBOptions& opts) FileOptions(const DBOptions& opts)
: EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {} : EnvOptions(opts),
temperature(opts.metadata_write_temperature),
handoff_checksum_type(ChecksumType::kCRC32c) {}
FileOptions(const EnvOptions& opts) FileOptions(const EnvOptions& opts)
: EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {} : EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {}
@ -1952,7 +1954,8 @@ class FSDirectoryWrapper : public FSDirectory {
// A utility routine: write "data" to the named file. // A utility routine: write "data" to the named file.
IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
const std::string& fname, bool should_sync = false, const std::string& fname, bool should_sync = false,
const IOOptions& io_options = IOOptions()); const IOOptions& io_options = IOOptions(),
const FileOptions& file_options = FileOptions());
// A utility routine: read contents of named file into *data // A utility routine: read contents of named file into *data
IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,

View File

@ -1580,6 +1580,16 @@ struct DBOptions {
// Default 100ms // Default 100ms
uint64_t follower_catchup_retry_wait_ms = 100; uint64_t follower_catchup_retry_wait_ms = 100;
// When DB files other than SST, blob and WAL files are created, use this
// filesystem temperature. (See also `wal_write_temperature` and various
// `*_temperature` CF options.) When not `kUnknown`, this overrides any
// temperature set by OptimizeForManifestWrite functions.
Temperature metadata_write_temperature = Temperature::kUnknown;
// Use this filesystem temperature when creating WAL files. When not
// `kUnknown`, this overrides any temperature set by OptimizeForLogWrite
// functions.
Temperature wal_write_temperature = Temperature::kUnknown;
// End EXPERIMENTAL // End EXPERIMENTAL
}; };

View File

@ -576,6 +576,14 @@ static std::unordered_map<std::string, OptionTypeInfo>
{offsetof(struct ImmutableDBOptions, follower_catchup_retry_wait_ms), {offsetof(struct ImmutableDBOptions, follower_catchup_retry_wait_ms),
OptionType::kUInt64T, OptionVerificationType::kNormal, OptionType::kUInt64T, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}}, OptionTypeFlags::kNone}},
{"metadata_write_temperature",
{offsetof(struct ImmutableDBOptions, metadata_write_temperature),
OptionType::kTemperature, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"wal_write_temperature",
{offsetof(struct ImmutableDBOptions, wal_write_temperature),
OptionType::kTemperature, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
}; };
const std::string OptionsHelper::kDBOptionsName = "DBOptions"; const std::string OptionsHelper::kDBOptionsName = "DBOptions";
@ -778,7 +786,9 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
follower_refresh_catchup_period_ms( follower_refresh_catchup_period_ms(
options.follower_refresh_catchup_period_ms), options.follower_refresh_catchup_period_ms),
follower_catchup_retry_count(options.follower_catchup_retry_count), follower_catchup_retry_count(options.follower_catchup_retry_count),
follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms) { follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms),
metadata_write_temperature(options.metadata_write_temperature),
wal_write_temperature(options.wal_write_temperature) {
fs = env->GetFileSystem(); fs = env->GetFileSystem();
clock = env->GetSystemClock().get(); clock = env->GetSystemClock().get();
logger = info_log.get(); logger = info_log.get();
@ -956,6 +966,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
db_host_id.c_str()); db_host_id.c_str());
ROCKS_LOG_HEADER(log, " Options.enforce_single_del_contracts: %s", ROCKS_LOG_HEADER(log, " Options.enforce_single_del_contracts: %s",
enforce_single_del_contracts ? "true" : "false"); enforce_single_del_contracts ? "true" : "false");
ROCKS_LOG_HEADER(log, " Options.metadata_write_temperature: %s",
temperature_to_string[metadata_write_temperature].c_str());
ROCKS_LOG_HEADER(log, " Options.wal_write_temperature: %s",
temperature_to_string[wal_write_temperature].c_str());
} }
bool ImmutableDBOptions::IsWalDirSameAsDBPath() const { bool ImmutableDBOptions::IsWalDirSameAsDBPath() const {

View File

@ -103,6 +103,8 @@ struct ImmutableDBOptions {
uint64_t follower_refresh_catchup_period_ms; uint64_t follower_refresh_catchup_period_ms;
uint64_t follower_catchup_retry_count; uint64_t follower_catchup_retry_count;
uint64_t follower_catchup_retry_wait_ms; uint64_t follower_catchup_retry_wait_ms;
Temperature metadata_write_temperature;
Temperature wal_write_temperature;
// Beginning convenience/helper objects that are not part of the base // Beginning convenience/helper objects that are not part of the base
// DBOptions // DBOptions

View File

@ -180,6 +180,15 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.enforce_single_del_contracts = options.enforce_single_del_contracts =
immutable_db_options.enforce_single_del_contracts; immutable_db_options.enforce_single_del_contracts;
options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc; options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc;
options.follower_refresh_catchup_period_ms =
immutable_db_options.follower_refresh_catchup_period_ms;
options.follower_catchup_retry_count =
immutable_db_options.follower_catchup_retry_count;
options.follower_catchup_retry_wait_ms =
immutable_db_options.follower_catchup_retry_wait_ms;
options.metadata_write_temperature =
immutable_db_options.metadata_write_temperature;
options.wal_write_temperature = immutable_db_options.wal_write_temperature;
return options; return options;
} }

View File

@ -69,8 +69,9 @@ Status PersistRocksDBOptions(const WriteOptions& write_options,
} }
std::unique_ptr<FSWritableFile> wf; std::unique_ptr<FSWritableFile> wf;
Status s = FileOptions file_options;
fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr); file_options.temperature = db_opt.metadata_write_temperature;
Status s = fs->NewWritableFile(file_name, file_options, &wf, nullptr);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

View File

@ -367,7 +367,12 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
"lowest_used_cache_tier=kNonVolatileBlockTier;" "lowest_used_cache_tier=kNonVolatileBlockTier;"
"allow_data_in_errors=false;" "allow_data_in_errors=false;"
"enforce_single_del_contracts=false;" "enforce_single_del_contracts=false;"
"daily_offpeak_time_utc=08:30-19:00;", "daily_offpeak_time_utc=08:30-19:00;"
"follower_refresh_catchup_period_ms=123;"
"follower_catchup_retry_count=456;"
"follower_catchup_retry_wait_ms=789;"
"metadata_write_temperature=kCold;"
"wal_write_temperature=kHot;",
new_options)); new_options));
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),