mirror of https://github.com/facebook/rocksdb.git
Smooth the deletion of WAL files (#5116)
Summary: WAL files are currently not subject to deletion rate limiting by DeleteScheduler. If the size of the WAL files is significant, this can cause a high delete rate on SSDs that may affect other operations. To fix it, force WAL file deletions to go through the SstFileManager. Original PR for this is #2768 Pull Request resolved: https://github.com/facebook/rocksdb/pull/5116 Differential Revision: D14669437 Pulled By: anand1976 fbshipit-source-id: c5f62d0640cebaa1574de841a1d01e4ce2faadf0
This commit is contained in:
parent
a98317f555
commit
dae3b5545c
|
@ -2878,8 +2878,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
|||
std::string path_to_delete = dbname + "/" + fname;
|
||||
if (type == kMetaDatabase) {
|
||||
del = DestroyDB(path_to_delete, options);
|
||||
} else if (type == kTableFile) {
|
||||
del = DeleteSSTFile(&soptions, path_to_delete, dbname);
|
||||
} else if (type == kTableFile || type == kLogFile) {
|
||||
del = DeleteDBFile(&soptions, path_to_delete, dbname);
|
||||
} else {
|
||||
del = env->DeleteFile(path_to_delete);
|
||||
}
|
||||
|
@ -2913,7 +2913,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
|||
if (ParseFileName(fname, &number, &type) &&
|
||||
type == kTableFile) { // Lock file will be deleted at end
|
||||
std::string table_path = path + "/" + fname;
|
||||
Status del = DeleteSSTFile(&soptions, table_path, dbname);
|
||||
Status del = DeleteDBFile(&soptions, table_path, dbname);
|
||||
if (result.ok() && !del.ok()) {
|
||||
result = del;
|
||||
}
|
||||
|
@ -2939,7 +2939,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
|||
// Delete archival files.
|
||||
for (const auto& file : archiveFiles) {
|
||||
if (ParseFileName(file, &number, &type) && type == kLogFile) {
|
||||
Status del = env->DeleteFile(archivedir + "/" + file);
|
||||
Status del =
|
||||
DeleteDBFile(&soptions, archivedir + "/" + file, archivedir);
|
||||
if (result.ok() && !del.ok()) {
|
||||
result = del;
|
||||
}
|
||||
|
@ -2952,7 +2953,9 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
|||
if (wal_dir_exists) {
|
||||
for (const auto& file : walDirFiles) {
|
||||
if (ParseFileName(file, &number, &type) && type == kLogFile) {
|
||||
Status del = env->DeleteFile(LogFileName(soptions.wal_dir, number));
|
||||
Status del =
|
||||
DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number),
|
||||
soptions.wal_dir);
|
||||
if (result.ok() && !del.ok()) {
|
||||
result = del;
|
||||
}
|
||||
|
|
|
@ -259,9 +259,9 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
|
|||
const std::string& path_to_sync,
|
||||
FileType type, uint64_t number) {
|
||||
Status file_deletion_status;
|
||||
if (type == kTableFile) {
|
||||
if (type == kTableFile || type == kLogFile) {
|
||||
file_deletion_status =
|
||||
DeleteSSTFile(&immutable_db_options_, fname, path_to_sync);
|
||||
DeleteDBFile(&immutable_db_options_, fname, path_to_sync);
|
||||
} else {
|
||||
file_deletion_status = env_->DeleteFile(fname);
|
||||
}
|
||||
|
|
|
@ -367,14 +367,16 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
|
|||
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
|
||||
sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
|
||||
|
||||
WriteOptions wo;
|
||||
wo.disableWAL = true;
|
||||
ASSERT_OK(TryReopen(options));
|
||||
// Create 4 files in L0
|
||||
for (char v = 'a'; v <= 'd'; v++) {
|
||||
ASSERT_OK(Put("Key2", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key3", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key4", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key1", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key4", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key2", DummyString(1024, v), wo));
|
||||
ASSERT_OK(Put("Key3", DummyString(1024, v), wo));
|
||||
ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
|
||||
ASSERT_OK(Put("Key1", DummyString(1024, v), wo));
|
||||
ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
// We created 4 sst files in L0
|
||||
|
@ -408,6 +410,55 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
|
|||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
}
|
||||
|
||||
TEST_F(DBSSTTest, RateLimitedWALDelete) {
|
||||
Destroy(last_options_);
|
||||
|
||||
std::vector<uint64_t> penalties;
|
||||
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||
"DeleteScheduler::BackgroundEmptyTrash:Wait",
|
||||
[&](void* arg) { penalties.push_back(*(static_cast<uint64_t*>(arg))); });
|
||||
|
||||
env_->no_slowdown_ = true;
|
||||
env_->time_elapse_only_sleep_ = true;
|
||||
Options options = CurrentOptions();
|
||||
options.disable_auto_compactions = true;
|
||||
options.env = env_;
|
||||
|
||||
int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
|
||||
Status s;
|
||||
options.sst_file_manager.reset(
|
||||
NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
|
||||
ASSERT_OK(s);
|
||||
options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
|
||||
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
|
||||
sfm->delete_scheduler()->SetMaxTrashDBRatio(2.1);
|
||||
|
||||
ASSERT_OK(TryReopen(options));
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
// Create 4 files in L0
|
||||
for (char v = 'a'; v <= 'd'; v++) {
|
||||
ASSERT_OK(Put("Key2", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key3", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key4", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key1", DummyString(1024, v)));
|
||||
ASSERT_OK(Put("Key4", DummyString(1024, v)));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
// We created 4 sst files in L0
|
||||
ASSERT_EQ("4", FilesPerLevel(0));
|
||||
|
||||
// Compaction will move the 4 files in L0 to trash and create 1 L1 file
|
||||
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
||||
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
|
||||
ASSERT_EQ("0,1", FilesPerLevel(0));
|
||||
|
||||
sfm->WaitForEmptyTrash();
|
||||
ASSERT_EQ(penalties.size(), 8);
|
||||
|
||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
}
|
||||
|
||||
TEST_F(DBSSTTest, OpenDBWithExistingTrash) {
|
||||
Options options = CurrentOptions();
|
||||
|
||||
|
@ -446,7 +497,6 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
|
|||
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||
"DeleteScheduler::DeleteFile",
|
||||
[&](void* /*arg*/) { bg_delete_file++; });
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
Options options = CurrentOptions();
|
||||
options.disable_auto_compactions = true;
|
||||
|
@ -464,10 +514,14 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
|
|||
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
|
||||
|
||||
DestroyAndReopen(options);
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
WriteOptions wo;
|
||||
wo.disableWAL = true;
|
||||
|
||||
// Create 4 files in L0
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
|
||||
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'), wo));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
// We created 4 sst files in L0
|
||||
|
@ -483,7 +537,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
|
|||
|
||||
// Create 4 files in L0
|
||||
for (int i = 4; i < 8; i++) {
|
||||
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B')));
|
||||
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'), wo));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
ASSERT_EQ("4,1", FilesPerLevel(0));
|
||||
|
@ -538,14 +592,27 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
|
|||
// Close DB and destroy it using DeleteScheduler
|
||||
Close();
|
||||
|
||||
int num_sst_files = 0;
|
||||
int num_wal_files = 0;
|
||||
std::vector<std::string> db_files;
|
||||
env_->GetChildren(dbname_, &db_files);
|
||||
for (std::string f : db_files) {
|
||||
if (f.substr(f.find_last_of(".") + 1) == "sst") {
|
||||
num_sst_files++;
|
||||
} else if (f.substr(f.find_last_of(".") + 1) == "log") {
|
||||
num_wal_files++;
|
||||
}
|
||||
}
|
||||
ASSERT_GT(num_sst_files, 0);
|
||||
ASSERT_GT(num_wal_files, 0);
|
||||
|
||||
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
|
||||
|
||||
sfm->SetDeleteRateBytesPerSecond(1024 * 1024);
|
||||
sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
|
||||
ASSERT_OK(DestroyDB(dbname_, options));
|
||||
sfm->WaitForEmptyTrash();
|
||||
// We have deleted the 4 sst files in the delete_scheduler
|
||||
ASSERT_EQ(bg_delete_file, 4);
|
||||
ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files);
|
||||
}
|
||||
|
||||
TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) {
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "util/cast_util.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/file_reader_writer.h"
|
||||
#include "util/file_util.h"
|
||||
#include "util/filename.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/mutexlock.h"
|
||||
|
@ -190,7 +191,7 @@ void WalManager::PurgeObsoleteWALFiles() {
|
|||
continue;
|
||||
}
|
||||
if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) {
|
||||
s = env_->DeleteFile(file_path);
|
||||
s = DeleteDBFile(&db_options_, file_path, archival_dir, false);
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s",
|
||||
file_path.c_str(), s.ToString().c_str());
|
||||
|
@ -216,7 +217,7 @@ void WalManager::PurgeObsoleteWALFiles() {
|
|||
log_file_size = std::max(log_file_size, file_size);
|
||||
++log_files_num;
|
||||
} else {
|
||||
s = env_->DeleteFile(file_path);
|
||||
s = DeleteDBFile(&db_options_, file_path, archival_dir, false);
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_WARN(db_options_.info_log,
|
||||
"Unable to delete file: %s: %s", file_path.c_str(),
|
||||
|
@ -255,7 +256,8 @@ void WalManager::PurgeObsoleteWALFiles() {
|
|||
|
||||
for (size_t i = 0; i < files_del_num; ++i) {
|
||||
std::string const file_path = archived_logs[i]->PathName();
|
||||
s = env_->DeleteFile(db_options_.wal_dir + "/" + file_path);
|
||||
s = DeleteDBFile(&db_options_, db_options_.wal_dir + "/" + file_path,
|
||||
db_options_.wal_dir, false);
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s",
|
||||
file_path.c_str(), s.ToString().c_str());
|
||||
|
|
|
@ -83,6 +83,8 @@ class SstFileManager {
|
|||
|
||||
// Create a new SstFileManager that can be shared among multiple RocksDB
|
||||
// instances to track SST file and control there deletion rate.
|
||||
// Even though SstFileManager don't track WAL files but it still control
|
||||
// there deletion rate.
|
||||
//
|
||||
// @param env: Pointer to Env object, please see "rocksdb/env.h".
|
||||
// @param info_log: If not nullptr, info_log will be used to log errors.
|
||||
|
@ -93,6 +95,7 @@ class SstFileManager {
|
|||
// this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
|
||||
// in 1 second, we will wait for another 3 seconds before we delete other
|
||||
// files, Set to 0 to disable deletion rate limiting.
|
||||
// This option also affect the delete rate of WAL files in the DB.
|
||||
// @param delete_existing_trash: Deprecated, this argument have no effect, but
|
||||
// if user provide trash_dir we will schedule deletes for files in the dir
|
||||
// @param status: If not nullptr, status will contain any errors that happened
|
||||
|
|
|
@ -87,16 +87,11 @@ Status CreateFile(Env* env, const std::string& destination,
|
|||
return dest_writer->Sync(use_fsync);
|
||||
}
|
||||
|
||||
Status DeleteSSTFile(const ImmutableDBOptions* db_options,
|
||||
const std::string& fname, const std::string& dir_to_sync) {
|
||||
return DeleteDBFile(db_options, fname, dir_to_sync, false);
|
||||
}
|
||||
|
||||
Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||
const std::string& fname, const std::string& dir_to_sync,
|
||||
const bool force_bg) {
|
||||
#ifndef ROCKSDB_LITE
|
||||
auto sfm =
|
||||
SstFileManagerImpl* sfm =
|
||||
static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
|
||||
if (sfm) {
|
||||
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
|
||||
|
@ -107,6 +102,7 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
|||
(void)dir_to_sync;
|
||||
(void)force_bg;
|
||||
// SstFileManager is not supported in ROCKSDB_LITE
|
||||
// Delete file immediately
|
||||
return db_options->env->DeleteFile(fname);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/status.h"
|
||||
#include "rocksdb/types.h"
|
||||
#include "util/filename.h"
|
||||
|
||||
namespace rocksdb {
|
||||
// use_fsync maps to options.use_fsync, which determines the way that
|
||||
|
@ -21,13 +22,9 @@ extern Status CopyFile(Env* env, const std::string& source,
|
|||
extern Status CreateFile(Env* env, const std::string& destination,
|
||||
const std::string& contents, bool use_fsync);
|
||||
|
||||
extern Status DeleteSSTFile(const ImmutableDBOptions* db_options,
|
||||
const std::string& fname,
|
||||
const std::string& path_to_sync);
|
||||
|
||||
extern Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||
const std::string& fname,
|
||||
const std::string& path_to_sync,
|
||||
const bool force_bg);
|
||||
const std::string& fname,
|
||||
const std::string& path_to_sync,
|
||||
const bool force_bg = false);
|
||||
|
||||
} // namespace rocksdb
|
||||
|
|
|
@ -812,7 +812,11 @@ TEST_F(BlobDBTest, SstFileManager) {
|
|||
Destroy();
|
||||
// Make sure that DestroyBlobDB() also goes through delete scheduler.
|
||||
ASSERT_GE(files_scheduled_to_delete, 2);
|
||||
ASSERT_EQ(0, files_deleted_directly);
|
||||
// Due to a timing issue, the WAL may or may not be deleted directly. The
|
||||
// blob file is first scheduled, followed by WAL. If the background trash
|
||||
// thread does not wake up on time, the WAL file will be directly
|
||||
// deleted as the trash size will be > DB size
|
||||
ASSERT_LE(files_deleted_directly, 1);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
sfm->WaitForEmptyTrash();
|
||||
}
|
||||
|
@ -855,7 +859,8 @@ TEST_F(BlobDBTest, SstFileManagerRestart) {
|
|||
// Make sure that reopening the DB rescan the existing trash files
|
||||
Open(bdb_options, db_options);
|
||||
ASSERT_GE(files_scheduled_to_delete, 3);
|
||||
ASSERT_EQ(0, files_deleted_directly);
|
||||
// Depending on timing, the WAL file may or may not be directly deleted
|
||||
ASSERT_LE(files_deleted_directly, 1);
|
||||
|
||||
sfm->WaitForEmptyTrash();
|
||||
|
||||
|
|
Loading…
Reference in New Issue