mirror of https://github.com/facebook/rocksdb.git
Fix file deletions in DestroyDB not rate limited (#12891)
Summary: Make `DestroyDB` slowly delete files if it's configured and enabled via `SstFileManager`. It's currently not available mainly because of DeleteScheduler's logic related to tracked total_size_ and total_trash_size_. These accounting and logic should not be applied to `DestroyDB`. This PR adds a `DeleteUnaccountedDBFile` util for this purpose which deletes files without accounting it. This util also supports assigning a file to a specified trash bucket so that user can later wait for a specific trash bucket to be empty. For `DestroyDB`, files with more than 1 hard links will be deleted immediately. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12891 Test Plan: Added unit tests, existing tests. Reviewed By: anand1976 Differential Revision: D60300220 Pulled By: jowlyzhang fbshipit-source-id: 8b18109a177a3a9532f6dc2e40e08310c08ca3c7
This commit is contained in:
parent
9d5c8c89a1
commit
d12aaf23ca
|
@ -17,6 +17,7 @@
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
@ -5240,6 +5241,14 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
||||||
Env* env = soptions.env;
|
Env* env = soptions.env;
|
||||||
std::vector<std::string> filenames;
|
std::vector<std::string> filenames;
|
||||||
bool wal_in_db_path = soptions.IsWalDirSameAsDBPath();
|
bool wal_in_db_path = soptions.IsWalDirSameAsDBPath();
|
||||||
|
auto sfm = static_cast_with_check<SstFileManagerImpl>(
|
||||||
|
options.sst_file_manager.get());
|
||||||
|
// Allocate a separate trash bucket to be used by all the to be deleted
|
||||||
|
// files, so we can later wait for this bucket to be empty before return.
|
||||||
|
std::optional<int32_t> bucket;
|
||||||
|
if (sfm) {
|
||||||
|
bucket = sfm->NewTrashBucket();
|
||||||
|
}
|
||||||
|
|
||||||
// Reset the logger because it holds a handle to the
|
// Reset the logger because it holds a handle to the
|
||||||
// log file and prevents cleanup and directory removal
|
// log file and prevents cleanup and directory removal
|
||||||
|
@ -5251,6 +5260,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
||||||
/*IODebugContext*=*/nullptr)
|
/*IODebugContext*=*/nullptr)
|
||||||
.PermitUncheckedError();
|
.PermitUncheckedError();
|
||||||
|
|
||||||
|
std::set<std::string> paths_to_delete;
|
||||||
FileLock* lock;
|
FileLock* lock;
|
||||||
const std::string lockname = LockFileName(dbname);
|
const std::string lockname = LockFileName(dbname);
|
||||||
Status result = env->LockFile(lockname, &lock);
|
Status result = env->LockFile(lockname, &lock);
|
||||||
|
@ -5267,10 +5277,9 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
||||||
del = DestroyDB(path_to_delete, options);
|
del = DestroyDB(path_to_delete, options);
|
||||||
} else if (type == kTableFile || type == kWalFile ||
|
} else if (type == kTableFile || type == kWalFile ||
|
||||||
type == kBlobFile) {
|
type == kBlobFile) {
|
||||||
del = DeleteDBFile(
|
del = DeleteUnaccountedDBFile(&soptions, path_to_delete, dbname,
|
||||||
&soptions, path_to_delete, dbname,
|
/*force_bg=*/false,
|
||||||
/*force_bg=*/false,
|
/*force_fg=*/false, bucket);
|
||||||
/*force_fg=*/(type == kWalFile) ? !wal_in_db_path : false);
|
|
||||||
} else {
|
} else {
|
||||||
del = env->DeleteFile(path_to_delete);
|
del = env->DeleteFile(path_to_delete);
|
||||||
}
|
}
|
||||||
|
@ -5279,6 +5288,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
paths_to_delete.insert(dbname);
|
||||||
|
|
||||||
std::set<std::string> paths;
|
std::set<std::string> paths;
|
||||||
for (const DbPath& db_path : options.db_paths) {
|
for (const DbPath& db_path : options.db_paths) {
|
||||||
|
@ -5300,18 +5310,19 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
||||||
(type == kTableFile ||
|
(type == kTableFile ||
|
||||||
type == kBlobFile)) { // Lock file will be deleted at end
|
type == kBlobFile)) { // Lock file will be deleted at end
|
||||||
std::string file_path = path + "/" + fname;
|
std::string file_path = path + "/" + fname;
|
||||||
Status del = DeleteDBFile(&soptions, file_path, dbname,
|
Status del = DeleteUnaccountedDBFile(&soptions, file_path, dbname,
|
||||||
/*force_bg=*/false, /*force_fg=*/false);
|
/*force_bg=*/false,
|
||||||
|
/*force_fg=*/false, bucket);
|
||||||
if (!del.ok() && result.ok()) {
|
if (!del.ok() && result.ok()) {
|
||||||
result = del;
|
result = del;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// TODO: Should we return an error if we cannot delete the directory?
|
|
||||||
env->DeleteDir(path).PermitUncheckedError();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
paths_to_delete.merge(paths);
|
||||||
|
|
||||||
std::vector<std::string> walDirFiles;
|
std::vector<std::string> walDirFiles;
|
||||||
std::string archivedir = ArchivalDirectory(dbname);
|
std::string archivedir = ArchivalDirectory(dbname);
|
||||||
bool wal_dir_exists = false;
|
bool wal_dir_exists = false;
|
||||||
|
@ -5335,46 +5346,49 @@ Status DestroyDB(const std::string& dbname, const Options& options,
|
||||||
// Delete archival files.
|
// Delete archival files.
|
||||||
for (const auto& file : archiveFiles) {
|
for (const auto& file : archiveFiles) {
|
||||||
if (ParseFileName(file, &number, &type) && type == kWalFile) {
|
if (ParseFileName(file, &number, &type) && type == kWalFile) {
|
||||||
Status del =
|
Status del = DeleteUnaccountedDBFile(
|
||||||
DeleteDBFile(&soptions, archivedir + "/" + file, archivedir,
|
&soptions, archivedir + "/" + file, archivedir,
|
||||||
/*force_bg=*/false, /*force_fg=*/!wal_in_db_path);
|
/*force_bg=*/false, /*force_fg=*/!wal_in_db_path, bucket);
|
||||||
if (!del.ok() && result.ok()) {
|
if (!del.ok() && result.ok()) {
|
||||||
result = del;
|
result = del;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Ignore error in case dir contains other files
|
paths_to_delete.insert(archivedir);
|
||||||
env->DeleteDir(archivedir).PermitUncheckedError();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete log files in the WAL dir
|
// Delete log files in the WAL dir
|
||||||
if (wal_dir_exists) {
|
if (wal_dir_exists) {
|
||||||
for (const auto& file : walDirFiles) {
|
for (const auto& file : walDirFiles) {
|
||||||
if (ParseFileName(file, &number, &type) && type == kWalFile) {
|
if (ParseFileName(file, &number, &type) && type == kWalFile) {
|
||||||
Status del =
|
Status del = DeleteUnaccountedDBFile(
|
||||||
DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number),
|
&soptions, LogFileName(soptions.wal_dir, number),
|
||||||
soptions.wal_dir, /*force_bg=*/false,
|
soptions.wal_dir, /*force_bg=*/false,
|
||||||
/*force_fg=*/!wal_in_db_path);
|
/*force_fg=*/!wal_in_db_path, bucket);
|
||||||
if (!del.ok() && result.ok()) {
|
if (!del.ok() && result.ok()) {
|
||||||
result = del;
|
result = del;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Ignore error in case dir contains other files
|
paths_to_delete.insert(soptions.wal_dir);
|
||||||
env->DeleteDir(soptions.wal_dir).PermitUncheckedError();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore error since state is already gone
|
// Ignore error since state is already gone
|
||||||
env->UnlockFile(lock).PermitUncheckedError();
|
env->UnlockFile(lock).PermitUncheckedError();
|
||||||
env->DeleteFile(lockname).PermitUncheckedError();
|
env->DeleteFile(lockname).PermitUncheckedError();
|
||||||
|
|
||||||
|
// Make sure trash files are all cleared before return.
|
||||||
|
if (sfm && bucket.has_value()) {
|
||||||
|
sfm->WaitForEmptyTrashBucket(bucket.value());
|
||||||
|
}
|
||||||
// sst_file_manager holds a ref to the logger. Make sure the logger is
|
// sst_file_manager holds a ref to the logger. Make sure the logger is
|
||||||
// gone before trying to remove the directory.
|
// gone before trying to remove the directory.
|
||||||
soptions.sst_file_manager.reset();
|
soptions.sst_file_manager.reset();
|
||||||
|
|
||||||
// Ignore error in case dir contains other files
|
// Ignore error in case dir contains other files
|
||||||
env->DeleteDir(dbname).PermitUncheckedError();
|
for (const auto& path_to_delete : paths_to_delete) {
|
||||||
;
|
env->DeleteDir(path_to_delete).PermitUncheckedError();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1226,6 +1226,8 @@ class DBImpl : public DB {
|
||||||
return logs_.back().number;
|
return logs_.back().number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TEST_DeleteObsoleteFiles();
|
||||||
|
|
||||||
const std::unordered_set<uint64_t>& TEST_GetFilesGrabbedForPurge() const {
|
const std::unordered_set<uint64_t>& TEST_GetFilesGrabbedForPurge() const {
|
||||||
return files_grabbed_for_purge_;
|
return files_grabbed_for_purge_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -314,6 +314,11 @@ const autovector<uint64_t>& DBImpl::TEST_GetFilesToQuarantine() const {
|
||||||
return error_handler_.GetFilesToQuarantine();
|
return error_handler_.GetFilesToQuarantine();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DBImpl::TEST_DeleteObsoleteFiles() {
|
||||||
|
InstrumentedMutexLock l(&mutex_);
|
||||||
|
DeleteObsoleteFiles();
|
||||||
|
}
|
||||||
|
|
||||||
size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
|
size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
|
||||||
InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
|
InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
|
||||||
return EstimateInMemoryStatsHistorySize();
|
return EstimateInMemoryStatsHistorySize();
|
||||||
|
|
|
@ -507,6 +507,23 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) {
|
||||||
ASSERT_EQ(files_deleted, 0);
|
ASSERT_EQ(files_deleted, 0);
|
||||||
ASSERT_EQ(files_scheduled_to_delete, 0);
|
ASSERT_EQ(files_scheduled_to_delete, 0);
|
||||||
Close();
|
Close();
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"SstFileManagerImpl::ScheduleUnaccountedFileDeletion", [&](void* arg) {
|
||||||
|
assert(arg);
|
||||||
|
const std::string* const file_path =
|
||||||
|
static_cast<const std::string*>(arg);
|
||||||
|
if (EndsWith(*file_path, ".blob")) {
|
||||||
|
++files_scheduled_to_delete;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::OnDeleteFile", [&](void* arg) {
|
||||||
|
const std::string* const file_path =
|
||||||
|
static_cast<const std::string*>(arg);
|
||||||
|
if (EndsWith(*file_path, ".blob")) {
|
||||||
|
files_deleted++;
|
||||||
|
}
|
||||||
|
});
|
||||||
ASSERT_OK(DestroyDB(dbname_, options));
|
ASSERT_OK(DestroyDB(dbname_, options));
|
||||||
ASSERT_EQ(files_deleted, blob_files.size());
|
ASSERT_EQ(files_deleted, blob_files.size());
|
||||||
ASSERT_EQ(files_scheduled_to_delete, blob_files.size());
|
ASSERT_EQ(files_scheduled_to_delete, blob_files.size());
|
||||||
|
@ -649,6 +666,23 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFilesWithGC) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Close();
|
Close();
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"SstFileManagerImpl::ScheduleUnaccountedFileDeletion", [&](void* arg) {
|
||||||
|
assert(arg);
|
||||||
|
const std::string* const file_path =
|
||||||
|
static_cast<const std::string*>(arg);
|
||||||
|
if (EndsWith(*file_path, ".blob")) {
|
||||||
|
++files_scheduled_to_delete;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::OnDeleteFile", [&](void* arg) {
|
||||||
|
const std::string* const file_path =
|
||||||
|
static_cast<const std::string*>(arg);
|
||||||
|
if (EndsWith(*file_path, ".blob")) {
|
||||||
|
files_deleted++;
|
||||||
|
}
|
||||||
|
});
|
||||||
ASSERT_OK(DestroyDB(dbname_, options));
|
ASSERT_OK(DestroyDB(dbname_, options));
|
||||||
sfm->WaitForEmptyTrash();
|
sfm->WaitForEmptyTrash();
|
||||||
ASSERT_EQ(files_deleted, 5);
|
ASSERT_EQ(files_deleted, 5);
|
||||||
|
@ -887,7 +921,7 @@ TEST_P(DBWALTestWithParam, WALTrashCleanupOnOpen) {
|
||||||
// before restarting the DB.
|
// before restarting the DB.
|
||||||
// We have to set this on the 2nd to last file for it to delay deletion
|
// We have to set this on the 2nd to last file for it to delay deletion
|
||||||
// on the last file. (Quirk of DeleteScheduler::BackgroundEmptyTrash())
|
// on the last file. (Quirk of DeleteScheduler::BackgroundEmptyTrash())
|
||||||
options.sst_file_manager->SetDeleteRateBytesPerSecond(1);
|
options.sst_file_manager->SetDeleteRateBytesPerSecond(1024 * 1024);
|
||||||
}
|
}
|
||||||
ASSERT_OK(Put("Key2", DummyString(1024, v)));
|
ASSERT_OK(Put("Key2", DummyString(1024, v)));
|
||||||
ASSERT_OK(Put("Key3", DummyString(1024, v)));
|
ASSERT_OK(Put("Key3", DummyString(1024, v)));
|
||||||
|
@ -1902,6 +1936,24 @@ TEST_F(DBSSTTest, DBWithSFMForBlobFilesAtomicFlush) {
|
||||||
ASSERT_EQ(files_deleted, 1);
|
ASSERT_EQ(files_deleted, 1);
|
||||||
|
|
||||||
Close();
|
Close();
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"SstFileManagerImpl::ScheduleUnaccountedFileDeletion", [&](void* arg) {
|
||||||
|
assert(arg);
|
||||||
|
const std::string* const file_path =
|
||||||
|
static_cast<const std::string*>(arg);
|
||||||
|
if (EndsWith(*file_path, ".blob")) {
|
||||||
|
++files_scheduled_to_delete;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::OnDeleteFile", [&](void* arg) {
|
||||||
|
const std::string* const file_path =
|
||||||
|
static_cast<const std::string*>(arg);
|
||||||
|
if (EndsWith(*file_path, ".blob")) {
|
||||||
|
files_deleted++;
|
||||||
|
}
|
||||||
|
});
|
||||||
ASSERT_OK(DestroyDB(dbname_, options));
|
ASSERT_OK(DestroyDB(dbname_, options));
|
||||||
|
|
||||||
ASSERT_EQ(files_scheduled_to_delete, 4);
|
ASSERT_EQ(files_scheduled_to_delete, 4);
|
||||||
|
|
|
@ -31,6 +31,7 @@ DeleteScheduler::DeleteScheduler(SystemClock* clock, FileSystem* fs,
|
||||||
total_trash_size_(0),
|
total_trash_size_(0),
|
||||||
rate_bytes_per_sec_(rate_bytes_per_sec),
|
rate_bytes_per_sec_(rate_bytes_per_sec),
|
||||||
pending_files_(0),
|
pending_files_(0),
|
||||||
|
next_trash_bucket_(0),
|
||||||
bytes_max_delete_chunk_(bytes_max_delete_chunk),
|
bytes_max_delete_chunk_(bytes_max_delete_chunk),
|
||||||
closing_(false),
|
closing_(false),
|
||||||
cv_(&mu_),
|
cv_(&mu_),
|
||||||
|
@ -66,10 +67,8 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
||||||
total_trash_size_.load() > total_size * max_trash_db_ratio_.load())) {
|
total_trash_size_.load() > total_size * max_trash_db_ratio_.load())) {
|
||||||
// Rate limiting is disabled or trash size makes up more than
|
// Rate limiting is disabled or trash size makes up more than
|
||||||
// max_trash_db_ratio_ (default 25%) of the total DB size
|
// max_trash_db_ratio_ (default 25%) of the total DB size
|
||||||
TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
|
Status s = DeleteFileImmediately(file_path, /*accounted=*/true);
|
||||||
Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = sst_file_manager_->OnDeleteFile(file_path);
|
|
||||||
ROCKS_LOG_INFO(info_log_,
|
ROCKS_LOG_INFO(info_log_,
|
||||||
"Deleted file %s immediately, rate_bytes_per_sec %" PRIi64
|
"Deleted file %s immediately, rate_bytes_per_sec %" PRIi64
|
||||||
", total_trash_size %" PRIu64 ", total_size %" PRIi64
|
", total_trash_size %" PRIu64 ", total_size %" PRIi64
|
||||||
|
@ -77,15 +76,57 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
||||||
file_path.c_str(), rate_bytes_per_sec_.load(),
|
file_path.c_str(), rate_bytes_per_sec_.load(),
|
||||||
total_trash_size_.load(), total_size,
|
total_trash_size_.load(), total_size,
|
||||||
max_trash_db_ratio_.load());
|
max_trash_db_ratio_.load());
|
||||||
InstrumentedMutexLock l(&mu_);
|
|
||||||
RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
|
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
return AddFileToDeletionQueue(file_path, dir_to_sync, /*bucket=*/std::nullopt,
|
||||||
|
/*accounted=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DeleteScheduler::DeleteUnaccountedFile(const std::string& file_path,
|
||||||
|
const std::string& dir_to_sync,
|
||||||
|
const bool force_bg,
|
||||||
|
std::optional<int32_t> bucket) {
|
||||||
|
uint64_t num_hard_links = 1;
|
||||||
|
fs_->NumFileLinks(file_path, IOOptions(), &num_hard_links, nullptr)
|
||||||
|
.PermitUncheckedError();
|
||||||
|
|
||||||
|
// We can tolerate rare races where we might immediately delete both links
|
||||||
|
// to a file.
|
||||||
|
if (rate_bytes_per_sec_.load() <= 0 || (!force_bg && num_hard_links > 1)) {
|
||||||
|
Status s = DeleteFileImmediately(file_path, /*accounted=*/false);
|
||||||
|
if (s.ok()) {
|
||||||
|
ROCKS_LOG_INFO(info_log_,
|
||||||
|
"Deleted file %s immediately, rate_bytes_per_sec %" PRIi64,
|
||||||
|
file_path.c_str(), rate_bytes_per_sec_.load());
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
return AddFileToDeletionQueue(file_path, dir_to_sync, bucket,
|
||||||
|
/*accounted=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DeleteScheduler::DeleteFileImmediately(const std::string& file_path,
|
||||||
|
bool accounted) {
|
||||||
|
TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
|
||||||
|
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::DeleteFile::cb",
|
||||||
|
const_cast<std::string*>(&file_path));
|
||||||
|
Status s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
||||||
|
if (s.ok()) {
|
||||||
|
s = OnDeleteFile(file_path, accounted);
|
||||||
|
InstrumentedMutexLock l(&mu_);
|
||||||
|
RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DeleteScheduler::AddFileToDeletionQueue(const std::string& file_path,
|
||||||
|
const std::string& dir_to_sync,
|
||||||
|
std::optional<int32_t> bucket,
|
||||||
|
bool accounted) {
|
||||||
// Move file to trash
|
// Move file to trash
|
||||||
std::string trash_file;
|
std::string trash_file;
|
||||||
Status s = MarkAsTrash(file_path, &trash_file);
|
Status s = MarkAsTrash(file_path, accounted, &trash_file);
|
||||||
ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(),
|
ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(),
|
||||||
s.ToString().c_str());
|
s.ToString().c_str());
|
||||||
|
|
||||||
|
@ -94,7 +135,7 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
||||||
file_path.c_str(), s.ToString().c_str());
|
file_path.c_str(), s.ToString().c_str());
|
||||||
s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
s = fs_->DeleteFile(file_path, IOOptions(), nullptr);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = sst_file_manager_->OnDeleteFile(file_path);
|
s = OnDeleteFile(file_path, accounted);
|
||||||
ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately",
|
ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately",
|
||||||
trash_file.c_str());
|
trash_file.c_str());
|
||||||
InstrumentedMutexLock l(&mu_);
|
InstrumentedMutexLock l(&mu_);
|
||||||
|
@ -104,11 +145,13 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the total trash size
|
// Update the total trash size
|
||||||
uint64_t trash_file_size = 0;
|
if (accounted) {
|
||||||
IOStatus io_s =
|
uint64_t trash_file_size = 0;
|
||||||
fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
|
IOStatus io_s =
|
||||||
if (io_s.ok()) {
|
fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr);
|
||||||
total_trash_size_.fetch_add(trash_file_size);
|
if (io_s.ok()) {
|
||||||
|
total_trash_size_.fetch_add(trash_file_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//**TODO: What should we do if we failed to
|
//**TODO: What should we do if we failed to
|
||||||
// get the file size?
|
// get the file size?
|
||||||
|
@ -117,8 +160,15 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
||||||
{
|
{
|
||||||
InstrumentedMutexLock l(&mu_);
|
InstrumentedMutexLock l(&mu_);
|
||||||
RecordTick(stats_.get(), FILES_MARKED_TRASH);
|
RecordTick(stats_.get(), FILES_MARKED_TRASH);
|
||||||
queue_.emplace(trash_file, dir_to_sync);
|
queue_.emplace(trash_file, dir_to_sync, accounted, bucket);
|
||||||
pending_files_++;
|
pending_files_++;
|
||||||
|
if (bucket.has_value()) {
|
||||||
|
auto iter = pending_files_in_buckets_.find(bucket.value());
|
||||||
|
assert(iter != pending_files_in_buckets_.end());
|
||||||
|
if (iter != pending_files_in_buckets_.end()) {
|
||||||
|
iter->second++;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (pending_files_ == 1) {
|
if (pending_files_ == 1) {
|
||||||
cv_.SignalAll();
|
cv_.SignalAll();
|
||||||
}
|
}
|
||||||
|
@ -177,7 +227,7 @@ Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
|
Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
|
||||||
std::string* trash_file) {
|
bool accounted, std::string* trash_file) {
|
||||||
// Sanity check of the path
|
// Sanity check of the path
|
||||||
size_t idx = file_path.rfind('/');
|
size_t idx = file_path.rfind('/');
|
||||||
if (idx == std::string::npos || idx == file_path.size() - 1) {
|
if (idx == std::string::npos || idx == file_path.size() - 1) {
|
||||||
|
@ -211,7 +261,7 @@ Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
|
||||||
}
|
}
|
||||||
cnt++;
|
cnt++;
|
||||||
}
|
}
|
||||||
if (s.ok()) {
|
if (s.ok() && accounted) {
|
||||||
s = sst_file_manager_->OnMoveFile(file_path, *trash_file);
|
s = sst_file_manager_->OnMoveFile(file_path, *trash_file);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
|
@ -235,6 +285,8 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
||||||
uint64_t total_deleted_bytes = 0;
|
uint64_t total_deleted_bytes = 0;
|
||||||
int64_t current_delete_rate = rate_bytes_per_sec_.load();
|
int64_t current_delete_rate = rate_bytes_per_sec_.load();
|
||||||
while (!queue_.empty() && !closing_) {
|
while (!queue_.empty() && !closing_) {
|
||||||
|
// Satisfy static analysis.
|
||||||
|
std::optional<int32_t> bucket = std::nullopt;
|
||||||
if (current_delete_rate != rate_bytes_per_sec_.load()) {
|
if (current_delete_rate != rate_bytes_per_sec_.load()) {
|
||||||
// User changed the delete rate
|
// User changed the delete rate
|
||||||
current_delete_rate = rate_bytes_per_sec_.load();
|
current_delete_rate = rate_bytes_per_sec_.load();
|
||||||
|
@ -247,14 +299,17 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
||||||
// Get new file to delete
|
// Get new file to delete
|
||||||
const FileAndDir& fad = queue_.front();
|
const FileAndDir& fad = queue_.front();
|
||||||
std::string path_in_trash = fad.fname;
|
std::string path_in_trash = fad.fname;
|
||||||
|
std::string dir_to_sync = fad.dir;
|
||||||
|
bool accounted = fad.accounted;
|
||||||
|
bucket = fad.bucket;
|
||||||
|
|
||||||
// We don't need to hold the lock while deleting the file
|
// We don't need to hold the lock while deleting the file
|
||||||
mu_.Unlock();
|
mu_.Unlock();
|
||||||
uint64_t deleted_bytes = 0;
|
uint64_t deleted_bytes = 0;
|
||||||
bool is_complete = true;
|
bool is_complete = true;
|
||||||
// Delete file from trash and update total_penlty value
|
// Delete file from trash and update total_penlty value
|
||||||
Status s =
|
Status s = DeleteTrashFile(path_in_trash, dir_to_sync, accounted,
|
||||||
DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete);
|
&deleted_bytes, &is_complete);
|
||||||
total_deleted_bytes += deleted_bytes;
|
total_deleted_bytes += deleted_bytes;
|
||||||
mu_.Lock();
|
mu_.Lock();
|
||||||
if (is_complete) {
|
if (is_complete) {
|
||||||
|
@ -288,12 +343,20 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
||||||
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
|
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
|
||||||
&total_penalty);
|
&total_penalty);
|
||||||
|
|
||||||
|
int32_t pending_files_in_bucket = std::numeric_limits<int32_t>::max();
|
||||||
if (is_complete) {
|
if (is_complete) {
|
||||||
pending_files_--;
|
pending_files_--;
|
||||||
|
if (bucket.has_value()) {
|
||||||
|
auto iter = pending_files_in_buckets_.find(bucket.value());
|
||||||
|
assert(iter != pending_files_in_buckets_.end());
|
||||||
|
if (iter != pending_files_in_buckets_.end()) {
|
||||||
|
pending_files_in_bucket = iter->second--;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (pending_files_ == 0) {
|
if (pending_files_ == 0 || pending_files_in_bucket == 0) {
|
||||||
// Unblock WaitForEmptyTrash since there are no more files waiting
|
// Unblock WaitForEmptyTrash or WaitForEmptyTrashBucket since there are
|
||||||
// to be deleted
|
// no more files waiting to be deleted
|
||||||
cv_.SignalAll();
|
cv_.SignalAll();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -302,12 +365,14 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
||||||
|
|
||||||
Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
||||||
const std::string& dir_to_sync,
|
const std::string& dir_to_sync,
|
||||||
uint64_t* deleted_bytes,
|
bool accounted, uint64_t* deleted_bytes,
|
||||||
bool* is_complete) {
|
bool* is_complete) {
|
||||||
uint64_t file_size;
|
uint64_t file_size;
|
||||||
Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr);
|
Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr);
|
||||||
*is_complete = true;
|
*is_complete = true;
|
||||||
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
|
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
|
||||||
|
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::DeleteTrashFile::cb",
|
||||||
|
const_cast<std::string*>(&path_in_trash));
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
bool need_full_delete = true;
|
bool need_full_delete = true;
|
||||||
if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
|
if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
|
||||||
|
@ -374,7 +439,7 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
||||||
}
|
}
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
*deleted_bytes = file_size;
|
*deleted_bytes = file_size;
|
||||||
s = sst_file_manager_->OnDeleteFile(path_in_trash);
|
s = OnDeleteFile(path_in_trash, accounted);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -384,12 +449,24 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
||||||
path_in_trash.c_str(), s.ToString().c_str());
|
path_in_trash.c_str(), s.ToString().c_str());
|
||||||
*deleted_bytes = 0;
|
*deleted_bytes = 0;
|
||||||
} else {
|
} else {
|
||||||
total_trash_size_.fetch_sub(*deleted_bytes);
|
if (accounted) {
|
||||||
|
total_trash_size_.fetch_sub(*deleted_bytes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status DeleteScheduler::OnDeleteFile(const std::string& file_path,
|
||||||
|
bool accounted) {
|
||||||
|
if (accounted) {
|
||||||
|
return sst_file_manager_->OnDeleteFile(file_path);
|
||||||
|
}
|
||||||
|
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::OnDeleteFile",
|
||||||
|
const_cast<std::string*>(&file_path));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
void DeleteScheduler::WaitForEmptyTrash() {
|
void DeleteScheduler::WaitForEmptyTrash() {
|
||||||
InstrumentedMutexLock l(&mu_);
|
InstrumentedMutexLock l(&mu_);
|
||||||
while (pending_files_ > 0 && !closing_) {
|
while (pending_files_ > 0 && !closing_) {
|
||||||
|
@ -397,6 +474,30 @@ void DeleteScheduler::WaitForEmptyTrash() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<int32_t> DeleteScheduler::NewTrashBucket() {
|
||||||
|
if (rate_bytes_per_sec_.load() <= 0) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
InstrumentedMutexLock l(&mu_);
|
||||||
|
int32_t bucket_number = next_trash_bucket_++;
|
||||||
|
pending_files_in_buckets_.emplace(bucket_number, 0);
|
||||||
|
return bucket_number;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeleteScheduler::WaitForEmptyTrashBucket(int32_t bucket) {
|
||||||
|
InstrumentedMutexLock l(&mu_);
|
||||||
|
if (bucket >= next_trash_bucket_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto iter = pending_files_in_buckets_.find(bucket);
|
||||||
|
while (iter != pending_files_in_buckets_.end() && iter->second > 0 &&
|
||||||
|
!closing_) {
|
||||||
|
cv_.Wait();
|
||||||
|
iter = pending_files_in_buckets_.find(bucket);
|
||||||
|
}
|
||||||
|
pending_files_in_buckets_.erase(bucket);
|
||||||
|
}
|
||||||
|
|
||||||
void DeleteScheduler::MaybeCreateBackgroundThread() {
|
void DeleteScheduler::MaybeCreateBackgroundThread() {
|
||||||
if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) {
|
if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) {
|
||||||
bg_thread_.reset(
|
bg_thread_.reset(
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <optional>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
@ -48,16 +49,45 @@ class DeleteScheduler {
|
||||||
MaybeCreateBackgroundThread();
|
MaybeCreateBackgroundThread();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark file as trash directory and schedule its deletion. If force_bg is
|
// Delete an accounted file that is tracked by `SstFileManager` and should be
|
||||||
// set, it forces the file to always be deleted in the background thread,
|
// tracked by this `DeleteScheduler` when it's deleted.
|
||||||
// except when rate limiting is disabled
|
// The file is deleted immediately if slow deletion is disabled. If force_bg
|
||||||
|
// is not set and trash to db size ratio exceeded the configured threshold,
|
||||||
|
// it is immediately deleted too. In all other cases, the file will be moved
|
||||||
|
// to a trash directory and scheduled for deletion by a background thread.
|
||||||
Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
|
Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
|
||||||
const bool force_bg = false);
|
const bool force_bg = false);
|
||||||
|
|
||||||
// Wait for all files being deleteing in the background to finish or for
|
// Delete an unaccounted file that is not tracked by `SstFileManager` and
|
||||||
|
// should not be tracked by this `DeleteScheduler` when it's deleted.
|
||||||
|
// The file is deleted immediately if slow deletion is disabled. If force_bg
|
||||||
|
// is not set and the file have more than 1 hard link, it is immediately
|
||||||
|
// deleted too. In all other cases, the file will be moved to a trash
|
||||||
|
// directory and scheduled for deletion by a background thread.
|
||||||
|
// This API also supports assign a file to a specified bucket created by
|
||||||
|
// `NewTrashBucket` when delete files in the background. So the caller can
|
||||||
|
// wait for a specific bucket to be empty by checking the
|
||||||
|
// `WaitForEmptyTrashBucket` API.
|
||||||
|
Status DeleteUnaccountedFile(const std::string& file_path,
|
||||||
|
const std::string& dir_to_sync,
|
||||||
|
const bool force_bg = false,
|
||||||
|
std::optional<int32_t> bucket = std::nullopt);
|
||||||
|
|
||||||
|
// Wait for all files being deleted in the background to finish or for
|
||||||
// destructor to be called.
|
// destructor to be called.
|
||||||
void WaitForEmptyTrash();
|
void WaitForEmptyTrash();
|
||||||
|
|
||||||
|
// Creates a new trash bucket. A bucket is only created and returned when slow
|
||||||
|
// deletion is enabled.
|
||||||
|
// For each bucket that is created, the user should also call
|
||||||
|
// `WaitForEmptyTrashBucket` after scheduling file deletions to make sure the
|
||||||
|
// trash files are all cleared.
|
||||||
|
std::optional<int32_t> NewTrashBucket();
|
||||||
|
|
||||||
|
// Wait for all the files in the specified bucket to be deleted in the
|
||||||
|
// background or for the destructor to be called.
|
||||||
|
void WaitForEmptyTrashBucket(int32_t bucket);
|
||||||
|
|
||||||
// Return a map containing errors that happened in BackgroundEmptyTrash
|
// Return a map containing errors that happened in BackgroundEmptyTrash
|
||||||
// file_path => error status
|
// file_path => error status
|
||||||
std::map<std::string, Status> GetBackgroundErrors();
|
std::map<std::string, Status> GetBackgroundErrors();
|
||||||
|
@ -87,12 +117,21 @@ class DeleteScheduler {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
|
Status DeleteFileImmediately(const std::string& file_path, bool accounted);
|
||||||
|
|
||||||
|
Status AddFileToDeletionQueue(const std::string& file_path,
|
||||||
|
const std::string& dir_to_sync,
|
||||||
|
std::optional<int32_t> bucket, bool accounted);
|
||||||
|
|
||||||
|
Status MarkAsTrash(const std::string& file_path, bool accounted,
|
||||||
|
std::string* path_in_trash);
|
||||||
|
|
||||||
Status DeleteTrashFile(const std::string& path_in_trash,
|
Status DeleteTrashFile(const std::string& path_in_trash,
|
||||||
const std::string& dir_to_sync,
|
const std::string& dir_to_sync, bool accounted,
|
||||||
uint64_t* deleted_bytes, bool* is_complete);
|
uint64_t* deleted_bytes, bool* is_complete);
|
||||||
|
|
||||||
|
Status OnDeleteFile(const std::string& file_path, bool accounted);
|
||||||
|
|
||||||
void BackgroundEmptyTrash();
|
void BackgroundEmptyTrash();
|
||||||
|
|
||||||
void MaybeCreateBackgroundThread();
|
void MaybeCreateBackgroundThread();
|
||||||
|
@ -104,19 +143,28 @@ class DeleteScheduler {
|
||||||
std::atomic<uint64_t> total_trash_size_;
|
std::atomic<uint64_t> total_trash_size_;
|
||||||
// Maximum number of bytes that should be deleted per second
|
// Maximum number of bytes that should be deleted per second
|
||||||
std::atomic<int64_t> rate_bytes_per_sec_;
|
std::atomic<int64_t> rate_bytes_per_sec_;
|
||||||
// Mutex to protect queue_, pending_files_, bg_errors_, closing_, stats_
|
// Mutex to protect queue_, pending_files_, next_trash_bucket_,
|
||||||
|
// pending_files_in_buckets_, bg_errors_, closing_, stats_
|
||||||
InstrumentedMutex mu_;
|
InstrumentedMutex mu_;
|
||||||
|
|
||||||
struct FileAndDir {
|
struct FileAndDir {
|
||||||
FileAndDir(const std::string& f, const std::string& d) : fname(f), dir(d) {}
|
FileAndDir(const std::string& _fname, const std::string& _dir,
|
||||||
|
bool _accounted, std::optional<int32_t> _bucket)
|
||||||
|
: fname(_fname), dir(_dir), accounted(_accounted), bucket(_bucket) {}
|
||||||
std::string fname;
|
std::string fname;
|
||||||
std::string dir; // empty will be skipped.
|
std::string dir; // empty will be skipped.
|
||||||
|
bool accounted;
|
||||||
|
std::optional<int32_t> bucket;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Queue of trash files that need to be deleted
|
// Queue of trash files that need to be deleted
|
||||||
std::queue<FileAndDir> queue_;
|
std::queue<FileAndDir> queue_;
|
||||||
// Number of trash files that are waiting to be deleted
|
// Number of trash files that are waiting to be deleted
|
||||||
int32_t pending_files_;
|
int32_t pending_files_;
|
||||||
|
// Next trash bucket that can be created
|
||||||
|
int32_t next_trash_bucket_;
|
||||||
|
// A mapping from trash bucket to number of pending files in the bucket
|
||||||
|
std::map<int32_t, int32_t> pending_files_in_buckets_;
|
||||||
uint64_t bytes_max_delete_chunk_;
|
uint64_t bytes_max_delete_chunk_;
|
||||||
// Errors that happened in BackgroundEmptyTrash (file_path => error)
|
// Errors that happened in BackgroundEmptyTrash (file_path => error)
|
||||||
std::map<std::string, Status> bg_errors_;
|
std::map<std::string, Status> bg_errors_;
|
||||||
|
@ -127,6 +175,7 @@ class DeleteScheduler {
|
||||||
// Condition variable signaled in these conditions
|
// Condition variable signaled in these conditions
|
||||||
// - pending_files_ value change from 0 => 1
|
// - pending_files_ value change from 0 => 1
|
||||||
// - pending_files_ value change from 1 => 0
|
// - pending_files_ value change from 1 => 0
|
||||||
|
// - a value in pending_files_in_buckets change from 1 => 0
|
||||||
// - closing_ value is set to true
|
// - closing_ value is set to true
|
||||||
InstrumentedCondVar cv_;
|
InstrumentedCondVar cv_;
|
||||||
// Background thread running BackgroundEmptyTrash
|
// Background thread running BackgroundEmptyTrash
|
||||||
|
@ -138,6 +187,10 @@ class DeleteScheduler {
|
||||||
// If the trash size constitutes for more than this fraction of the total DB
|
// If the trash size constitutes for more than this fraction of the total DB
|
||||||
// size we will start deleting new files passed to DeleteScheduler
|
// size we will start deleting new files passed to DeleteScheduler
|
||||||
// immediately
|
// immediately
|
||||||
|
// Unaccounted files passed for deletion will not cause change in
|
||||||
|
// total_trash_size_ or affect the DeleteScheduler::total_trash_size_ over
|
||||||
|
// SstFileManager::total_size_ ratio. Their slow deletion is not subject to
|
||||||
|
// this configured threshold either.
|
||||||
std::atomic<double> max_trash_db_ratio_;
|
std::atomic<double> max_trash_db_ratio_;
|
||||||
static const uint64_t kMicrosInSecond = 1000 * 1000LL;
|
static const uint64_t kMicrosInSecond = 1000 * 1000LL;
|
||||||
std::shared_ptr<Statistics> stats_;
|
std::shared_ptr<Statistics> stats_;
|
||||||
|
|
|
@ -78,7 +78,7 @@ class DeleteSchedulerTest : public testing::Test {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024,
|
std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024,
|
||||||
size_t dummy_files_dirs_idx = 0) {
|
size_t dummy_files_dirs_idx = 0, bool track = true) {
|
||||||
std::string file_path =
|
std::string file_path =
|
||||||
dummy_files_dirs_[dummy_files_dirs_idx] + "/" + file_name;
|
dummy_files_dirs_[dummy_files_dirs_idx] + "/" + file_name;
|
||||||
std::unique_ptr<WritableFile> f;
|
std::unique_ptr<WritableFile> f;
|
||||||
|
@ -86,7 +86,9 @@ class DeleteSchedulerTest : public testing::Test {
|
||||||
std::string data(size, 'A');
|
std::string data(size, 'A');
|
||||||
EXPECT_OK(f->Append(data));
|
EXPECT_OK(f->Append(data));
|
||||||
EXPECT_OK(f->Close());
|
EXPECT_OK(f->Close());
|
||||||
EXPECT_OK(sst_file_mgr_->OnAddFile(file_path));
|
if (track) {
|
||||||
|
EXPECT_OK(sst_file_mgr_->OnAddFile(file_path));
|
||||||
|
}
|
||||||
return file_path;
|
return file_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,6 +355,8 @@ TEST_F(DeleteSchedulerTest, DisableRateLimiting) {
|
||||||
ASSERT_EQ(num_files,
|
ASSERT_EQ(num_files,
|
||||||
stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
||||||
|
|
||||||
|
ASSERT_FALSE(delete_scheduler_->NewTrashBucket().has_value());
|
||||||
|
|
||||||
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -718,6 +722,141 @@ TEST_F(DeleteSchedulerTest, IsTrashCheck) {
|
||||||
ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.trashx"));
|
ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.trashx"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DeleteSchedulerTest, DeleteAccountedAndUnaccountedFiles) {
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / s
|
||||||
|
NewDeleteScheduler();
|
||||||
|
|
||||||
|
// Create 100 files, every file is 1 KB
|
||||||
|
int num_files = 100; // 100 files
|
||||||
|
uint64_t file_size = 1024; // 1 KB as a file size
|
||||||
|
std::vector<std::string> generated_files;
|
||||||
|
for (int i = 0; i < num_files; i++) {
|
||||||
|
std::string file_name = "file" + std::to_string(i) + ".data";
|
||||||
|
generated_files.push_back(NewDummyFile(file_name, file_size,
|
||||||
|
/*dummy_files_dirs_idx*/ 0,
|
||||||
|
/*track=*/false));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < num_files; i++) {
|
||||||
|
if (i % 2) {
|
||||||
|
ASSERT_OK(sst_file_mgr_->OnAddFile(generated_files[i], file_size));
|
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], ""));
|
||||||
|
} else {
|
||||||
|
ASSERT_OK(
|
||||||
|
delete_scheduler_->DeleteUnaccountedFile(generated_files[i], ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete_scheduler_->WaitForEmptyTrash();
|
||||||
|
ASSERT_EQ(0, delete_scheduler_->GetTotalTrashSize());
|
||||||
|
ASSERT_EQ(0, sst_file_mgr_->GetTotalSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DeleteSchedulerTest, ConcurrentlyDeleteUnaccountedFilesInBuckets) {
|
||||||
|
int bg_delete_file = 0;
|
||||||
|
int fg_delete_file = 0;
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteTrashFile:DeleteFile",
|
||||||
|
[&](void* /*arg*/) { bg_delete_file++; });
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; });
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / s
|
||||||
|
NewDeleteScheduler();
|
||||||
|
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
// Create 1000 files, every file is 1 KB
|
||||||
|
int num_files = 1000;
|
||||||
|
uint64_t file_size = 1024; // 1 KB as a file size
|
||||||
|
std::vector<std::string> generated_files;
|
||||||
|
for (int i = 0; i < num_files; i++) {
|
||||||
|
std::string file_name = "file" + std::to_string(i) + ".data";
|
||||||
|
generated_files.push_back(NewDummyFile(file_name, file_size,
|
||||||
|
/*dummy_files_dirs_idx*/ 0,
|
||||||
|
/*track=*/false));
|
||||||
|
}
|
||||||
|
// Concurrently delete files in different buckets and check all the buckets
|
||||||
|
// are empty.
|
||||||
|
int thread_cnt = 10;
|
||||||
|
int files_per_thread = 100;
|
||||||
|
std::atomic<int> thread_num(0);
|
||||||
|
std::vector<port::Thread> threads;
|
||||||
|
std::function<void()> delete_thread = [&]() {
|
||||||
|
std::optional<int32_t> bucket = delete_scheduler_->NewTrashBucket();
|
||||||
|
ASSERT_TRUE(bucket.has_value());
|
||||||
|
int idx = thread_num.fetch_add(1);
|
||||||
|
int range_start = idx * files_per_thread;
|
||||||
|
int range_end = range_start + files_per_thread;
|
||||||
|
for (int j = range_start; j < range_end; j++) {
|
||||||
|
ASSERT_OK(delete_scheduler_->DeleteUnaccountedFile(
|
||||||
|
generated_files[j], "", /*false_bg=*/false, bucket));
|
||||||
|
}
|
||||||
|
delete_scheduler_->WaitForEmptyTrashBucket(bucket.value());
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0; i < thread_cnt; i++) {
|
||||||
|
threads.emplace_back(delete_thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < threads.size(); i++) {
|
||||||
|
threads[i].join();
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(0, delete_scheduler_->GetTotalTrashSize());
|
||||||
|
ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
||||||
|
ASSERT_EQ(1000, stats_->getAndResetTickerCount(FILES_MARKED_TRASH));
|
||||||
|
ASSERT_EQ(0, fg_delete_file);
|
||||||
|
ASSERT_EQ(1000, bg_delete_file);
|
||||||
|
|
||||||
|
// OK to re check an already empty bucket
|
||||||
|
delete_scheduler_->WaitForEmptyTrashBucket(9);
|
||||||
|
// Invalid bucket return too.
|
||||||
|
delete_scheduler_->WaitForEmptyTrashBucket(100);
|
||||||
|
std::optional<int32_t> next_bucket = delete_scheduler_->NewTrashBucket();
|
||||||
|
ASSERT_TRUE(next_bucket.has_value());
|
||||||
|
ASSERT_EQ(10, next_bucket.value());
|
||||||
|
delete_scheduler_->WaitForEmptyTrashBucket(10);
|
||||||
|
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DeleteSchedulerTest,
|
||||||
|
ImmediatelyDeleteUnaccountedFilesWithRemainingLinks) {
|
||||||
|
int bg_delete_file = 0;
|
||||||
|
int fg_delete_file = 0;
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteTrashFile:DeleteFile",
|
||||||
|
[&](void* /*arg*/) { bg_delete_file++; });
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; });
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec
|
||||||
|
NewDeleteScheduler();
|
||||||
|
|
||||||
|
std::string file1 = NewDummyFile("data_1", 500 * 1024,
|
||||||
|
/*dummy_files_dirs_idx*/ 0, /*track=*/false);
|
||||||
|
std::string file2 = NewDummyFile("data_2", 100 * 1024,
|
||||||
|
/*dummy_files_dirs_idx*/ 0, /*track=*/false);
|
||||||
|
|
||||||
|
ASSERT_OK(env_->LinkFile(file1, dummy_files_dirs_[0] + "/data_1b"));
|
||||||
|
ASSERT_OK(env_->LinkFile(file2, dummy_files_dirs_[0] + "/data_2b"));
|
||||||
|
|
||||||
|
// Should delete in 4 batch if there is no hardlink
|
||||||
|
ASSERT_OK(
|
||||||
|
delete_scheduler_->DeleteUnaccountedFile(file1, "", /*force_bg=*/false));
|
||||||
|
ASSERT_OK(
|
||||||
|
delete_scheduler_->DeleteUnaccountedFile(file2, "", /*force_bg=*/false));
|
||||||
|
|
||||||
|
delete_scheduler_->WaitForEmptyTrash();
|
||||||
|
|
||||||
|
ASSERT_EQ(0, delete_scheduler_->GetTotalTrashSize());
|
||||||
|
ASSERT_EQ(0, bg_delete_file);
|
||||||
|
ASSERT_EQ(2, fg_delete_file);
|
||||||
|
ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_MARKED_TRASH));
|
||||||
|
ASSERT_EQ(2, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY));
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
|
@ -125,8 +125,8 @@ IOStatus CreateFile(FileSystem* fs, const std::string& destination,
|
||||||
Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||||
const std::string& fname, const std::string& dir_to_sync,
|
const std::string& fname, const std::string& dir_to_sync,
|
||||||
const bool force_bg, const bool force_fg) {
|
const bool force_bg, const bool force_fg) {
|
||||||
SstFileManagerImpl* sfm =
|
SstFileManagerImpl* sfm = static_cast_with_check<SstFileManagerImpl>(
|
||||||
static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
|
db_options->sst_file_manager.get());
|
||||||
if (sfm && !force_fg) {
|
if (sfm && !force_fg) {
|
||||||
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
|
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
|
||||||
} else {
|
} else {
|
||||||
|
@ -134,6 +134,21 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status DeleteUnaccountedDBFile(const ImmutableDBOptions* db_options,
|
||||||
|
const std::string& fname,
|
||||||
|
const std::string& dir_to_sync,
|
||||||
|
const bool force_bg, const bool force_fg,
|
||||||
|
std::optional<int32_t> bucket) {
|
||||||
|
SstFileManagerImpl* sfm = static_cast_with_check<SstFileManagerImpl>(
|
||||||
|
db_options->sst_file_manager.get());
|
||||||
|
if (sfm && !force_fg) {
|
||||||
|
return sfm->ScheduleUnaccountedFileDeletion(fname, dir_to_sync, force_bg,
|
||||||
|
bucket);
|
||||||
|
} else {
|
||||||
|
return db_options->env->DeleteFile(fname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// requested_checksum_func_name brings the function name of the checksum
|
// requested_checksum_func_name brings the function name of the checksum
|
||||||
// generator in checksum_factory. Empty string is permitted, in which case the
|
// generator in checksum_factory. Empty string is permitted, in which case the
|
||||||
// name of the generator created by the factory is unchecked. When
|
// name of the generator created by the factory is unchecked. When
|
||||||
|
|
|
@ -55,6 +55,16 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||||
const std::string& fname, const std::string& path_to_sync,
|
const std::string& fname, const std::string& path_to_sync,
|
||||||
const bool force_bg, const bool force_fg);
|
const bool force_bg, const bool force_fg);
|
||||||
|
|
||||||
|
// Delete an unaccounted DB file that is not tracked by SstFileManager and will
|
||||||
|
// not be tracked by its DeleteScheduler when getting deleted.
|
||||||
|
// If a legitimate bucket is provided and this file is scheduled for slow
|
||||||
|
// deletion, it will be assigned to the specified trash bucket.
|
||||||
|
Status DeleteUnaccountedDBFile(const ImmutableDBOptions* db_options,
|
||||||
|
const std::string& fname,
|
||||||
|
const std::string& dir_to_sync,
|
||||||
|
const bool force_bg, const bool force_fg,
|
||||||
|
std::optional<int32_t> bucket);
|
||||||
|
|
||||||
// TODO(hx235): pass the whole DBOptions intead of its individual fields
|
// TODO(hx235): pass the whole DBOptions intead of its individual fields
|
||||||
IOStatus GenerateOneFileChecksum(
|
IOStatus GenerateOneFileChecksum(
|
||||||
FileSystem* fs, const std::string& file_path,
|
FileSystem* fs, const std::string& file_path,
|
||||||
|
|
|
@ -421,10 +421,28 @@ Status SstFileManagerImpl::ScheduleFileDeletion(const std::string& file_path,
|
||||||
return delete_scheduler_.DeleteFile(file_path, path_to_sync, force_bg);
|
return delete_scheduler_.DeleteFile(file_path, path_to_sync, force_bg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status SstFileManagerImpl::ScheduleUnaccountedFileDeletion(
|
||||||
|
const std::string& file_path, const std::string& dir_to_sync,
|
||||||
|
const bool force_bg, std::optional<int32_t> bucket) {
|
||||||
|
TEST_SYNC_POINT_CALLBACK(
|
||||||
|
"SstFileManagerImpl::ScheduleUnaccountedFileDeletion",
|
||||||
|
const_cast<std::string*>(&file_path));
|
||||||
|
return delete_scheduler_.DeleteUnaccountedFile(file_path, dir_to_sync,
|
||||||
|
force_bg, bucket);
|
||||||
|
}
|
||||||
|
|
||||||
void SstFileManagerImpl::WaitForEmptyTrash() {
|
void SstFileManagerImpl::WaitForEmptyTrash() {
|
||||||
delete_scheduler_.WaitForEmptyTrash();
|
delete_scheduler_.WaitForEmptyTrash();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<int32_t> SstFileManagerImpl::NewTrashBucket() {
|
||||||
|
return delete_scheduler_.NewTrashBucket();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SstFileManagerImpl::WaitForEmptyTrashBucket(int32_t bucket) {
|
||||||
|
delete_scheduler_.WaitForEmptyTrashBucket(bucket);
|
||||||
|
}
|
||||||
|
|
||||||
void SstFileManagerImpl::OnAddFileImpl(const std::string& file_path,
|
void SstFileManagerImpl::OnAddFileImpl(const std::string& file_path,
|
||||||
uint64_t file_size) {
|
uint64_t file_size) {
|
||||||
auto tracked_file = tracked_files_.find(file_path);
|
auto tracked_file = tracked_files_.find(file_path);
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "db/compaction/compaction.h"
|
#include "db/compaction/compaction.h"
|
||||||
|
@ -118,17 +118,40 @@ class SstFileManagerImpl : public SstFileManager {
|
||||||
// not guaranteed
|
// not guaranteed
|
||||||
bool CancelErrorRecovery(ErrorHandler* db);
|
bool CancelErrorRecovery(ErrorHandler* db);
|
||||||
|
|
||||||
// Mark file as trash and schedule it's deletion. If force_bg is set, it
|
// Mark a file as trash and schedule its deletion. If force_bg is set, it
|
||||||
// forces the file to be deleting in the background regardless of DB size,
|
// forces the file to be deleting in the background regardless of DB size,
|
||||||
// except when rate limited delete is disabled
|
// except when rate limited delete is disabled.
|
||||||
virtual Status ScheduleFileDeletion(const std::string& file_path,
|
virtual Status ScheduleFileDeletion(const std::string& file_path,
|
||||||
const std::string& dir_to_sync,
|
const std::string& dir_to_sync,
|
||||||
const bool force_bg = false);
|
const bool force_bg = false);
|
||||||
|
|
||||||
// Wait for all files being deleteing in the background to finish or for
|
// Delete an unaccounted file. The file is deleted immediately if slow
|
||||||
|
// deletion is disabled. A file with more than 1 hard links will be deleted
|
||||||
|
// immediately unless force_bg is set. In other cases, files will be scheduled
|
||||||
|
// for slow deletion, and assigned to the specified bucket if a legitimate one
|
||||||
|
// is provided. A legitimate bucket is one that is created with the
|
||||||
|
// `NewTrashBucket` API, and for which `WaitForEmptyTrashBucket` hasn't been
|
||||||
|
// called yet.
|
||||||
|
virtual Status ScheduleUnaccountedFileDeletion(
|
||||||
|
const std::string& file_path, const std::string& dir_to_sync,
|
||||||
|
const bool force_bg = false,
|
||||||
|
std::optional<int32_t> bucket = std::nullopt);
|
||||||
|
|
||||||
|
// Wait for all files being deleted in the background to finish or for
|
||||||
// destructor to be called.
|
// destructor to be called.
|
||||||
virtual void WaitForEmptyTrash();
|
virtual void WaitForEmptyTrash();
|
||||||
|
|
||||||
|
// Creates a new trash bucket. A legitimate bucket is only created and
|
||||||
|
// returned when slow deletion is enabled.
|
||||||
|
// For each bucket that is created and used, the user should also call
|
||||||
|
// `WaitForEmptyTrashBucket` after scheduling file deletions to make sure all
|
||||||
|
// the trash files are cleared.
|
||||||
|
std::optional<int32_t> NewTrashBucket();
|
||||||
|
|
||||||
|
// Wait for all the files in the specified bucket to be deleted in the
|
||||||
|
// background or for destructor to be called.
|
||||||
|
virtual void WaitForEmptyTrashBucket(int32_t bucket);
|
||||||
|
|
||||||
DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
|
DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
|
||||||
|
|
||||||
// Stop the error recovery background thread. This should be called only
|
// Stop the error recovery background thread. This should be called only
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
*Make DestroyDB supports slow deletion when it's configured in `SstFileManager`. The slow deletion is subject to the configured `rate_bytes_per_sec`, but not subject to the `max_trash_db_ratio`.
|
|
@ -760,7 +760,7 @@ TEST_F(BlobDBTest, SstFileManager) {
|
||||||
// run the same test for Get(), MultiGet() and Iterator each.
|
// run the same test for Get(), MultiGet() and Iterator each.
|
||||||
std::shared_ptr<SstFileManager> sst_file_manager(
|
std::shared_ptr<SstFileManager> sst_file_manager(
|
||||||
NewSstFileManager(mock_env_.get()));
|
NewSstFileManager(mock_env_.get()));
|
||||||
sst_file_manager->SetDeleteRateBytesPerSecond(1);
|
sst_file_manager->SetDeleteRateBytesPerSecond(1024 * 1024);
|
||||||
SstFileManagerImpl *sfm =
|
SstFileManagerImpl *sfm =
|
||||||
static_cast<SstFileManagerImpl *>(sst_file_manager.get());
|
static_cast<SstFileManagerImpl *>(sst_file_manager.get());
|
||||||
|
|
||||||
|
@ -818,7 +818,7 @@ TEST_F(BlobDBTest, SstFileManagerRestart) {
|
||||||
// run the same test for Get(), MultiGet() and Iterator each.
|
// run the same test for Get(), MultiGet() and Iterator each.
|
||||||
std::shared_ptr<SstFileManager> sst_file_manager(
|
std::shared_ptr<SstFileManager> sst_file_manager(
|
||||||
NewSstFileManager(mock_env_.get()));
|
NewSstFileManager(mock_env_.get()));
|
||||||
sst_file_manager->SetDeleteRateBytesPerSecond(1);
|
sst_file_manager->SetDeleteRateBytesPerSecond(1024 * 1024);
|
||||||
SstFileManagerImpl *sfm =
|
SstFileManagerImpl *sfm =
|
||||||
static_cast<SstFileManagerImpl *>(sst_file_manager.get());
|
static_cast<SstFileManagerImpl *>(sst_file_manager.get());
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/rocksdb_namespace.h"
|
#include "rocksdb/rocksdb_namespace.h"
|
||||||
|
#include "rocksdb/sst_file_manager.h"
|
||||||
#include "rocksdb/utilities/transaction_db.h"
|
#include "rocksdb/utilities/transaction_db.h"
|
||||||
#include "test_util/sync_point.h"
|
#include "test_util/sync_point.h"
|
||||||
#include "test_util/testharness.h"
|
#include "test_util/testharness.h"
|
||||||
|
@ -1031,6 +1032,100 @@ TEST_F(CheckpointTest, CheckpointWithArchievedLog) {
|
||||||
delete snapshot_db;
|
delete snapshot_db;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class CheckpointDestroyTest : public CheckpointTest,
|
||||||
|
public testing::WithParamInterface<bool> {};
|
||||||
|
|
||||||
|
TEST_P(CheckpointDestroyTest, DisableEnableSlowDeletion) {
|
||||||
|
bool slow_deletion = GetParam();
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.num_levels = 2;
|
||||||
|
options.disable_auto_compactions = true;
|
||||||
|
Status s;
|
||||||
|
options.sst_file_manager.reset(NewSstFileManager(
|
||||||
|
options.env, options.info_log, "", slow_deletion ? 1024 * 1024 : 0,
|
||||||
|
false /* delete_existing_trash */, &s, 1));
|
||||||
|
ASSERT_OK(s);
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
ASSERT_OK(Put("foo", "a"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
ASSERT_OK(Put("bar", "b"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
ASSERT_OK(Put("bar", "val" + std::to_string(i)));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
}
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0), 10);
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(1), 2);
|
||||||
|
|
||||||
|
Checkpoint* checkpoint;
|
||||||
|
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
|
||||||
|
ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name_));
|
||||||
|
|
||||||
|
delete checkpoint;
|
||||||
|
checkpoint = nullptr;
|
||||||
|
|
||||||
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(1), 2);
|
||||||
|
|
||||||
|
DB* snapshot_db;
|
||||||
|
ASSERT_OK(DB::Open(options, snapshot_name_, &snapshot_db));
|
||||||
|
ReadOptions read_opts;
|
||||||
|
std::string get_result;
|
||||||
|
ASSERT_OK(snapshot_db->Get(read_opts, "foo", &get_result));
|
||||||
|
ASSERT_EQ("a", get_result);
|
||||||
|
ASSERT_OK(snapshot_db->Get(read_opts, "bar", &get_result));
|
||||||
|
ASSERT_EQ("val9", get_result);
|
||||||
|
delete snapshot_db;
|
||||||
|
|
||||||
|
// Make sure original obsolete files for hard linked files are all deleted.
|
||||||
|
DBImpl* db_impl = static_cast_with_check<DBImpl>(db_);
|
||||||
|
db_impl->TEST_DeleteObsoleteFiles();
|
||||||
|
auto sfm = static_cast_with_check<SstFileManagerImpl>(
|
||||||
|
options.sst_file_manager.get());
|
||||||
|
ASSERT_NE(nullptr, sfm);
|
||||||
|
sfm->WaitForEmptyTrash();
|
||||||
|
// SST file 2-12 for "bar" will be compacted into one file on L1 during the
|
||||||
|
// compaction after checkpoint is created. SST file 1 on L1: foo, seq:
|
||||||
|
// 1 (hard links is 1 after checkpoint destroy)
|
||||||
|
std::atomic<int> bg_delete_sst{0};
|
||||||
|
std::atomic<int> fg_delete_sst{0};
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteFile::cb", [&](void* arg) {
|
||||||
|
ASSERT_NE(nullptr, arg);
|
||||||
|
auto file_name = *static_cast<std::string*>(arg);
|
||||||
|
if (file_name.size() >= 4 &&
|
||||||
|
file_name.compare(file_name.size() - 4, 4, ".sst") == 0) {
|
||||||
|
fg_delete_sst.fetch_add(1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteTrashFile::cb", [&](void* arg) {
|
||||||
|
ASSERT_NE(nullptr, arg);
|
||||||
|
auto file_name = *static_cast<std::string*>(arg);
|
||||||
|
if (file_name.size() >= 10 &&
|
||||||
|
file_name.compare(file_name.size() - 10, 10, ".sst.trash") == 0) {
|
||||||
|
bg_delete_sst.fetch_add(1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
ASSERT_OK(DestroyDB(snapshot_name_, options));
|
||||||
|
if (slow_deletion) {
|
||||||
|
ASSERT_EQ(fg_delete_sst, 1);
|
||||||
|
ASSERT_EQ(bg_delete_sst, 11);
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(fg_delete_sst, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ("a", Get("foo"));
|
||||||
|
ASSERT_EQ("val9", Get("bar"));
|
||||||
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
}
|
||||||
|
INSTANTIATE_TEST_CASE_P(CheckpointDestroyTest, CheckpointDestroyTest,
|
||||||
|
::testing::Values(true, false));
|
||||||
|
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
Loading…
Reference in New Issue