PurgeObsoleteFiles() unittest

Summary:
Created a unittest that verifies that automatic deletion performed by PurgeObsoleteFiles() works correctly.

Also, few small fixes on the logic part -- call version_set_->GetObsoleteFiles() in FindObsoleteFiles() instead of on some arbitrary positions.

Test Plan: Created a unit test

Reviewers: dhruba, haobo, nkg-

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D14079
This commit is contained in:
Igor Canadi 2013-11-14 18:03:57 -08:00
parent 94dde686bb
commit a0ce3fd00a
4 changed files with 146 additions and 67 deletions

View file

@ -249,7 +249,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
manual_compaction_(nullptr),
logger_(nullptr),
disable_delete_obsolete_files_(false),
delete_obsolete_files_last_run_(0),
delete_obsolete_files_last_run_(options.env->NowMicros()),
purge_wal_files_last_run_(0),
last_stats_dump_time_microsec_(0),
default_interval_to_delete_obsolete_WAL_(600),
@ -437,7 +437,13 @@ void DBImpl::MaybeDumpStats() {
// Returns the list of live files in 'sst_live' and the list
// of all files in the filesystem in 'all_files'.
void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
// no_full_scan = true -- never do the full scan using GetChildren()
// force = false -- don't force the full scan, except every
// options_.delete_obsolete_files_period_micros
// force = true -- force the full scan
void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
bool force,
bool no_full_scan) {
mutex_.AssertHeld();
// if deletion is disabled, do nothing
@ -445,14 +451,30 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
return;
}
// get obsolete files
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
// store the current filenum, lognum, etc
deletion_state.manifest_file_number = versions_->ManifestFileNumber();
deletion_state.log_number = versions_->LogNumber();
deletion_state.prev_log_number = versions_->PrevLogNumber();
// This method is costly when the number of files is large.
// Do not allow it to trigger more often than once in
// delete_obsolete_files_period_micros.
// TODO we should not be catching live files here,
// version_->GetObsoleteFiles() should tell us the truth, which
// files are to be deleted. However, it does not, so we do
// this to be safe, i.e. never delete files that could be
// live
deletion_state.sst_live.assign(pending_outputs_.begin(),
pending_outputs_.end());
versions_->AddLiveFiles(&deletion_state.sst_live);
// if no_full_scan, never do the full scan
if (no_full_scan) {
return;
}
// if force == true, always fall through and do the full scan
// if force == false, do the full scan only every
// options_.delete_obsolete_files_period_micros
if (!force && options_.delete_obsolete_files_period_micros != 0) {
const uint64_t now_micros = env_->NowMicros();
if (delete_obsolete_files_last_run_ +
@ -462,12 +484,6 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
delete_obsolete_files_last_run_ = now_micros;
}
// Make a list of all of the live files; set is slow, should not
// be used.
deletion_state.sst_live.assign(pending_outputs_.begin(),
pending_outputs_.end());
versions_->AddLiveFiles(&deletion_state.sst_live);
// set of all files in the directory
env_->GetChildren(dbname_, &deletion_state.all_files); // Ignore errors
@ -488,8 +504,10 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
// files in sst_delete_files and log_delete_files.
// It is not necessary to hold the mutex when invoking this method.
void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
// if deletion is disabled, do nothing
if (disable_delete_obsolete_files_) {
// this checks if FindObsoleteFiles() was run before. If not, don't do
// PurgeObsoleteFiles(). If FindObsoleteFiles() was run, we need to also
// run PurgeObsoleteFiles(), even if disable_delete_obsolete_files_ is true
if (state.manifest_file_number == 0) {
return;
}
@ -1791,7 +1809,6 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
CleanupCompaction(compact, status);
versions_->ReleaseCompactionFiles(c.get(), status);
c->ReleaseInputs();
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
*madeProgress = true;
}
c.reset();
@ -2454,6 +2471,7 @@ struct IterState {
port::Mutex* mu;
Version* version;
std::vector<MemTable*> mem; // includes both mem_ and imm_
DBImpl *db;
};
static void CleanupIteratorState(void* arg1, void* arg2) {
@ -2463,7 +2481,12 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
state->mem[i]->Unref();
}
state->version->Unref();
// delete only the sst obsolete files
DBImpl::DeletionState deletion_state;
// fast path FindObsoleteFiles
state->db->FindObsoleteFiles(deletion_state, false, true);
state->mu->Unlock();
state->db->PurgeObsoleteFiles(deletion_state);
delete state;
}
} // namespace
@ -2498,6 +2521,7 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
versions_->current()->Ref();
cleanup->mu = &mutex_;
cleanup->db = this;
cleanup->version = versions_->current();
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
@ -3375,9 +3399,6 @@ Status DBImpl::DeleteFile(std::string name) {
}
edit.DeleteFile(level, number);
status = versions_->LogAndApply(&edit, &mutex_);
if (status.ok()) {
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
}
FindObsoleteFiles(deletion_state, false);
} // lock released here
LogFlush(options_.info_log);

View file

@ -123,6 +123,54 @@ class DBImpl : public DB {
default_interval_to_delete_obsolete_WAL_ = default_interval_to_delete_obsolete_WAL;
}
// needed for CleanupIteratorState
struct DeletionState {
inline bool HaveSomethingToDelete() const {
return all_files.size() ||
sst_delete_files.size() ||
log_delete_files.size();
}
// a list of all files that we'll consider deleting
// (every once in a while this is filled up with all files
// in the DB directory)
std::vector<std::string> all_files;
// the list of all live sst files that cannot be deleted
std::vector<uint64_t> sst_live;
// a list of sst files that we need to delete
std::vector<FileMetaData*> sst_delete_files;
// a list of log files that we need to delete
std::vector<uint64_t> log_delete_files;
// the current manifest_file_number, log_number and prev_log_number
// that corresponds to the set of files in 'live'.
uint64_t manifest_file_number, log_number, prev_log_number;
DeletionState() {
manifest_file_number = 0;
log_number = 0;
prev_log_number = 0;
}
};
// Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'all_files'.
// If force == false and the last call was less than
// options_.delete_obsolete_files_period_micros microseconds ago,
// it will not fill up the deletion_state
void FindObsoleteFiles(DeletionState& deletion_state,
bool force,
bool no_full_scan = false);
// Diffs the files listed in filenames and those that do not
// belong to live files are posibly removed. Also, removes all the
// files in sst_delete_files and log_delete_files.
// It is not necessary to hold the mutex when invoking this method.
void PurgeObsoleteFiles(DeletionState& deletion_state);
protected:
Env* const env_;
const std::string dbname_;
@ -158,38 +206,6 @@ class DBImpl : public DB {
const Status CreateArchivalDirectory();
struct DeletionState {
inline bool HaveSomethingToDelete() const {
return all_files.size() ||
sst_delete_files.size() ||
log_delete_files.size();
}
// a list of all files that we'll consider deleting
// (every once in a while this is filled up with all files
// in the DB directory)
std::vector<std::string> all_files;
// the list of all live sst files that cannot be deleted
std::vector<uint64_t> sst_live;
// a list of sst files that we need to delete
std::vector<FileMetaData*> sst_delete_files;
// a list of log files that we need to delete
std::vector<uint64_t> log_delete_files;
// the current manifest_file_number, log_number and prev_log_number
// that corresponds to the set of files in 'live'.
uint64_t manifest_file_number, log_number, prev_log_number;
DeletionState() {
manifest_file_number = 0;
log_number = 0;
prev_log_number = 0;
}
};
// Delete any unneeded files and stale in-memory entries.
void DeleteObsoleteFiles();
@ -243,19 +259,6 @@ class DBImpl : public DB {
void AllocateCompactionOutputFileNumbers(CompactionState* compact);
void ReleaseCompactionUnusedFileNumbers(CompactionState* compact);
// Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'all_files'.
// If force == false and the last call was less than
// options_.delete_obsolete_files_period_micros microseconds ago,
// it will not fill up the deletion_state
void FindObsoleteFiles(DeletionState& deletion_state, bool force);
// Diffs the files listed in filenames and those that do not
// belong to live files are posibly removed. Also, removes all the
// files in sst_delete_files and log_delete_files.
// It is not necessary to hold the mutex when invoking this method.
void PurgeObsoleteFiles(DeletionState& deletion_state);
void PurgeObsoleteWALFiles();
Status AppendSortedWalsOfType(const std::string& path,

View file

@ -40,6 +40,7 @@ class DeleteFileTest {
options_.WAL_ttl_seconds = 300; // Used to test log files
options_.WAL_size_limit_MB = 1024; // Used to test log files
dbname_ = test::TmpDir() + "/deletefile_test";
options_.wal_dir = dbname_ + "/wal_files";
DestroyDB(dbname_, options_);
numlevels_ = 7;
ASSERT_OK(ReopenDB(true));
@ -107,6 +108,28 @@ class DeleteFileTest {
ASSERT_OK(dbi->TEST_WaitForFlushMemTable());
}
void CheckFileTypeCounts(std::string& dir,
int required_log,
int required_sst,
int required_manifest) {
std::vector<std::string> filenames;
env_->GetChildren(dir, &filenames);
int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0;
for (auto file : filenames) {
uint64_t number;
FileType type;
if (ParseFileName(file, &number, &type)) {
log_cnt += (type == kLogFile);
sst_cnt += (type == kTableFile);
manifest_cnt += (type == kDescriptorFile);
}
}
ASSERT_EQ(required_log, log_cnt);
ASSERT_EQ(required_sst, sst_cnt);
ASSERT_EQ(required_manifest, manifest_cnt);
}
};
TEST(DeleteFileTest, AddKeysAndQueryLevels) {
@ -156,6 +179,34 @@ TEST(DeleteFileTest, AddKeysAndQueryLevels) {
CloseDB();
}
TEST(DeleteFileTest, PurgeObsoleteFilesTest) {
CreateTwoLevels();
// there should be only one (empty) log file because CreateTwoLevels()
// flushes the memtables to disk
CheckFileTypeCounts(options_.wal_dir, 1, 0, 0);
// 2 ssts, 1 manifest
CheckFileTypeCounts(dbname_, 0, 2, 1);
std::string first("0"), last("999999");
Slice first_slice(first), last_slice(last);
db_->CompactRange(&first_slice, &last_slice, true, 2);
// 1 sst after compaction
CheckFileTypeCounts(dbname_, 0, 1, 1);
// this time, we keep an iterator alive
ReopenDB(true);
Iterator *itr = 0;
CreateTwoLevels();
itr = db_->NewIterator(ReadOptions());
db_->CompactRange(&first_slice, &last_slice, true, 2);
// 3 sst after compaction with live iterator
CheckFileTypeCounts(dbname_, 0, 3, 1);
delete itr;
// 1 sst after iterator deletion
CheckFileTypeCounts(dbname_, 0, 1, 1);
CloseDB();
}
TEST(DeleteFileTest, DeleteFileWithIterator) {
CreateTwoLevels();
ReadOptions options;
@ -196,11 +247,11 @@ TEST(DeleteFileTest, DeleteLogFiles) {
// Should not succeed because live logs are not allowed to be deleted
std::unique_ptr<LogFile> alive_log = std::move(logfiles.back());
ASSERT_EQ(alive_log->Type(), kAliveLogFile);
ASSERT_TRUE(env_->FileExists(dbname_ + "/" + alive_log->PathName()));
ASSERT_TRUE(env_->FileExists(options_.wal_dir + "/" + alive_log->PathName()));
fprintf(stdout, "Deleting alive log file %s\n",
alive_log->PathName().c_str());
ASSERT_TRUE(!db_->DeleteFile(alive_log->PathName()).ok());
ASSERT_TRUE(env_->FileExists(dbname_ + "/" + alive_log->PathName()));
ASSERT_TRUE(env_->FileExists(options_.wal_dir + "/" + alive_log->PathName()));
logfiles.clear();
// Call Flush to bring about a new working log file and add more keys
@ -214,11 +265,13 @@ TEST(DeleteFileTest, DeleteLogFiles) {
ASSERT_GT(logfiles.size(), 0UL);
std::unique_ptr<LogFile> archived_log = std::move(logfiles.front());
ASSERT_EQ(archived_log->Type(), kArchivedLogFile);
ASSERT_TRUE(env_->FileExists(dbname_ + "/" + archived_log->PathName()));
ASSERT_TRUE(env_->FileExists(options_.wal_dir + "/" +
archived_log->PathName()));
fprintf(stdout, "Deleting archived log file %s\n",
archived_log->PathName().c_str());
ASSERT_OK(db_->DeleteFile(archived_log->PathName()));
ASSERT_TRUE(!env_->FileExists(dbname_ + "/" + archived_log->PathName()));
ASSERT_TRUE(!env_->FileExists(options_.wal_dir + "/" +
archived_log->PathName()));
CloseDB();
}

View file

@ -387,7 +387,9 @@ struct Options {
bool disable_seek_compaction;
// The periodicity when obsolete files get deleted. The default
// value is 6 hours.
// value is 6 hours. The files that get out of scope by compaction
// process will still get automatically delete on every compaction,
// regardless of this setting
uint64_t delete_obsolete_files_period_micros;
// Maximum number of concurrent background jobs, submitted to