From 6f101303542f2259075a9abe88d836c792ead411 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 5 Feb 2015 20:09:42 -0800 Subject: [PATCH 1/3] Fix DestroyDB Summary: When DestroyDB() finds a wal file in the DB directory, it assumes it is actually in WAL directory. This can lead to confusion, since it reports IO error when it tries to delete wal file from DB directory. For example: https://ci-builds.fb.com/job/rocksdb_clang_build/296/console This change will fix our unit tests. Test Plan: unit tests work Reviewers: yhchiang, rven, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D32907 --- db/db_impl.cc | 33 ++++++++++++++++++--------------- db/db_test.cc | 7 ++++--- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index dd627313b9..3365e18a46 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3885,23 +3885,10 @@ Status DestroyDB(const std::string& dbname, const Options& options) { const Options& soptions(SanitizeOptions(dbname, &comparator, options)); Env* env = soptions.env; std::vector filenames; - std::vector archiveFiles; - std::string archivedir = ArchivalDirectory(dbname); // Ignore error in case directory does not exist env->GetChildren(dbname, &filenames); - if (dbname != soptions.wal_dir) { - std::vector logfilenames; - env->GetChildren(soptions.wal_dir, &logfilenames); - filenames.insert(filenames.end(), logfilenames.begin(), logfilenames.end()); - archivedir = ArchivalDirectory(soptions.wal_dir); - } - - if (filenames.empty()) { - return Status::OK(); - } - FileLock* lock; const std::string lockname = LockFileName(dbname); Status result = env->LockFile(lockname, &lock); @@ -3915,8 +3902,6 @@ Status DestroyDB(const std::string& dbname, const Options& options) { Status del; if (type == kMetaDatabase) { del = DestroyDB(dbname + "/" + filenames[i], options); - } else if (type == kLogFile) { - del = env->DeleteFile(soptions.wal_dir + "/" + filenames[i]); } else { del = env->DeleteFile(dbname + "/" + filenames[i]); } @@ -3939,6 +3924,24 @@ Status DestroyDB(const std::string& dbname, const Options& options) { } } + std::vector walDirFiles; + std::string archivedir = ArchivalDirectory(dbname); + if (dbname != soptions.wal_dir) { + env->GetChildren(soptions.wal_dir, &walDirFiles); + archivedir = ArchivalDirectory(soptions.wal_dir); + } + + // Delete log files in the WAL dir + for (const auto& file : walDirFiles) { + if (ParseFileName(file, &number, &type) && type == kLogFile) { + Status del = env->DeleteFile(soptions.wal_dir + "/" + file); + if (result.ok() && !del.ok()) { + result = del; + } + } + } + + std::vector archiveFiles; env->GetChildren(archivedir, &archiveFiles); // Delete archival files. for (size_t i = 0; i < archiveFiles.size(); ++i) { diff --git a/db/db_test.cc b/db/db_test.cc index 720978021c..b19d2550b2 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -624,7 +624,7 @@ class DBTest { options.db_log_dir = test::TmpDir(env_); break; case kWalDirAndMmapReads: - options.wal_dir = test::TmpDir(env_) + "/wal"; + options.wal_dir = dbname_ + "/wal"; // mmap reads should be orthogonal to WalDir setting, so we piggyback to // this option config to test mmap reads as well options.allow_mmap_reads = true; @@ -2595,8 +2595,9 @@ TEST(DBTest, IgnoreRecoveredLog) { Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - options.wal_dir = dbname_ + "/logs"; - DestroyAndReopen(options); + options.wal_dir = dbname_ + "/wal"; + Destroy(options); + Reopen(options); // fill up the DB std::string one, two; From 8e83a9d3153e36ab0dadf433aa795d9cff124886 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 6 Feb 2015 02:38:14 -0800 Subject: [PATCH 2/3] Add a missing field for STATE_MUTEX_WAIT to global_state_table Summary: Add a missing field for STATE_MUTEX_WAIT to global_state_table. This will fix the failure of thread_list_test. Test Plan: thread_list_test --- util/thread_operation.h | 1 + 1 file changed, 1 insertion(+) diff --git a/util/thread_operation.h b/util/thread_operation.h index b4326f5bd8..45521e227f 100644 --- a/util/thread_operation.h +++ b/util/thread_operation.h @@ -54,6 +54,7 @@ struct StateInfo { // rows in this global table. static StateInfo global_state_table[] = { {ThreadStatus::STATE_UNKNOWN, ""}, + {ThreadStatus::STATE_MUTEX_WAIT, "Mutex Wait"}, }; #else From 2a979822b6f72a4c563e2c23f32b5c8523584c91 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Fri, 6 Feb 2015 08:44:30 -0800 Subject: [PATCH 3/3] Fix deleting obsolete files Summary: This diff basically reverts D30249 and also adds a unit test that was failing before this patch. I have no idea how I didn't catch this terrible bug when writing a diff, sorry about that :( I think we should redesign our system of keeping track of and deleting files. This is already a second bug in this critical piece of code. I'll think of few ideas. BTW this diff is also a regression when running lots of column families. I plan to revisit this separately. Test Plan: added a unit test Reviewers: yhchiang, rven, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D33045 --- db/db_impl.cc | 17 +++------- db/db_test.cc | 74 +++++++++++++++++++++++++++++++++++++++++++ db/job_context.h | 3 +- db/version_builder.cc | 1 - db/version_edit.h | 7 +--- db/version_set.cc | 8 +---- util/options.cc | 4 +++ 7 files changed, 86 insertions(+), 28 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 3365e18a46..be1f7037f3 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -430,9 +430,10 @@ void DBImpl::MaybeDumpStats() { } } +// * Returns the list of live files in 'sst_live' // If it's doing full scan: -// * Returns the list of live files in 'full_scan_sst_live' and the list -// of all files in the filesystem in 'full_scan_candidate_files'. +// * Returns the list of all files in the filesystem in +// 'full_scan_candidate_files'. // Otherwise, gets obsolete files from VersionSet. // no_full_scan = true -- never do the full scan using GetChildren() // force = false -- don't force the full scan, except every @@ -440,7 +441,6 @@ void DBImpl::MaybeDumpStats() { // force = true -- force the full scan void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, bool no_full_scan) { - // TODO(icanadi) clean up FindObsoleteFiles, no need to do full scans anymore mutex_.AssertHeld(); // if deletion is disabled, do nothing @@ -482,13 +482,8 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, job_context->min_pending_output = std::numeric_limits::max(); } + versions_->AddLiveFiles(&job_context->sst_live); if (doing_the_full_scan) { - // Here we find all files in the DB directory and all the live files. In the - // DeleteObsoleteFiles(), we will calculate a set difference (all_files - - // live_files) and delete all files in that difference. If we're not doing - // the full scan we don't need to get live files, because all files returned - // by GetObsoleteFiles() will be dead (and need to be deleted) - versions_->AddLiveFiles(&job_context->full_scan_sst_live); for (uint32_t path_id = 0; path_id < db_options_.db_paths.size(); path_id++) { // set of all files in the directory. We'll exclude files that are still @@ -554,7 +549,7 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) { // Now, convert live list to an unordered map, WITHOUT mutex held; // set is slow. std::unordered_map sst_live_map; - for (const FileDescriptor& fd : state.full_scan_sst_live) { + for (const FileDescriptor& fd : state.sst_live) { sst_live_map[fd.GetNumber()] = &fd; } @@ -1566,7 +1561,6 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { VersionEdit edit; edit.SetColumnFamily(cfd->GetID()); for (const auto& f : cfd->current()->storage_info()->LevelFiles(level)) { - f->moved = true; edit.DeleteFile(level, f->fd.GetNumber()); edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, @@ -2223,7 +2217,6 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, JobContext* job_context, // Move file to next level assert(c->num_input_files(0) == 1); FileMetaData* f = c->input(0, 0); - f->moved = true; c->edit()->DeleteFile(c->level(), f->fd.GetNumber()); c->edit()->AddFile(c->level() + 1, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, diff --git a/db/db_test.cc b/db/db_test.cc index b19d2550b2..715d63970a 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -10256,6 +10256,80 @@ TEST(DBTest, DontDeleteMovedFile) { Reopen(options); } +TEST(DBTest, DeleteMovedFileAfterCompaction) { + // iter 1 -- delete_obsolete_files_period_micros == 0 + for (int iter = 0; iter < 2; ++iter) { + // This test triggers move compaction and verifies that the file is not + // deleted when it's part of move compaction + Options options = CurrentOptions(); + options.env = env_; + if (iter == 1) { + options.delete_obsolete_files_period_micros = 0; + } + options.create_if_missing = true; + options.level0_file_num_compaction_trigger = + 2; // trigger compaction when we have 2 files + DestroyAndReopen(options); + + Random rnd(301); + // Create two 1MB sst files + for (int i = 0; i < 2; ++i) { + // Create 1MB sst file + for (int j = 0; j < 100; ++j) { + ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); + } + ASSERT_OK(Flush()); + } + // this should execute L0->L1 + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ("0,1", FilesPerLevel(0)); + + // block compactions + SleepingBackgroundTask sleeping_task; + env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task, + Env::Priority::LOW); + + options.max_bytes_for_level_base = 1024 * 1024; // 1 MB + Reopen(options); + std::unique_ptr iterator(db_->NewIterator(ReadOptions())); + ASSERT_EQ("0,1", FilesPerLevel(0)); + // let compactions go + sleeping_task.WakeUp(); + sleeping_task.WaitUntilDone(); + + // this should execute L1->L2 (move) + dbfull()->TEST_WaitForCompact(); + + ASSERT_EQ("0,0,1", FilesPerLevel(0)); + + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + ASSERT_EQ(metadata.size(), 1U); + auto moved_file_name = metadata[0].name; + + // Create two more 1MB sst files + for (int i = 0; i < 2; ++i) { + // Create 1MB sst file + for (int j = 0; j < 100; ++j) { + ASSERT_OK(Put(Key(i * 50 + j + 100), RandomString(&rnd, 10 * 1024))); + } + ASSERT_OK(Flush()); + } + // this should execute both L0->L1 and L1->L2 (merge with previous file) + dbfull()->TEST_WaitForCompact(); + + ASSERT_EQ("0,0,2", FilesPerLevel(0)); + + // iterator is holding the file + ASSERT_TRUE(env_->FileExists(dbname_ + "/" + moved_file_name)); + + iterator.reset(); + + // this file should have been compacted away + ASSERT_TRUE(!env_->FileExists(dbname_ + "/" + moved_file_name)); + } +} + TEST(DBTest, EncodeDecompressedBlockSizeTest) { // iter 0 -- zlib // iter 1 -- bzip2 diff --git a/db/job_context.h b/db/job_context.h index 01c868c037..d3aa9b215a 100644 --- a/db/job_context.h +++ b/db/job_context.h @@ -43,8 +43,7 @@ struct JobContext { std::vector full_scan_candidate_files; // the list of all live sst files that cannot be deleted - // (filled only if we're doing full scan) - std::vector full_scan_sst_live; + std::vector sst_live; // a list of sst files that we need to delete std::vector sst_delete_files; diff --git a/db/version_builder.cc b/db/version_builder.cc index 3a4143b9e8..c010ee4294 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -215,7 +215,6 @@ class VersionBuilder::Rep { const int level = new_file.first; FileMetaData* f = new FileMetaData(new_file.second); f->refs = 1; - f->moved = false; assert(levels_[level].added_files.find(f->fd.GetNumber()) == levels_[level].added_files.end()); diff --git a/db/version_edit.h b/db/version_edit.h index 6f7a692f32..004855ff9b 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -87,10 +87,6 @@ struct FileMetaData { bool init_stats_from_file; // true if the data-entry stats of this file // has initialized from file. - // Always false for new files. Set to true if the file was part of move - // compaction. Can only be mutated from the compaction process, under DB mutex - bool moved; - FileMetaData() : refs(0), being_compacted(false), @@ -100,8 +96,7 @@ struct FileMetaData { num_deletions(0), raw_key_size(0), raw_value_size(0), - init_stats_from_file(false), - moved(false) {} + init_stats_from_file(false) {} }; // A compressed copy of file meta data that just contain diff --git a/db/version_set.cc b/db/version_set.cc index 211ee3fda9..6ec6f1d9eb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -309,13 +309,7 @@ Version::~Version() { cfd_->table_cache()->ReleaseHandle(f->table_reader_handle); f->table_reader_handle = nullptr; } - if (!f->moved) { - vset_->obsolete_files_.push_back(f); - } else { - // moved! - // TODO(icanadi) delete this outside of mutex - delete f; - } + vset_->obsolete_files_.push_back(f); } } } diff --git a/util/options.cc b/util/options.cc index 69aca5ab1b..fbfa74ccc3 100644 --- a/util/options.cc +++ b/util/options.cc @@ -204,7 +204,11 @@ DBOptions::DBOptions() env(Env::Default()), rate_limiter(nullptr), info_log(nullptr), +#ifdef NDEBUG info_log_level(INFO_LEVEL), +#else + info_log_level(DEBUG_LEVEL), +#endif // NDEBUG max_open_files(5000), max_total_wal_size(0), statistics(nullptr),