diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index 2aaf2c50de..7bb9478ac3 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -63,6 +63,15 @@ class CuckooTableDBTest : public testing::Test { ASSERT_OK(DB::Open(opts, dbname_, &db_)); } + void DestroyAndReopen(Options* options) { + assert(options); + ASSERT_OK(db_->Close()); + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, *options)); + Reopen(options); + } + Status Put(const Slice& k, const Slice& v) { return db_->Put(WriteOptions(), k, v); } @@ -205,7 +214,7 @@ static std::string Uint64Key(uint64_t i) { TEST_F(CuckooTableDBTest, Uint64Comparator) { Options options = CurrentOptions(); options.comparator = test::Uint64Comparator(); - Reopen(&options); + DestroyAndReopen(&options); ASSERT_OK(Put(Uint64Key(1), "v1")); ASSERT_OK(Put(Uint64Key(2), "v2")); diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 377952b2c7..31200ee8a1 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -2279,6 +2279,43 @@ class TableFileListener : public EventListener { }; } // namespace +TEST_F(DBBasicTest, LastSstFileNotInManifest) { + // If the last sst file is not tracked in MANIFEST, + // or the VersionEdit for the last sst file is not synced, + // on recovery, the last sst file should be deleted, + // and new sst files shouldn't reuse its file number. + Options options = CurrentOptions(); + DestroyAndReopen(options); + Close(); + + // Manually add a sst file. + constexpr uint64_t kSstFileNumber = 100; + const std::string kSstFile = MakeTableFileName(dbname_, kSstFileNumber); + ASSERT_OK(WriteStringToFile(env_, /* data = */ "bad sst file content", + /* fname = */ kSstFile, + /* should_sync = */ true)); + ASSERT_OK(env_->FileExists(kSstFile)); + + TableFileListener* listener = new TableFileListener(); + options.listeners.emplace_back(listener); + Reopen(options); + // kSstFile should already be deleted. + ASSERT_TRUE(env_->FileExists(kSstFile).IsNotFound()); + + ASSERT_OK(Put("k", "v")); + ASSERT_OK(Flush()); + // New sst file should have file number > kSstFileNumber. + std::vector& files = + listener->GetFiles(kDefaultColumnFamilyName); + ASSERT_EQ(files.size(), 1); + const std::string fname = files[0].erase(0, (dbname_ + "/").size()); + uint64_t number = 0; + FileType type = kTableFile; + ASSERT_TRUE(ParseFileName(fname, &number, &type)); + ASSERT_EQ(type, kTableFile); + ASSERT_GT(number, kSstFileNumber); +} + TEST_F(DBBasicTest, RecoverWithMissingFiles) { Options options = CurrentOptions(); DestroyAndReopen(options); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 1ffff8fa69..2f48c3c266 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1215,14 +1215,22 @@ class DBImpl : public DB { virtual bool OwnTablesAndLogs() const { return true; } + // Set DB identity file, and write DB ID to manifest if necessary. + Status SetDBId(); + // REQUIRES: db mutex held when calling this function, but the db mutex can // be released and re-acquired. Db mutex will be held when the function // returns. - // After best-efforts recovery, there may be SST files in db/cf paths that are - // not referenced in the MANIFEST. We delete these SST files. In the + // After recovery, there may be SST files in db/cf paths that are + // not referenced in the MANIFEST (e.g. + // 1. It's best effort recovery; + // 2. The VersionEdits referencing the SST files are appended to + // MANIFEST, DB crashes when syncing the MANIFEST, the VersionEdits are + // still not synced to MANIFEST during recovery.) + // We delete these SST files. In the // meantime, we find out the largest file number present in the paths, and // bump up the version set's next_file_number_ to be 1 + largest_file_number. - Status FinishBestEffortsRecovery(); + Status DeleteUnreferencedSstFiles(); // SetDbSessionId() should be called in the constuctor DBImpl() // to ensure that db_session_id_ gets updated every time the DB is opened diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index d825c406ca..f67e8f748a 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -751,7 +751,43 @@ uint64_t PrecomputeMinLogNumberToKeep2PC( return min_log_number_to_keep; } -Status DBImpl::FinishBestEffortsRecovery() { +Status DBImpl::SetDBId() { + Status s; + // Happens when immutable_db_options_.write_dbid_to_manifest is set to true + // the very first time. + if (db_id_.empty()) { + // Check for the IDENTITY file and create it if not there. + s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr); + // Typically Identity file is created in NewDB() and for some reason if + // it is no longer available then at this point DB ID is not in Identity + // file or Manifest. + if (s.IsNotFound()) { + s = SetIdentityFile(env_, dbname_); + if (!s.ok()) { + return s; + } + } else if (!s.ok()) { + assert(s.IsIOError()); + return s; + } + s = GetDbIdentityFromIdentityFile(&db_id_); + if (immutable_db_options_.write_dbid_to_manifest && s.ok()) { + VersionEdit edit; + edit.SetDBId(db_id_); + Options options; + MutableCFOptions mutable_cf_options(options); + versions_->db_id_ = db_id_; + s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), + mutable_cf_options, &edit, &mutex_, nullptr, + /* new_descriptor_log */ false); + } + } else { + s = SetIdentityFile(env_, dbname_, db_id_); + } + return s; +} + +Status DBImpl::DeleteUnreferencedSstFiles() { mutex_.AssertHeld(); std::vector paths; paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator))); @@ -807,8 +843,6 @@ Status DBImpl::FinishBestEffortsRecovery() { assert(versions_->GetColumnFamilySet()); ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault(); assert(default_cfd); - // Even if new_descriptor_log is false, we will still switch to a new - // MANIFEST and update CURRENT file, since this is in recovery. s = versions_->LogAndApply( default_cfd, *default_cfd->GetLatestMutableCFOptions(), &edit, &mutex_, directories_.GetDbDir(), /*new_descriptor_log*/ false); diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 90759e9c5b..247985d003 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -479,42 +479,14 @@ Status DBImpl::Recover( // TryRecover may delete previous column_family_set_. column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); - s = FinishBestEffortsRecovery(); } } if (!s.ok()) { return s; } - // Happens when immutable_db_options_.write_dbid_to_manifest is set to true - // the very first time. - if (db_id_.empty()) { - // Check for the IDENTITY file and create it if not there. - s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr); - // Typically Identity file is created in NewDB() and for some reason if - // it is no longer available then at this point DB ID is not in Identity - // file or Manifest. - if (s.IsNotFound()) { - s = SetIdentityFile(env_, dbname_); - if (!s.ok()) { - return s; - } - } else if (!s.ok()) { - assert(s.IsIOError()); - return s; - } - s = GetDbIdentityFromIdentityFile(&db_id_); - if (immutable_db_options_.write_dbid_to_manifest && s.ok()) { - VersionEdit edit; - edit.SetDBId(db_id_); - Options options; - MutableCFOptions mutable_cf_options(options); - versions_->db_id_ = db_id_; - s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), - mutable_cf_options, &edit, &mutex_, nullptr, - false); - } - } else { - s = SetIdentityFile(env_, dbname_, db_id_); + s = SetDBId(); + if (s.ok() && !read_only) { + s = DeleteUnreferencedSstFiles(); } if (immutable_db_options_.paranoid_checks && s.ok()) { diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 403b221e68..3c36ecd1a4 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -269,7 +269,7 @@ TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) { const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); - Reopen(opts); + DestroyAndReopen(opts); // Write a third before snapshot, a third between snapshot and tombstone, and // a third after the tombstone. Keys older than snapshot or newer than the @@ -309,7 +309,7 @@ TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) { opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); opts.num_levels = 2; opts.statistics = CreateDBStatistics(); - Reopen(opts); + DestroyAndReopen(opts); for (int i = 0; i < kNumFiles; ++i) { if (i > 0) { @@ -603,7 +603,7 @@ TEST_F(DBRangeDelTest, TableEvictedDuringScan) { bbto.cache_index_and_filter_blocks = true; bbto.block_cache = NewLRUCache(8 << 20); opts.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(opts); + DestroyAndReopen(opts); // Hold a snapshot so range deletions can't become obsolete during compaction // to bottommost level (i.e., L1). @@ -761,7 +761,7 @@ TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) { Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); - Reopen(opts); + DestroyAndReopen(opts); // Write half of the keys before the tombstone and half after the tombstone. // Only covered keys (i.e., within the range and older than the tombstone) @@ -794,7 +794,7 @@ TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) { Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); - Reopen(opts); + DestroyAndReopen(opts); const Snapshot* snapshot = nullptr; // Put a snapshot before the range tombstone, verify an iterator using that diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index da1098e274..fe90a07960 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2066,7 +2066,6 @@ void StressTest::Open() { FLAGS_level_compaction_dynamic_level_bytes; options_.file_checksum_gen_factory = GetFileChecksumImpl(FLAGS_file_checksum_impl); - options_.track_and_verify_wals_in_manifest = true; } else { #ifdef ROCKSDB_LITE fprintf(stderr, "--options_file not supported in lite mode\n"); diff --git a/env/mock_env.cc b/env/mock_env.cc index d24557e5d1..44c8a01b98 100644 --- a/env/mock_env.cc +++ b/env/mock_env.cc @@ -601,7 +601,7 @@ class MockFileSystem : public FileSystem { std::string NormalizeMockPath(const std::string& path) { std::string p = NormalizePath(path); - if (p.back() == '/' && p.size() > 1) { + if (p.back() == kFilePathSeparator && p.size() > 1) { p.pop_back(); } return p; diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index fc62dc80e1..46f6414c86 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1456,11 +1456,11 @@ public class RocksDBTest { try (final RocksDB db = RocksDB.open(options, dbPath)) { final RocksDB.LiveFiles livefiles = db.getLiveFiles(true); assertThat(livefiles).isNotNull(); - assertThat(livefiles.manifestFileSize).isEqualTo(13); + assertThat(livefiles.manifestFileSize).isEqualTo(57); assertThat(livefiles.files.size()).isEqualTo(3); assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT"); - assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000001"); - assertThat(livefiles.files.get(2)).isEqualTo("/OPTIONS-000005"); + assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000003"); + assertThat(livefiles.files.get(2)).isEqualTo("/OPTIONS-000006"); } } } diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 0cb9ee3da2..8d8ca95f85 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -2500,19 +2500,19 @@ TEST_F(BackupableDBTest, GarbageCollectionBeforeBackup) { OpenDBAndBackupEngine(true); backup_chroot_env_->CreateDirIfMissing(backupdir_ + "/shared"); - std::string file_five = backupdir_ + "/shared/000007.sst"; + std::string file_five = backupdir_ + "/shared/000008.sst"; std::string file_five_contents = "I'm not really a sst file"; - // this depends on the fact that 00007.sst is the first file created by the DB + // this depends on the fact that 00008.sst is the first file created by the DB ASSERT_OK(file_manager_->WriteToFile(file_five, file_five_contents)); FillDB(db_.get(), 0, 100); - // backup overwrites file 000007.sst + // backup overwrites file 000008.sst ASSERT_TRUE(backup_engine_->CreateNewBackup(db_.get(), true).ok()); std::string new_file_five_contents; ASSERT_OK(ReadFileToString(backup_chroot_env_.get(), file_five, &new_file_five_contents)); - // file 000007.sst was overwritten + // file 000008.sst was overwritten ASSERT_TRUE(new_file_five_contents != file_five_contents); CloseDBAndBackupEngine();