From fde0cd7ced0035f7c3ca276c372a28c55775d7c3 Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Tue, 3 Nov 2020 20:33:45 -0800 Subject: [PATCH] Add API to verify whole sst file checksum (#7578) Summary: Existing API `VerifyChecksum()` allows application to verify sst files' block checksums. Since whole file, user-specified checksum is tracked in MANIFEST, we can expose a new API to verify sst files' file checksums. ``` // Compute table file checksums if applicable and compare with MANIFEST. // Returns OK if no file has mismatching whole-file checksum. Status DB::VerifyFileChecksums(const ReadOptions& /*read_options*/); ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/7578 Test Plan: make check Reviewed By: pdillinger Differential Revision: D24436783 Pulled By: riversand963 fbshipit-source-id: 52b51519b842f2b3c4e3351998a97c86cbec85b3 --- HISTORY.md | 1 + db/corruption_test.cc | 46 ++++++++++++++++- db/db_basic_test.cc | 22 ++++++++ db/db_impl/db_impl.cc | 66 ++++++++++++++++++++++-- db/db_impl/db_impl.h | 20 +++++++ include/rocksdb/db.h | 8 +++ include/rocksdb/utilities/stackable_db.h | 5 ++ 7 files changed, 162 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index eff2f0f1c7..b4fc6df1b6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -14,6 +14,7 @@ ### Public API Change * Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options. +* Add new API `DB::VerifyFileChecksums` to verify SST file checksum with corresponding entries in the MANIFEST if present. Current implementation requires scanning and recomputing file checksums. ### Behavior Changes * The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag. diff --git a/db/corruption_test.cc b/db/corruption_test.cc index cb7cfef348..86d79f8877 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -37,7 +37,7 @@ namespace ROCKSDB_NAMESPACE { -static const int kValueSize = 1000; +static constexpr int kValueSize = 1000; class CorruptionTest : public testing::Test { public: @@ -68,9 +68,16 @@ class CorruptionTest : public testing::Test { } ~CorruptionTest() override { + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->LoadDependency({}); + SyncPoint::GetInstance()->ClearAllCallBacks(); delete db_; db_ = nullptr; - DestroyDB(dbname_, Options()); + if (getenv("KEEP_DB")) { + fprintf(stdout, "db is still at %s\n", dbname_.c_str()); + } else { + EXPECT_OK(DestroyDB(dbname_, Options())); + } } void CloseDb() { @@ -825,6 +832,41 @@ TEST_F(CorruptionTest, DisableKeyOrderCheck) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } +TEST_F(CorruptionTest, VerifyWholeTableChecksum) { + CloseDb(); + Options options; + options.env = &env_; + ASSERT_OK(DestroyDB(dbname_, options)); + options.create_if_missing = true; + options.file_checksum_gen_factory = + ROCKSDB_NAMESPACE::GetFileChecksumGenCrc32cFactory(); + Reopen(&options); + + Build(10, 5); + + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + CloseDb(); + + // Corrupt the first byte of each table file, this must be data block. + Corrupt(kTableFile, 0, 1); + + ASSERT_OK(TryReopen(&options)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + int count{0}; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::VerifySstFileChecksum:mismatch", [&](void* arg) { + auto* s = reinterpret_cast(arg); + assert(s); + ++count; + ASSERT_NOK(*s); + }); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsCorruption()); + ASSERT_EQ(1, count); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index dda217db60..377952b2c7 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -3304,6 +3304,28 @@ TEST_F(DBBasicTest, ManifestWriteFailure) { Reopen(options); } +#ifndef ROCKSDB_LITE +TEST_F(DBBasicTest, VerifyFileChecksums) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.env = env_; + DestroyAndReopen(options); + ASSERT_OK(Put("a", "value")); + ASSERT_OK(Flush()); + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument()); + + options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); + Reopen(options); + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + + // Write an L0 with checksum computed. + ASSERT_OK(Put("b", "value")); + ASSERT_OK(Flush()); + + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); +} +#endif // !ROCKSDB_LITE + // A test class for intercepting random reads and injecting artificial // delays. Used for testing the deadline/timeout feature class DBBasicTestDeadline diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index f7a074d680..f0737cafb4 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -4727,8 +4728,29 @@ Status DBImpl::CreateColumnFamilyWithImport( return status; } +Status DBImpl::VerifyFileChecksums(const ReadOptions& read_options) { + return VerifyChecksumInternal(read_options, /*use_file_checksum=*/true); +} + Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { + return VerifyChecksumInternal(read_options, /*use_file_checksum=*/false); +} + +Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, + bool use_file_checksum) { Status s; + + if (use_file_checksum) { + FileChecksumGenFactory* const file_checksum_gen_factory = + immutable_db_options_.file_checksum_gen_factory.get(); + if (!file_checksum_gen_factory) { + s = Status::InvalidArgument( + "Cannot verify file checksum if options.file_checksum_gen_factory is " + "null"); + return s; + } + } + std::vector cfd_list; { InstrumentedMutexLock l(&mutex_); @@ -4743,11 +4765,12 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { for (auto cfd : cfd_list) { sv_list.push_back(cfd->GetReferencedSuperVersion(this)); } + for (auto& sv : sv_list) { VersionStorageInfo* vstorage = sv->current->storage_info(); ColumnFamilyData* cfd = sv->current->cfd(); Options opts; - { + if (!use_file_checksum) { InstrumentedMutexLock l(&mutex_); opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_), cfd->GetLatestCFOptions()); @@ -4755,11 +4778,18 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) { for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok(); j++) { - const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd; + const auto& fd_with_krange = vstorage->LevelFilesBrief(i).files[j]; + const auto& fd = fd_with_krange.fd; + const FileMetaData* fmeta = fd_with_krange.file_metadata; + assert(fmeta); std::string fname = TableFileName(cfd->ioptions()->cf_paths, fd.GetNumber(), fd.GetPathId()); - s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, - read_options, fname); + if (use_file_checksum) { + s = VerifySstFileChecksum(*fmeta, fname, read_options); + } else { + s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, + read_options, fname); + } } } if (!s.ok()) { @@ -4790,6 +4820,34 @@ Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { return s; } +Status DBImpl::VerifySstFileChecksum(const FileMetaData& fmeta, + const std::string& fname, + const ReadOptions& read_options) { + Status s; + if (fmeta.file_checksum == kUnknownFileChecksum) { + return s; + } + std::string file_checksum; + std::string func_name; + s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum( + fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(), + fmeta.file_checksum_func_name, &file_checksum, &func_name, + read_options.readahead_size, immutable_db_options_.allow_mmap_reads, + io_tracer_); + if (s.ok()) { + assert(fmeta.file_checksum_func_name == func_name); + if (file_checksum != fmeta.file_checksum) { + std::ostringstream oss; + oss << fname << " file checksum mismatch, "; + oss << "expecting " << Slice(fmeta.file_checksum).ToString(/*hex=*/true); + oss << ", but actual " << Slice(file_checksum).ToString(/*hex=*/true); + s = Status::Corruption(oss.str()); + TEST_SYNC_POINT_CALLBACK("DBImpl::VerifySstFileChecksum:mismatch", &s); + } + } + return s; +} + void DBImpl::NotifyOnExternalFileIngested( ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) { if (immutable_db_options_.listeners.empty()) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index b843093db8..68b053f312 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -431,8 +431,28 @@ class DBImpl : public DB { const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) override; + using DB::VerifyFileChecksums; + Status VerifyFileChecksums(const ReadOptions& read_options) override; + using DB::VerifyChecksum; virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override; + // Verify the checksums of files in db. Currently only tables are checked. + // + // read_options: controls file I/O behavior, e.g. read ahead size while + // reading all the live table files. + // + // use_file_checksum: if false, verify the block checksums of all live table + // in db. Otherwise, obtain the file checksums and compare + // with the MANIFEST. Currently, file checksums are + // recomputed by reading all table files. + // + // Returns: OK if there is no file whose file or block checksum mismatches. + Status VerifyChecksumInternal(const ReadOptions& read_options, + bool use_file_checksum); + + Status VerifySstFileChecksum(const FileMetaData& fmeta, + const std::string& fpath, + const ReadOptions& read_options); using DB::StartTrace; virtual Status StartTrace( diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index c933e3860f..840e1b4ecf 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1443,6 +1443,14 @@ class DB { const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) = 0; + // Verify the checksums of files in db. Currently the whole-file checksum of + // table files are checked. + virtual Status VerifyFileChecksums(const ReadOptions& /*read_options*/) { + return Status::NotSupported("File verification not supported"); + } + + // Verify the block checksums of files in db. The block checksums of table + // files are checked. virtual Status VerifyChecksum(const ReadOptions& read_options) = 0; virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index 8f74c60035..686452bdde 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -141,6 +141,11 @@ class StackableDB : public DB { import_options, metadata, handle); } + using DB::VerifyFileChecksums; + Status VerifyFileChecksums(const ReadOptions& read_opts) override { + return db_->VerifyFileChecksums(read_opts); + } + virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); } virtual Status VerifyChecksum(const ReadOptions& options) override {