// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/version_builder.h" #include "db/version_edit.h" #include "db/version_set.h" namespace ROCKSDB_NAMESPACE { struct FileMetaData; class VersionEditHandlerBase { public: explicit VersionEditHandlerBase(const ReadOptions& read_options) : read_options_(read_options), max_manifest_read_size_(std::numeric_limits::max()) {} virtual ~VersionEditHandlerBase() {} void Iterate(log::Reader& reader, Status* log_read_status); const Status& status() const { return status_; } AtomicGroupReadBuffer& GetReadBuffer() { return read_buffer_; } protected: explicit VersionEditHandlerBase(const ReadOptions& read_options, uint64_t max_read_size) : read_options_(read_options), max_manifest_read_size_(max_read_size) {} virtual Status Initialize() { return Status::OK(); } virtual Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) = 0; virtual Status OnAtomicGroupReplayBegin() { return Status::OK(); } virtual Status OnAtomicGroupReplayEnd() { return Status::OK(); } virtual void CheckIterationResult(const log::Reader& /*reader*/, Status* /*s*/) {} void ClearReadBuffer() { read_buffer_.Clear(); } Status status_; const ReadOptions& read_options_; private: AtomicGroupReadBuffer read_buffer_; const uint64_t max_manifest_read_size_; }; class ListColumnFamiliesHandler : public VersionEditHandlerBase { public: explicit ListColumnFamiliesHandler(const ReadOptions& read_options) : VersionEditHandlerBase(read_options) {} ~ListColumnFamiliesHandler() override {} const std::map GetColumnFamilyNames() const { return column_family_names_; } protected: Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** /*unused*/) override; private: // default column family is always implicitly there std::map column_family_names_{ {0, kDefaultColumnFamilyName}}; }; class FileChecksumRetriever : public VersionEditHandlerBase { public: FileChecksumRetriever(const ReadOptions& read_options, uint64_t max_read_size, FileChecksumList& file_checksum_list) : VersionEditHandlerBase(read_options, max_read_size), file_checksum_list_(file_checksum_list) {} ~FileChecksumRetriever() override {} protected: Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** /*unused*/) override; private: FileChecksumList& file_checksum_list_; }; using VersionBuilderUPtr = std::unique_ptr; // A class used for scanning MANIFEST file. // VersionEditHandler reads a MANIFEST file, parses the version edits, and // builds the version set's in-memory state, e.g. the version storage info for // the versions of column families. It replays all the version edits in one // MANIFEST file to build the end version. // // To use this class and its subclasses, // 1. Create an object of VersionEditHandler or its subclasses. // VersionEditHandler handler(read_only, column_families, version_set, // track_found_and_missing_files, // no_error_if_files_missing); // 2. Status s = handler.Iterate(reader, &db_id); // 3. Check s and handle possible errors. // // Not thread-safe, external synchronization is necessary if an object of // VersionEditHandler is shared by multiple threads. class VersionEditHandler : public VersionEditHandlerBase { public: explicit VersionEditHandler( bool read_only, const std::vector& column_families, VersionSet* version_set, bool track_found_and_missing_files, bool no_error_if_files_missing, const std::shared_ptr& io_tracer, const ReadOptions& read_options, bool allow_incomplete_valid_version, EpochNumberRequirement epoch_number_requirement = EpochNumberRequirement::kMustPresent) : VersionEditHandler(read_only, column_families, version_set, track_found_and_missing_files, no_error_if_files_missing, io_tracer, read_options, /*skip_load_table_files=*/false, allow_incomplete_valid_version, epoch_number_requirement) {} ~VersionEditHandler() override {} const VersionEditParams& GetVersionEditParams() const { return version_edit_params_; } void GetDbId(std::string* db_id) const { if (db_id && version_edit_params_.HasDbId()) { *db_id = version_edit_params_.GetDbId(); } } virtual Status VerifyFile(ColumnFamilyData* /*cfd*/, const std::string& /*fpath*/, int /*level*/, const FileMetaData& /*fmeta*/) { return Status::OK(); } virtual Status VerifyBlobFile(ColumnFamilyData* /*cfd*/, uint64_t /*blob_file_num*/, const BlobFileAddition& /*blob_addition*/) { return Status::OK(); } protected: explicit VersionEditHandler( bool read_only, std::vector column_families, VersionSet* version_set, bool track_found_and_missing_files, bool no_error_if_files_missing, const std::shared_ptr& io_tracer, const ReadOptions& read_options, bool skip_load_table_files, bool allow_incomplete_valid_version, EpochNumberRequirement epoch_number_requirement = EpochNumberRequirement::kMustPresent); Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override; virtual Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd); Status OnColumnFamilyDrop(VersionEdit& edit, ColumnFamilyData** cfd); Status OnNonCfOperation(VersionEdit& edit, ColumnFamilyData** cfd); Status OnWalAddition(VersionEdit& edit); Status OnWalDeletion(VersionEdit& edit); Status Initialize() override; void CheckColumnFamilyId(const VersionEdit& edit, bool* do_not_open_cf, bool* cf_in_builders) const; void CheckIterationResult(const log::Reader& reader, Status* s) override; ColumnFamilyData* CreateCfAndInit(const ColumnFamilyOptions& cf_options, const VersionEdit& edit); virtual ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit); virtual Status MaybeCreateVersionBeforeApplyEdit(const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version); virtual Status LoadTables(ColumnFamilyData* cfd, bool prefetch_index_and_filter_in_cache, bool is_initial_load); virtual bool MustOpenAllColumnFamilies() const { return !read_only_; } const bool read_only_; std::vector column_families_; VersionSet* version_set_; std::unordered_map builders_; std::unordered_map name_to_options_; const bool track_found_and_missing_files_; // Keeps track of column families in manifest that were not found in // column families parameters. Namely, the user asks to not open these column // families. In non read only mode, if those column families are not dropped // by subsequent manifest records, Recover() will return failure status. std::unordered_map do_not_open_column_families_; VersionEditParams version_edit_params_; bool no_error_if_files_missing_; std::shared_ptr io_tracer_; bool skip_load_table_files_; bool initialized_; std::unique_ptr> cf_to_cmp_names_; // If false, only a complete Version for which all files consisting it can be // found is considered a valid Version. If true, besides complete Version, an // incomplete Version with only a suffix of L0 files missing is also // considered valid if the Version is never edited in an atomic group. const bool allow_incomplete_valid_version_; EpochNumberRequirement epoch_number_requirement_; std::unordered_set cfds_to_mark_no_udt_; private: Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, const VersionEdit& edit); // When `FileMetaData.user_defined_timestamps_persisted` is false and // user-defined timestamp size is non-zero. User-defined timestamps are // stripped from file boundaries: `smallest`, `largest` in // `VersionEdit.DecodeFrom` before they were written to Manifest. // This is the mirroring change to handle file boundaries on the Manifest read // path for this scenario: to pad a minimum timestamp to the user key in // `smallest` and `largest` so their format are consistent with the running // user comparator. Status MaybeHandleFileBoundariesForNewFiles(VersionEdit& edit, const ColumnFamilyData* cfd); }; // A class similar to its base class, i.e. VersionEditHandler. // Unlike VersionEditHandler that only aims to build the end version, this class // supports building the most recent point in time version. A point in time // version is a version for which no files are missing, or if // `allow_incomplete_valid_version` is true, only a suffix of L0 files (and // their associated blob files) are missing. // // Building a point in time version when end version is not available can // be useful for best efforts recovery (options.best_efforts_recovery), which // uses this class and sets `allow_incomplete_valid_version` to true. // It's also useful for secondary instances/follower instances for which end // version could be transiently unavailable. These two cases use subclass // `ManifestTailer` and sets `allow_incomplete_valid_version` to false. // // Not thread-safe, external synchronization is necessary if an object of // VersionEditHandlerPointInTime is shared by multiple threads. class VersionEditHandlerPointInTime : public VersionEditHandler { public: VersionEditHandlerPointInTime( bool read_only, std::vector column_families, VersionSet* version_set, const std::shared_ptr& io_tracer, const ReadOptions& read_options, bool allow_incomplete_valid_version, EpochNumberRequirement epoch_number_requirement = EpochNumberRequirement::kMustPresent); ~VersionEditHandlerPointInTime() override; bool HasMissingFiles() const; virtual Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath, int level, const FileMetaData& fmeta) override; virtual Status VerifyBlobFile(ColumnFamilyData* cfd, uint64_t blob_file_num, const BlobFileAddition& blob_addition) override; protected: Status OnAtomicGroupReplayBegin() override; Status OnAtomicGroupReplayEnd() override; void CheckIterationResult(const log::Reader& reader, Status* s) override; ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit) override; // `MaybeCreateVersionBeforeApplyEdit(..., false)` creates a version upon a // negative edge trigger (transition from valid to invalid). // // `MaybeCreateVersionBeforeApplyEdit(..., true)` creates a version on a // positive level trigger (state is valid). Status MaybeCreateVersionBeforeApplyEdit(const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) override; Status LoadTables(ColumnFamilyData* cfd, bool prefetch_index_and_filter_in_cache, bool is_initial_load) override; std::unordered_map versions_; // `atomic_update_versions_` is for ensuring all-or-nothing AtomicGroup // recoveries. When `atomic_update_versions_` is nonempty, it serves as a // barrier to updating `versions_` until all its values are populated. std::unordered_map atomic_update_versions_; // `atomic_update_versions_missing_` counts the nullptr values in // `atomic_update_versions_`. size_t atomic_update_versions_missing_; bool in_atomic_group_ = false; private: bool AtomicUpdateVersionsCompleted(); bool AtomicUpdateVersionsContains(uint32_t cfid); void AtomicUpdateVersionsDropCf(uint32_t cfid); // This function is called for `Version*` updates for column families in an // incomplete atomic update. It buffers `Version*` updates in // `atomic_update_versions_`. void AtomicUpdateVersionsPut(Version* version); // This function is called upon completion of an atomic update. It applies // `Version*` updates in `atomic_update_versions_` to `versions_`. void AtomicUpdateVersionsApply(); }; // A class similar to `VersionEditHandlerPointInTime` that parse MANIFEST and // builds point in time version. // `ManifestTailer` supports reading one MANIFEST file in multiple tailing // attempts and supports switching to a different MANIFEST after // `PrepareToReadNewManifest` is called. This class is used by secondary and // follower instance. class ManifestTailer : public VersionEditHandlerPointInTime { public: explicit ManifestTailer(std::vector column_families, VersionSet* version_set, const std::shared_ptr& io_tracer, const ReadOptions& read_options, EpochNumberRequirement epoch_number_requirement = EpochNumberRequirement::kMustPresent) : VersionEditHandlerPointInTime(/*read_only=*/false, column_families, version_set, io_tracer, read_options, /*allow_incomplete_valid_version=*/false, epoch_number_requirement), mode_(Mode::kRecovery) {} Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath, int level, const FileMetaData& fmeta) override; void PrepareToReadNewManifest() { initialized_ = false; ClearReadBuffer(); } std::unordered_set& GetUpdatedColumnFamilies() { return cfds_changed_; } std::vector GetAndClearIntermediateFiles(); protected: Status Initialize() override; bool MustOpenAllColumnFamilies() const override { return false; } Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override; Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd) override; void CheckIterationResult(const log::Reader& reader, Status* s) override; enum Mode : uint8_t { kRecovery = 0, kCatchUp = 1, }; Mode mode_; std::unordered_set cfds_changed_; }; class DumpManifestHandler : public VersionEditHandler { public: DumpManifestHandler(std::vector column_families, VersionSet* version_set, const std::shared_ptr& io_tracer, const ReadOptions& read_options, bool verbose, bool hex, bool json) : VersionEditHandler( /*read_only=*/true, column_families, version_set, /*track_found_and_missing_files=*/false, /*no_error_if_files_missing=*/false, io_tracer, read_options, /*skip_load_table_files=*/true, /*allow_incomplete_valid_version=*/false, /*epoch_number_requirement=*/EpochNumberRequirement::kMustPresent), verbose_(verbose), hex_(hex), json_(json), count_(0) { cf_to_cmp_names_.reset(new std::unordered_map()); } ~DumpManifestHandler() override {} Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override { // Write out each individual edit if (json_) { // Print out DebugStrings. Can include non-terminating null characters. std::string edit_dump_str = edit.DebugJSON(count_, hex_); fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout); fwrite("\n", sizeof(char), 1, stdout); } else if (verbose_) { // Print out DebugStrings. Can include non-terminating null characters. std::string edit_dump_str = edit.DebugString(hex_); fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout); } ++count_; return VersionEditHandler::ApplyVersionEdit(edit, cfd); } void CheckIterationResult(const log::Reader& reader, Status* s) override; private: const bool verbose_; const bool hex_; const bool json_; int count_; }; } // namespace ROCKSDB_NAMESPACE