Postponing custom checksum support in BackupEngine (#7411)

Summary:
This change reverts BackupEngine to 6.12 state to accommodate a
higher-priority fix that does not easily merge with this custom checksum
support. We intend to reinstate this support soon, by merging a revert
of this change.

For backupable_db_test, I've removed the tests depending on this
feature.

I've also removed relevant HISTORY.md entry.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7411

Test Plan: unit tests

Reviewed By: ajkr

Differential Revision: D23793835

Pulled By: pdillinger

fbshipit-source-id: 7e861436539584799b13d1a8ae559b81b6d08052
This commit is contained in:
Peter Dillinger 2020-09-18 15:25:34 -07:00
parent 2dbb90a064
commit fb98398ca9
5 changed files with 103 additions and 894 deletions

View file

@ -14,7 +14,6 @@
* Fix a bug in which bottommost compaction continues to advance the underlying InternalIterator to skip tombstones even after shutdown.
### New Features
* A new option `std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory` is added to `BackupableDBOptions`. The default value for this option is `nullptr`. If this option is null, the default backup engine checksum function (crc32c) will be used for creating, verifying, or restoring backups. If it is not null and is set to the DB custom checksum factory, the custom checksum function used in DB will also be used for creating, verifying, or restoring backups, in addition to the default checksum function (crc32c). If it is not null and is set to a custom checksum factory different than the DB custom checksum factory (which may be null), BackupEngine will return `Status::InvalidArgument()`.
* A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions.
* Added a new subcommand, `ldb unsafe_remove_sst_file`, which removes a lost or corrupt SST file from a DB's metadata. This command involves data loss and must not be used on a live DB.

View file

@ -1223,9 +1223,6 @@ Status StressTest::TestBackupRestore(
// For debugging, get info_log from live options
backup_opts.info_log = db_->GetDBOptions().info_log.get();
assert(backup_opts.info_log);
if (thread->rand.OneIn(2)) {
backup_opts.file_checksum_gen_factory = options_.file_checksum_gen_factory;
}
if (thread->rand.OneIn(10)) {
backup_opts.share_table_files = false;
} else {

View file

@ -24,8 +24,10 @@
namespace ROCKSDB_NAMESPACE {
// The default DB file checksum function name.
constexpr char kDbFileChecksumFuncName[] = "FileChecksumCrc32c";
// The default BackupEngine file checksum function name.
constexpr char kDefaultBackupFileChecksumFuncName[] = "crc32c";
constexpr char kBackupFileChecksumFuncName[] = "crc32c";
struct BackupableDBOptions {
// Where to keep the backup files. Has to be different than dbname_
@ -194,33 +196,6 @@ struct BackupableDBOptions {
// and share_table_files are true.
ShareFilesNaming share_files_with_checksum_naming;
// Option for custom checksum functions.
// When this option is nullptr, BackupEngine will use its default crc32c as
// the checksum function.
//
// When it is not nullptr, BackupEngine will try to find in the factory the
// checksum function that DB used to calculate the file checksums. If such a
// function is found, BackupEngine will use it to create, verify, or restore
// backups, in addition to the default crc32c checksum function. If such a
// function is not found, BackupEngine will return Status::InvalidArgument().
// Therefore, this option comes into effect only if DB has a custom checksum
// factory and this option is set to the same factory.
//
//
// Note: If share_files_with_checksum and share_table_files are true,
// the <checksum> appeared in the table filenames will be the custom checksum
// value if db session ids are available (namely, table file naming options
// is kOptionalChecksumAndDbSessionId and the db session ids obtained from
// the table files are nonempty).
//
// Note: We do not require the same setting to this option for backup
// restoration or verification as was set during backup creation but we
// strongly recommend setting it to the same as the DB file checksum function
// for all BackupEngine interactions when practical.
//
// Default: nullptr
std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory;
void Dump(Logger* logger) const;
explicit BackupableDBOptions(
@ -233,9 +208,7 @@ struct BackupableDBOptions {
int _max_valid_backups_to_open = INT_MAX,
ShareFilesNaming _share_files_with_checksum_naming =
static_cast<ShareFilesNaming>(kUseDbSessionId | kFlagIncludeFileSize |
kFlagMatchInterimNaming),
std::shared_ptr<FileChecksumGenFactory> _file_checksum_gen_factory =
nullptr)
kFlagMatchInterimNaming))
: backup_dir(_backup_dir),
backup_env(_backup_env),
share_table_files(_share_table_files),
@ -249,8 +222,7 @@ struct BackupableDBOptions {
max_background_operations(_max_background_operations),
callback_trigger_interval_size(_callback_trigger_interval_size),
max_valid_backups_to_open(_max_valid_backups_to_open),
share_files_with_checksum_naming(_share_files_with_checksum_naming),
file_checksum_gen_factory(_file_checksum_gen_factory) {
share_files_with_checksum_naming(_share_files_with_checksum_naming) {
assert(share_table_files || !share_files_with_checksum);
assert((share_files_with_checksum_naming & kMaskNoNamingFlags) != 0);
}
@ -407,18 +379,16 @@ class BackupEngineReadOnly {
}
// If verify_with_checksum is true, this function
// inspects the default crc32c checksums and file sizes of backup files to
// see if they match our expectation. This function further inspects the
// custom checksums if BackupableDBOptions::file_checksum_gen_factory is
// the same as DBOptions::file_checksum_gen_factory.
// inspects the current checksums and file sizes of backup files to see if
// they match our expectation.
//
// If verify_with_checksum is false, this function
// checks that each file exists and that the size of the file matches our
// expectation. It does not check file checksum.
//
// If this BackupEngine created the backup, it compares the files' current
// sizes (and current checksums) against the number of bytes written to
// them (and the checksums calculated) during creation.
// sizes (and current checksum) against the number of bytes written to
// them (and the checksum calculated) during creation.
// Otherwise, it compares the files' current sizes (and checksums) against
// their sizes (and checksums) when the BackupEngine was opened.
//
@ -538,9 +508,7 @@ class BackupEngine {
// If verify_with_checksum is true, this function
// inspects the current checksums and file sizes of backup files to see if
// they match our expectation. It further inspects the custom checksums
// if BackupableDBOptions::file_checksum_gen_factory is the same as
// DBOptions::file_checksum_gen_factory.
// they match our expectation.
//
// If verify_with_checksum is false, this function
// checks that each file exists and that the size of the file matches our

View file

@ -28,7 +28,6 @@
#include <unordered_set>
#include <vector>
#include "db/log_reader.h"
#include "env/composite_env_wrapper.h"
#include "file/filename.h"
#include "file/sequence_file_reader.h"
@ -42,7 +41,6 @@
#include "util/channel.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/file_checksum_helper.h"
#include "util/string_util.h"
#include "utilities/checkpoint/checkpoint_impl.h"
@ -64,22 +62,6 @@ inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) {
PutFixed32(&checksum_str, EndianSwapValue(checksum_value));
return ChecksumStrToHex(checksum_str);
}
// Checks if the checksum function names are the same. Note that both the
// backup default checksum function and the db standard checksum function are
// crc32c although they have different names. So We treat the db standard
// checksum function name and the backup default checksum function name as
// the same name.
inline bool IsSameChecksumFunc(const std::string& dst_checksum_func_name,
const std::string& src_checksum_func_name) {
return (dst_checksum_func_name == src_checksum_func_name) ||
((dst_checksum_func_name == kDefaultBackupFileChecksumFuncName) &&
(src_checksum_func_name == kStandardDbFileChecksumFuncName)) ||
((src_checksum_func_name == kDefaultBackupFileChecksumFuncName) &&
(dst_checksum_func_name == kStandardDbFileChecksumFuncName));
}
inline bool IsSstFile(const std::string& fname) {
return fname.length() > 4 && fname.rfind(".sst") == fname.length() - 4;
}
} // namespace
void BackupStatistics::IncrementNumberSuccessBackup() {
@ -190,15 +172,11 @@ class BackupEngineImpl : public BackupEngine {
struct FileInfo {
FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum,
const std::string& custom_checksum,
const std::string& checksum_name, const std::string& id = "",
const std::string& sid = "")
const std::string& id = "", const std::string& sid = "")
: refs(0),
filename(fname),
size(sz),
checksum_hex(checksum),
custom_checksum_hex(custom_checksum),
checksum_func_name(checksum_name),
db_id(id),
db_session_id(sid) {}
@ -209,8 +187,6 @@ class BackupEngineImpl : public BackupEngine {
const std::string filename;
const uint64_t size;
const std::string checksum_hex;
const std::string custom_checksum_hex;
const std::string checksum_func_name;
// DB identities
// db_id is obtained for potential usage in the future but not used
// currently
@ -399,78 +375,6 @@ class BackupEngineImpl : public BackupEngine {
return GetBackupMetaDir() + "/" + (tmp ? "." : "") +
ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : "");
}
inline Status GetFileNameInfo(const std::string& file,
std::string& local_name, uint64_t& number,
FileType& type) const {
// 1. extract the filename
size_t last_slash = file.find_last_of('/');
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert(last_slash != std::string::npos);
local_name = file.substr(last_slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>,
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if (file.substr(0, last_slash) == GetSharedChecksumDirRel()) {
local_name = GetFileFromChecksumFile(local_name);
}
// 2. find the filetype
bool ok = ParseFileName(local_name, &number, &type);
if (!ok) {
return Status::Corruption("Backup corrupted: Fail to parse filename " +
local_name);
}
return Status::OK();
}
inline bool HasCustomChecksumGenFactory() const {
return options_.file_checksum_gen_factory != nullptr;
}
// Returns nullptr if file_checksum_gen_factory is not set or
// file_checksum_gen_factory is not able to create a generator with
// name being requested_checksum_func_name
inline std::unique_ptr<FileChecksumGenerator> GetCustomChecksumGenerator(
const std::string& requested_checksum_func_name = "") const {
std::shared_ptr<FileChecksumGenFactory> checksum_factory =
options_.file_checksum_gen_factory;
if (checksum_factory == nullptr) {
return nullptr;
} else {
FileChecksumGenContext gen_context;
gen_context.requested_checksum_func_name = requested_checksum_func_name;
return checksum_factory->CreateFileChecksumGenerator(gen_context);
}
}
// Set the checksum generator by the requested checksum function name
inline Status SetChecksumGenerator(
const std::string& requested_checksum_func_name,
std::unique_ptr<FileChecksumGenerator>& checksum_func) {
if (requested_checksum_func_name != kDefaultBackupFileChecksumFuncName) {
if (!HasCustomChecksumGenFactory()) {
// No custom checksum factory indicates users would like to use the
// backup default checksum function and accept the degraded data
// integrity checking
return Status::OK();
} else {
checksum_func =
GetCustomChecksumGenerator(requested_checksum_func_name);
// we will use the default backup checksum function if the custom
// checksum functions is the db standard checksum function but is not
// found in the checksum factory passed in; otherwise, we return
// Status::InvalidArgument()
if (checksum_func == nullptr &&
requested_checksum_func_name != kStandardDbFileChecksumFuncName) {
return Status::InvalidArgument("Checksum checksum function " +
requested_checksum_func_name +
" not found");
}
}
}
// The requested checksum function is the default backup checksum function
return Status::OK();
}
// If size_limit == 0, there is no size limit, copy everything.
//
@ -478,48 +382,27 @@ class BackupEngineImpl : public BackupEngine {
//
// @param src If non-empty, the file is copied from this pathname.
// @param contents If non-empty, the file will be created with these contents.
Status CopyOrCreateFile(
const std::string& src, const std::string& dst,
const std::string& contents, Env* src_env, Env* dst_env,
const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter,
const std::string& backup_checksum_func_name, uint64_t* size = nullptr,
std::string* checksum_hex = nullptr,
std::string* custom_checksum_hex = nullptr, uint64_t size_limit = 0,
std::function<void()> progress_callback = []() {});
Status CopyOrCreateFile(const std::string& src, const std::string& dst,
const std::string& contents, Env* src_env,
Env* dst_env, const EnvOptions& src_env_options,
bool sync, RateLimiter* rate_limiter,
uint64_t* size = nullptr,
std::string* checksum_hex = nullptr,
uint64_t size_limit = 0,
std::function<void()> progress_callback = []() {});
Status CalculateChecksum(
const std::string& src, Env* src_env, const EnvOptions& src_env_options,
uint64_t size_limit, std::string* checksum_hex,
const std::unique_ptr<FileChecksumGenerator>& checksum_func = nullptr,
std::string* custom_checksum_hex = nullptr);
Status CalculateChecksum(const std::string& src, Env* src_env,
const EnvOptions& src_env_options,
uint64_t size_limit, std::string* checksum_hex);
// Obtain db_id and db_session_id from the table properties of file_path
Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options,
const std::string& file_path, std::string* db_id,
std::string* db_session_id);
Status GetFileChecksumsFromManifestInBackup(Env* src_env,
const BackupID& backup_id,
const BackupMeta* backup,
FileChecksumList* checksum_list);
Status VerifyFileWithCrc32c(Env* src_env, const BackupMeta* backup,
const std::string& rel_path);
struct LogReporter : public log::Reader::Reporter {
Status* status;
virtual void Corruption(size_t /*bytes*/, const Status& s) override {
if (status->ok()) {
*status = s;
}
}
};
struct CopyOrCreateResult {
uint64_t size;
std::string checksum_hex;
std::string custom_checksum_hex;
std::string checksum_func_name;
std::string db_id;
std::string db_session_id;
Status status;
@ -543,7 +426,6 @@ class BackupEngineImpl : public BackupEngine {
bool verify_checksum_after_work;
std::string src_checksum_func_name;
std::string src_checksum_hex;
std::string backup_checksum_func_name;
std::string db_id;
std::string db_session_id;
@ -560,7 +442,6 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work(false),
src_checksum_func_name(kUnknownFileChecksumFuncName),
src_checksum_hex(""),
backup_checksum_func_name(kUnknownFileChecksumFuncName),
db_id(""),
db_session_id("") {}
@ -586,7 +467,6 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work = o.verify_checksum_after_work;
src_checksum_func_name = std::move(o.src_checksum_func_name);
src_checksum_hex = std::move(o.src_checksum_hex);
backup_checksum_func_name = std::move(o.backup_checksum_func_name);
db_id = std::move(o.db_id);
db_session_id = std::move(o.db_session_id);
return *this;
@ -601,8 +481,6 @@ class BackupEngineImpl : public BackupEngine {
const std::string& _src_checksum_func_name =
kUnknownFileChecksumFuncName,
const std::string& _src_checksum_hex = "",
const std::string& _backup_checksum_func_name =
kUnknownFileChecksumFuncName,
const std::string& _db_id = "", const std::string& _db_session_id = "")
: src_path(std::move(_src_path)),
dst_path(std::move(_dst_path)),
@ -617,7 +495,6 @@ class BackupEngineImpl : public BackupEngine {
verify_checksum_after_work(_verify_checksum_after_work),
src_checksum_func_name(_src_checksum_func_name),
src_checksum_hex(_src_checksum_hex),
backup_checksum_func_name(_backup_checksum_func_name),
db_id(_db_id),
db_session_id(_db_session_id) {}
};
@ -999,49 +876,33 @@ Status BackupEngineImpl::Initialize() {
result.status = CopyOrCreateFile(
work_item.src_path, work_item.dst_path, work_item.contents,
work_item.src_env, work_item.dst_env, work_item.src_env_options,
work_item.sync, work_item.rate_limiter,
work_item.backup_checksum_func_name, &result.size,
&result.checksum_hex, &result.custom_checksum_hex,
work_item.size_limit, work_item.progress_callback);
result.checksum_func_name = work_item.backup_checksum_func_name;
work_item.sync, work_item.rate_limiter, &result.size,
&result.checksum_hex, work_item.size_limit,
work_item.progress_callback);
result.db_id = work_item.db_id;
result.db_session_id = work_item.db_session_id;
if (result.status.ok() && work_item.verify_checksum_after_work) {
// work_item.verify_checksum_after_work being true means backup engine
// has obtained its crc32c and/or custom checksum for the table file.
// Therefore, we can try to compare the checksums if possible.
// unknown checksum function name implies no db table file checksum in
// db manifest; work_item.verify_checksum_after_work being true means
// backup engine has calculated its crc32c checksum for the table
// file; therefore, we are able to compare the checksums.
if (work_item.src_checksum_func_name ==
kUnknownFileChecksumFuncName ||
IsSameChecksumFunc(result.checksum_func_name,
work_item.src_checksum_func_name)) {
std::string checksum_to_compare;
std::string checksum_func_name_used;
if (work_item.src_checksum_func_name ==
kUnknownFileChecksumFuncName ||
work_item.src_checksum_func_name ==
kStandardDbFileChecksumFuncName) {
// kUnknownFileChecksumFuncName implies no table file checksums in
// db manifest, but we can compare using the crc32c checksum
checksum_to_compare = result.checksum_hex;
checksum_func_name_used = kStandardDbFileChecksumFuncName;
} else {
checksum_to_compare = result.custom_checksum_hex;
checksum_func_name_used = work_item.src_checksum_func_name;
}
if (work_item.src_checksum_hex != checksum_to_compare) {
work_item.src_checksum_func_name == kDbFileChecksumFuncName) {
if (work_item.src_checksum_hex != result.checksum_hex) {
std::string checksum_info(
"Expected checksum is " + work_item.src_checksum_hex +
" while computed checksum is " + checksum_to_compare);
result.status = Status::Corruption(
checksum_func_name_used + " mismatch after copying to " +
work_item.dst_path + ": " + checksum_info);
" while computed checksum is " + result.checksum_hex);
result.status =
Status::Corruption("Checksum mismatch after copying to " +
work_item.dst_path + ": " + checksum_info);
}
} else {
std::string checksum_function_info(
"Existing checksum function is " +
work_item.src_checksum_func_name +
" while provided checksum function is " +
result.checksum_func_name);
kBackupFileChecksumFuncName);
ROCKS_LOG_INFO(
options_.info_log,
"Unable to verify checksum after copying to %s: %s\n",
@ -1129,6 +990,15 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
CheckpointImpl checkpoint(db);
uint64_t sequence_number = 0;
DBOptions db_options = db->GetDBOptions();
FileChecksumGenFactory* db_checksum_factory =
db_options.file_checksum_gen_factory.get();
const std::string kFileChecksumGenFactoryName =
"FileChecksumGenCrc32cFactory";
bool compare_checksum =
db_checksum_factory != nullptr &&
db_checksum_factory->Name() == kFileChecksumGenFactoryName
? true
: false;
EnvOptions src_raw_env_options(db_options);
s = checkpoint.CreateCustomCheckpoint(
db_options,
@ -1194,14 +1064,12 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
options.progress_callback, contents);
} /* create_file_cb */,
&sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64,
db_options.file_checksum_gen_factory == nullptr ? false : true);
compare_checksum);
if (s.ok()) {
new_backup->SetSequenceNumber(sequence_number);
}
}
ROCKS_LOG_INFO(options_.info_log,
"add files for backup done (%s), wait finish.",
s.ok() ? "OK" : "not OK");
ROCKS_LOG_INFO(options_.info_log, "add files for backup done, wait finish.");
Status item_status;
for (auto& item : backup_items_to_finish) {
item.result.wait();
@ -1213,11 +1081,10 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
}
if (item_status.ok()) {
item_status = new_backup.get()->AddFile(std::make_shared<FileInfo>(
item.dst_relative, result.size, result.checksum_hex,
result.custom_checksum_hex, result.checksum_func_name, result.db_id,
item.dst_relative, result.size, result.checksum_hex, result.db_id,
result.db_session_id));
}
if (s.ok() && !item_status.ok()) {
if (!item_status.ok()) {
s = item_status;
}
}
@ -1475,74 +1342,50 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
DeleteChildren(db_dir);
}
Status s;
// Try to obtain checksum info from backuped DB MANIFEST
// The checksum info will be used for validating the checksums of the table
// files after restoration, in addtion to the default backup engine crc32c
// checksums.
std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id, backup.get(),
checksum_list.get());
if (!s.ok()) {
return s;
}
RateLimiter* rate_limiter = options_.restore_rate_limiter.get();
if (rate_limiter) {
copy_file_buffer_size_ =
static_cast<size_t>(rate_limiter->GetSingleBurstBytes());
}
Status s;
std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish;
for (const auto& file_info : backup->GetFiles()) {
const std::string& file = file_info->filename;
std::string dst;
// 1. extract the filename
size_t slash = file.find_last_of('/');
// file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
// shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
// or private/<number>/<file>
assert(slash != std::string::npos);
dst = file.substr(slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>,
// <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
if (file.substr(0, slash) == GetSharedChecksumDirRel()) {
dst = GetFileFromChecksumFile(dst);
}
// 2. find the filetype
uint64_t number;
FileType type;
s = GetFileNameInfo(file, dst, number, type);
if (!s.ok()) {
return s;
bool ok = ParseFileName(dst, &number, &type);
if (!ok) {
return Status::Corruption("Backup corrupted: Fail to parse filename " +
dst);
}
std::string src_checksum_func_name = kUnknownFileChecksumFuncName;
std::string src_checksum_str = kUnknownFileChecksum;
std::string src_checksum_hex;
bool has_manifest_checksum = false;
if (type == kTableFile) {
Status file_checksum_status = checksum_list->SearchOneFileChecksum(
number, &src_checksum_str, &src_checksum_func_name);
if (file_checksum_status.ok() &&
src_checksum_str != kUnknownFileChecksum &&
src_checksum_func_name != kUnknownFileChecksumFuncName) {
src_checksum_hex = ChecksumStrToHex(src_checksum_str);
has_manifest_checksum = true;
}
}
// Construct the final path
// 3. Construct the final path
// kLogFile lives in wal_dir and all the rest live in db_dir
dst = ((type == kLogFile) ? wal_dir : db_dir) +
"/" + dst;
ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(),
dst.c_str());
std::string backup_checksum_func_name = file_info->checksum_func_name;
std::unique_ptr<FileChecksumGenerator> checksum_func;
if (src_checksum_func_name != kUnknownFileChecksumFuncName) {
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
if (checksum_func != nullptr) {
backup_checksum_func_name = checksum_func->Name();
}
}
CopyOrCreateWorkItem copy_or_create_work_item(
GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_,
EnvOptions() /* src_env_options */, false, rate_limiter,
0 /* size_limit */, []() {} /* progress_callback */,
has_manifest_checksum, src_checksum_func_name, src_checksum_hex,
backup_checksum_func_name);
0 /* size_limit */);
RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), file_info->checksum_hex);
files_to_copy_or_create_.write(std::move(copy_or_create_work_item));
@ -1560,11 +1403,7 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
s = item_status;
break;
} else if (item.checksum_hex != result.checksum_hex) {
// Compare crc32c checksums (especially for non-table files)
std::string checksum_info("Expected checksum is " + item.checksum_hex +
" while computed checksum is " +
result.checksum_hex);
s = Status::Corruption("Crc32c checksum check failed: " + checksum_info);
s = Status::Corruption("Checksum check failed");
break;
}
}
@ -1603,17 +1442,6 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size);
}
Status s;
std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
if (verify_with_checksum) {
// Try to obtain checksum info from backuped DB MANIFEST
s = GetFileChecksumsFromManifestInBackup(backup_env_, backup_id,
backup.get(), checksum_list.get());
if (!s.ok()) {
return s;
}
}
// For all files registered in backup
for (const auto& file_info : backup->GetFiles()) {
const auto abs_path = GetAbsolutePath(file_info->filename);
@ -1632,68 +1460,27 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
}
if (verify_with_checksum) {
// verify file checksum
// try setting checksum_func
std::unique_ptr<FileChecksumGenerator> checksum_func;
std::string src_checksum_func_name = kUnknownFileChecksumFuncName;
std::string src_checksum_str = kUnknownFileChecksum;
std::string src_checksum_hex;
if (IsSstFile(file_info->filename)) {
const std::string& file = file_info->filename;
std::string local_name;
uint64_t number;
FileType type;
s = GetFileNameInfo(file, local_name, number, type);
if (!s.ok()) {
return s;
}
assert(type == kTableFile);
// Try to get checksum for the table file
Status file_checksum_status = checksum_list->SearchOneFileChecksum(
number, &src_checksum_str, &src_checksum_func_name);
if (file_checksum_status.ok() &&
src_checksum_str != kUnknownFileChecksum &&
src_checksum_func_name != kUnknownFileChecksumFuncName) {
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
src_checksum_hex = ChecksumStrToHex(src_checksum_str);
}
}
std::string checksum_hex;
ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n",
abs_path.c_str());
std::string checksum_hex;
std::string custom_checksum_hex;
CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */,
&checksum_hex, checksum_func, &custom_checksum_hex);
&checksum_hex);
if (file_info->checksum_hex != checksum_hex) {
std::string checksum_info(
"Expected checksum is " + file_info->checksum_hex +
" while computed checksum is " + checksum_hex);
return Status::Corruption("File corrupted: crc32c mismatch for " +
return Status::Corruption("File corrupted: Checksum mismatch for " +
abs_path + ": " + checksum_info);
}
if (checksum_func != nullptr && src_checksum_hex != custom_checksum_hex) {
std::string checksum_info("Expected checksum is " + src_checksum_hex +
" while computed checksum is " +
custom_checksum_hex);
return Status::Corruption("File corrupted: " + src_checksum_func_name +
" mismatch for " + abs_path + ": " +
checksum_info);
}
}
}
return Status::OK();
}
Status BackupEngineImpl::CopyOrCreateFile(
const std::string& src, const std::string& dst, const std::string& contents,
Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync,
RateLimiter* rate_limiter, const std::string& backup_checksum_func_name,
uint64_t* size, std::string* checksum_hex, std::string* custom_checksum_hex,
RateLimiter* rate_limiter, uint64_t* size, std::string* checksum_hex,
uint64_t size_limit, std::function<void()> progress_callback) {
assert(src.empty() != contents.empty());
Status s;
@ -1707,13 +1494,6 @@ Status BackupEngineImpl::CopyOrCreateFile(
}
uint32_t checksum_value = 0;
// Get custom checksum function
std::unique_ptr<FileChecksumGenerator> checksum_func;
s = SetChecksumGenerator(backup_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
// Check if size limit is set. if not, set it to very big number
if (size_limit == 0) {
size_limit = std::numeric_limits<uint64_t>::max();
@ -1768,10 +1548,6 @@ Status BackupEngineImpl::CopyOrCreateFile(
if (checksum_hex != nullptr) {
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
}
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Update(data.data(), data.size());
}
s = dest_writer->Append(data);
if (rate_limiter != nullptr) {
rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */,
@ -1784,14 +1560,10 @@ Status BackupEngineImpl::CopyOrCreateFile(
}
} while (s.ok() && contents.empty() && data.size() > 0 && size_limit > 0);
// Convert uint32_t checksum to hex checksum
if (checksum_hex != nullptr) {
// Convert uint32_t checksum to hex checksum
checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
}
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Finalize();
custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum()));
}
if (s.ok() && sync) {
s = dest_writer->Sync(false);
@ -1819,50 +1591,27 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std::string dst_relative_tmp;
Status s;
std::string checksum_hex;
std::string custom_checksum_hex;
// The function name of backup checksum function.
std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName;
std::string db_id;
std::string db_session_id;
// whether a default or custom checksum for a table file is available
// whether the checksum for a table file is available
bool has_checksum = false;
// Set up the custom checksum function.
// A nullptr checksum_func indicates the default backup checksum function
// will be used. If checksum_func is not nullptr, then both the default
// backup checksum function and checksum_func will be used.
std::unique_ptr<FileChecksumGenerator> checksum_func;
if (src_checksum_func_name != kUnknownFileChecksumFuncName) {
// DB files have checksum functions
s = SetChecksumGenerator(src_checksum_func_name, checksum_func);
if (!s.ok()) {
return s;
}
if (checksum_func != nullptr) {
backup_checksum_func_name = checksum_func->Name();
}
}
// Whenever the db checksum function name matches the backup engine custom
// checksum function name, we will compare the checksum values after copying.
// Note that only table files may have a known checksum name passed in.
// Whenever a default checksum function name is passed in, we will compares
// the corresponding checksum values after copying. Note that only table files
// may have a known checksum function name passed in.
//
// If the checksum function names do not match and db session id is not
// If no default checksum function name is passed in and db session id is not
// available, we will calculate the checksum *before* copying in two cases
// (we always calcuate checksums when copying or creating for any file types):
// a) share_files_with_checksum is true and file type is table;
// b) share_table_files is true and the file exists already.
//
// Step 0: Check if a known checksum function name is passed in
if (IsSameChecksumFunc(backup_checksum_func_name, src_checksum_func_name)) {
// Step 0: Check if default checksum function name is passed in
if (kDbFileChecksumFuncName == src_checksum_func_name) {
if (src_checksum_str == kUnknownFileChecksum) {
return Status::Aborted("Unknown checksum value for " + fname);
}
if (checksum_func == nullptr) {
checksum_hex = ChecksumStrToHex(src_checksum_str);
} else {
custom_checksum_hex = ChecksumStrToHex(src_checksum_str);
}
checksum_hex = ChecksumStrToHex(src_checksum_str);
has_checksum = true;
}
@ -1881,8 +1630,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// the shared_checksum directory.
if (!has_checksum && db_session_id.empty()) {
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex, checksum_func,
&custom_checksum_hex);
size_limit, &checksum_hex);
if (!s.ok()) {
return s;
}
@ -1901,14 +1649,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// shared_checksum/<file_number>_<db_session_id>.sst
// Otherwise, dst_relative is of the form
// shared_checksum/<file_number>_<checksum>_<size>.sst
//
// Also, we display custom checksums in the name if possible.
dst_relative = GetSharedFileWithChecksum(
dst_relative, has_checksum,
checksum_func == nullptr || UseLegacyNaming(db_session_id)
? checksum_hex
: custom_checksum_hex,
size_bytes, db_session_id);
dst_relative, has_checksum, checksum_hex, size_bytes, db_session_id);
dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
} else if (shared) {
@ -1973,14 +1715,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
backup_env_->DeleteFile(final_dest_path);
} else {
// file exists and referenced
if (!has_checksum || checksum_hex.empty()) {
// Either both checksum_hex and custom_checksum_hex need recalculating
// or only checksum_hex needs recalculating
if (!has_checksum) {
// FIXME(peterd): extra I/O
s = CalculateChecksum(
src_dir + fname, db_env_, src_env_options, size_limit,
&checksum_hex, checksum_func,
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex);
if (!s.ok()) {
return s;
}
@ -1999,11 +1737,6 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
fname.c_str(), checksum_hex.c_str(), size_bytes);
}
}
if (checksum_func != nullptr) {
ROCKS_LOG_INFO(options_.info_log, "%s checksum is %s",
backup_checksum_func_name.c_str(),
custom_checksum_hex.c_str());
}
} else if (backuped_file_infos_.find(dst_relative) ==
backuped_file_infos_.end() &&
!same_path) {
@ -2020,14 +1753,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// the file is present and referenced by a backup
ROCKS_LOG_INFO(options_.info_log,
"%s already present, calculate checksum", fname.c_str());
if (!has_checksum || checksum_hex.empty()) {
// Either both checksum_hex and custom_checksum_hex need recalculating
// or only checksum_hex needs recalculating
if (!has_checksum) {
// FIXME(peterd): extra I/O
s = CalculateChecksum(
src_dir + fname, db_env_, src_env_options, size_limit,
&checksum_hex, checksum_func,
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
size_limit, &checksum_hex);
if (!s.ok()) {
return s;
}
@ -2045,8 +1774,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents,
db_env_, backup_env_, src_env_options, options_.sync, rate_limiter,
size_limit, progress_callback, has_checksum, src_checksum_func_name,
checksum_func == nullptr ? checksum_hex : custom_checksum_hex,
backup_checksum_func_name, db_id, db_session_id);
checksum_hex, db_id, db_session_id);
BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
copy_or_create_work_item.result.get_future(), shared, need_to_copy,
backup_env_, temp_dest_path, final_dest_path, dst_relative);
@ -2062,8 +1790,6 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
result.status = s;
result.size = size_bytes;
result.checksum_hex = std::move(checksum_hex);
result.custom_checksum_hex = std::move(custom_checksum_hex);
result.checksum_func_name = std::move(backup_checksum_func_name);
result.db_id = std::move(db_id);
result.db_session_id = std::move(db_session_id);
promise_result.set_value(std::move(result));
@ -2071,16 +1797,14 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
return s;
}
Status BackupEngineImpl::CalculateChecksum(
const std::string& src, Env* src_env, const EnvOptions& src_env_options,
uint64_t size_limit, std::string* checksum_hex,
const std::unique_ptr<FileChecksumGenerator>& checksum_func,
std::string* custom_checksum_hex) {
Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
const EnvOptions& src_env_options,
uint64_t size_limit,
std::string* checksum_hex) {
if (checksum_hex == nullptr) {
return Status::InvalidArgument("Checksum pointer is null");
return Status::Aborted("Checksum pointer is null");
}
uint32_t checksum_value = 0;
if (size_limit == 0) {
size_limit = std::numeric_limits<uint64_t>::max();
}
@ -2109,18 +1833,10 @@ Status BackupEngineImpl::CalculateChecksum(
}
size_limit -= data.size();
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Update(data.data(), data.size());
}
} while (data.size() > 0 && size_limit > 0);
checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
if (checksum_func != nullptr && custom_checksum_hex != nullptr) {
checksum_func->Finalize();
custom_checksum_hex->assign(ChecksumStrToHex(checksum_func->GetChecksum()));
}
return s;
}
@ -2179,77 +1895,6 @@ Status BackupEngineImpl::GetFileDbIdentities(Env* src_env,
}
}
Status BackupEngineImpl::GetFileChecksumsFromManifestInBackup(
Env* src_env, const BackupID& backup_id, const BackupMeta* backup,
FileChecksumList* checksum_list) {
if (checksum_list == nullptr) {
return Status::InvalidArgument("checksum_list is nullptr");
}
checksum_list->reset();
Status s;
// Read CURRENT file to get the latest DB MANIFEST filename in backup_id
// and then read the the MANIFEST file to obtain the checksum info stored
// in the file.
std::string current_rel_path =
GetPrivateFileRel(backup_id, false /* tmp */, "CURRENT");
s = VerifyFileWithCrc32c(src_env, backup, current_rel_path);
if (!s.ok()) {
return s;
}
std::string manifest_filename;
s = ReadFileToString(src_env, GetAbsolutePath(current_rel_path),
&manifest_filename);
if (!s.ok()) {
return s;
}
// Remove tailing '\n' if any
while (!manifest_filename.empty() && manifest_filename.back() == '\n') {
manifest_filename.pop_back();
}
std::string manifest_rel_path =
GetPrivateFileRel(backup_id, false /* tmp */, manifest_filename);
s = VerifyFileWithCrc32c(src_env, backup, manifest_rel_path);
if (!s.ok()) {
return s;
}
// Read whole manifest file in backup
s = GetFileChecksumsFromManifest(
src_env, GetAbsolutePath(manifest_rel_path),
std::numeric_limits<uint64_t>::max() /*manifest_file_size*/,
checksum_list);
return s;
}
Status BackupEngineImpl::VerifyFileWithCrc32c(Env* src_env,
const BackupMeta* backup,
const std::string& rel_path) {
const std::shared_ptr<FileInfo> file_info = backup->GetFile(rel_path);
if (file_info == nullptr) {
return Status::Corruption(rel_path + " is missing");
}
std::string abs_path = GetAbsolutePath(rel_path);
std::string expected_checksum = file_info->checksum_hex;
std::string actual_checksum;
Status s = CalculateChecksum(abs_path, src_env, EnvOptions(),
0 /* size_limit */, &actual_checksum);
if (!s.ok()) {
return s;
}
if (actual_checksum != expected_checksum) {
std::string checksum_info("Expected checksum is " + expected_checksum +
" while computed checksum is " + actual_checksum);
return Status::Corruption("crc32c mismatch for " + rel_path + ": " +
checksum_info);
}
return s;
}
void BackupEngineImpl::DeleteChildren(const std::string& dir,
uint32_t file_type_filter) {
std::vector<std::string> children;
@ -2423,14 +2068,6 @@ Status BackupEngineImpl::BackupMeta::AddFile(
return Status::Corruption(
"Checksum mismatch for existing backup file. Delete old backups and "
"try again.");
} else if (IsSameChecksumFunc(itr->second->checksum_func_name,
file_info->checksum_func_name) &&
!itr->second->custom_checksum_hex.empty() &&
itr->second->custom_checksum_hex !=
file_info->custom_checksum_hex) {
return Status::Corruption(
"Custom checksum mismatch for existing backup file. Delete old "
"backups and try again.");
}
++itr->second->refs; // increase refcount if already present
}
@ -2544,14 +2181,12 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
}
uint32_t checksum_value = 0;
std::string checksum_func_name = kUnknownFileChecksumFuncName;
if (line.starts_with(checksum_prefix)) {
line.remove_prefix(checksum_prefix.size());
checksum_func_name = kDefaultBackupFileChecksumFuncName;
checksum_value = static_cast<uint32_t>(strtoul(line.data(), nullptr, 10));
if (line != ROCKSDB_NAMESPACE::ToString(checksum_value)) {
return Status::Corruption("Invalid crc32c checksum value for " +
filename + " in " + meta_filename_);
return Status::Corruption("Invalid checksum value for " + filename +
" in " + meta_filename_);
}
} else {
return Status::Corruption("Unknown checksum type for " + filename +
@ -2559,8 +2194,7 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
}
files.emplace_back(
new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value),
"" /* custom_checksum_hex */, checksum_func_name));
new FileInfo(filename, size, ChecksumInt32ToHex(checksum_value)));
}
if (s.ok() && data.size() > 0) {

View file

@ -48,148 +48,6 @@ const auto kFlagMatchInterimNaming =
const auto kNamingDefault =
kUseDbSessionId | kFlagIncludeFileSize | kFlagMatchInterimNaming;
class DummyFileChecksumGen : public FileChecksumGenerator {
public:
explicit DummyFileChecksumGen(const FileChecksumGenContext& /* context */,
bool state) {
if (state) {
checksum_ = 0;
} else {
checksum_ = 1;
}
}
void Update(const char* /* data */, size_t /* n */) override {}
void Finalize() override {
assert(checksum_str_.empty());
// Store as big endian raw bytes
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "DummyFileChecksum"; }
private:
uint32_t checksum_;
std::string checksum_str_;
};
class DummyFileChecksumGenFactory : public FileChecksumGenFactory {
public:
explicit DummyFileChecksumGenFactory(bool state = false) : state_(state) {}
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "DummyFileChecksum") {
return std::unique_ptr<FileChecksumGenerator>(
new DummyFileChecksumGen(context, state_));
} else {
return nullptr;
}
}
const char* Name() const override { return "DummyFileChecksumGenFactory"; }
private:
bool state_;
};
class FileHash32Gen : public FileChecksumGenerator {
public:
explicit FileHash32Gen(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
void Update(const char* data, size_t n) override { content_.append(data, n); }
void Finalize() override {
assert(checksum_str_.empty());
const char* str = content_.c_str();
checksum_ = Hash(str, strlen(str), 1);
// Store as big endian raw bytes
PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "FileHash32"; }
private:
std::string content_;
uint32_t checksum_;
std::string checksum_str_;
};
class FileHash64Gen : public FileChecksumGenerator {
public:
explicit FileHash64Gen(const FileChecksumGenContext& /*context*/) {
checksum_ = 0;
}
void Update(const char* data, size_t n) override { content_.append(data, n); }
void Finalize() override {
assert(checksum_str_.empty());
const char* str = content_.c_str();
checksum_ = Hash64(str, strlen(str), 1);
// Store as big endian raw bytes
PutFixed64(&checksum_str_, EndianSwapValue(checksum_));
}
std::string GetChecksum() const override {
assert(!checksum_str_.empty());
return checksum_str_;
}
const char* Name() const override { return "FileHash64"; }
private:
std::string content_;
uint64_t checksum_;
std::string checksum_str_;
};
class FileHash32GenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileHash32") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash32Gen(context));
} else {
return nullptr;
}
}
const char* Name() const override { return "FileHash32GenFactory"; }
};
class FileHashGenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& context) override {
if (context.requested_checksum_func_name.empty() ||
context.requested_checksum_func_name == "FileHash64") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash64Gen(context));
} else if (context.requested_checksum_func_name == "FileHash32") {
return std::unique_ptr<FileChecksumGenerator>(new FileHash32Gen(context));
} else {
return nullptr;
}
}
const char* Name() const override { return "FileHashGenFactory"; }
};
class DummyDB : public StackableDB {
public:
/* implicit */
@ -992,253 +850,6 @@ class BackupableDBTestWithParam : public BackupableDBTest,
}
};
TEST_F(BackupableDBTest, DbAndBackupSameCustomChecksum) {
const int keys_iteration = 5000;
options_.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
// backup uses it default crc32c
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
// backup uses db crc32c
backupable_options_->file_checksum_gen_factory =
GetFileChecksumGenCrc32cFactory();
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
std::shared_ptr<FileChecksumGenFactory> hash_factory =
std::make_shared<FileHashGenFactory>();
options_.file_checksum_gen_factory = hash_factory;
backupable_options_->file_checksum_gen_factory = hash_factory;
for (const auto& sopt : kAllShareOptions) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration + 1);
// delete old data
DestroyDB(dbname_, options_);
}
// Mimic a checksum mismatch for custom checksum function by using a dummy
// checksum function with a state
std::shared_ptr<FileChecksumGenFactory> dummy_factory_0 =
std::make_shared<DummyFileChecksumGenFactory>(false);
std::shared_ptr<FileChecksumGenFactory> dummy_factory_1 =
std::make_shared<DummyFileChecksumGenFactory>(true);
FileChecksumGenContext context;
// Both factories have the same generator name
std::string dummy_checksum_function_name =
dummy_factory_0->CreateFileChecksumGenerator(context)->Name();
options_.file_checksum_gen_factory = dummy_factory_0;
for (const auto& sopt : kAllShareOptions) {
backupable_options_->file_checksum_gen_factory = dummy_factory_1;
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration);
// DB and backup engine do not have the same custom checksum function
// "state"
Status s = backup_engine_->CreateNewBackup(db_.get());
ASSERT_NOK(s);
ASSERT_TRUE(
s.ToString().find("Corruption: " + dummy_checksum_function_name +
" mismatch") != std::string::npos);
CloseBackupEngine();
// Change custom checksum function and try again
backupable_options_->file_checksum_gen_factory = dummy_factory_0;
OpenBackupEngine(true /* destroy_old_data */);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
ASSERT_OK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
CloseBackupEngine();
// Try verifying or restoring a backup using a different custom checksum
// function "state"
backupable_options_->file_checksum_gen_factory = dummy_factory_1;
OpenBackupEngine(false /* destroy_old_data */);
ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(1, dbname_, dbname_));
CloseDBAndBackupEngine();
// delete old data
DestroyDB(dbname_, options_);
}
}
TEST_F(BackupableDBTest, CustomChecksumTransition) {
const int keys_iteration = 5000;
std::shared_ptr<FileChecksumGenFactory> hash32_factory =
std::make_shared<FileHash32GenFactory>();
std::shared_ptr<FileChecksumGenFactory> hash_factory =
std::make_shared<FileHashGenFactory>();
for (const auto& sopt : kAllShareOptions) {
// 1) with one custom checksum function (FileHash32GenFactory) for both
// db and backup
int i = 0;
options_.file_checksum_gen_factory = hash32_factory;
backupable_options_->file_checksum_gen_factory = hash32_factory;
// open with old backup
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
// verify the backup with checksum
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// 2) with two custom checksum functions (FileHashGenFactory) for db
// but one custom checksum function (FileHash32GenFactory) for backup
++i;
options_.file_checksum_gen_factory = hash_factory;
backupable_options_->file_checksum_gen_factory = hash32_factory;
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
// note that the checksum factory for backup does not know the custom
// checksum function used in the db
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
// but it knows the custom checksum function for the older backup
ASSERT_OK(backup_engine_->VerifyBackup(i, true));
// reset the factory to nullptr and try again
CloseBackupEngine();
backupable_options_->file_checksum_gen_factory = nullptr;
OpenBackupEngine();
ASSERT_NOK(backup_engine_->DeleteBackup(i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1));
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// Now set the factory to the same as the one used in the db
backupable_options_->file_checksum_gen_factory = hash_factory;
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseBackupEngine();
++i;
// Say, we accidentally change the factory
backupable_options_->file_checksum_gen_factory = hash32_factory;
OpenBackupEngine();
// Unable to verify the latest backup.
ASSERT_NOK(backup_engine_->VerifyBackup(i + 1, true));
// Unable to restore the latest backup.
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
CloseBackupEngine();
// Reset the factory to the same as the one used in the db.
backupable_options_->file_checksum_gen_factory = hash_factory;
OpenBackupEngine();
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
ASSERT_OK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
ASSERT_OK(backup_engine_->DeleteBackup(i + 1));
--i;
CloseDBAndBackupEngine();
// 3) with one custom checksum function (FileHash32GenFactory) for db
// but two custom checksum functions (FileHashGenFactory) for backup
// note that the checksum factory for backup does know the checksum
// function in the db
++i;
options_.file_checksum_gen_factory = hash32_factory;
backupable_options_->file_checksum_gen_factory = hash_factory;
// open with old backup
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(i - 1, true));
ASSERT_OK(backup_engine_->VerifyBackup(i, true));
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(i - 1, 0, keys_iteration * (i - 1),
keys_iteration * i);
AssertBackupConsistency(i, 0, keys_iteration * i, keys_iteration * (i + 1));
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * (i + 2));
// 4) no custom checksums
++i;
options_.file_checksum_gen_factory = nullptr;
backupable_options_->file_checksum_gen_factory = nullptr;
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
for (int j = 0; j <= i; ++j) {
ASSERT_OK(backup_engine_->VerifyBackup(j + 1, true));
}
CloseDBAndBackupEngine();
for (int j = 0; j <= i; ++j) {
AssertBackupConsistency(j + 1, 0, keys_iteration * (j + 1),
keys_iteration * (j + 2));
}
// delete old data
DestroyDB(dbname_, options_);
}
}
TEST_F(BackupableDBTest, CustomChecksumNoNewDbTables) {
const int keys_iteration = 5000;
std::vector<std::shared_ptr<FileChecksumGenFactory>> checksum_factories{
nullptr, GetFileChecksumGenCrc32cFactory(),
std::make_shared<FileHash32GenFactory>(),
std::make_shared<FileHashGenFactory>()};
for (const auto& sopt : kAllShareOptions) {
for (const auto& f : checksum_factories) {
options_.file_checksum_gen_factory = f;
backupable_options_->file_checksum_gen_factory = f;
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
sopt);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
// No new table files have been created since the last backup.
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(2, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2);
AssertBackupConsistency(2, 0, keys_iteration, keys_iteration * 2);
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
sopt);
// No new table files have been created since the last backup and backup
// engine opening
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
ASSERT_OK(backup_engine_->VerifyBackup(3, true));
CloseDBAndBackupEngine();
AssertBackupConsistency(3, 0, keys_iteration, keys_iteration * 2);
// delete old data
DestroyDB(dbname_, options_);
}
}
}
TEST_F(BackupableDBTest, FileCollision) {
const int keys_iteration = 5000;
for (const auto& sopt : kAllShareOptions) {