Add share_files_with_cheksum option to BackupEngine

Summary: added a new option to BackupEngine: if share_files_with_checksum is set to true, sst files are stored in shared_checksum/ and are identified by the triple (file name, checksum, file size) instead of just the file name. This option is targeted at distributed databases that want to backup their primary replica.

Test Plan: unit tests and tested backup and restore on a distributed rocksdb

Reviewers: igor

Reviewed By: igor

Differential Revision: https://reviews.facebook.net/D18393
This commit is contained in:
Benjamin Renard 2014-05-02 17:08:55 -07:00
parent 77edbfd642
commit 41e5cf2392
3 changed files with 169 additions and 41 deletions

View file

@ -7,16 +7,19 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef ROCKSDB_LITE
#pragma once #pragma once
#include "utilities/stackable_db.h" #ifndef ROCKSDB_LITE
#include "rocksdb/env.h"
#include "rocksdb/status.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <string> #include <string>
#include <map> #include <map>
#include <vector> #include <vector>
#include "utilities/stackable_db.h"
#include "rocksdb/env.h"
#include "rocksdb/status.h"
namespace rocksdb { namespace rocksdb {
struct BackupableDBOptions { struct BackupableDBOptions {
@ -72,6 +75,14 @@ struct BackupableDBOptions {
// Default: 0 // Default: 0
uint64_t restore_rate_limit; uint64_t restore_rate_limit;
// Only used if share_table_files is set to true. If true, will consider that
// backups can come from different databases, hence a sst is not uniquely
// identifed by its name, but by the triple (file name, crc32, file length)
// Default: false
// Note: this is an experimental option, and you'll need to set it manually
// *turn it on only if you know what you're doing*
bool share_files_with_checksum;
void Dump(Logger* logger) const; void Dump(Logger* logger) const;
explicit BackupableDBOptions(const std::string& _backup_dir, explicit BackupableDBOptions(const std::string& _backup_dir,
@ -90,7 +101,10 @@ struct BackupableDBOptions {
destroy_old_data(_destroy_old_data), destroy_old_data(_destroy_old_data),
backup_log_files(_backup_log_files), backup_log_files(_backup_log_files),
backup_rate_limit(_backup_rate_limit), backup_rate_limit(_backup_rate_limit),
restore_rate_limit(_restore_rate_limit) {} restore_rate_limit(_restore_rate_limit),
share_files_with_checksum(false) {
assert(share_table_files || !share_files_with_checksum);
}
}; };
struct RestoreOptions { struct RestoreOptions {
@ -233,5 +247,5 @@ class RestoreBackupableDB {
BackupEngine* backup_engine_; BackupEngine* backup_engine_;
}; };
} // rocksdb namespace } // namespace rocksdb
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

View file

@ -186,6 +186,9 @@ class BackupEngineImpl : public BackupEngine {
inline std::string GetPrivateDirRel() const { inline std::string GetPrivateDirRel() const {
return "private"; return "private";
} }
inline std::string GetSharedChecksumDirRel() const {
return "shared_checksum";
}
inline std::string GetPrivateFileRel(BackupID backup_id, inline std::string GetPrivateFileRel(BackupID backup_id,
bool tmp = false, bool tmp = false,
const std::string& file = "") const { const std::string& file = "") const {
@ -198,6 +201,27 @@ class BackupEngineImpl : public BackupEngine {
assert(file.size() == 0 || file[0] != '/'); assert(file.size() == 0 || file[0] != '/');
return "shared/" + file + (tmp ? ".tmp" : ""); return "shared/" + file + (tmp ? ".tmp" : "");
} }
inline std::string GetSharedFileWithChecksumRel(const std::string& file = "",
bool tmp = false) const {
assert(file.size() == 0 || file[0] != '/');
return GetSharedChecksumDirRel() + "/" + file + (tmp ? ".tmp" : "");
}
inline std::string GetSharedFileWithChecksum(const std::string& file,
const uint32_t checksum_value,
const uint64_t file_size) const {
assert(file.size() == 0 || file[0] != '/');
std::string file_copy = file;
return file_copy.insert(file_copy.find_last_of('.'),
"_" + std::to_string(checksum_value)
+ "_" + std::to_string(file_size));
}
inline std::string GetFileFromChecksumFile(const std::string& file) const {
assert(file.size() == 0 || file[0] != '/');
std::string file_copy = file;
size_t first_underscore = file_copy.find_first_of('_');
return file_copy.erase(first_underscore,
file_copy.find_last_of('.') - first_underscore);
}
inline std::string GetLatestBackupFile(bool tmp = false) const { inline std::string GetLatestBackupFile(bool tmp = false) const {
return GetAbsolutePath(std::string("LATEST_BACKUP") + (tmp ? ".tmp" : "")); return GetAbsolutePath(std::string("LATEST_BACKUP") + (tmp ? ".tmp" : ""));
} }
@ -227,7 +251,8 @@ class BackupEngineImpl : public BackupEngine {
const std::string& src_dir, const std::string& src_dir,
const std::string& src_fname, // starts with "/" const std::string& src_fname, // starts with "/"
RateLimiter* rate_limiter, RateLimiter* rate_limiter,
uint64_t size_limit = 0); uint64_t size_limit = 0,
bool shared_checksum = false);
Status CalculateChecksum(const std::string& src, Status CalculateChecksum(const std::string& src,
Env* src_env, Env* src_env,
@ -286,10 +311,17 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
backup_env_->CreateDirIfMissing(GetAbsolutePath()); backup_env_->CreateDirIfMissing(GetAbsolutePath());
backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_); backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_);
if (options_.share_table_files) { if (options_.share_table_files) {
if (options_.share_files_with_checksum) {
backup_env_->CreateDirIfMissing(GetAbsolutePath(
GetSharedFileWithChecksumRel()));
backup_env_->NewDirectory(GetAbsolutePath(
GetSharedFileWithChecksumRel()), &shared_directory_);
} else {
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel())); backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()), backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()),
&shared_directory_); &shared_directory_);
} }
}
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel())); backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel()));
backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()), backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()),
&private_directory_); &private_directory_);
@ -436,7 +468,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
type == kCurrentFile); type == kCurrentFile);
// rules: // rules:
// * if it's kTableFile, than it's shared // * if it's kTableFile, then it's shared
// * if it's kDescriptorFile, limit the size to manifest_file_size // * if it's kDescriptorFile, limit the size to manifest_file_size
s = BackupFile(new_backup_id, s = BackupFile(new_backup_id,
&new_backup, &new_backup,
@ -444,7 +476,8 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
db->GetName(), /* src_dir */ db->GetName(), /* src_dir */
live_files[i], /* src_fname */ live_files[i], /* src_fname */
rate_limiter.get(), rate_limiter.get(),
(type == kDescriptorFile) ? manifest_file_size : 0); (type == kDescriptorFile) ? manifest_file_size : 0,
options_.share_files_with_checksum && type == kTableFile);
} }
// copy WAL files // copy WAL files
@ -614,10 +647,17 @@ Status BackupEngineImpl::RestoreDBFromBackup(
std::string dst; std::string dst;
// 1. extract the filename // 1. extract the filename
size_t slash = file.find_last_of('/'); size_t slash = file.find_last_of('/');
// file will either be shared/<file> or private/<number>/<file> // file will either be shared/<file>, shared_checksum/<file_crc32_size>
// or private/<number>/<file>
assert(slash != std::string::npos); assert(slash != std::string::npos);
dst = file.substr(slash + 1); dst = file.substr(slash + 1);
// if the file was in shared_checksum, extract the real file name
// in this case the file is <number>_<checksum>_<size>.<type>
if (file.substr(0, slash) == GetSharedChecksumDirRel()) {
dst = GetFileFromChecksumFile(dst);
}
// 2. find the filetype // 2. find the filetype
uint64_t number; uint64_t number;
FileType type; FileType type;
@ -785,12 +825,33 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup,
bool shared, const std::string& src_dir, bool shared, const std::string& src_dir,
const std::string& src_fname, const std::string& src_fname,
RateLimiter* rate_limiter, RateLimiter* rate_limiter,
uint64_t size_limit) { uint64_t size_limit,
bool shared_checksum) {
assert(src_fname.size() > 0 && src_fname[0] == '/'); assert(src_fname.size() > 0 && src_fname[0] == '/');
std::string dst_relative = src_fname.substr(1); std::string dst_relative = src_fname.substr(1);
std::string dst_relative_tmp; std::string dst_relative_tmp;
if (shared) { Status s;
uint64_t size;
uint32_t checksum_value = 0;
if (shared && shared_checksum) {
// add checksum and file length to the file name
s = CalculateChecksum(src_dir + src_fname,
db_env_,
size_limit,
&checksum_value);
if (s.ok()) {
s = db_env_->GetFileSize(src_dir + src_fname, &size);
}
if (!s.ok()) {
return s;
}
dst_relative = GetSharedFileWithChecksum(dst_relative, checksum_value,
size);
dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
} else if (shared) {
dst_relative_tmp = GetSharedFileRel(dst_relative, true); dst_relative_tmp = GetSharedFileRel(dst_relative, true);
dst_relative = GetSharedFileRel(dst_relative, false); dst_relative = GetSharedFileRel(dst_relative, false);
} else { } else {
@ -799,13 +860,15 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup,
} }
std::string dst_path = GetAbsolutePath(dst_relative); std::string dst_path = GetAbsolutePath(dst_relative);
std::string dst_path_tmp = GetAbsolutePath(dst_relative_tmp); std::string dst_path_tmp = GetAbsolutePath(dst_relative_tmp);
Status s;
uint64_t size;
// if it's shared, we also need to check if it exists -- if it does, // if it's shared, we also need to check if it exists -- if it does,
// no need to copy it again // no need to copy it again
uint32_t checksum_value = 0;
if (shared && backup_env_->FileExists(dst_path)) { if (shared && backup_env_->FileExists(dst_path)) {
if (shared_checksum) {
Log(options_.info_log,
"%s already present, with checksum %u and size %" PRIu64,
src_fname.c_str(), checksum_value, size);
} else {
backup_env_->GetFileSize(dst_path, &size); // Ignore error backup_env_->GetFileSize(dst_path, &size); // Ignore error
Log(options_.info_log, "%s already present, calculate checksum", Log(options_.info_log, "%s already present, calculate checksum",
src_fname.c_str()); src_fname.c_str());
@ -813,6 +876,7 @@ Status BackupEngineImpl::BackupFile(BackupID backup_id, BackupMeta* backup,
db_env_, db_env_,
size_limit, size_limit,
&checksum_value); &checksum_value);
}
} else { } else {
Log(options_.info_log, "Copying %s", src_fname.c_str()); Log(options_.info_log, "Copying %s", src_fname.c_str());
s = CopyFile(src_dir + src_fname, s = CopyFile(src_dir + src_fname,

View file

@ -7,6 +7,10 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <string>
#include <algorithm>
#include <iostream>
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "rocksdb/transaction_log.h" #include "rocksdb/transaction_log.h"
#include "utilities/utility_db.h" #include "utilities/utility_db.h"
@ -16,9 +20,6 @@
#include "util/testutil.h" #include "util/testutil.h"
#include "util/auto_roll_logger.h" #include "util/auto_roll_logger.h"
#include <string>
#include <algorithm>
namespace rocksdb { namespace rocksdb {
namespace { namespace {
@ -375,7 +376,8 @@ class BackupableDBTest {
} }
void OpenBackupableDB(bool destroy_old_data = false, bool dummy = false, void OpenBackupableDB(bool destroy_old_data = false, bool dummy = false,
bool share_table_files = true) { bool share_table_files = true,
bool share_with_checksums = false) {
// reset all the defaults // reset all the defaults
test_backup_env_->SetLimitWrittenFiles(1000000); test_backup_env_->SetLimitWrittenFiles(1000000);
test_db_env_->SetLimitWrittenFiles(1000000); test_db_env_->SetLimitWrittenFiles(1000000);
@ -390,6 +392,7 @@ class BackupableDBTest {
} }
backupable_options_->destroy_old_data = destroy_old_data; backupable_options_->destroy_old_data = destroy_old_data;
backupable_options_->share_table_files = share_table_files; backupable_options_->share_table_files = share_table_files;
backupable_options_->share_files_with_checksum = share_with_checksums;
db_.reset(new BackupableDB(db, *backupable_options_)); db_.reset(new BackupableDB(db, *backupable_options_));
} }
@ -794,6 +797,53 @@ TEST(BackupableDBTest, NoShareTableFiles) {
} }
} }
// Verify that you can backup and restore with share_files_with_checksum on
TEST(BackupableDBTest, ShareTableFilesWithChecksums) {
const int keys_iteration = 5000;
OpenBackupableDB(true, false, true, true);
for (int i = 0; i < 5; ++i) {
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(db_->CreateNewBackup(!!(i % 2)));
}
CloseBackupableDB();
for (int i = 0; i < 5; ++i) {
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * 6);
}
}
// Verify that you can backup and restore using share_files_with_checksum set to
// false and then transition this option to true
TEST(BackupableDBTest, ShareTableFilesWithChecksumsTransition) {
const int keys_iteration = 5000;
// set share_files_with_checksum to false
OpenBackupableDB(true, false, true, false);
for (int i = 0; i < 5; ++i) {
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(db_->CreateNewBackup(true));
}
CloseBackupableDB();
for (int i = 0; i < 5; ++i) {
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 1),
keys_iteration * 6);
}
// set share_files_with_checksum to true and do some more backups
OpenBackupableDB(true, false, true, true);
for (int i = 5; i < 10; ++i) {
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(db_->CreateNewBackup(true));
}
CloseBackupableDB();
for (int i = 0; i < 5; ++i) {
AssertBackupConsistency(i + 1, 0, keys_iteration * (i + 5 + 1),
keys_iteration * 11);
}
}
TEST(BackupableDBTest, DeleteTmpFiles) { TEST(BackupableDBTest, DeleteTmpFiles) {
OpenBackupableDB(); OpenBackupableDB();
CloseBackupableDB(); CloseBackupableDB();