Limit backups opened

Summary:
This was requested by a customer who wants to proactively monitor whether any valid backups are available. The existing performance was poor because Open() serially reads every small meta-file (one per backup), which was slow on HDFS.

Now we only read the minimum number of meta-files to find `max_valid_backups_to_open` valid backups. The customer mentioned above can just set it to one.
Closes https://github.com/facebook/rocksdb/pull/2151

Differential Revision: D4882564

Pulled By: ajkr

fbshipit-source-id: cb0edf9e8ac693e4d5f24902e725a011ed8c0c2f
This commit is contained in:
Andrew Kryczka 2017-04-19 13:15:16 -07:00 committed by Facebook Github Bot
parent 1dd7760513
commit df74b775e6
3 changed files with 78 additions and 12 deletions

View file

@ -108,6 +108,11 @@ struct BackupableDBOptions {
// Default: 4194304
uint64_t callback_trigger_interval_size;
// When Open() is called, it will open at most this many of the latest
// non-corrupted backups. If 0, it will open all available backups.
// Default: 0
int max_valid_backups_to_open;
void Dump(Logger* logger) const;
explicit BackupableDBOptions(
@ -116,7 +121,8 @@ struct BackupableDBOptions {
bool _sync = true, bool _destroy_old_data = false,
bool _backup_log_files = true, uint64_t _backup_rate_limit = 0,
uint64_t _restore_rate_limit = 0, int _max_background_operations = 1,
uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024)
uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024,
int _max_valid_backups_to_open = 0)
: backup_dir(_backup_dir),
backup_env(_backup_env),
share_table_files(_share_table_files),
@ -128,7 +134,8 @@ struct BackupableDBOptions {
restore_rate_limit(_restore_rate_limit),
share_files_with_checksum(false),
max_background_operations(_max_background_operations),
callback_trigger_interval_size(_callback_trigger_interval_size) {
callback_trigger_interval_size(_callback_trigger_interval_size),
max_valid_backups_to_open(_max_valid_backups_to_open) {
assert(share_table_files || !share_files_with_checksum);
}
};

View file

@ -612,18 +612,28 @@ Status BackupEngineImpl::Initialize() {
const auto abs_dir = GetAbsolutePath(rel_dir);
InsertPathnameToSizeBytes(abs_dir, backup_env_, &abs_path_to_size);
}
// load the backups if any
for (auto& backup : backups_) {
// load the backups if any, until valid_backups_to_open of the latest
// non-corrupted backups have been successfully opened.
int valid_backups_to_open;
if (options_.max_valid_backups_to_open == 0) {
valid_backups_to_open = INT_MAX;
} else {
valid_backups_to_open = options_.max_valid_backups_to_open;
}
for (auto backup_iter = backups_.rbegin();
backup_iter != backups_.rend() && valid_backups_to_open > 0;
++backup_iter) {
InsertPathnameToSizeBytes(
GetAbsolutePath(GetPrivateFileRel(backup.first)), backup_env_,
GetAbsolutePath(GetPrivateFileRel(backup_iter->first)), backup_env_,
&abs_path_to_size);
Status s =
backup.second->LoadFromFile(options_.backup_dir, abs_path_to_size);
Status s = backup_iter->second->LoadFromFile(options_.backup_dir,
abs_path_to_size);
if (s.IsCorruption()) {
ROCKS_LOG_INFO(options_.info_log, "Backup %u corrupted -- %s",
backup.first, s.ToString().c_str());
corrupt_backups_.insert(std::make_pair(
backup.first, std::make_pair(s, std::move(backup.second))));
backup_iter->first, s.ToString().c_str());
corrupt_backups_.insert(
std::make_pair(backup_iter->first,
std::make_pair(s, std::move(backup_iter->second))));
} else if (!s.ok()) {
// Distinguish corruption errors from errors in the backup Env.
// Errors in the backup Env (i.e., this code path) will cause Open() to
@ -631,14 +641,29 @@ Status BackupEngineImpl::Initialize() {
return s;
} else {
ROCKS_LOG_INFO(options_.info_log, "Loading backup %" PRIu32 " OK:\n%s",
backup.first, backup.second->GetInfoString().c_str());
latest_backup_id_ = std::max(latest_backup_id_, backup.first);
backup_iter->first,
backup_iter->second->GetInfoString().c_str());
latest_backup_id_ = std::max(latest_backup_id_, backup_iter->first);
--valid_backups_to_open;
}
}
for (const auto& corrupt : corrupt_backups_) {
backups_.erase(backups_.find(corrupt.first));
}
// erase the backups before max_valid_backups_to_open
int num_unopened_backups;
if (options_.max_valid_backups_to_open == 0) {
num_unopened_backups = 0;
} else {
num_unopened_backups =
std::max(0, static_cast<int>(backups_.size()) -
options_.max_valid_backups_to_open);
}
for (int i = 0; i < num_unopened_backups; ++i) {
assert(backups_.begin()->second->Empty());
backups_.erase(backups_.begin());
}
}
ROCKS_LOG_INFO(options_.info_log, "Latest backup is %u", latest_backup_id_);

View file

@ -1465,6 +1465,40 @@ TEST_F(BackupableDBTest, MetadataTooLarge) {
CloseDBAndBackupEngine();
DestroyDB(dbname_, options_);
}
TEST_F(BackupableDBTest, LimitBackupsOpened) {
// Verify the specified max backups are opened, including skipping over
// corrupted backups.
//
// Setup:
// - backups 1, 2, and 4 are valid
// - backup 3 is corrupt
// - max_valid_backups_to_open == 2
//
// Expectation: the engine opens backups 4 and 2 since those are latest two
// non-corrupt backups.
const int kNumKeys = 5000;
OpenDBAndBackupEngine(true);
for (int i = 1; i <= 4; ++i) {
FillDB(db_.get(), kNumKeys * i, kNumKeys * (i + 1));
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
if (i == 3) {
ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/meta/3", 3));
}
}
CloseDBAndBackupEngine();
backupable_options_->max_valid_backups_to_open = 2;
OpenDBAndBackupEngine();
std::vector<BackupInfo> backup_infos;
backup_engine_->GetBackupInfo(&backup_infos);
ASSERT_EQ(2, backup_infos.size());
ASSERT_EQ(2, backup_infos[0].backup_id);
ASSERT_EQ(4, backup_infos[1].backup_id);
CloseDBAndBackupEngine();
DestroyDB(dbname_, options_);
}
} // anon namespace
} // namespace rocksdb