Do not swallow error returned from SaveTo() (#6801)

Summary:
With consistency check enabled, VersionBuilder::SaveTo() may return error once
corruption is detected while building versions. We should handle these errors.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6801

Test Plan: make check

Reviewed By: siying

Differential Revision: D21385045

Pulled By: riversand963

fbshipit-source-id: 98f6424e2a4699b62befa21e9fe00e70a771118e
This commit is contained in:
Yanqin Jin 2020-05-05 10:44:12 -07:00 committed by Peter Dillinger
parent ec33e9378c
commit 0ef925153d
5 changed files with 155 additions and 47 deletions

View file

@ -1778,6 +1778,28 @@ TEST_F(DBBasicTest, IncrementalRecoveryNoCorrupt) {
} }
} }
TEST_F(DBBasicTest, BestEffortsRecoveryWithVersionBuildingFailure) {
Options options = CurrentOptions();
DestroyAndReopen(options);
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) {
ASSERT_NE(nullptr, arg);
*(reinterpret_cast<Status*>(arg)) =
Status::Corruption("Inject corruption");
});
SyncPoint::GetInstance()->EnableProcessing();
options.best_efforts_recovery = true;
Status s = TryReopen(options);
ASSERT_TRUE(s.IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
namespace { namespace {
class TableFileListener : public EventListener { class TableFileListener : public EventListener {

View file

@ -45,6 +45,8 @@ class DBSecondaryTest : public DBTestBase {
void OpenSecondary(const Options& options); void OpenSecondary(const Options& options);
Status TryOpenSecondary(const Options& options);
void OpenSecondaryWithColumnFamilies( void OpenSecondaryWithColumnFamilies(
const std::vector<std::string>& column_families, const Options& options); const std::vector<std::string>& column_families, const Options& options);
@ -70,9 +72,13 @@ class DBSecondaryTest : public DBTestBase {
}; };
void DBSecondaryTest::OpenSecondary(const Options& options) { void DBSecondaryTest::OpenSecondary(const Options& options) {
ASSERT_OK(TryOpenSecondary(options));
}
Status DBSecondaryTest::TryOpenSecondary(const Options& options) {
Status s = Status s =
DB::OpenAsSecondary(options, dbname_, secondary_path_, &db_secondary_); DB::OpenAsSecondary(options, dbname_, secondary_path_, &db_secondary_);
ASSERT_OK(s); return s;
} }
void DBSecondaryTest::OpenSecondaryWithColumnFamilies( void DBSecondaryTest::OpenSecondaryWithColumnFamilies(
@ -858,6 +864,56 @@ TEST_F(DBSecondaryTest, CheckConsistencyWhenOpen) {
thread.join(); thread.join();
ASSERT_TRUE(called); ASSERT_TRUE(called);
} }
TEST_F(DBSecondaryTest, StartFromInconsistent) {
Options options = CurrentOptions();
DestroyAndReopen(options);
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) {
ASSERT_NE(nullptr, arg);
*(reinterpret_cast<Status*>(arg)) =
Status::Corruption("Inject corruption");
});
SyncPoint::GetInstance()->EnableProcessing();
Options options1;
Status s = TryOpenSecondary(options1);
ASSERT_TRUE(s.IsCorruption());
}
TEST_F(DBSecondaryTest, InconsistencyDuringCatchUp) {
Options options = CurrentOptions();
DestroyAndReopen(options);
ASSERT_OK(Put("foo", "value"));
ASSERT_OK(Flush());
Options options1;
OpenSecondary(options1);
{
std::string value;
ASSERT_OK(db_secondary_->Get(ReadOptions(), "foo", &value));
ASSERT_EQ("value", value);
}
ASSERT_OK(Put("bar", "value1"));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) {
ASSERT_NE(nullptr, arg);
*(reinterpret_cast<Status*>(arg)) =
Status::Corruption("Inject corruption");
});
SyncPoint::GetInstance()->EnableProcessing();
Status s = db_secondary_->TryCatchUpWithPrimary();
ASSERT_TRUE(s.IsCorruption());
}
#endif //! ROCKSDB_LITE #endif //! ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

View file

@ -377,6 +377,7 @@ Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/,
ColumnFamilyData* cfd, ColumnFamilyData* cfd,
bool force_create_version) { bool force_create_version) {
assert(cfd->initialized()); assert(cfd->initialized());
Status s;
if (force_create_version) { if (force_create_version) {
auto builder_iter = builders_.find(cfd->GetID()); auto builder_iter = builders_.find(cfd->GetID());
assert(builder_iter != builders_.end()); assert(builder_iter != builders_.end());
@ -384,13 +385,18 @@ Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/,
auto* v = new Version(cfd, version_set_, version_set_->file_options_, auto* v = new Version(cfd, version_set_, version_set_->file_options_,
*cfd->GetLatestMutableCFOptions(), *cfd->GetLatestMutableCFOptions(),
version_set_->current_version_number_++); version_set_->current_version_number_++);
builder->SaveTo(v->storage_info()); s = builder->SaveTo(v->storage_info());
// Install new version if (s.ok()) {
v->PrepareApply(*cfd->GetLatestMutableCFOptions(), // Install new version
!(version_set_->db_options_->skip_stats_update_on_db_open)); v->PrepareApply(
version_set_->AppendVersion(cfd, v); *cfd->GetLatestMutableCFOptions(),
!(version_set_->db_options_->skip_stats_update_on_db_open));
version_set_->AppendVersion(cfd, v);
} else {
delete v;
}
} }
return Status::OK(); return s;
} }
Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd, Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd,
@ -558,16 +564,20 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
auto* version = new Version(cfd, version_set_, version_set_->file_options_, auto* version = new Version(cfd, version_set_, version_set_->file_options_,
*cfd->GetLatestMutableCFOptions(), *cfd->GetLatestMutableCFOptions(),
version_set_->current_version_number_++); version_set_->current_version_number_++);
builder->SaveTo(version->storage_info()); s = builder->SaveTo(version->storage_info());
version->PrepareApply( if (s.ok()) {
*cfd->GetLatestMutableCFOptions(), version->PrepareApply(
!version_set_->db_options_->skip_stats_update_on_db_open); *cfd->GetLatestMutableCFOptions(),
auto v_iter = versions_.find(cfd->GetID()); !version_set_->db_options_->skip_stats_update_on_db_open);
if (v_iter != versions_.end()) { auto v_iter = versions_.find(cfd->GetID());
delete v_iter->second; if (v_iter != versions_.end()) {
v_iter->second = version; delete v_iter->second;
v_iter->second = version;
} else {
versions_.emplace(cfd->GetID(), version);
}
} else { } else {
versions_.emplace(cfd->GetID(), version); delete version;
} }
} }
return s; return s;

View file

@ -5920,13 +5920,23 @@ Status ReactiveVersionSet::Recover(
Version* v = new Version(cfd, this, file_options_, Version* v = new Version(cfd, this, file_options_,
*cfd->GetLatestMutableCFOptions(), *cfd->GetLatestMutableCFOptions(),
current_version_number_++); current_version_number_++);
builder->SaveTo(v->storage_info()); s = builder->SaveTo(v->storage_info());
// Install recovered version if (s.ok()) {
v->PrepareApply(*cfd->GetLatestMutableCFOptions(), // Install recovered version
!(db_options_->skip_stats_update_on_db_open)); v->PrepareApply(*cfd->GetLatestMutableCFOptions(),
AppendVersion(cfd, v); !(db_options_->skip_stats_update_on_db_open));
AppendVersion(cfd, v);
} else {
ROCKS_LOG_ERROR(db_options_->info_log,
"[%s]: inconsistent version: %s\n",
cfd->GetName().c_str(), s.ToString().c_str());
delete v;
break;
}
} }
}
if (s.ok()) {
next_file_number_.store(version_edit.next_file_number_ + 1); next_file_number_.store(version_edit.next_file_number_ + 1);
last_allocated_sequence_ = version_edit.last_sequence_; last_allocated_sequence_ = version_edit.last_sequence_;
last_published_sequence_ = version_edit.last_sequence_; last_published_sequence_ = version_edit.last_sequence_;
@ -6003,6 +6013,8 @@ Status ReactiveVersionSet::ReadAndApply(
s = ApplyOneVersionEditToBuilder(edit, cfds_changed, &temp_edit); s = ApplyOneVersionEditToBuilder(edit, cfds_changed, &temp_edit);
if (s.ok()) { if (s.ok()) {
applied_edits++; applied_edits++;
} else {
break;
} }
} }
} }
@ -6012,13 +6024,14 @@ Status ReactiveVersionSet::ReadAndApply(
} }
// It's possible that: // It's possible that:
// 1) s.IsCorruption(), indicating the current MANIFEST is corrupted. // 1) s.IsCorruption(), indicating the current MANIFEST is corrupted.
// Or the version(s) rebuilt from tailing the MANIFEST is inconsistent.
// 2) we have finished reading the current MANIFEST. // 2) we have finished reading the current MANIFEST.
// 3) we have encountered an IOError reading the current MANIFEST. // 3) we have encountered an IOError reading the current MANIFEST.
// We need to look for the next MANIFEST and start from there. If we cannot // We need to look for the next MANIFEST and start from there. If we cannot
// find the next MANIFEST, we should exit the loop. // find the next MANIFEST, we should exit the loop.
s = MaybeSwitchManifest(reader->GetReporter(), manifest_reader); Status tmp_s = MaybeSwitchManifest(reader->GetReporter(), manifest_reader);
reader = manifest_reader->get(); reader = manifest_reader->get();
if (s.ok()) { if (tmp_s.ok()) {
if (reader->file()->file_name() == old_manifest_path) { if (reader->file()->file_name() == old_manifest_path) {
// Still processing the same MANIFEST, thus no need to continue this // Still processing the same MANIFEST, thus no need to continue this
// loop since no record is available if we have reached here. // loop since no record is available if we have reached here.
@ -6048,6 +6061,7 @@ Status ReactiveVersionSet::ReadAndApply(
number_of_edits_to_skip_ += 2; number_of_edits_to_skip_ += 2;
} }
} }
s = tmp_s;
} }
} }
} }
@ -6140,12 +6154,16 @@ Status ReactiveVersionSet::ApplyOneVersionEditToBuilder(
auto version = new Version(cfd, this, file_options_, auto version = new Version(cfd, this, file_options_,
*cfd->GetLatestMutableCFOptions(), *cfd->GetLatestMutableCFOptions(),
current_version_number_++); current_version_number_++);
builder->SaveTo(version->storage_info()); s = builder->SaveTo(version->storage_info());
version->PrepareApply(*cfd->GetLatestMutableCFOptions(), true); if (s.ok()) {
AppendVersion(cfd, version); version->PrepareApply(*cfd->GetLatestMutableCFOptions(), true);
active_version_builders_.erase(builder_iter); AppendVersion(cfd, version);
if (cfds_changed->count(cfd) == 0) { active_version_builders_.erase(builder_iter);
cfds_changed->insert(cfd); if (cfds_changed->count(cfd) == 0) {
cfds_changed->insert(cfd);
}
} else {
delete version;
} }
} else if (s.IsPathNotFound()) { } else if (s.IsPathNotFound()) {
s = Status::OK(); s = Status::OK();
@ -6153,23 +6171,25 @@ Status ReactiveVersionSet::ApplyOneVersionEditToBuilder(
// Some other error has occurred during LoadTableHandlers. // Some other error has occurred during LoadTableHandlers.
} }
if (version_edit->HasNextFile()) { if (s.ok()) {
next_file_number_.store(version_edit->next_file_number_ + 1); if (version_edit->HasNextFile()) {
next_file_number_.store(version_edit->next_file_number_ + 1);
}
if (version_edit->has_last_sequence_) {
last_allocated_sequence_ = version_edit->last_sequence_;
last_published_sequence_ = version_edit->last_sequence_;
last_sequence_ = version_edit->last_sequence_;
}
if (version_edit->has_prev_log_number_) {
prev_log_number_ = version_edit->prev_log_number_;
MarkFileNumberUsed(version_edit->prev_log_number_);
}
if (version_edit->has_log_number_) {
MarkFileNumberUsed(version_edit->log_number_);
}
column_family_set_->UpdateMaxColumnFamily(version_edit->max_column_family_);
MarkMinLogNumberToKeep2PC(version_edit->min_log_number_to_keep_);
} }
if (version_edit->has_last_sequence_) {
last_allocated_sequence_ = version_edit->last_sequence_;
last_published_sequence_ = version_edit->last_sequence_;
last_sequence_ = version_edit->last_sequence_;
}
if (version_edit->has_prev_log_number_) {
prev_log_number_ = version_edit->prev_log_number_;
MarkFileNumberUsed(version_edit->prev_log_number_);
}
if (version_edit->has_log_number_) {
MarkFileNumberUsed(version_edit->log_number_);
}
column_family_set_->UpdateMaxColumnFamily(version_edit->max_column_family_);
MarkMinLogNumberToKeep2PC(version_edit->min_log_number_to_keep_);
return s; return s;
} }

View file

@ -1155,7 +1155,7 @@ TEST_F(VersionSetAtomicGroupTest,
// Write the corrupted edits. // Write the corrupted edits.
AddNewEditsToLog(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize);
mu.Lock(); mu.Lock();
EXPECT_OK( EXPECT_NOK(
reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed));
mu.Unlock(); mu.Unlock();
EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(),
@ -1205,7 +1205,7 @@ TEST_F(VersionSetAtomicGroupTest,
&manifest_reader_status)); &manifest_reader_status));
AddNewEditsToLog(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize);
mu.Lock(); mu.Lock();
EXPECT_OK( EXPECT_NOK(
reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed));
mu.Unlock(); mu.Unlock();
EXPECT_EQ(edits_[1].DebugString(), EXPECT_EQ(edits_[1].DebugString(),