Fix missing cases of corruption retries (#13122)

Summary:
This PR fixes a few cases where RocksDB was not retrying checksum failure/corruption of file reads with the `verify_and_reconstruct_read` IO option. After fixing these cases, we can almost always successfully open the DB and execute reads even if we see transient corruptions, provided the `FileSystem` supports the `verify_and_reconstruct_read` option. The specific cases fixed in this PR are -
1. CURRENT file
2. IDENTITY file
3. OPTIONS file
4. SST footer

Pull Request resolved: https://github.com/facebook/rocksdb/pull/13122

Test Plan: Unit test in `db_io_failure_test.cc` that injects corruption at various stages of DB open and reads

Reviewed By: jaykorean

Differential Revision: D65617982

Pulled By: anand1976

fbshipit-source-id: 4324b88cc7eee5501ab5df20ef7a95bb12ed3ea7
This commit is contained in:
anand76 2024-11-08 12:43:21 -08:00 committed by Facebook GitHub Bot
parent 485ee4f45c
commit ee258619be
15 changed files with 290 additions and 94 deletions

View File

@ -5203,11 +5203,12 @@ Status DBImpl::GetDbIdentity(std::string& identity) const {
return Status::OK(); return Status::OK();
} }
Status DBImpl::GetDbIdentityFromIdentityFile(std::string* identity) const { Status DBImpl::GetDbIdentityFromIdentityFile(const IOOptions& opts,
std::string* identity) const {
std::string idfilename = IdentityFileName(dbname_); std::string idfilename = IdentityFileName(dbname_);
const FileOptions soptions; const FileOptions soptions;
Status s = ReadFileToString(fs_.get(), idfilename, identity); Status s = ReadFileToString(fs_.get(), idfilename, opts, identity);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

View File

@ -482,7 +482,8 @@ class DBImpl : public DB {
Status GetDbIdentity(std::string& identity) const override; Status GetDbIdentity(std::string& identity) const override;
virtual Status GetDbIdentityFromIdentityFile(std::string* identity) const; virtual Status GetDbIdentityFromIdentityFile(const IOOptions& opts,
std::string* identity) const;
Status GetDbSessionId(std::string& session_id) const override; Status GetDbSessionId(std::string& session_id) const override;
@ -1592,7 +1593,7 @@ class DBImpl : public DB {
// Read/create DB identity file (as appropriate), and write DB ID to // Read/create DB identity file (as appropriate), and write DB ID to
// version_edit if provided. // version_edit if provided.
Status SetupDBId(const WriteOptions& write_options, bool read_only, Status SetupDBId(const WriteOptions& write_options, bool read_only,
bool is_new_db, VersionEdit* version_edit); bool is_new_db, bool is_retry, VersionEdit* version_edit);
// Assign db_id_ and write DB ID to version_edit if provided. // Assign db_id_ and write DB ID to version_edit if provided.
void SetDBId(std::string&& id, bool read_only, VersionEdit* version_edit); void SetDBId(std::string&& id, bool read_only, VersionEdit* version_edit);

View File

@ -983,7 +983,8 @@ void DBImpl::SetDBId(std::string&& id, bool read_only,
} }
Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only, Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
bool is_new_db, VersionEdit* version_edit) { bool is_new_db, bool is_retry,
VersionEdit* version_edit) {
Status s; Status s;
if (!is_new_db) { if (!is_new_db) {
// Check for the IDENTITY file and create it if not there or // Check for the IDENTITY file and create it if not there or
@ -991,7 +992,11 @@ Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
std::string db_id_in_file; std::string db_id_in_file;
s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr); s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr);
if (s.ok()) { if (s.ok()) {
s = GetDbIdentityFromIdentityFile(&db_id_in_file); IOOptions opts;
if (is_retry) {
opts.verify_and_reconstruct_read = true;
}
s = GetDbIdentityFromIdentityFile(opts, &db_id_in_file);
if (s.ok() && !db_id_in_file.empty()) { if (s.ok() && !db_id_in_file.empty()) {
if (db_id_.empty()) { if (db_id_.empty()) {
// Loaded from file and wasn't already known from manifest // Loaded from file and wasn't already known from manifest

View File

@ -301,7 +301,7 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
VersionEdit new_db_edit; VersionEdit new_db_edit;
const WriteOptions write_options(Env::IOActivity::kDBOpen); const WriteOptions write_options(Env::IOActivity::kDBOpen);
Status s = SetupDBId(write_options, /*read_only=*/false, /*is_new_db=*/true, Status s = SetupDBId(write_options, /*read_only=*/false, /*is_new_db=*/true,
&new_db_edit); /*is_retry=*/false, &new_db_edit);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -676,11 +676,11 @@ Status DBImpl::Recover(
// Already set up DB ID in NewDB // Already set up DB ID in NewDB
} else if (immutable_db_options_.write_dbid_to_manifest && recovery_ctx) { } else if (immutable_db_options_.write_dbid_to_manifest && recovery_ctx) {
VersionEdit edit; VersionEdit edit;
s = SetupDBId(write_options, read_only, is_new_db, &edit); s = SetupDBId(write_options, read_only, is_new_db, is_retry, &edit);
recovery_ctx->UpdateVersionEdits( recovery_ctx->UpdateVersionEdits(
versions_->GetColumnFamilySet()->GetDefault(), edit); versions_->GetColumnFamilySet()->GetDefault(), edit);
} else { } else {
s = SetupDBId(write_options, read_only, is_new_db, nullptr); s = SetupDBId(write_options, read_only, is_new_db, is_retry, nullptr);
} }
assert(!s.ok() || !db_id_.empty()); assert(!s.ok() || !db_id_.empty());
ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB ID: %s\n", db_id_.c_str()); ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB ID: %s\n", db_id_.c_str());

View File

@ -265,7 +265,8 @@ Status OpenForReadOnlyCheckExistence(const DBOptions& db_options,
const std::shared_ptr<FileSystem>& fs = db_options.env->GetFileSystem(); const std::shared_ptr<FileSystem>& fs = db_options.env->GetFileSystem();
std::string manifest_path; std::string manifest_path;
uint64_t manifest_file_number; uint64_t manifest_file_number;
s = VersionSet::GetCurrentManifestPath(dbname, fs.get(), &manifest_path, s = VersionSet::GetCurrentManifestPath(dbname, fs.get(), /*is_retry=*/false,
&manifest_path,
&manifest_file_number); &manifest_file_number);
} else { } else {
// Historic behavior that doesn't necessarily make sense // Historic behavior that doesn't necessarily make sense

View File

@ -27,12 +27,14 @@ class CorruptionFS : public FileSystemWrapper {
num_writable_file_errors_(0), num_writable_file_errors_(0),
corruption_trigger_(INT_MAX), corruption_trigger_(INT_MAX),
read_count_(0), read_count_(0),
corrupt_offset_(0),
corrupt_len_(0),
rnd_(300), rnd_(300),
fs_buffer_(fs_buffer), fs_buffer_(fs_buffer),
verify_read_(verify_read) {} verify_read_(verify_read) {}
~CorruptionFS() override { ~CorruptionFS() override {
// Assert that the corruption was reset, which means it got triggered // Assert that the corruption was reset, which means it got triggered
assert(corruption_trigger_ == INT_MAX); assert(corruption_trigger_ == INT_MAX || corrupt_len_ > 0);
} }
const char* Name() const override { return "ErrorEnv"; } const char* Name() const override { return "ErrorEnv"; }
@ -48,8 +50,10 @@ class CorruptionFS : public FileSystemWrapper {
} }
void SetCorruptionTrigger(const int trigger) { void SetCorruptionTrigger(const int trigger) {
MutexLock l(&mutex_);
corruption_trigger_ = trigger; corruption_trigger_ = trigger;
read_count_ = 0; read_count_ = 0;
corrupt_fname_.clear();
} }
IOStatus NewRandomAccessFile(const std::string& fname, IOStatus NewRandomAccessFile(const std::string& fname,
@ -58,25 +62,31 @@ class CorruptionFS : public FileSystemWrapper {
IODebugContext* dbg) override { IODebugContext* dbg) override {
class CorruptionRandomAccessFile : public FSRandomAccessFileOwnerWrapper { class CorruptionRandomAccessFile : public FSRandomAccessFileOwnerWrapper {
public: public:
CorruptionRandomAccessFile(CorruptionFS& fs, CorruptionRandomAccessFile(CorruptionFS& fs, const std::string& fname,
std::unique_ptr<FSRandomAccessFile>& file) std::unique_ptr<FSRandomAccessFile>& file)
: FSRandomAccessFileOwnerWrapper(std::move(file)), fs_(fs) {} : FSRandomAccessFileOwnerWrapper(std::move(file)),
fs_(fs),
fname_(fname) {}
IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts, IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts,
Slice* result, char* scratch, Slice* result, char* scratch,
IODebugContext* dbg) const override { IODebugContext* dbg) const override {
IOStatus s = target()->Read(offset, len, opts, result, scratch, dbg); IOStatus s = target()->Read(offset, len, opts, result, scratch, dbg);
if (opts.verify_and_reconstruct_read) { if (opts.verify_and_reconstruct_read) {
fs_.MaybeResetOverlapWithCorruptedChunk(fname_, offset,
result->size());
return s; return s;
} }
MutexLock l(&fs_.mutex_);
if (s.ok() && ++fs_.read_count_ >= fs_.corruption_trigger_) { if (s.ok() && ++fs_.read_count_ >= fs_.corruption_trigger_) {
fs_.read_count_ = 0;
fs_.corruption_trigger_ = INT_MAX; fs_.corruption_trigger_ = INT_MAX;
char* data = const_cast<char*>(result->data()); char* data = const_cast<char*>(result->data());
std::memcpy( std::memcpy(
data, data,
fs_.rnd_.RandomString(static_cast<int>(result->size())).c_str(), fs_.rnd_.RandomString(static_cast<int>(result->size())).c_str(),
result->size()); result->size());
fs_.SetCorruptedChunk(fname_, offset, result->size());
} }
return s; return s;
} }
@ -101,14 +111,76 @@ class CorruptionFS : public FileSystemWrapper {
return IOStatus::OK(); return IOStatus::OK();
} }
IOStatus Prefetch(uint64_t /*offset*/, size_t /*n*/,
const IOOptions& /*options*/,
IODebugContext* /*dbg*/) override {
return IOStatus::NotSupported("Prefetch");
}
private: private:
CorruptionFS& fs_; CorruptionFS& fs_;
std::string fname_;
}; };
std::unique_ptr<FSRandomAccessFile> file; std::unique_ptr<FSRandomAccessFile> file;
IOStatus s = target()->NewRandomAccessFile(fname, opts, &file, dbg); IOStatus s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
EXPECT_OK(s); EXPECT_OK(s);
result->reset(new CorruptionRandomAccessFile(*this, file)); result->reset(new CorruptionRandomAccessFile(*this, fname, file));
return s;
}
IOStatus NewSequentialFile(const std::string& fname,
const FileOptions& file_opts,
std::unique_ptr<FSSequentialFile>* result,
IODebugContext* dbg) override {
class CorruptionSequentialFile : public FSSequentialFileOwnerWrapper {
public:
CorruptionSequentialFile(CorruptionFS& fs, const std::string& fname,
std::unique_ptr<FSSequentialFile>& file)
: FSSequentialFileOwnerWrapper(std::move(file)),
fs_(fs),
fname_(fname),
offset_(0) {}
IOStatus Read(size_t len, const IOOptions& opts, Slice* result,
char* scratch, IODebugContext* dbg) override {
IOStatus s = target()->Read(len, opts, result, scratch, dbg);
if (result->size() == 0 ||
fname_.find("IDENTITY") != std::string::npos) {
return s;
}
if (opts.verify_and_reconstruct_read) {
fs_.MaybeResetOverlapWithCorruptedChunk(fname_, offset_,
result->size());
return s;
}
MutexLock l(&fs_.mutex_);
if (s.ok() && ++fs_.read_count_ >= fs_.corruption_trigger_) {
fs_.corruption_trigger_ = INT_MAX;
char* data = const_cast<char*>(result->data());
std::memcpy(
data,
fs_.rnd_.RandomString(static_cast<int>(result->size())).c_str(),
result->size());
fs_.SetCorruptedChunk(fname_, offset_, result->size());
}
offset_ += result->size();
return s;
}
private:
CorruptionFS& fs_;
std::string fname_;
size_t offset_;
};
std::unique_ptr<FSSequentialFile> file;
IOStatus s = target()->NewSequentialFile(fname, file_opts, &file, dbg);
EXPECT_OK(s);
result->reset(new CorruptionSequentialFile(*this, fname, file));
return s; return s;
} }
@ -123,12 +195,40 @@ class CorruptionFS : public FileSystemWrapper {
} }
} }
void SetCorruptedChunk(const std::string& fname, size_t offset, size_t len) {
assert(corrupt_fname_.empty());
corrupt_fname_ = fname;
corrupt_offset_ = offset;
corrupt_len_ = len;
}
void MaybeResetOverlapWithCorruptedChunk(const std::string& fname,
size_t offset, size_t len) {
if (fname == corrupt_fname_ &&
((offset <= corrupt_offset_ && (offset + len) > corrupt_offset_) ||
(offset >= corrupt_offset_ &&
offset < (corrupt_offset_ + corrupt_len_)))) {
corrupt_fname_.clear();
}
}
bool VerifyRetry() { return corrupt_len_ > 0 && corrupt_fname_.empty(); }
int read_count() { return read_count_; }
int corruption_trigger() { return corruption_trigger_; }
private: private:
int corruption_trigger_; int corruption_trigger_;
int read_count_; int read_count_;
std::string corrupt_fname_;
size_t corrupt_offset_;
size_t corrupt_len_;
Random rnd_; Random rnd_;
bool fs_buffer_; bool fs_buffer_;
bool verify_read_; bool verify_read_;
port::Mutex mutex_;
}; };
} // anonymous namespace } // anonymous namespace
@ -717,6 +817,7 @@ class DBIOCorruptionTest
bbto.num_file_reads_for_auto_readahead = 0; bbto.num_file_reads_for_auto_readahead = 0;
options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); options_.table_factory.reset(NewBlockBasedTableFactory(bbto));
options_.disable_auto_compactions = true; options_.disable_auto_compactions = true;
options_.max_file_opening_threads = 0;
Reopen(options_); Reopen(options_);
} }
@ -857,8 +958,8 @@ TEST_P(DBIOCorruptionTest, FlushReadCorruptionRetry) {
Status s = Flush(); Status s = Flush();
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1); ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT), ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1); 1);
std::string val; std::string val;
@ -885,8 +986,8 @@ TEST_P(DBIOCorruptionTest, ManifestCorruptionRetry) {
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(ReopenDB()); ASSERT_OK(ReopenDB());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1); ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT), ASSERT_GT(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1); 1);
} else { } else {
ASSERT_EQ(ReopenDB(), Status::Corruption()); ASSERT_EQ(ReopenDB(), Status::Corruption());
@ -970,6 +1071,57 @@ TEST_P(DBIOCorruptionTest, TablePropertiesCorruptionRetry) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
} }
TEST_P(DBIOCorruptionTest, DBOpenReadCorruptionRetry) {
if (!std::get<2>(GetParam())) {
return;
}
CorruptionFS* fs =
static_cast<CorruptionFS*>(env_guard_->GetFileSystem().get());
for (int sst = 0; sst < 3; ++sst) {
for (int key = 0; key < 100; ++key) {
std::stringstream ss;
ss << std::setw(3) << 100 * sst + key;
ASSERT_OK(Put("key" + ss.str(), "val" + ss.str()));
}
ASSERT_OK(Flush());
}
Close();
// DB open will create table readers unless we reduce the table cache
// capacity.
// SanitizeOptions will set max_open_files to minimum of 20. Table cache
// is allocated with max_open_files - 10 as capacity. So override
// max_open_files to 11 so table cache capacity will become 1. This will
// prevent file open during DB open and force the file to be opened
// during MultiGet
SyncPoint::GetInstance()->SetCallBack(
"SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
int* max_open_files = (int*)arg;
*max_open_files = 11;
});
SyncPoint::GetInstance()->EnableProcessing();
// Progressively increase the IO count trigger for corruption, and verify
// that it was retried
int corruption_trigger = 1;
fs->SetCorruptionTrigger(corruption_trigger);
do {
fs->SetCorruptionTrigger(corruption_trigger);
ASSERT_OK(ReopenDB());
for (int sst = 0; sst < 3; ++sst) {
for (int key = 0; key < 100; ++key) {
std::stringstream ss;
ss << std::setw(3) << 100 * sst + key;
ASSERT_EQ(Get("key" + ss.str()), "val" + ss.str());
}
}
// Verify that the injected corruption was repaired
ASSERT_TRUE(fs->VerifyRetry());
corruption_trigger++;
} while (fs->corruption_trigger() == INT_MAX);
}
// The parameters are - 1. Use FS provided buffer, 2. Use async IO ReadOption, // The parameters are - 1. Use FS provided buffer, 2. Use async IO ReadOption,
// 3. Retry with verify_and_reconstruct_read IOOption // 3. Retry with verify_and_reconstruct_read IOOption
INSTANTIATE_TEST_CASE_P(DBIOCorruptionTest, DBIOCorruptionTest, INSTANTIATE_TEST_CASE_P(DBIOCorruptionTest, DBIOCorruptionTest,

View File

@ -6012,15 +6012,19 @@ Status VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd,
} }
Status VersionSet::GetCurrentManifestPath(const std::string& dbname, Status VersionSet::GetCurrentManifestPath(const std::string& dbname,
FileSystem* fs, FileSystem* fs, bool is_retry,
std::string* manifest_path, std::string* manifest_path,
uint64_t* manifest_file_number) { uint64_t* manifest_file_number) {
assert(fs != nullptr); assert(fs != nullptr);
assert(manifest_path != nullptr); assert(manifest_path != nullptr);
assert(manifest_file_number != nullptr); assert(manifest_file_number != nullptr);
IOOptions opts;
std::string fname; std::string fname;
Status s = ReadFileToString(fs, CurrentFileName(dbname), &fname); if (is_retry) {
opts.verify_and_reconstruct_read = true;
}
Status s = ReadFileToString(fs, CurrentFileName(dbname), opts, &fname);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -6050,8 +6054,8 @@ Status VersionSet::Recover(
// Read "CURRENT" file, which contains a pointer to the current manifest // Read "CURRENT" file, which contains a pointer to the current manifest
// file // file
std::string manifest_path; std::string manifest_path;
Status s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path, Status s = GetCurrentManifestPath(dbname_, fs_.get(), is_retry,
&manifest_file_number_); &manifest_path, &manifest_file_number_);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -6296,8 +6300,8 @@ Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
// Read "CURRENT" file, which contains a pointer to the current manifest file // Read "CURRENT" file, which contains a pointer to the current manifest file
std::string manifest_path; std::string manifest_path;
uint64_t manifest_file_number; uint64_t manifest_file_number;
Status s = Status s = GetCurrentManifestPath(dbname, fs, /*is_retry=*/false,
GetCurrentManifestPath(dbname, fs, &manifest_path, &manifest_file_number); &manifest_path, &manifest_file_number);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -7495,8 +7499,8 @@ Status ReactiveVersionSet::MaybeSwitchManifest(
assert(manifest_reader != nullptr); assert(manifest_reader != nullptr);
Status s; Status s;
std::string manifest_path; std::string manifest_path;
s = GetCurrentManifestPath(dbname_, fs_.get(), &manifest_path, s = GetCurrentManifestPath(dbname_, fs_.get(), /*is_retry=*/false,
&manifest_file_number_); &manifest_path, &manifest_file_number_);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

View File

@ -1278,7 +1278,7 @@ class VersionSet {
{}); {});
static Status GetCurrentManifestPath(const std::string& dbname, static Status GetCurrentManifestPath(const std::string& dbname,
FileSystem* fs, FileSystem* fs, bool is_retry,
std::string* manifest_filename, std::string* manifest_filename,
uint64_t* manifest_file_number); uint64_t* manifest_file_number);
void WakeUpWaitingManifestWriters(); void WakeUpWaitingManifestWriters();

View File

@ -1223,7 +1223,7 @@ class VersionSetTestBase {
tmp_db_options.env = env_; tmp_db_options.env = env_;
std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_)); std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
std::string db_id; std::string db_id;
ASSERT_OK(impl->GetDbIdentityFromIdentityFile(&db_id)); ASSERT_OK(impl->GetDbIdentityFromIdentityFile(IOOptions(), &db_id));
new_db.SetDBId(db_id); new_db.SetDBId(db_id);
} }
new_db.SetLogNumber(0); new_db.SetLogNumber(0);
@ -1391,7 +1391,8 @@ class VersionSetTestBase {
assert(manifest_path != nullptr); assert(manifest_path != nullptr);
uint64_t manifest_file_number = 0; uint64_t manifest_file_number = 0;
Status s = versions_->GetCurrentManifestPath( Status s = versions_->GetCurrentManifestPath(
dbname_, fs_.get(), manifest_path, &manifest_file_number); dbname_, fs_.get(), /*is_retry=*/false, manifest_path,
&manifest_file_number);
ASSERT_OK(s); ASSERT_OK(s);
} }
@ -1399,7 +1400,8 @@ class VersionSetTestBase {
assert(manifest_path != nullptr); assert(manifest_path != nullptr);
uint64_t manifest_file_number = 0; uint64_t manifest_file_number = 0;
Status s = versions_->GetCurrentManifestPath( Status s = versions_->GetCurrentManifestPath(
dbname_, fs_.get(), manifest_path, &manifest_file_number); dbname_, fs_.get(), /*is_retry=*/false, manifest_path,
&manifest_file_number);
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_EQ(1, manifest_file_number); ASSERT_EQ(1, manifest_file_number);
} }
@ -3515,7 +3517,7 @@ class VersionSetTestEmptyDb
tmp_db_options.env = env_; tmp_db_options.env = env_;
std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_)); std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
std::string db_id; std::string db_id;
ASSERT_OK(impl->GetDbIdentityFromIdentityFile(&db_id)); ASSERT_OK(impl->GetDbIdentityFromIdentityFile(IOOptions(), &db_id));
new_db.SetDBId(db_id); new_db.SetDBId(db_id);
} }
const std::string manifest_path = DescriptorFileName(dbname_, 1); const std::string manifest_path = DescriptorFileName(dbname_, 1);
@ -3839,7 +3841,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
tmp_db_options.env = env_; tmp_db_options.env = env_;
std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_)); std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
std::string db_id; std::string db_id;
ASSERT_OK(impl->GetDbIdentityFromIdentityFile(&db_id)); ASSERT_OK(impl->GetDbIdentityFromIdentityFile(IOOptions(), &db_id));
new_db.SetDBId(db_id); new_db.SetDBId(db_id);
} }
{ {

7
env/file_system.cc vendored
View File

@ -200,6 +200,11 @@ IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
std::string* data) { std::string* data) {
return ReadFileToString(fs, fname, IOOptions(), data);
}
IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
const IOOptions& opts, std::string* data) {
FileOptions soptions; FileOptions soptions;
data->clear(); data->clear();
std::unique_ptr<FSSequentialFile> file; std::unique_ptr<FSSequentialFile> file;
@ -212,7 +217,7 @@ IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
char* space = new char[kBufferSize]; char* space = new char[kBufferSize];
while (true) { while (true) {
Slice fragment; Slice fragment;
s = file->Read(kBufferSize, IOOptions(), &fragment, space, nullptr); s = file->Read(kBufferSize, opts, &fragment, space, nullptr);
if (!s.ok()) { if (!s.ok()) {
break; break;
} }

View File

@ -1961,4 +1961,8 @@ IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
std::string* data); std::string* data);
// A utility routine: read contents of named file into *data
IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
const IOOptions& opts, std::string* data);
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

View File

@ -12,6 +12,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "file/file_util.h"
#include "file/line_file_reader.h" #include "file/line_file_reader.h"
#include "file/writable_file_writer.h" #include "file/writable_file_writer.h"
#include "options/cf_options.h" #include "options/cf_options.h"
@ -268,70 +269,89 @@ Status RocksDBOptionsParser::Parse(const ConfigOptions& config_options_in,
Reset(); Reset();
ConfigOptions config_options = config_options_in; ConfigOptions config_options = config_options_in;
std::unique_ptr<FSSequentialFile> seq_file; Status s;
Status s = fs->NewSequentialFile(file_name, FileOptions(), &seq_file, bool retry = false;
nullptr); do {
if (!s.ok()) { std::unique_ptr<FSSequentialFile> seq_file;
return s; s = fs->NewSequentialFile(file_name, FileOptions(), &seq_file, nullptr);
} if (!s.ok()) {
LineFileReader lf_reader(std::move(seq_file), file_name, return s;
config_options.file_readahead_size);
OptionSection section = kOptionSectionUnknown;
std::string title;
std::string argument;
std::unordered_map<std::string, std::string> opt_map;
std::string line;
// we only support single-lined statement.
while (lf_reader.ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) {
int line_num = static_cast<int>(lf_reader.GetLineNumber());
line = TrimAndRemoveComment(line);
if (line.empty()) {
continue;
} }
if (IsSection(line)) {
LineFileReader lf_reader(
std::move(seq_file), file_name, config_options.file_readahead_size,
nullptr, std::vector<std::shared_ptr<EventListener>>{}, nullptr, retry);
OptionSection section = kOptionSectionUnknown;
std::string title;
std::string argument;
std::unordered_map<std::string, std::string> opt_map;
std::string line;
// we only support single-lined statement.
while (
lf_reader.ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) {
int line_num = static_cast<int>(lf_reader.GetLineNumber());
line = TrimAndRemoveComment(line);
if (line.empty()) {
continue;
}
if (IsSection(line)) {
s = EndSection(config_options, section, title, argument, opt_map);
opt_map.clear();
if (!s.ok()) {
break;
}
// If the option file is not generated by a higher version, unknown
// option should only mean corruption.
if (config_options.ignore_unknown_options &&
section == kOptionSectionVersion) {
using VTuple = std::tuple<int, int, int>;
if (VTuple(db_version[0], db_version[1], db_version[2]) <=
VTuple(ROCKSDB_MAJOR, ROCKSDB_MINOR, ROCKSDB_PATCH)) {
config_options.ignore_unknown_options = false;
}
}
s = ParseSection(&section, &title, &argument, line, line_num);
if (!s.ok()) {
break;
}
} else {
std::string name;
std::string value;
s = ParseStatement(&name, &value, line, line_num);
if (!s.ok()) {
break;
}
opt_map.insert({name, value});
}
}
if (s.ok()) {
s = lf_reader.GetStatus();
}
if (s.ok()) {
s = EndSection(config_options, section, title, argument, opt_map); s = EndSection(config_options, section, title, argument, opt_map);
opt_map.clear(); opt_map.clear();
if (!s.ok()) { }
return s; if (s.ok()) {
} s = ValidityCheck();
}
// If the option file is not generated by a higher version, unknown if (!s.ok()) {
// option should only mean corruption. if ((s.IsCorruption() || s.IsInvalidArgument()) && !retry &&
if (config_options.ignore_unknown_options && CheckFSFeatureSupport(fs,
section == kOptionSectionVersion) { FSSupportedOps::kVerifyAndReconstructRead)) {
using VTuple = std::tuple<int, int, int>; retry = true;
if (VTuple(db_version[0], db_version[1], db_version[2]) <= Reset();
VTuple(ROCKSDB_MAJOR, ROCKSDB_MINOR, ROCKSDB_PATCH)) { } else {
config_options.ignore_unknown_options = false;
}
}
s = ParseSection(&section, &title, &argument, line, line_num);
if (!s.ok()) {
return s; return s;
} }
} else { } else {
std::string name; return s;
std::string value;
s = ParseStatement(&name, &value, line, line_num);
if (!s.ok()) {
return s;
}
opt_map.insert({name, value});
} }
} } while (retry);
s = lf_reader.GetStatus();
if (!s.ok()) {
return s;
}
s = EndSection(config_options, section, title, argument, opt_map); return s;
opt_map.clear();
if (!s.ok()) {
return s;
}
return ValidityCheck();
} }
Status RocksDBOptionsParser::CheckSection(const OptionSection section, Status RocksDBOptionsParser::CheckSection(const OptionSection section,

View File

@ -560,9 +560,9 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
IOOptions new_opts = opts; IOOptions new_opts = opts;
new_opts.verify_and_reconstruct_read = true; new_opts.verify_and_reconstruct_read = true;
footer->Reset(); footer->Reset();
s = ReadFooterFromFileInternal(new_opts, file, fs, prefetch_buffer, s = ReadFooterFromFileInternal(new_opts, file, fs,
file_size, footer, /*prefetch_buffer=*/nullptr, file_size,
enforce_table_magic_number); footer, enforce_table_magic_number);
RecordTick(stats, FILE_READ_CORRUPTION_RETRY_COUNT); RecordTick(stats, FILE_READ_CORRUPTION_RETRY_COUNT);
if (s.ok()) { if (s.ok()) {
RecordTick(stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT); RecordTick(stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);

View File

@ -573,7 +573,7 @@ Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file,
return s; return s;
} }
s = ReadFooterFromFile(opts, file, *ioptions.fs, prefetch_buffer, file_size, s = ReadFooterFromFile(opts, file, *ioptions.fs, prefetch_buffer, file_size,
&footer, table_magic_number); &footer, table_magic_number, ioptions.stats);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }

View File

@ -0,0 +1 @@
Fix missing cases of corruption retry during DB open and read API processing.