mirror of https://github.com/facebook/rocksdb.git
Fix a case of ignored corruption in creating backups (#12200)
Summary: We often need to read the table properties of an SST file when taking a backup. However, we currently do not check checksums for this step, and even with that enabled, we ignore failures. This change ensures we fail creating a backup if corruption is detected in that step of reading table properties. To get this working properly (with existing unit tests), we also add some temperature handling logic like already exists in BackupEngineImpl::ReadFileAndComputeChecksum and elsewhere in BackupEngine. Also, SstFileDumper needed a fix to its error handling logic. This was originally intended to help diagnose some mysterious failures (apparent corruptions) seen in taking backups in the crash test, though that is now fixed in https://github.com/facebook/rocksdb/pull/12206 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12200 Test Plan: unit test added that corrupts table properties, along with existing tests Reviewed By: ajkr Differential Revision: D52520674 Pulled By: pdillinger fbshipit-source-id: 032cfc0791428f3b8147d34c7d424ab128e28f42
This commit is contained in:
parent
5cb2d09d47
commit
5da900f28a
|
@ -92,13 +92,14 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
|
|||
fopts.temperature = file_temp_;
|
||||
Status s = fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
|
||||
if (s.ok()) {
|
||||
// check empty file
|
||||
// if true, skip further processing of this file
|
||||
s = fs->GetFileSize(file_path, IOOptions(), &file_size, nullptr);
|
||||
}
|
||||
|
||||
// check empty file
|
||||
// if true, skip further processing of this file
|
||||
if (file_size == 0) {
|
||||
return Status::Aborted(file_path, "Empty file");
|
||||
if (s.ok()) {
|
||||
if (file_size == 0) {
|
||||
return Status::Aborted(file_path, "Empty file");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file_.reset(new RandomAccessFileReader(std::move(file), file_path));
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Fixed some cases in which DB file corruption was detected but ignored on creating a backup with BackupEngine.
|
|
@ -1556,11 +1556,10 @@ IOStatus BackupEngineImpl::CreateNewBackupWithMetadata(
|
|||
}
|
||||
ROCKS_LOG_INFO(options_.info_log,
|
||||
"dispatch files for backup done, wait for finish.");
|
||||
IOStatus item_io_status;
|
||||
for (auto& item : backup_items_to_finish) {
|
||||
item.result.wait();
|
||||
auto result = item.result.get();
|
||||
item_io_status = result.io_status;
|
||||
IOStatus item_io_status = result.io_status;
|
||||
Temperature temp = result.expected_src_temperature;
|
||||
if (result.current_src_temperature != Temperature::kUnknown &&
|
||||
(temp == Temperature::kUnknown ||
|
||||
|
@ -1577,7 +1576,8 @@ IOStatus BackupEngineImpl::CreateNewBackupWithMetadata(
|
|||
result.db_session_id, temp));
|
||||
}
|
||||
if (!item_io_status.ok()) {
|
||||
io_s = item_io_status;
|
||||
io_s = std::move(item_io_status);
|
||||
io_s.MustCheck();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2332,11 +2332,20 @@ IOStatus BackupEngineImpl::AddBackupFileWorkItem(
|
|||
if (GetNamingNoFlags() != BackupEngineOptions::kLegacyCrc32cAndFileSize &&
|
||||
file_type != kBlobFile) {
|
||||
// Prepare db_session_id to add to the file name
|
||||
// Ignore the returned status
|
||||
// In the failed cases, db_id and db_session_id will be empty
|
||||
GetFileDbIdentities(db_env_, src_env_options, src_path, src_temperature,
|
||||
rate_limiter, &db_id, &db_session_id)
|
||||
.PermitUncheckedError();
|
||||
Status s = GetFileDbIdentities(db_env_, src_env_options, src_path,
|
||||
src_temperature, rate_limiter, &db_id,
|
||||
&db_session_id);
|
||||
if (s.IsPathNotFound()) {
|
||||
// Retry with any temperature
|
||||
s = GetFileDbIdentities(db_env_, src_env_options, src_path,
|
||||
Temperature::kUnknown, rate_limiter, &db_id,
|
||||
&db_session_id);
|
||||
}
|
||||
if (s.IsNotFound()) {
|
||||
// db_id and db_session_id will be empty, which is OK for old files
|
||||
} else if (!s.ok()) {
|
||||
return status_to_io_status(std::move(s));
|
||||
}
|
||||
}
|
||||
// Calculate checksum if checksum and db session id are not available.
|
||||
// If db session id is available, we will not calculate the checksum
|
||||
|
@ -2594,7 +2603,7 @@ Status BackupEngineImpl::GetFileDbIdentities(
|
|||
SstFileDumper sst_reader(options, file_path, file_temp,
|
||||
2 * 1024 * 1024
|
||||
/* readahead_size */,
|
||||
false /* verify_checksum */, false /* output_hex */,
|
||||
true /* verify_checksum */, false /* output_hex */,
|
||||
false /* decode_blob_index */, src_env_options,
|
||||
true /* silent */);
|
||||
|
||||
|
@ -2605,6 +2614,7 @@ Status BackupEngineImpl::GetFileDbIdentities(
|
|||
if (s.ok()) {
|
||||
// Try to get table properties from the table reader of sst_reader
|
||||
if (!sst_reader.ReadTableProperties(&tp).ok()) {
|
||||
// FIXME (peterd): this logic is untested and seems obsolete.
|
||||
// Try to use table properites from the initialization of sst_reader
|
||||
table_properties = sst_reader.GetInitTableProperties();
|
||||
} else {
|
||||
|
|
|
@ -499,6 +499,24 @@ class FileManager : public EnvWrapper {
|
|||
return WriteToFile(fname, file_contents);
|
||||
}
|
||||
|
||||
Status CorruptFileMiddle(const std::string& fname) {
|
||||
std::string to_xor = "blah";
|
||||
std::string file_contents;
|
||||
Status s = ReadFileToString(this, fname, &file_contents);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
s = DeleteFile(fname);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
size_t middle = file_contents.size() / 2;
|
||||
for (size_t i = 0; i < to_xor.size(); ++i) {
|
||||
file_contents[middle + i] ^= to_xor[i];
|
||||
}
|
||||
return WriteToFile(fname, file_contents);
|
||||
}
|
||||
|
||||
Status CorruptChecksum(const std::string& fname, bool appear_valid) {
|
||||
std::string metadata;
|
||||
Status s = ReadFileToString(this, fname, &metadata);
|
||||
|
@ -2235,6 +2253,33 @@ TEST_F(BackupEngineTest, TableFileCorruptionBeforeIncremental) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(BackupEngineTest, PropertiesBlockCorruptionIncremental) {
|
||||
OpenDBAndBackupEngine(true, false, kShareWithChecksum);
|
||||
DBImpl* dbi = static_cast<DBImpl*>(db_.get());
|
||||
// A small SST file
|
||||
ASSERT_OK(dbi->Put(WriteOptions(), "x", "y"));
|
||||
ASSERT_OK(dbi->Flush(FlushOptions()));
|
||||
ASSERT_OK(dbi->TEST_WaitForFlushMemTable());
|
||||
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
|
||||
CloseBackupEngine();
|
||||
|
||||
std::vector<FileAttributes> table_files;
|
||||
ASSERT_OK(GetDataFilesInDB(kTableFile, &table_files));
|
||||
ASSERT_EQ(table_files.size(), 1);
|
||||
std::string tf = dbname_ + "/" + table_files[0].name;
|
||||
// Properties block should be in the middle of a small file
|
||||
ASSERT_OK(db_file_manager_->CorruptFileMiddle(tf));
|
||||
|
||||
OpenBackupEngine();
|
||||
|
||||
Status s = backup_engine_->CreateNewBackup(db_.get());
|
||||
ASSERT_TRUE(s.IsCorruption());
|
||||
|
||||
CloseDBAndBackupEngine();
|
||||
}
|
||||
|
||||
// Test how naming options interact with detecting file size corruption
|
||||
// between incremental backups
|
||||
TEST_F(BackupEngineTest, FileSizeForIncremental) {
|
||||
|
|
|
@ -452,8 +452,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|||
void SetRandomReadError(int one_in) { read_error_one_in_ = one_in; }
|
||||
|
||||
bool ShouldInjectRandomReadError() {
|
||||
return read_error_one_in() &&
|
||||
Random::GetTLSInstance()->OneIn(read_error_one_in());
|
||||
auto one_in = read_error_one_in();
|
||||
return one_in > 0 && Random::GetTLSInstance()->OneIn(one_in);
|
||||
}
|
||||
|
||||
// Inject an write error with randomlized parameter and the predefined
|
||||
|
|
Loading…
Reference in New Issue