FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync() to recover… (#8501)

Summary:
… small overwritten files.
If a file is overwritten with renamed and the parent path is not synced, FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync() will delete the file. However, RocksDB relies on file renaming to be atomic no matter whether the parent directory is synced or not, and the current behavior breaks the assumption and caused some false positive: https://github.com/facebook/rocksdb/pull/8489

Since the atomic renaming is used in CURRENT files, to fix the problem, in FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync(), we recover the state of overwritten file if the file is small.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8501

Test Plan: Run stress test for a while and see it doesn't break.

Reviewed By: anand1976

Differential Revision: D29594384

fbshipit-source-id: 589b5c2f0a9d2aca53752d7bdb0231efa5b3ae92
This commit is contained in:
sdong 2021-07-07 16:20:40 -07:00 committed by Facebook GitHub Bot
parent ed8eb436db
commit b1a53db327
3 changed files with 44 additions and 16 deletions

View File

@ -2558,16 +2558,10 @@ void StressTest::Open() {
ingest_read_error = false; ingest_read_error = false;
Random rand(static_cast<uint32_t>(FLAGS_seed)); Random rand(static_cast<uint32_t>(FLAGS_seed));
// TODO: This is disabled for now as deleting the CURRENT file after open
// failure causes the DB to not be reopened again due to the presence of
// WAL files, which DB::Open considers to be a corruption. Re-enable once
// we figure out a proper fix
#if 0
if (rand.OneIn(2)) { if (rand.OneIn(2)) {
fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(),
nullptr); nullptr);
} }
#endif
if (rand.OneIn(3)) { if (rand.OneIn(3)) {
fault_fs_guard->DropUnsyncedFileData(); fault_fs_guard->DropUnsyncedFileData();
} else if (rand.OneIn(2)) { } else if (rand.OneIn(2)) {

View File

@ -30,6 +30,8 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
const std::string kNewFileNoOverwrite = "";
// Assume a filename, and not a directory name like "/foo/bar/" // Assume a filename, and not a directory name like "/foo/bar/"
std::string TestFSGetDirName(const std::string filename) { std::string TestFSGetDirName(const std::string filename) {
size_t found = filename.find_last_of("/\\"); size_t found = filename.find_last_of("/\\");
@ -415,7 +417,10 @@ IOStatus FaultInjectionTestFS::NewWritableFile(
open_files_.insert(fname); open_files_.insert(fname);
auto dir_and_name = TestFSGetDirAndName(fname); auto dir_and_name = TestFSGetDirAndName(fname);
auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
list.insert(dir_and_name.second); // The new file could overwrite an old one. Here we simplify
// the implementation by assuming no file of this name after
// dropping unsynced files.
list[dir_and_name.second] = kNewFileNoOverwrite;
} }
{ {
IOStatus in_s = InjectMetadataWriteError(); IOStatus in_s = InjectMetadataWriteError();
@ -454,7 +459,7 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile(
open_files_.insert(fname); open_files_.insert(fname);
auto dir_and_name = TestFSGetDirAndName(fname); auto dir_and_name = TestFSGetDirAndName(fname);
auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
list.insert(dir_and_name.second); list[dir_and_name.second] = kNewFileNoOverwrite;
} }
{ {
IOStatus in_s = InjectMetadataWriteError(); IOStatus in_s = InjectMetadataWriteError();
@ -492,7 +497,9 @@ IOStatus FaultInjectionTestFS::NewRandomRWFile(
open_files_.insert(fname); open_files_.insert(fname);
auto dir_and_name = TestFSGetDirAndName(fname); auto dir_and_name = TestFSGetDirAndName(fname);
auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first];
list.insert(dir_and_name.second); // It could be overwriting an old file, but we simplify the
// implementation by ignoring it.
list[dir_and_name.second] = kNewFileNoOverwrite;
} }
{ {
IOStatus in_s = InjectMetadataWriteError(); IOStatus in_s = InjectMetadataWriteError();
@ -579,6 +586,19 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s,
return in_s; return in_s;
} }
} }
// We preserve contents of overwritten files up to a size threshold.
// We could keep previous file in another name, but we need to worry about
// garbage collect the those files. We do it if it is needed later.
// We ignore I/O errors here for simplicity.
std::string previous_contents = kNewFileNoOverwrite;
if (target()->FileExists(t, IOOptions(), nullptr).ok()) {
uint64_t file_size;
if (target()->GetFileSize(t, IOOptions(), &file_size, nullptr).ok() &&
file_size < 1024) {
ReadFileToString(target(), t, &previous_contents).PermitUncheckedError();
}
}
IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg); IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg);
if (io_s.ok()) { if (io_s.ok()) {
@ -594,7 +614,7 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s,
if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) {
auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; auto& tlist = dir_to_new_files_since_last_sync_[tdn.first];
assert(tlist.find(tdn.second) == tlist.end()); assert(tlist.find(tdn.second) == tlist.end());
tlist.insert(tdn.second); tlist[tdn.second] = previous_contents;
} }
} }
IOStatus in_s = InjectMetadataWriteError(); IOStatus in_s = InjectMetadataWriteError();
@ -665,7 +685,7 @@ IOStatus FaultInjectionTestFS::DropRandomUnsyncedFileData(Random* rnd) {
IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync( IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync(
const IOOptions& options, IODebugContext* dbg) { const IOOptions& options, IODebugContext* dbg) {
// Because DeleteFile access this container make a copy to avoid deadlock // Because DeleteFile access this container make a copy to avoid deadlock
std::map<std::string, std::set<std::string>> map_copy; std::map<std::string, std::map<std::string, std::string>> map_copy;
{ {
MutexLock l(&mutex_); MutexLock l(&mutex_);
map_copy.insert(dir_to_new_files_since_last_sync_.begin(), map_copy.insert(dir_to_new_files_since_last_sync_.begin(),
@ -673,10 +693,20 @@ IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync(
} }
for (auto& pair : map_copy) { for (auto& pair : map_copy) {
for (std::string name : pair.second) { for (auto& file_pair : pair.second) {
IOStatus io_s = DeleteFile(pair.first + "/" + name, options, dbg); if (file_pair.second == kNewFileNoOverwrite) {
if (!io_s.ok()) { IOStatus io_s =
return io_s; DeleteFile(pair.first + "/" + file_pair.first, options, dbg);
if (!io_s.ok()) {
return io_s;
}
} else {
IOStatus io_s =
WriteStringToFile(target(), file_pair.second,
pair.first + "/" + file_pair.first, true);
if (!io_s.ok()) {
return io_s;
}
} }
} }
} }

View File

@ -480,7 +480,11 @@ class FaultInjectionTestFS : public FileSystemWrapper {
port::Mutex mutex_; port::Mutex mutex_;
std::map<std::string, FSFileState> db_file_state_; std::map<std::string, FSFileState> db_file_state_;
std::set<std::string> open_files_; std::set<std::string> open_files_;
std::unordered_map<std::string, std::set<std::string>> // directory -> (file name -> file contents to recover)
// When data is recovered from unsyned parent directory, the files with
// empty file contents to recover is deleted. Those with non-empty ones
// will be recovered to content accordingly.
std::unordered_map<std::string, std::map<std::string, std::string>>
dir_to_new_files_since_last_sync_; dir_to_new_files_since_last_sync_;
bool filesystem_active_; // Record flushes, syncs, writes bool filesystem_active_; // Record flushes, syncs, writes
bool filesystem_writable_; // Bypass FaultInjectionTestFS and go directly bool filesystem_writable_; // Bypass FaultInjectionTestFS and go directly