rocksdb/utilities/fault_injection_fs.h
Hui Xiao 408e8d4c85 Handle injected write error after successful WAL write in crash test + misc (#12838)
Summary:
**Context/Summary:**
We discovered the following false positive in our crash test lately:
(1) PUT() writes k/v to WAL but fails in `ApplyWALToManifest()`. The k/v is in the WAL
(2) Current stress test logic will rollback the expected state of such k/v since PUT() fails
(3) If the DB crashes before recovery finishes and reopens, the WAL will be replayed and the k/v is in the DB while the expected state have been roll-backed.

We decided to leave those expected state to be pending until the loop-write of the same key succeeds.

Bonus: Now that I realized write to manifest can also fail the write which faces the similar problem as https://github.com/facebook/rocksdb/pull/12797, I decided to disable fault injection on user write per thread (instead of globally) when tracing is needed for prefix recovery; some refactory

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12838

Test Plan:
Rehearsal CI
Run below command (varies on sync_fault_injection=1,0 to verify ExpectedState behavior) for a while to ensure crash recovery validation works fine

```
python3 tools/db_crashtest.py --simple blackbox --interval=30 --WAL_size_limit_MB=0 --WAL_ttl_seconds=0 --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --adm_policy=1 --advise_random_on_open=0 --allow_concurrent_memtable_write=0 --allow_data_in_errors=True --allow_fallocate=0 --async_io=0 --auto_readahead_size=0 --avoid_flush_during_recovery=0 --avoid_flush_during_shutdown=0 --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=0 --batch_protection_bytes_per_key=0 --bgerror_resume_retry_interval=1000000 --block_align=1 --block_protection_bytes_per_key=4 --block_size=16384 --bloom_before_level=4 --bloom_bits=56.810257702625165 --bottommost_compression_type=none --bottommost_file_compaction_delay=0 --bytes_per_sync=262144 --cache_index_and_filter_blocks=1 --cache_index_and_filter_blocks_with_high_priority=1 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=1 --charge_filter_construction=1 --charge_table_reader=0 --check_multiget_consistency=0 --check_multiget_entity_consistency=1 --checkpoint_one_in=10000 --checksum_type=kxxHash --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000 --compact_range_one_in=1000 --compaction_pri=4 --compaction_readahead_size=1048576 --compaction_ttl=10 --compress_format_version=1 --compressed_secondary_cache_ratio=0.0 --compressed_secondary_cache_size=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=none --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --daily_offpeak_time_utc=04:00-08:00 --data_block_index_type=1 --db_write_buffer_size=0 --default_temperature=kWarm --default_write_temperature=kCold --delete_obsolete_files_period_micros=30000000 --delpercent=20 --delrangepercent=20 --destroy_db_initially=0 --detect_filter_construct_corruption=0 --disable_file_deletions_one_in=10000 --disable_manual_compaction_one_in=1000000 --disable_wal=0 --dump_malloc_stats=0 --enable_checksum_handoff=1 --enable_compaction_filter=0 --enable_custom_split_merge=0 --enable_do_not_compress_roles=0 --enable_index_compression=1 --enable_memtable_insert_with_hint_prefix_extractor=0 --enable_pipelined_write=0 --enable_sst_partitioner_factory=0 --enable_thread_tracking=0 --enable_write_thread_adaptive_yield=0 --error_recovery_with_no_fault_injection=1 --exclude_wal_from_write_fault_injection=0 --fail_if_options_file_error=1 --fifo_allow_compaction=0 --file_checksum_impl=crc32c --fill_cache=1 --flush_one_in=1000000 --format_version=3 --get_all_column_family_metadata_one_in=1000000 --get_current_wal_file_one_in=0 --get_live_files_apis_one_in=1000000 --get_properties_of_all_tables_one_in=1000000 --get_property_one_in=100000 --get_sorted_wal_files_one_in=0 --hard_pending_compaction_bytes_limit=274877906944 --high_pri_pool_ratio=0.5 --index_block_restart_interval=4 --index_shortening=2 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=16384 --inplace_update_support=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --key_may_exist_one_in=100 --last_level_temperature=kWarm --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=10000 --log_file_time_to_roll=60 --log_readahead_size=16777216 --long_running_snapshots=1 --low_pri_pool_ratio=0 --lowest_used_cache_tier=0 --manifest_preallocation_size=0 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=16384 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --max_key=100000 --max_key_len=3 --max_log_file_size=1048576 --max_manifest_file_size=32768 --max_sequential_skip_in_iterations=1 --max_total_wal_size=0 --max_write_batch_group_size_bytes=16 --max_write_buffer_number=10 --max_write_buffer_size_to_maintain=8388608 --memtable_insert_hint_per_batch=1 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0.01 --memtable_protection_bytes_per_key=1 --memtable_whole_key_filtering=1 --memtablerep=skip_list --metadata_charge_policy=1 --metadata_read_fault_one_in=0 --metadata_write_fault_one_in=8 --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=-1 --open_metadata_read_fault_one_in=0 --open_metadata_write_fault_one_in=8 --open_read_fault_one_in=0 --open_write_fault_one_in=8 --ops_per_thread=100000000 --optimize_filters_for_hits=1 --optimize_filters_for_memory=1 --optimize_multiget_for_io=1 --paranoid_file_checks=0 --partition_filters=0 --partition_pinning=3 --pause_background_one_in=1000000 --periodic_compaction_seconds=2 --prefix_size=7 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --promote_l0_one_in=0 --read_amp_bytes_per_bit=0 --read_fault_one_in=1000 --readahead_size=524288 --readpercent=10 --recycle_log_file_num=1 --reopen=0 --report_bg_io_stats=0 --reset_stats_one_in=1000000 --sample_for_compression=0 --secondary_cache_fault_one_in=0 --set_options_one_in=0 --skip_stats_update_on_db_open=1 --snapshot_hold_ops=100000 --soft_pending_compaction_bytes_limit=68719476736 --sqfc_name=foo --sqfc_version=0 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=10 --stats_history_buffer_size=0 --strict_bytes_per_sync=1 --subcompactions=4 --sync=1 --sync_fault_injection=0 --table_cache_numshardbits=6 --target_file_size_base=16777216 --target_file_size_multiplier=1 --test_batches_snapshots=0 --top_level_index_pinning=2 --uncache_aggressiveness=239 --universal_max_read_amp=-1 --unpartitioned_pinning=1 --use_adaptive_mutex=1 --use_adaptive_mutex_lru=1 --use_attribute_group=0 --use_delta_encoding=0 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=0 --use_multi_cf_iterator=0 --use_multi_get_entity=0 --use_multiget=0 --use_put_entity_one_in=0 --use_sqfc_for_range_queries=1 --use_timed_put_one_in=0 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_compression=0 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none --write_buffer_size=33554432 --write_dbid_to_manifest=0 --write_fault_one_in=8 --writepercent=40
```

Reviewed By: cbi42

Differential Revision: D59377075

Pulled By: hx235

fbshipit-source-id: 91f602fd67e2d339d378cd28b982095fd073dcb6
2024-07-29 13:51:49 -07:00

761 lines
28 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright 2014 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// This test uses a custom FileSystem to keep track of the state of a file
// system the last "Sync". The data being written is cached in a "buffer".
// Only when "Sync" is called, the data will be persistent. It can similate
// file data loss (or entire files) not protected by a "Sync". For any of the
// FileSystem related operations, by specify the "IOStatus Error", a specific
// error can be returned when file system is not activated.
#pragma once
#include <algorithm>
#include <map>
#include <set>
#include <string>
#include "file/filename.h"
#include "rocksdb/file_system.h"
#include "util/mutexlock.h"
#include "util/random.h"
#include "util/thread_local.h"
namespace ROCKSDB_NAMESPACE {
class TestFSWritableFile;
class FaultInjectionTestFS;
enum class FaultInjectionIOType {
kRead = 0,
kWrite,
kMetadataRead,
kMetadataWrite,
};
struct FSFileState {
std::string filename_;
uint64_t pos_at_last_append_ = 0;
uint64_t pos_at_last_sync_ = 0;
std::string buffer_;
explicit FSFileState(const std::string& filename = {})
: filename_(filename) {}
bool IsFullySynced() const {
return pos_at_last_append_ == pos_at_last_sync_;
}
IOStatus DropUnsyncedData();
IOStatus DropRandomUnsyncedData(Random* rand);
};
// A wrapper around WritableFileWriter* file
// is written to or sync'ed.
class TestFSWritableFile : public FSWritableFile {
public:
explicit TestFSWritableFile(const std::string& fname,
const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>&& f,
FaultInjectionTestFS* fs);
virtual ~TestFSWritableFile();
IOStatus Append(const Slice& data, const IOOptions&,
IODebugContext*) override;
IOStatus Append(const Slice& data, const IOOptions& options,
const DataVerificationInfo& verification_info,
IODebugContext* dbg) override;
IOStatus Truncate(uint64_t size, const IOOptions& options,
IODebugContext* dbg) override;
IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
IOStatus Flush(const IOOptions&, IODebugContext*) override;
IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/,
const IOOptions& options, IODebugContext* dbg) override;
bool IsSyncThreadSafe() const override { return true; }
IOStatus PositionedAppend(const Slice& data, uint64_t offset,
const IOOptions& options,
IODebugContext* dbg) override;
IOStatus PositionedAppend(const Slice& data, uint64_t offset,
const IOOptions& options,
const DataVerificationInfo& verification_info,
IODebugContext* dbg) override;
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
bool use_direct_io() const override { return target_->use_direct_io(); }
uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override {
MutexLock l(&mutex_);
return target_->GetFileSize(options, dbg);
}
private:
FSFileState state_; // Need protection by mutex_
FileOptions file_opts_;
std::unique_ptr<FSWritableFile> target_;
bool writable_file_opened_;
FaultInjectionTestFS* fs_;
port::Mutex mutex_;
const bool unsync_data_loss_;
};
// A wrapper around WritableFileWriter* file
// is written to or sync'ed.
class TestFSRandomRWFile : public FSRandomRWFile {
public:
explicit TestFSRandomRWFile(const std::string& fname,
std::unique_ptr<FSRandomRWFile>&& f,
FaultInjectionTestFS* fs);
virtual ~TestFSRandomRWFile();
IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
IODebugContext* dbg) override;
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) const override;
IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
bool use_direct_io() const override { return target_->use_direct_io(); }
private:
std::unique_ptr<FSRandomRWFile> target_;
bool file_opened_;
FaultInjectionTestFS* fs_;
};
class TestFSRandomAccessFile : public FSRandomAccessFile {
public:
explicit TestFSRandomAccessFile(const std::string& fname,
std::unique_ptr<FSRandomAccessFile>&& f,
FaultInjectionTestFS* fs);
~TestFSRandomAccessFile() override {}
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) const override;
IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
std::function<void(FSReadRequest&, void*)> cb,
void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
IODebugContext* dbg) override;
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
const IOOptions& options, IODebugContext* dbg) override;
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetUniqueId(char* id, size_t max_size) const override;
private:
std::unique_ptr<FSRandomAccessFile> target_;
FaultInjectionTestFS* fs_;
};
class TestFSSequentialFile : public FSSequentialFileOwnerWrapper {
public:
explicit TestFSSequentialFile(std::unique_ptr<FSSequentialFile>&& f,
FaultInjectionTestFS* fs, std::string fname)
: FSSequentialFileOwnerWrapper(std::move(f)),
fs_(fs),
fname_(std::move(fname)) {}
IOStatus Read(size_t n, const IOOptions& options, Slice* result,
char* scratch, IODebugContext* dbg) override;
IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) override;
private:
FaultInjectionTestFS* fs_;
std::string fname_;
uint64_t read_pos_ = 0;
uint64_t target_read_pos_ = 0;
};
class TestFSDirectory : public FSDirectory {
public:
explicit TestFSDirectory(FaultInjectionTestFS* fs, std::string dirname,
FSDirectory* dir)
: fs_(fs), dirname_(std::move(dirname)), dir_(dir) {}
~TestFSDirectory() {}
IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
IOStatus FsyncWithDirOptions(
const IOOptions& options, IODebugContext* dbg,
const DirFsyncOptions& dir_fsync_options) override;
private:
FaultInjectionTestFS* fs_;
std::string dirname_;
std::unique_ptr<FSDirectory> dir_;
};
class FaultInjectionTestFS : public FileSystemWrapper {
public:
explicit FaultInjectionTestFS(const std::shared_ptr<FileSystem>& base)
: FileSystemWrapper(base),
filesystem_active_(true),
filesystem_writable_(false),
inject_unsynced_data_loss_(false),
read_unsynced_data_(true),
allow_link_open_file_(false),
injected_thread_local_read_error_(DeleteThreadLocalErrorContext),
injected_thread_local_write_error_(DeleteThreadLocalErrorContext),
injected_thread_local_metadata_read_error_(
DeleteThreadLocalErrorContext),
injected_thread_local_metadata_write_error_(
DeleteThreadLocalErrorContext),
ingest_data_corruption_before_write_(false),
checksum_handoff_func_type_(kCRC32c),
fail_get_file_unique_id_(false) {}
virtual ~FaultInjectionTestFS() override { fs_error_.PermitUncheckedError(); }
static const char* kClassName() { return "FaultInjectionTestFS"; }
const char* Name() const override { return kClassName(); }
static bool IsInjectedError(const Status& s) {
assert(!s.ok());
return std::strstr(s.getState(), kInjected.c_str());
}
static bool IsFailedToWriteToWALError(const Status& s) {
assert(!s.ok());
return std::strstr(s.getState(), kFailedToWriteToWAL.c_str());
}
IOStatus NewDirectory(const std::string& name, const IOOptions& options,
std::unique_ptr<FSDirectory>* result,
IODebugContext* dbg) override;
IOStatus FileExists(const std::string& fname, const IOOptions& options,
IODebugContext* dbg) override;
IOStatus GetChildren(const std::string& dir, const IOOptions& options,
std::vector<std::string>* result,
IODebugContext* dbg) override;
IOStatus GetChildrenFileAttributes(const std::string& dir,
const IOOptions& options,
std::vector<FileAttributes>* result,
IODebugContext* dbg) override;
IOStatus NewWritableFile(const std::string& fname,
const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) override;
IOStatus ReopenWritableFile(const std::string& fname,
const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) override;
IOStatus ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) override;
IOStatus NewRandomRWFile(const std::string& fname,
const FileOptions& file_opts,
std::unique_ptr<FSRandomRWFile>* result,
IODebugContext* dbg) override;
IOStatus NewRandomAccessFile(const std::string& fname,
const FileOptions& file_opts,
std::unique_ptr<FSRandomAccessFile>* result,
IODebugContext* dbg) override;
IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
std::unique_ptr<FSSequentialFile>* r,
IODebugContext* dbg) override;
IOStatus DeleteFile(const std::string& f, const IOOptions& options,
IODebugContext* dbg) override;
IOStatus GetFileSize(const std::string& f, const IOOptions& options,
uint64_t* file_size, IODebugContext* dbg) override;
IOStatus GetFileModificationTime(const std::string& fname,
const IOOptions& options,
uint64_t* file_mtime,
IODebugContext* dbg) override;
IOStatus RenameFile(const std::string& s, const std::string& t,
const IOOptions& options, IODebugContext* dbg) override;
IOStatus LinkFile(const std::string& src, const std::string& target,
const IOOptions& options, IODebugContext* dbg) override;
IOStatus NumFileLinks(const std::string& fname, const IOOptions& options,
uint64_t* count, IODebugContext* dbg) override;
IOStatus AreFilesSame(const std::string& first, const std::string& second,
const IOOptions& options, bool* res,
IODebugContext* dbg) override;
IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options,
std::string* output_path,
IODebugContext* dbg) override;
// Undef to eliminate clash on Windows
#undef GetFreeSpace
IOStatus GetFreeSpace(const std::string& path, const IOOptions& options,
uint64_t* disk_free, IODebugContext* dbg) override {
IOStatus io_s;
if (!IsFilesystemActive() &&
fs_error_.subcode() == IOStatus::SubCode::kNoSpace) {
*disk_free = 0;
} else {
io_s = MaybeInjectThreadLocalError(FaultInjectionIOType::kMetadataRead,
options);
if (io_s.ok()) {
io_s = target()->GetFreeSpace(path, options, disk_free, dbg);
}
}
return io_s;
}
IOStatus IsDirectory(const std::string& path, const IOOptions& options,
bool* is_dir, IODebugContext* dgb) override;
IOStatus Poll(std::vector<void*>& io_handles,
size_t min_completions) override;
IOStatus AbortIO(std::vector<void*>& io_handles) override;
void WritableFileClosed(const FSFileState& state);
void WritableFileSynced(const FSFileState& state);
void WritableFileAppended(const FSFileState& state);
IOStatus DropUnsyncedFileData();
IOStatus DropRandomUnsyncedFileData(Random* rnd);
IOStatus DeleteFilesCreatedAfterLastDirSync(const IOOptions& options,
IODebugContext* dbg);
void ResetState();
void UntrackFile(const std::string& f);
void SyncDir(const std::string& dirname) {
MutexLock l(&mutex_);
dir_to_new_files_since_last_sync_.erase(dirname);
}
// Setting the filesystem to inactive is the test equivalent to simulating a
// system reset. Setting to inactive will freeze our saved filesystem state so
// that it will stop being recorded. It can then be reset back to the state at
// the time of the reset.
bool IsFilesystemActive() {
MutexLock l(&mutex_);
return filesystem_active_;
}
// Setting filesystem_writable_ makes NewWritableFile. ReopenWritableFile,
// and NewRandomRWFile bypass FaultInjectionTestFS and go directly to the
// target FS
bool IsFilesystemDirectWritable() {
MutexLock l(&mutex_);
return filesystem_writable_;
}
void SetFilesystemActiveNoLock(
bool active, IOStatus error = IOStatus::Corruption("Not active")) {
error.PermitUncheckedError();
filesystem_active_ = active;
if (!active) {
fs_error_ = error;
}
}
void SetFilesystemActive(
bool active, IOStatus error = IOStatus::Corruption("Not active")) {
MutexLock l(&mutex_);
error.PermitUncheckedError();
SetFilesystemActiveNoLock(active, error);
}
void SetFilesystemDirectWritable(bool writable) {
MutexLock l(&mutex_);
filesystem_writable_ = writable;
}
// If true, we buffer write data in memory to simulate data loss upon system
// crash by only having process crashes
void SetInjectUnsyncedDataLoss(bool inject) {
MutexLock l(&mutex_);
inject_unsynced_data_loss_ = inject;
}
bool InjectUnsyncedDataLoss() {
MutexLock l(&mutex_);
return inject_unsynced_data_loss_;
}
// In places (e.g. GetSortedWals()) RocksDB relies on querying the file size
// or even reading the contents of files currently open for writing, and
// as in POSIX semantics, expects to see the flushed size and contents
// regardless of what has been synced. FaultInjectionTestFS historically
// did not emulate this behavior, only showing synced data from such read
// operations. (Different from FaultInjectionTestEnv--sigh.) Calling this
// function with false restores this historical behavior for testing
// stability, but use of this semantics must be phased out as it is
// inconsistent with expected FileSystem semantics. In other words, this
// functionality is DEPRECATED. Intended to be set after construction and
// unchanged (not thread safe).
void SetReadUnsyncedData(bool read_unsynced_data) {
read_unsynced_data_ = read_unsynced_data;
}
bool ReadUnsyncedData() const { return read_unsynced_data_; }
// FaultInjectionTestFS normally includes a hygiene check for FileSystem
// implementations that only support LinkFile() on closed files (not open
// for write). Setting this to true bypasses the check.
void SetAllowLinkOpenFile(bool allow_link_open_file = true) {
allow_link_open_file_ = allow_link_open_file;
}
bool ShouldIOActivtiesExcludedFromFaultInjection(Env::IOActivity io_activty) {
MutexLock l(&mutex_);
return io_activties_excluded_from_fault_injection.find(io_activty) !=
io_activties_excluded_from_fault_injection.end();
}
void AssertNoOpenFile() { assert(open_managed_files_.empty()); }
IOStatus GetError() { return fs_error_; }
void SetFileSystemIOError(IOStatus io_error) {
MutexLock l(&mutex_);
io_error.PermitUncheckedError();
fs_error_ = io_error;
}
// To simulate the data corruption before data is written in FS
void IngestDataCorruptionBeforeWrite() {
MutexLock l(&mutex_);
ingest_data_corruption_before_write_ = true;
}
void NoDataCorruptionBeforeWrite() {
MutexLock l(&mutex_);
ingest_data_corruption_before_write_ = false;
}
bool ShouldDataCorruptionBeforeWrite() {
MutexLock l(&mutex_);
return ingest_data_corruption_before_write_;
}
void SetChecksumHandoffFuncType(const ChecksumType& func_type) {
MutexLock l(&mutex_);
checksum_handoff_func_type_ = func_type;
}
const ChecksumType& GetChecksumHandoffFuncType() {
MutexLock l(&mutex_);
return checksum_handoff_func_type_;
}
void SetFailGetUniqueId(bool flag) {
MutexLock l(&mutex_);
fail_get_file_unique_id_ = flag;
}
bool ShouldFailGetUniqueId() {
MutexLock l(&mutex_);
return fail_get_file_unique_id_;
}
// Specify what the operation, so we can inject the right type of error
enum ErrorOperation : char {
kRead = 0,
kMultiReadSingleReq = 1,
kMultiRead = 2,
kOpen,
kAppend,
kPositionedAppend,
kUnknown,
};
void SetThreadLocalErrorContext(FaultInjectionIOType type, uint32_t seed,
int one_in, bool retryable,
bool has_data_loss) {
struct ErrorContext* new_ctx = new ErrorContext(seed);
new_ctx->one_in = one_in;
new_ctx->count = 0;
new_ctx->retryable = retryable;
new_ctx->has_data_loss = has_data_loss;
SetErrorContextOfFaultInjectionIOType(type, new_ctx);
}
static void DeleteThreadLocalErrorContext(void* p) {
ErrorContext* ctx = static_cast<ErrorContext*>(p);
delete ctx;
}
IOStatus MaybeInjectThreadLocalError(
FaultInjectionIOType type, const IOOptions& io_options,
const std::string& file_name = "", ErrorOperation op = kUnknown,
Slice* slice = nullptr, bool direct_io = false, char* scratch = nullptr,
bool need_count_increase = false, bool* fault_injected = nullptr);
int GetAndResetInjectedThreadLocalErrorCount(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
int count = 0;
if (ctx) {
count = ctx->count;
ctx->count = 0;
}
return count;
}
void SetIOActivtiesExcludedFromFaultInjection(
const std::set<Env::IOActivity>& io_activties) {
MutexLock l(&mutex_);
io_activties_excluded_from_fault_injection = io_activties;
}
void SetFileTypesExcludedFromWriteFaultInjection(
const std::set<FileType>& types) {
MutexLock l(&mutex_);
file_types_excluded_from_write_fault_injection_ = types;
}
void EnableThreadLocalErrorInjection(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx) {
ctx->enable_error_injection = true;
}
}
void EnableAllThreadLocalErrorInjection() {
EnableThreadLocalErrorInjection(FaultInjectionIOType::kRead);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataRead);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataWrite);
}
void DisableThreadLocalErrorInjection(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx) {
ctx->enable_error_injection = false;
}
}
void DisableAllThreadLocalErrorInjection() {
DisableThreadLocalErrorInjection(FaultInjectionIOType::kRead);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataRead);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataWrite);
}
void PrintInjectedThreadLocalErrorBacktrace(FaultInjectionIOType type);
// If there is unsynced data in the specified file within the specified
// range [offset, offset + n), return the unsynced data overlapping with
// that range, in a corresponding range of scratch. When known, also return
// the position of the last sync, so that the caller can determine whether
// more data is available from the target file when not available from
// unsynced.
void ReadUnsynced(const std::string& fname, uint64_t offset, size_t n,
Slice* result, char* scratch, int64_t* pos_at_last_sync);
inline static const std::string kInjected = "injected";
private:
inline static const std::string kFailedToWriteToWAL =
"failed to write to WAL";
port::Mutex mutex_;
std::map<std::string, FSFileState> db_file_state_;
std::set<std::string> open_managed_files_;
// directory -> (file name -> file contents to recover)
// When data is recovered from unsyned parent directory, the files with
// empty file contents to recover is deleted. Those with non-empty ones
// will be recovered to content accordingly.
std::unordered_map<std::string, std::map<std::string, std::string>>
dir_to_new_files_since_last_sync_;
bool filesystem_active_; // Record flushes, syncs, writes
bool filesystem_writable_; // Bypass FaultInjectionTestFS and go directly
// to underlying FS for writable files
bool inject_unsynced_data_loss_; // See InjectUnsyncedDataLoss()
bool read_unsynced_data_; // See SetReadUnsyncedData()
bool allow_link_open_file_; // See SetAllowLinkOpenFile()
IOStatus fs_error_;
enum ErrorType : int {
kErrorTypeStatus = 0,
kErrorTypeCorruption,
kErrorTypeTruncated,
kErrorTypeMax
};
struct ErrorContext {
Random rand;
int one_in;
int count;
bool enable_error_injection;
void* callstack;
std::string message;
int frames;
ErrorType type;
bool retryable;
bool has_data_loss;
explicit ErrorContext(uint32_t seed)
: rand(seed),
enable_error_injection(false),
callstack(nullptr),
frames(0),
retryable(false),
has_data_loss(false) {}
~ErrorContext() {
if (callstack) {
free(callstack);
}
}
};
std::set<FileType> file_types_excluded_from_write_fault_injection_;
std::set<Env::IOActivity> io_activties_excluded_from_fault_injection;
ThreadLocalPtr injected_thread_local_read_error_;
ThreadLocalPtr injected_thread_local_write_error_;
ThreadLocalPtr injected_thread_local_metadata_read_error_;
ThreadLocalPtr injected_thread_local_metadata_write_error_;
bool ingest_data_corruption_before_write_;
ChecksumType checksum_handoff_func_type_;
bool fail_get_file_unique_id_;
// Inject an error. For a READ operation, a status of IOError(), a
// corruption in the contents of scratch, or truncation of slice
// are the types of error with equal probability. For OPEN,
// its always an IOError.
// fault_injected returns whether a fault is injected. It is needed
// because some fault is inected with IOStatus to be OK.
IOStatus MaybeInjectThreadLocalReadError(const IOOptions& io_options,
ErrorOperation op, Slice* slice,
bool direct_io, char* scratch,
bool need_count_increase,
bool* fault_injected);
bool ShouldExcludeFromWriteFaultInjection(const std::string& file_name) {
MutexLock l(&mutex_);
FileType file_type = kTempFile;
uint64_t file_number = 0;
if (!TryParseFileName(file_name, &file_number, &file_type)) {
return false;
}
return file_types_excluded_from_write_fault_injection_.find(file_type) !=
file_types_excluded_from_write_fault_injection_.end();
}
// Extract number of type from file name. Return false if failing to fine
// them.
bool TryParseFileName(const std::string& file_name, uint64_t* number,
FileType* type);
ErrorContext* GetErrorContextFromFaultInjectionIOType(
FaultInjectionIOType type) {
ErrorContext* ctx = nullptr;
switch (type) {
case FaultInjectionIOType::kRead:
ctx = static_cast<struct ErrorContext*>(
injected_thread_local_read_error_.Get());
break;
case FaultInjectionIOType::kWrite:
ctx = static_cast<struct ErrorContext*>(
injected_thread_local_write_error_.Get());
break;
case FaultInjectionIOType::kMetadataRead:
ctx = static_cast<struct ErrorContext*>(
injected_thread_local_metadata_read_error_.Get());
break;
case FaultInjectionIOType::kMetadataWrite:
ctx = static_cast<struct ErrorContext*>(
injected_thread_local_metadata_write_error_.Get());
break;
default:
assert(false);
break;
}
return ctx;
}
void SetErrorContextOfFaultInjectionIOType(FaultInjectionIOType type,
ErrorContext* new_ctx) {
ErrorContext* old_ctx = nullptr;
switch (type) {
case FaultInjectionIOType::kRead:
old_ctx = static_cast<struct ErrorContext*>(
injected_thread_local_read_error_.Swap(new_ctx));
break;
case FaultInjectionIOType::kWrite:
old_ctx = static_cast<struct ErrorContext*>(
injected_thread_local_write_error_.Swap(new_ctx));
break;
case FaultInjectionIOType::kMetadataRead:
old_ctx = static_cast<struct ErrorContext*>(
injected_thread_local_metadata_read_error_.Swap(new_ctx));
break;
case FaultInjectionIOType::kMetadataWrite:
old_ctx = static_cast<struct ErrorContext*>(
injected_thread_local_metadata_write_error_.Swap(new_ctx));
break;
default:
assert(false);
break;
}
if (old_ctx) {
DeleteThreadLocalErrorContext(old_ctx);
}
}
std::string GetErrorMessage(FaultInjectionIOType type,
const std::string& file_name, ErrorOperation op) {
std::ostringstream msg;
msg << kInjected << " ";
switch (type) {
case FaultInjectionIOType::kRead:
msg << "read error";
break;
case FaultInjectionIOType::kWrite:
msg << "write error";
break;
case FaultInjectionIOType::kMetadataRead:
msg << "metadata read error";
break;
case FaultInjectionIOType::kMetadataWrite:
msg << "metadata write error";
break;
default:
assert(false);
break;
}
if (type == FaultInjectionIOType::kWrite &&
(op == ErrorOperation::kOpen || op == ErrorOperation::kAppend ||
op == ErrorOperation::kPositionedAppend)) {
FileType file_type = kTempFile;
uint64_t ignore = 0;
if (TryParseFileName(file_name, &ignore, &file_type) &&
file_type == FileType::kWalFile) {
msg << " " << kFailedToWriteToWAL;
}
}
return msg.str();
}
};
} // namespace ROCKSDB_NAMESPACE