Handle injected write error after successful WAL write in crash test + misc (#12838)

Summary:
**Context/Summary:**
We discovered the following false positive in our crash test lately:
(1) PUT() writes k/v to WAL but fails in `ApplyWALToManifest()`. The k/v is in the WAL
(2) Current stress test logic will rollback the expected state of such k/v since PUT() fails
(3) If the DB crashes before recovery finishes and reopens, the WAL will be replayed and the k/v is in the DB while the expected state have been roll-backed.

We decided to leave those expected state to be pending until the loop-write of the same key succeeds.

Bonus: Now that I realized write to manifest can also fail the write which faces the similar problem as https://github.com/facebook/rocksdb/pull/12797, I decided to disable fault injection on user write per thread (instead of globally) when tracing is needed for prefix recovery; some refactory

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12838

Test Plan:
Rehearsal CI
Run below command (varies on sync_fault_injection=1,0 to verify ExpectedState behavior) for a while to ensure crash recovery validation works fine

```
python3 tools/db_crashtest.py --simple blackbox --interval=30 --WAL_size_limit_MB=0 --WAL_ttl_seconds=0 --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --adm_policy=1 --advise_random_on_open=0 --allow_concurrent_memtable_write=0 --allow_data_in_errors=True --allow_fallocate=0 --async_io=0 --auto_readahead_size=0 --avoid_flush_during_recovery=0 --avoid_flush_during_shutdown=0 --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=0 --batch_protection_bytes_per_key=0 --bgerror_resume_retry_interval=1000000 --block_align=1 --block_protection_bytes_per_key=4 --block_size=16384 --bloom_before_level=4 --bloom_bits=56.810257702625165 --bottommost_compression_type=none --bottommost_file_compaction_delay=0 --bytes_per_sync=262144 --cache_index_and_filter_blocks=1 --cache_index_and_filter_blocks_with_high_priority=1 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=1 --charge_filter_construction=1 --charge_table_reader=0 --check_multiget_consistency=0 --check_multiget_entity_consistency=1 --checkpoint_one_in=10000 --checksum_type=kxxHash --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000 --compact_range_one_in=1000 --compaction_pri=4 --compaction_readahead_size=1048576 --compaction_ttl=10 --compress_format_version=1 --compressed_secondary_cache_ratio=0.0 --compressed_secondary_cache_size=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=none --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --daily_offpeak_time_utc=04:00-08:00 --data_block_index_type=1 --db_write_buffer_size=0 --default_temperature=kWarm --default_write_temperature=kCold --delete_obsolete_files_period_micros=30000000 --delpercent=20 --delrangepercent=20 --destroy_db_initially=0 --detect_filter_construct_corruption=0 --disable_file_deletions_one_in=10000 --disable_manual_compaction_one_in=1000000 --disable_wal=0 --dump_malloc_stats=0 --enable_checksum_handoff=1 --enable_compaction_filter=0 --enable_custom_split_merge=0 --enable_do_not_compress_roles=0 --enable_index_compression=1 --enable_memtable_insert_with_hint_prefix_extractor=0 --enable_pipelined_write=0 --enable_sst_partitioner_factory=0 --enable_thread_tracking=0 --enable_write_thread_adaptive_yield=0 --error_recovery_with_no_fault_injection=1 --exclude_wal_from_write_fault_injection=0 --fail_if_options_file_error=1 --fifo_allow_compaction=0 --file_checksum_impl=crc32c --fill_cache=1 --flush_one_in=1000000 --format_version=3 --get_all_column_family_metadata_one_in=1000000 --get_current_wal_file_one_in=0 --get_live_files_apis_one_in=1000000 --get_properties_of_all_tables_one_in=1000000 --get_property_one_in=100000 --get_sorted_wal_files_one_in=0 --hard_pending_compaction_bytes_limit=274877906944 --high_pri_pool_ratio=0.5 --index_block_restart_interval=4 --index_shortening=2 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=16384 --inplace_update_support=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --key_may_exist_one_in=100 --last_level_temperature=kWarm --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=10000 --log_file_time_to_roll=60 --log_readahead_size=16777216 --long_running_snapshots=1 --low_pri_pool_ratio=0 --lowest_used_cache_tier=0 --manifest_preallocation_size=0 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=16384 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --max_key=100000 --max_key_len=3 --max_log_file_size=1048576 --max_manifest_file_size=32768 --max_sequential_skip_in_iterations=1 --max_total_wal_size=0 --max_write_batch_group_size_bytes=16 --max_write_buffer_number=10 --max_write_buffer_size_to_maintain=8388608 --memtable_insert_hint_per_batch=1 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0.01 --memtable_protection_bytes_per_key=1 --memtable_whole_key_filtering=1 --memtablerep=skip_list --metadata_charge_policy=1 --metadata_read_fault_one_in=0 --metadata_write_fault_one_in=8 --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=-1 --open_metadata_read_fault_one_in=0 --open_metadata_write_fault_one_in=8 --open_read_fault_one_in=0 --open_write_fault_one_in=8 --ops_per_thread=100000000 --optimize_filters_for_hits=1 --optimize_filters_for_memory=1 --optimize_multiget_for_io=1 --paranoid_file_checks=0 --partition_filters=0 --partition_pinning=3 --pause_background_one_in=1000000 --periodic_compaction_seconds=2 --prefix_size=7 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --promote_l0_one_in=0 --read_amp_bytes_per_bit=0 --read_fault_one_in=1000 --readahead_size=524288 --readpercent=10 --recycle_log_file_num=1 --reopen=0 --report_bg_io_stats=0 --reset_stats_one_in=1000000 --sample_for_compression=0 --secondary_cache_fault_one_in=0 --set_options_one_in=0 --skip_stats_update_on_db_open=1 --snapshot_hold_ops=100000 --soft_pending_compaction_bytes_limit=68719476736 --sqfc_name=foo --sqfc_version=0 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=10 --stats_history_buffer_size=0 --strict_bytes_per_sync=1 --subcompactions=4 --sync=1 --sync_fault_injection=0 --table_cache_numshardbits=6 --target_file_size_base=16777216 --target_file_size_multiplier=1 --test_batches_snapshots=0 --top_level_index_pinning=2 --uncache_aggressiveness=239 --universal_max_read_amp=-1 --unpartitioned_pinning=1 --use_adaptive_mutex=1 --use_adaptive_mutex_lru=1 --use_attribute_group=0 --use_delta_encoding=0 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=0 --use_multi_cf_iterator=0 --use_multi_get_entity=0 --use_multiget=0 --use_put_entity_one_in=0 --use_sqfc_for_range_queries=1 --use_timed_put_one_in=0 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_compression=0 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none --write_buffer_size=33554432 --write_dbid_to_manifest=0 --write_fault_one_in=8 --writepercent=40
```

Reviewed By: cbi42

Differential Revision: D59377075

Pulled By: hx235

fbshipit-source-id: 91f602fd67e2d339d378cd28b982095fd073dcb6
This commit is contained in:
Hui Xiao 2024-07-29 13:51:49 -07:00 committed by Facebook GitHub Bot
parent d94c2adc28
commit 408e8d4c85
16 changed files with 524 additions and 394 deletions

View File

@ -1411,6 +1411,7 @@ IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
total_log_size_ += log_entry.size();
log_file_number_size.AddSize(*log_size);
log_empty_ = false;
return io_s;
}

View File

@ -55,7 +55,10 @@ IOStatus Writer::WriteBuffer(const WriteOptions& write_options) {
if (dest_->seen_error()) {
#ifndef NDEBUG
if (dest_->seen_injected_error()) {
return IOStatus::IOError("Seen injected error. Skip writing buffer.");
std::stringstream msg;
msg << "Seen " << FaultInjectionTestFS::kInjected
<< " error. Skip writing buffer.";
return IOStatus::IOError(msg.str());
}
#endif // NDEBUG
return IOStatus::IOError("Seen error. Skip writing buffer.");
@ -93,7 +96,10 @@ IOStatus Writer::AddRecord(const WriteOptions& write_options,
if (dest_->seen_error()) {
#ifndef NDEBUG
if (dest_->seen_injected_error()) {
return IOStatus::IOError("Seen injected error. Skip writing buffer.");
std::stringstream msg;
msg << "Seen " << FaultInjectionTestFS::kInjected
<< " error. Skip writing buffer.";
return IOStatus::IOError(msg.str());
}
#endif // NDEBUG
return IOStatus::IOError("Seen error. Skip writing buffer.");
@ -205,7 +211,10 @@ IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) {
if (dest_->seen_error()) {
#ifndef NDEBUG
if (dest_->seen_injected_error()) {
return IOStatus::IOError("Seen injected error. Skip writing buffer.");
std::stringstream msg;
msg << "Seen " << FaultInjectionTestFS::kInjected
<< " error. Skip writing buffer.";
return IOStatus::IOError(msg.str());
}
#endif // NDEBUG
return IOStatus::IOError("Seen error. Skip writing buffer.");

View File

@ -197,7 +197,6 @@ DECLARE_int64(target_file_size_base);
DECLARE_int32(target_file_size_multiplier);
DECLARE_uint64(max_bytes_for_level_base);
DECLARE_double(max_bytes_for_level_multiplier);
DECLARE_int32(range_deletion_width);
DECLARE_uint64(rate_limiter_bytes_per_sec);
DECLARE_bool(rate_limit_bg_reads);
DECLARE_bool(rate_limit_user_ops);

View File

@ -72,14 +72,7 @@ class DbStressListener : public EventListener {
// pretending doing some work here
RandomSleep();
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
}
@ -180,14 +173,7 @@ class DbStressListener : public EventListener {
void OnSubcompactionCompleted(const SubcompactionJobInfo& /* si */) override {
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
}
@ -274,14 +260,7 @@ class DbStressListener : public EventListener {
bool* /* auto_recovery */) override {
RandomSleep();
if (FLAGS_error_recovery_with_no_fault_injection && fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
// TODO(hx235): only exempt the flush thread during error recovery instead
// of all the flush threads from error injection
fault_fs_guard->SetIOActivtiesExcludedFromFaultInjection(
@ -293,14 +272,7 @@ class DbStressListener : public EventListener {
const BackgroundErrorRecoveryInfo& /*info*/) override {
RandomSleep();
if (FLAGS_error_recovery_with_no_fault_injection && fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
fault_fs_guard->SetIOActivtiesExcludedFromFaultInjection({});
}
}

View File

@ -31,6 +31,7 @@ DECLARE_int32(compaction_thread_pool_adjust_interval);
DECLARE_int32(continuous_verification_interval);
DECLARE_bool(error_recovery_with_no_fault_injection);
DECLARE_bool(sync_fault_injection);
DECLARE_int32(range_deletion_width);
DECLARE_bool(disable_wal);
DECLARE_int32(manual_wal_flush_one_in);
DECLARE_int32(metadata_read_fault_one_in);
@ -147,7 +148,8 @@ class SharedState {
~SharedState() {
#ifndef NDEBUG
if (FLAGS_read_fault_one_in) {
if (FLAGS_read_fault_one_in || FLAGS_write_fault_one_in ||
FLAGS_metadata_write_fault_one_in) {
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
}
@ -260,10 +262,14 @@ class SharedState {
// This is useful for crash-recovery testing when the process may crash
// before updating the corresponding expected value
//
// Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`.
PendingExpectedValue PreparePut(int cf, int64_t key) {
return expected_state_manager_->PreparePut(cf, key);
// It can fail and `*prepared` will be set to false if the previous write or
// delete is still in pending state (e.g, still in recovery for retryable IO
// errors). If succeeds,`*prepared` will be set to true
//
// Requires external locking covering `key` in `cf` to prevent
// concurrent write or delete to the same `key`.
PendingExpectedValue PreparePut(int cf, int64_t key, bool* prepared) {
return expected_state_manager_->PreparePut(cf, key, prepared);
}
// Does not requires external locking.
@ -275,24 +281,31 @@ class SharedState {
// This is useful for crash-recovery testing when the process may crash
// before updating the corresponding expected value
//
// It can fail and `*prepared` will be set to false if the previous write or
// delete is still in pending state (e.g, still in recovery for retryable IO
// errors). If succeeds,`*prepared` will be set to true
//
// Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`.
PendingExpectedValue PrepareDelete(int cf, int64_t key) {
return expected_state_manager_->PrepareDelete(cf, key);
PendingExpectedValue PrepareDelete(int cf, int64_t key, bool* prepared) {
return expected_state_manager_->PrepareDelete(cf, key, prepared);
}
// Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`.
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) {
return expected_state_manager_->PrepareSingleDelete(cf, key);
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key,
bool* prepared) {
return expected_state_manager_->PrepareSingleDelete(cf, key, prepared);
}
// Requires external locking covering keys in `[begin_key, end_key)` in `cf`
// to prevent concurrent write or delete to the same `key`.
std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
int64_t begin_key,
int64_t end_key) {
return expected_state_manager_->PrepareDeleteRange(cf, begin_key, end_key);
int64_t end_key,
bool* prepared) {
return expected_state_manager_->PrepareDeleteRange(cf, begin_key, end_key,
prepared);
}
bool AllowsOverwrite(int64_t key) const {

View File

@ -632,9 +632,10 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
for (auto cfh : column_families_) {
for (int64_t k = 0; k != number_of_keys; ++k) {
const std::string key = Key(k);
bool prepare = false;
PendingExpectedValue pending_expected_value =
shared->PreparePut(cf_idx, k);
shared->PreparePut(cf_idx, k, &prepare);
assert(prepare);
const uint32_t value_base = pending_expected_value.GetFinalValueBase();
const size_t sz = GenerateValue(value_base, value, sizeof(value));
@ -1011,26 +1012,12 @@ void StressTest::OperateDb(ThreadState* thread) {
thread->rand.OneIn(FLAGS_verify_db_one_in)) {
// Temporarily disable error injection for verification
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
ContinuouslyVerifyDb(thread);
// Enable back error injection disabled for verification
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
if (thread->shared->ShouldStopTest()) {
break;
@ -1056,14 +1043,7 @@ void StressTest::OperateDb(ThreadState* thread) {
} else if (s.ok()) {
// Temporarily disable error injection for verification
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
// Verify no writes during LockWAL
@ -1118,14 +1098,7 @@ void StressTest::OperateDb(ThreadState* thread) {
// Enable back error injection disabled for verification
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
}
}
@ -1238,27 +1211,13 @@ void StressTest::OperateDb(ThreadState* thread) {
// failed due to injected error. So we disable fault injection to avoid
// false positive
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestGetProperty(thread);
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
}
@ -1289,25 +1248,11 @@ void StressTest::OperateDb(ThreadState* thread) {
// TODO(hx235): enable error injection with
// backup/restore after fixing the various issues it surfaces
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
Status s = TestBackupRestore(thread, rand_column_families, rand_keys);
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
ProcessStatus(shared, "Backup/restore", s);
}
@ -1344,7 +1289,13 @@ void StressTest::OperateDb(ThreadState* thread) {
if (thread->rand.OneInOpt(FLAGS_key_may_exist_one_in)) {
TestKeyMayExist(thread, read_opts, rand_column_families, rand_keys);
}
// Prefix-recoverability relies on tracing successful user writes.
// Currently we trace all user writes regardless of whether it later
// succeeds or not. To simplify, we disable any fault injection during
// user write.
// TODO(hx235): support tracing user writes with fault injection.
bool disable_fault_injection_during_user_write =
fault_fs_guard && MightHaveUnsyncedDataLoss();
int prob_op = thread->rand.Uniform(100);
// Reset this in case we pick something other than a read op. We don't
// want to use a stale value when deciding at the beginning of the loop
@ -1403,16 +1354,34 @@ void StressTest::OperateDb(ThreadState* thread) {
} else if (prob_op < write_bound) {
assert(prefix_bound <= prob_op);
// OPERATION write
if (disable_fault_injection_during_user_write) {
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys,
value);
if (disable_fault_injection_during_user_write) {
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
} else if (prob_op < del_bound) {
assert(write_bound <= prob_op);
// OPERATION delete
if (disable_fault_injection_during_user_write) {
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestDelete(thread, write_opts, rand_column_families, rand_keys);
if (disable_fault_injection_during_user_write) {
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
} else if (prob_op < delrange_bound) {
assert(del_bound <= prob_op);
// OPERATION delete range
if (disable_fault_injection_during_user_write) {
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestDeleteRange(thread, write_opts, rand_column_families, rand_keys);
if (disable_fault_injection_during_user_write) {
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
} else if (prob_op < iterate_bound) {
assert(delrange_bound <= prob_op);
// OPERATION iterate
@ -1456,14 +1425,7 @@ void StressTest::OperateDb(ThreadState* thread) {
#ifndef NDEBUG
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
#endif // NDEBUG
}
@ -2344,14 +2306,7 @@ Status StressTest::TestBackupRestore(
// Temporarily disable error injection for clean up
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
if (s.ok() || IsErrorInjectedAndRetryable(s)) {
@ -2373,14 +2328,7 @@ Status StressTest::TestBackupRestore(
// Enable back error injection disabled for clean up
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
@ -2520,26 +2468,12 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
tmp_opts.sst_file_manager.reset();
// Temporarily disable error injection for clean-up
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
DestroyDB(checkpoint_dir, tmp_opts);
// Enable back error injection disabled for clean-up
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
Checkpoint* checkpoint = nullptr;
Status s = Checkpoint::Create(db_, &checkpoint);
@ -2647,14 +2581,7 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
// Temporarily disable error injection for clean-up
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
@ -2666,14 +2593,7 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
// Enable back error injection disabled for clean-up
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
return s;
}
@ -3048,14 +2968,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
if (thread->rand.OneIn(2)) {
// Temporarily disable error injection to for validation
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
// Declare a snapshot and compare the data before and after the compaction
@ -3065,14 +2978,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
// Enable back error injection disabled for validation
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
}
std::ostringstream compact_range_opt_oss;
@ -3110,14 +3016,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
if (pre_snapshot != nullptr) {
// Temporarily disable error injection for validation
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
uint32_t post_hash =
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
@ -3136,14 +3035,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
db_->ReleaseSnapshot(pre_snapshot);
if (fault_fs_guard) {
// Enable back error injection disabled for validation
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
}
}
@ -3523,14 +3415,7 @@ void StressTest::Open(SharedState* shared, bool reopen) {
// If this is for DB reopen, error injection may have been enabled.
// Disable it here in case there is no open fault injection.
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
// TODO; test transaction DB Open with fault injection
if (!FLAGS_use_txn) {
@ -3611,14 +3496,7 @@ void StressTest::Open(SharedState* shared, bool reopen) {
if (inject_sync_fault || inject_open_meta_read_error ||
inject_open_meta_write_error || inject_open_read_error ||
inject_open_write_error) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableAllThreadLocalErrorInjection();
if (s.ok()) {
// Injected errors might happen in background compactions. We

View File

@ -63,6 +63,19 @@ class StressTest {
return 0;
}
}
void GetDeleteRangeKeyLocks(
ThreadState* thread, int rand_column_family, int64_t rand_key,
std::vector<std::unique_ptr<MutexLock>>* range_locks) {
for (int j = 0; j < FLAGS_range_deletion_width; ++j) {
if (j == 0 ||
((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
range_locks->emplace_back(new MutexLock(
thread->shared->GetMutexForKey(rand_column_family, rand_key + j)));
}
}
}
Status AssertSame(DB* db, ColumnFamilyHandle* cf,
ThreadState::SnapshotState& snap_state);
@ -283,7 +296,8 @@ class StressTest {
bool IsErrorInjectedAndRetryable(const Status& error_s) const {
assert(!error_s.ok());
return error_s.getState() && std::strstr(error_s.getState(), "inject") &&
return error_s.getState() &&
FaultInjectionTestFS::IsInjectedError(error_s) &&
!status_to_io_status(Status(error_s)).GetDataLoss();
}

View File

@ -32,14 +32,32 @@ void ExpectedState::Precommit(int cf, int64_t key, const ExpectedValue& value) {
std::atomic_thread_fence(std::memory_order_release);
}
PendingExpectedValue ExpectedState::PreparePut(int cf, int64_t key) {
PendingExpectedValue ExpectedState::PreparePut(int cf, int64_t key,
bool* prepared) {
assert(prepared);
ExpectedValue expected_value = Load(cf, key);
// Calculate the original expected value
const ExpectedValue orig_expected_value = expected_value;
expected_value.Put(true /* pending */);
// Calculate the pending expected value
bool res = expected_value.Put(true /* pending */);
if (!res) {
PendingExpectedValue ret = PendingExpectedValue(
&Value(cf, key), orig_expected_value, orig_expected_value);
*prepared = false;
return ret;
}
const ExpectedValue pending_expected_value = expected_value;
expected_value.Put(false /* pending */);
// Calculate the final expected value
res = expected_value.Put(false /* pending */);
assert(res);
const ExpectedValue final_expected_value = expected_value;
// Precommit
Precommit(cf, key, pending_expected_value);
*prepared = true;
return PendingExpectedValue(&Value(cf, key), orig_expected_value,
final_expected_value);
}
@ -48,41 +66,58 @@ ExpectedValue ExpectedState::Get(int cf, int64_t key) { return Load(cf, key); }
PendingExpectedValue ExpectedState::PrepareDelete(int cf, int64_t key,
bool* prepared) {
assert(prepared);
ExpectedValue expected_value = Load(cf, key);
// Calculate the original expected value
const ExpectedValue orig_expected_value = expected_value;
// Calculate the pending expected value
bool res = expected_value.Delete(true /* pending */);
if (prepared) {
*prepared = res;
}
if (!res) {
return PendingExpectedValue(&Value(cf, key), orig_expected_value,
orig_expected_value);
PendingExpectedValue ret = PendingExpectedValue(
&Value(cf, key), orig_expected_value, orig_expected_value);
*prepared = false;
return ret;
}
const ExpectedValue pending_expected_value = expected_value;
expected_value.Delete(false /* pending */);
// Calculate the final expected value
res = expected_value.Delete(false /* pending */);
assert(res);
const ExpectedValue final_expected_value = expected_value;
// Precommit
Precommit(cf, key, pending_expected_value);
*prepared = true;
return PendingExpectedValue(&Value(cf, key), orig_expected_value,
final_expected_value);
}
PendingExpectedValue ExpectedState::PrepareSingleDelete(int cf, int64_t key) {
return PrepareDelete(cf, key);
PendingExpectedValue ExpectedState::PrepareSingleDelete(int cf, int64_t key,
bool* prepared) {
return PrepareDelete(cf, key, prepared);
}
std::vector<PendingExpectedValue> ExpectedState::PrepareDeleteRange(
int cf, int64_t begin_key, int64_t end_key) {
int cf, int64_t begin_key, int64_t end_key, bool* prepared) {
std::vector<PendingExpectedValue> pending_expected_values;
bool has_prepared_failed = false;
for (int64_t key = begin_key; key < end_key; ++key) {
bool prepared = false;
bool each_prepared = false;
PendingExpectedValue pending_expected_value =
PrepareDelete(cf, key, &prepared);
if (prepared) {
PrepareDelete(cf, key, &each_prepared);
if (each_prepared) {
pending_expected_values.push_back(pending_expected_value);
} else {
has_prepared_failed = true;
pending_expected_value.PermitUnclosedPendingState();
break;
}
}
*prepared = !has_prepared_failed;
return pending_expected_values;
}

View File

@ -44,29 +44,29 @@ class ExpectedState {
//
// Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`.
PendingExpectedValue PreparePut(int cf, int64_t key);
PendingExpectedValue PreparePut(int cf, int64_t key, bool* prepared);
// Does not requires external locking.
ExpectedValue Get(int cf, int64_t key);
// Prepare a Delete that will be started but not finished yet
// Prepare a Delete that will be started but not finished yet.
// This is useful for crash-recovery testing when the process may crash
// before updating the corresponding expected value
//
// Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`.
PendingExpectedValue PrepareDelete(int cf, int64_t key,
bool* prepared = nullptr);
PendingExpectedValue PrepareDelete(int cf, int64_t key, bool* prepared);
// Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`.
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key);
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key, bool* prepared);
// Requires external locking covering keys in `[begin_key, end_key)` in `cf`
// to prevent concurrent write or delete to the same `key`.
std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
int64_t begin_key,
int64_t end_key);
int64_t end_key,
bool* prepared);
// Update the expected value for start of an incomplete write or delete
// operation on the key assoicated with this expected value
@ -197,28 +197,30 @@ class ExpectedStateManager {
void ClearColumnFamily(int cf) { return latest_->ClearColumnFamily(cf); }
// See ExpectedState::PreparePut()
PendingExpectedValue PreparePut(int cf, int64_t key) {
return latest_->PreparePut(cf, key);
PendingExpectedValue PreparePut(int cf, int64_t key, bool* prepared) {
return latest_->PreparePut(cf, key, prepared);
}
// See ExpectedState::Get()
ExpectedValue Get(int cf, int64_t key) { return latest_->Get(cf, key); }
// See ExpectedState::PrepareDelete()
PendingExpectedValue PrepareDelete(int cf, int64_t key) {
return latest_->PrepareDelete(cf, key);
PendingExpectedValue PrepareDelete(int cf, int64_t key, bool* prepared) {
return latest_->PrepareDelete(cf, key, prepared);
}
// See ExpectedState::PrepareSingleDelete()
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) {
return latest_->PrepareSingleDelete(cf, key);
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key,
bool* prepared) {
return latest_->PrepareSingleDelete(cf, key, prepared);
}
// See ExpectedState::PrepareDeleteRange()
std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
int64_t begin_key,
int64_t end_key) {
return latest_->PrepareDeleteRange(cf, begin_key, end_key);
int64_t end_key,
bool* prepared) {
return latest_->PrepareDeleteRange(cf, begin_key, end_key, prepared);
}
// See ExpectedState::Exists()

View File

@ -10,7 +10,11 @@
#include <atomic>
namespace ROCKSDB_NAMESPACE {
void ExpectedValue::Put(bool pending) {
bool ExpectedValue::Put(bool pending) {
if (pending && (PendingWrite() || PendingDelete())) {
return false;
}
if (pending) {
SetPendingWrite();
} else {
@ -18,9 +22,14 @@ void ExpectedValue::Put(bool pending) {
ClearDeleted();
ClearPendingWrite();
}
return true;
}
bool ExpectedValue::Delete(bool pending) {
if (pending && (PendingWrite() || PendingDelete())) {
return false;
}
if (!Exists()) {
return false;
}

View File

@ -41,7 +41,7 @@ class ExpectedValue {
uint32_t Read() const { return expected_value_; }
void Put(bool pending);
bool Put(bool pending);
bool Delete(bool pending);

View File

@ -1611,14 +1611,37 @@ class NonBatchedOpsStressTest : public StressTest {
}
}
// To track the final write status
Status s;
// To track the initial write status
Status initial_write_s;
// To track whether WAL write may have succeeded during the initial failed
// write
bool initial_wal_write_may_succeed = true;
bool prepared = false;
PendingExpectedValue pending_expected_value =
shared->PreparePut(rand_column_family, rand_key);
shared->PreparePut(rand_column_family, rand_key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
return s;
}
const uint32_t value_base = pending_expected_value.GetFinalValueBase();
const size_t sz = GenerateValue(value_base, value, sizeof(value));
const Slice v(value, sz);
Status s;
do {
// In order to commit the expected state for the initial write failed with
// injected retryable error and successful WAL write, retry the write
// until it succeeds after the recovery finishes
if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed) {
lock.reset();
std::this_thread::sleep_for(std::chrono::microseconds(1 * 1000 * 1000));
lock.reset(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key)));
}
if (FLAGS_use_put_entity_one_in > 0 &&
(value_base % FLAGS_use_put_entity_one_in) == 0) {
if (!FLAGS_use_txn) {
@ -1668,10 +1691,20 @@ class NonBatchedOpsStressTest : public StressTest {
});
}
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when the
// first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) {
pending_expected_value.Rollback();
if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s;
} else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
@ -1685,11 +1718,12 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate();
}
}
} else {
pending_expected_value.Commit();
thread->stats.AddBytesForWrites(1, sz);
PrintKeyValue(rand_column_family, static_cast<uint32_t>(rand_key), value,
sz);
}
return s;
}
@ -1711,12 +1745,37 @@ class NonBatchedOpsStressTest : public StressTest {
Slice key = key_str;
auto cfh = column_families_[rand_column_family];
// To track the final write status
Status s;
// To track the initial write status
Status initial_write_s;
// To track whether WAL write may have succeeded during the initial failed
// write
bool initial_wal_write_may_succeed = true;
// Use delete if the key may be overwritten and a single deletion
// otherwise.
Status s;
if (shared->AllowsOverwrite(rand_key)) {
bool prepared = false;
PendingExpectedValue pending_expected_value =
shared->PrepareDelete(rand_column_family, rand_key);
shared->PrepareDelete(rand_column_family, rand_key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
return s;
}
do {
// In order to commit the expected state for the initial write failed
// with injected retryable error and successful WAL write, retry the
// write until it succeeds after the recovery finishes
if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed) {
lock.reset();
std::this_thread::sleep_for(
std::chrono::microseconds(1 * 1000 * 1000));
lock.reset(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key)));
}
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->Delete(write_opts, cfh, key);
@ -1728,10 +1787,20 @@ class NonBatchedOpsStressTest : public StressTest {
return txn.Delete(cfh, key);
});
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when
// the first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) {
pending_expected_value.Rollback();
if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s;
} else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ &&
@ -1746,12 +1815,31 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate();
}
}
} else {
pending_expected_value.Commit();
thread->stats.AddDeletes(1);
}
} else {
bool prepared = false;
PendingExpectedValue pending_expected_value =
shared->PrepareSingleDelete(rand_column_family, rand_key);
shared->PrepareSingleDelete(rand_column_family, rand_key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
return s;
}
do {
// In order to commit the expected state for the initial write failed
// with injected retryable error and successful WAL write, retry the
// write until it succeeds after the recovery finishes
if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed) {
lock.reset();
std::this_thread::sleep_for(
std::chrono::microseconds(1 * 1000 * 1000));
lock.reset(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key)));
}
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->SingleDelete(write_opts, cfh, key);
@ -1763,10 +1851,20 @@ class NonBatchedOpsStressTest : public StressTest {
return txn.SingleDelete(cfh, key);
});
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when
// the first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) {
pending_expected_value.Rollback();
if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s;
} else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ &&
@ -1781,10 +1879,11 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "single delete error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate();
}
}
} else {
pending_expected_value.Commit();
thread->stats.AddSingleDeletes(1);
}
}
return s;
}
@ -1805,16 +1904,29 @@ class NonBatchedOpsStressTest : public StressTest {
rand_key =
thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1);
}
for (int j = 0; j < FLAGS_range_deletion_width; ++j) {
if (j == 0 ||
((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
range_locks.emplace_back(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key + j)));
}
}
GetDeleteRangeKeyLocks(thread, rand_column_family, rand_key, &range_locks);
// To track the final write status
Status s;
// To track the initial write status
Status initial_write_s;
// To track whether WAL write may have succeeded during the initial failed
// write
bool initial_wal_write_may_succeed = true;
bool prepared = false;
std::vector<PendingExpectedValue> pending_expected_values =
shared->PrepareDeleteRange(rand_column_family, rand_key,
rand_key + FLAGS_range_deletion_width);
rand_key + FLAGS_range_deletion_width,
&prepared);
if (!prepared) {
for (PendingExpectedValue& pending_expected_value :
pending_expected_values) {
pending_expected_value.PermitUnclosedPendingState();
}
return s;
}
const int covered = static_cast<int>(pending_expected_values.size());
std::string keystr = Key(rand_key);
Slice key = keystr;
@ -1823,7 +1935,18 @@ class NonBatchedOpsStressTest : public StressTest {
Slice end_key = end_keystr;
std::string write_ts_str;
Slice write_ts;
Status s;
do {
// In order to commit the expected state for the initial write failed with
// injected retryable error and successful WAL write, retry the write
// until it succeeds after the recovery finishes
if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed) {
range_locks.clear();
std::this_thread::sleep_for(std::chrono::microseconds(1 * 1000 * 1000));
GetDeleteRangeKeyLocks(thread, rand_column_family, rand_key,
&range_locks);
}
if (FLAGS_user_timestamp_size) {
write_ts_str = GetNowNanos();
write_ts = write_ts_str;
@ -1831,12 +1954,23 @@ class NonBatchedOpsStressTest : public StressTest {
} else {
s = db_->DeleteRange(write_opts, cfh, key, end_key);
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when the
// first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) {
for (PendingExpectedValue& pending_expected_value :
pending_expected_values) {
pending_expected_value.Rollback();
}
if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s;
} else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
@ -1850,13 +1984,14 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "delete range error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate();
}
}
} else {
for (PendingExpectedValue& pending_expected_value :
pending_expected_values) {
pending_expected_value.Commit();
}
thread->stats.AddRangeDeletions(1);
thread->stats.AddCoveredByRangeDeletions(covered);
}
return s;
}
@ -1881,6 +2016,7 @@ class NonBatchedOpsStressTest : public StressTest {
// ingestion a clean slate
s = db_stress_env->DeleteFile(sst_filename);
}
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
@ -1921,8 +2057,17 @@ class NonBatchedOpsStressTest : public StressTest {
}
keys.push_back(key);
bool prepared = false;
PendingExpectedValue pending_expected_value =
shared->PreparePut(column_family, key);
shared->PreparePut(column_family, key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
for (PendingExpectedValue& pev : pending_expected_values) {
pev.PermitUnclosedPendingState();
}
return;
}
const uint32_t value_base = pending_expected_value.GetFinalValueBase();
values.push_back(value_base);
pending_expected_values.push_back(pending_expected_value);

View File

@ -22,6 +22,9 @@
#include "rocksdb/rate_limiter.h"
#include "test_util/sync_point.h"
#include "util/aligned_buffer.h"
#ifndef NDEBUG
#include "utilities/fault_injection_fs.h"
#endif // NDEBUG
namespace ROCKSDB_NAMESPACE {
class Statistics;
@ -327,10 +330,14 @@ class WritableFileWriter {
}
#endif // NDEBUG
// TODO(hx235): store the actual previous error status and return it here
IOStatus GetWriterHasPreviousErrorStatus() {
#ifndef NDEBUG
if (seen_injected_error_.load(std::memory_order_relaxed)) {
return IOStatus::IOError("Writer has previous injected error.");
std::stringstream msg;
msg << "Writer has previous " << FaultInjectionTestFS::kInjected
<< " error.";
return IOStatus::IOError(msg.str());
}
#endif // NDEBUG
return IOStatus::IOError("Writer has previous error.");

View File

@ -777,14 +777,6 @@ def finalize_and_sanitize(src_params):
# files, which would be problematic when unsynced data can be lost in
# crash recoveries.
dest_params["enable_compaction_filter"] = 0
# Prefix-recoverability relies on tracing successful user writes.
# Currently we trace all user writes regardless of whether it later succeeds or not.
# To simplify, we disable any user write failure injection.
# TODO(hx235): support tracing user writes with failure injection.
# TODO(hx235): support excluding WAL from metadata write fault injection so we don't
# have to disable metadata write fault injection to other file
dest_params["metadata_write_fault_one_in"] = 0
dest_params["exclude_wal_from_write_fault_injection"] = 1
# Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
# unordered_write is only enabled with --txn, and txn_params disables inplace_update_support, so
# setting allow_concurrent_memtable_write=1 won't conflcit with inplace_update_support.

View File

@ -168,8 +168,9 @@ IOStatus TestFSWritableFile::Append(const Slice& data, const IOOptions& options,
return fs_->GetError();
}
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite,
options, state_.filename_);
IOStatus s = fs_->MaybeInjectThreadLocalError(
FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kAppend);
if (!s.ok()) {
return s;
}
@ -203,8 +204,9 @@ IOStatus TestFSWritableFile::Append(
return IOStatus::Corruption("Data is corrupted!");
}
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite,
options, state_.filename_);
IOStatus s = fs_->MaybeInjectThreadLocalError(
FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kAppend);
if (!s.ok()) {
return s;
}
@ -266,8 +268,9 @@ IOStatus TestFSWritableFile::PositionedAppend(const Slice& data,
if (fs_->ShouldDataCorruptionBeforeWrite()) {
return IOStatus::Corruption("Data is corrupted!");
}
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite,
options, state_.filename_);
IOStatus s = fs_->MaybeInjectThreadLocalError(
FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kPositionedAppend);
if (!s.ok()) {
return s;
}
@ -292,8 +295,9 @@ IOStatus TestFSWritableFile::PositionedAppend(
if (fs_->ShouldDataCorruptionBeforeWrite()) {
return IOStatus::Corruption("Data is corrupted!");
}
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite,
options, state_.filename_);
IOStatus s = fs_->MaybeInjectThreadLocalError(
FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kPositionedAppend);
if (!s.ok()) {
return s;
}
@ -843,8 +847,9 @@ IOStatus FaultInjectionTestFS::NewWritableFile(
return target()->NewWritableFile(fname, file_opts, result, dbg);
}
IOStatus io_s = MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite,
file_opts.io_options, fname);
IOStatus io_s = MaybeInjectThreadLocalError(
FaultInjectionIOType::kWrite, file_opts.io_options, fname,
FaultInjectionTestFS::ErrorOperation::kOpen);
if (!io_s.ok()) {
return io_s;
}
@ -1391,9 +1396,12 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
}
ctx->callstack = port::SaveStack(&ctx->frames);
std::stringstream msg;
msg << FaultInjectionTestFS::kInjected << " ";
if (op != ErrorOperation::kMultiReadSingleReq) {
// Likely non-per read status code for MultiRead
ctx->message += "injected read error; ";
msg << "read error";
ctx->message = msg.str();
ret_fault_injected = true;
ret = IOStatus::IOError(ctx->message);
} else if (Random::GetTLSInstance()->OneIn(8)) {
@ -1401,7 +1409,8 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
// For a small chance, set the failure to status but turn the
// result to be empty, which is supposed to be caught for a check.
*result = Slice();
ctx->message += "injected empty result; ";
msg << "empty result";
ctx->message = msg.str();
ret_fault_injected = true;
} else if (!direct_io && Random::GetTLSInstance()->OneIn(7) &&
scratch != nullptr && result->data() == scratch) {
@ -1418,10 +1427,12 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
// It would work for CRC. Not 100% sure for xxhash and will adjust
// if it is not the case.
const_cast<char*>(result->data())[result->size() - 1]++;
ctx->message += "injected corrupt last byte; ";
msg << "corrupt last byte";
ctx->message = msg.str();
ret_fault_injected = true;
} else {
ctx->message += "injected error result multiget single; ";
msg << "error result multiget single";
ctx->message = msg.str();
ret_fault_injected = true;
ret = IOStatus::IOError(ctx->message);
}
@ -1465,7 +1476,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalError(
free(ctx->callstack);
}
ctx->callstack = port::SaveStack(&ctx->frames);
ctx->message = GetErrorMessageFromFaultInjectionIOType(type);
ctx->message = GetErrorMessage(type, file_name, op);
ret = IOStatus::IOError(ctx->message);
ret.SetRetryable(ctx->retryable);
ret.SetDataLoss(ctx->has_data_loss);

View File

@ -224,6 +224,16 @@ class FaultInjectionTestFS : public FileSystemWrapper {
static const char* kClassName() { return "FaultInjectionTestFS"; }
const char* Name() const override { return kClassName(); }
static bool IsInjectedError(const Status& s) {
assert(!s.ok());
return std::strstr(s.getState(), kInjected.c_str());
}
static bool IsFailedToWriteToWALError(const Status& s) {
assert(!s.ok());
return std::strstr(s.getState(), kFailedToWriteToWAL.c_str());
}
IOStatus NewDirectory(const std::string& name, const IOOptions& options,
std::unique_ptr<FSDirectory>* result,
IODebugContext* dbg) override;
@ -472,6 +482,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
kMultiReadSingleReq = 1,
kMultiRead = 2,
kOpen,
kAppend,
kPositionedAppend,
kUnknown,
};
@ -520,17 +532,6 @@ class FaultInjectionTestFS : public FileSystemWrapper {
file_types_excluded_from_write_fault_injection_ = types;
}
bool ShouldExcludeFromWriteFaultInjection(const std::string& file_name) {
MutexLock l(&mutex_);
FileType file_type = kTempFile;
uint64_t file_number = 0;
if (!TryParseFileName(file_name, &file_number, &file_type)) {
return false;
}
return file_types_excluded_from_write_fault_injection_.find(file_type) !=
file_types_excluded_from_write_fault_injection_.end();
}
void EnableThreadLocalErrorInjection(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx) {
@ -538,6 +539,13 @@ class FaultInjectionTestFS : public FileSystemWrapper {
}
}
void EnableAllThreadLocalErrorInjection() {
EnableThreadLocalErrorInjection(FaultInjectionIOType::kRead);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataRead);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataWrite);
}
void DisableThreadLocalErrorInjection(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx) {
@ -545,6 +553,13 @@ class FaultInjectionTestFS : public FileSystemWrapper {
}
}
void DisableAllThreadLocalErrorInjection() {
DisableThreadLocalErrorInjection(FaultInjectionIOType::kRead);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataRead);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataWrite);
}
void PrintInjectedThreadLocalErrorBacktrace(FaultInjectionIOType type);
// If there is unsynced data in the specified file within the specified
@ -556,7 +571,11 @@ class FaultInjectionTestFS : public FileSystemWrapper {
void ReadUnsynced(const std::string& fname, uint64_t offset, size_t n,
Slice* result, char* scratch, int64_t* pos_at_last_sync);
inline static const std::string kInjected = "injected";
private:
inline static const std::string kFailedToWriteToWAL =
"failed to write to WAL";
port::Mutex mutex_;
std::map<std::string, FSFileState> db_file_state_;
std::set<std::string> open_managed_files_;
@ -628,6 +647,18 @@ class FaultInjectionTestFS : public FileSystemWrapper {
bool direct_io, char* scratch,
bool need_count_increase,
bool* fault_injected);
bool ShouldExcludeFromWriteFaultInjection(const std::string& file_name) {
MutexLock l(&mutex_);
FileType file_type = kTempFile;
uint64_t file_number = 0;
if (!TryParseFileName(file_name, &file_number, &file_type)) {
return false;
}
return file_types_excluded_from_write_fault_injection_.find(file_type) !=
file_types_excluded_from_write_fault_injection_.end();
}
// Extract number of type from file name. Return false if failing to fine
// them.
bool TryParseFileName(const std::string& file_name, uint64_t* number,
@ -690,27 +721,39 @@ class FaultInjectionTestFS : public FileSystemWrapper {
}
}
std::string GetErrorMessageFromFaultInjectionIOType(
FaultInjectionIOType type) {
std::string msg = "";
std::string GetErrorMessage(FaultInjectionIOType type,
const std::string& file_name, ErrorOperation op) {
std::ostringstream msg;
msg << kInjected << " ";
switch (type) {
case FaultInjectionIOType::kRead:
msg = "injected read error";
msg << "read error";
break;
case FaultInjectionIOType::kWrite:
msg = "injected write error";
msg << "write error";
break;
case FaultInjectionIOType::kMetadataRead:
msg = "injected metadata read error";
msg << "metadata read error";
break;
case FaultInjectionIOType::kMetadataWrite:
msg = "injected metadata write error";
msg << "metadata write error";
break;
default:
assert(false);
break;
}
return msg;
if (type == FaultInjectionIOType::kWrite &&
(op == ErrorOperation::kOpen || op == ErrorOperation::kAppend ||
op == ErrorOperation::kPositionedAppend)) {
FileType file_type = kTempFile;
uint64_t ignore = 0;
if (TryParseFileName(file_name, &ignore, &file_type) &&
file_type == FileType::kWalFile) {
msg << " " << kFailedToWriteToWAL;
}
}
return msg.str();
}
};