Handle injected write error after successful WAL write in crash test + misc (#12838)

Summary:
**Context/Summary:**
We discovered the following false positive in our crash test lately:
(1) PUT() writes k/v to WAL but fails in `ApplyWALToManifest()`. The k/v is in the WAL
(2) Current stress test logic will rollback the expected state of such k/v since PUT() fails
(3) If the DB crashes before recovery finishes and reopens, the WAL will be replayed and the k/v is in the DB while the expected state have been roll-backed.

We decided to leave those expected state to be pending until the loop-write of the same key succeeds.

Bonus: Now that I realized write to manifest can also fail the write which faces the similar problem as https://github.com/facebook/rocksdb/pull/12797, I decided to disable fault injection on user write per thread (instead of globally) when tracing is needed for prefix recovery; some refactory

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12838

Test Plan:
Rehearsal CI
Run below command (varies on sync_fault_injection=1,0 to verify ExpectedState behavior) for a while to ensure crash recovery validation works fine

```
python3 tools/db_crashtest.py --simple blackbox --interval=30 --WAL_size_limit_MB=0 --WAL_ttl_seconds=0 --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --adm_policy=1 --advise_random_on_open=0 --allow_concurrent_memtable_write=0 --allow_data_in_errors=True --allow_fallocate=0 --async_io=0 --auto_readahead_size=0 --avoid_flush_during_recovery=0 --avoid_flush_during_shutdown=0 --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=0 --batch_protection_bytes_per_key=0 --bgerror_resume_retry_interval=1000000 --block_align=1 --block_protection_bytes_per_key=4 --block_size=16384 --bloom_before_level=4 --bloom_bits=56.810257702625165 --bottommost_compression_type=none --bottommost_file_compaction_delay=0 --bytes_per_sync=262144 --cache_index_and_filter_blocks=1 --cache_index_and_filter_blocks_with_high_priority=1 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=1 --charge_filter_construction=1 --charge_table_reader=0 --check_multiget_consistency=0 --check_multiget_entity_consistency=1 --checkpoint_one_in=10000 --checksum_type=kxxHash --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000 --compact_range_one_in=1000 --compaction_pri=4 --compaction_readahead_size=1048576 --compaction_ttl=10 --compress_format_version=1 --compressed_secondary_cache_ratio=0.0 --compressed_secondary_cache_size=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=none --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --daily_offpeak_time_utc=04:00-08:00 --data_block_index_type=1 --db_write_buffer_size=0 --default_temperature=kWarm --default_write_temperature=kCold --delete_obsolete_files_period_micros=30000000 --delpercent=20 --delrangepercent=20 --destroy_db_initially=0 --detect_filter_construct_corruption=0 --disable_file_deletions_one_in=10000 --disable_manual_compaction_one_in=1000000 --disable_wal=0 --dump_malloc_stats=0 --enable_checksum_handoff=1 --enable_compaction_filter=0 --enable_custom_split_merge=0 --enable_do_not_compress_roles=0 --enable_index_compression=1 --enable_memtable_insert_with_hint_prefix_extractor=0 --enable_pipelined_write=0 --enable_sst_partitioner_factory=0 --enable_thread_tracking=0 --enable_write_thread_adaptive_yield=0 --error_recovery_with_no_fault_injection=1 --exclude_wal_from_write_fault_injection=0 --fail_if_options_file_error=1 --fifo_allow_compaction=0 --file_checksum_impl=crc32c --fill_cache=1 --flush_one_in=1000000 --format_version=3 --get_all_column_family_metadata_one_in=1000000 --get_current_wal_file_one_in=0 --get_live_files_apis_one_in=1000000 --get_properties_of_all_tables_one_in=1000000 --get_property_one_in=100000 --get_sorted_wal_files_one_in=0 --hard_pending_compaction_bytes_limit=274877906944 --high_pri_pool_ratio=0.5 --index_block_restart_interval=4 --index_shortening=2 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=16384 --inplace_update_support=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --key_may_exist_one_in=100 --last_level_temperature=kWarm --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=10000 --log_file_time_to_roll=60 --log_readahead_size=16777216 --long_running_snapshots=1 --low_pri_pool_ratio=0 --lowest_used_cache_tier=0 --manifest_preallocation_size=0 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=16384 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --max_key=100000 --max_key_len=3 --max_log_file_size=1048576 --max_manifest_file_size=32768 --max_sequential_skip_in_iterations=1 --max_total_wal_size=0 --max_write_batch_group_size_bytes=16 --max_write_buffer_number=10 --max_write_buffer_size_to_maintain=8388608 --memtable_insert_hint_per_batch=1 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0.01 --memtable_protection_bytes_per_key=1 --memtable_whole_key_filtering=1 --memtablerep=skip_list --metadata_charge_policy=1 --metadata_read_fault_one_in=0 --metadata_write_fault_one_in=8 --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=-1 --open_metadata_read_fault_one_in=0 --open_metadata_write_fault_one_in=8 --open_read_fault_one_in=0 --open_write_fault_one_in=8 --ops_per_thread=100000000 --optimize_filters_for_hits=1 --optimize_filters_for_memory=1 --optimize_multiget_for_io=1 --paranoid_file_checks=0 --partition_filters=0 --partition_pinning=3 --pause_background_one_in=1000000 --periodic_compaction_seconds=2 --prefix_size=7 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --promote_l0_one_in=0 --read_amp_bytes_per_bit=0 --read_fault_one_in=1000 --readahead_size=524288 --readpercent=10 --recycle_log_file_num=1 --reopen=0 --report_bg_io_stats=0 --reset_stats_one_in=1000000 --sample_for_compression=0 --secondary_cache_fault_one_in=0 --set_options_one_in=0 --skip_stats_update_on_db_open=1 --snapshot_hold_ops=100000 --soft_pending_compaction_bytes_limit=68719476736 --sqfc_name=foo --sqfc_version=0 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=10 --stats_history_buffer_size=0 --strict_bytes_per_sync=1 --subcompactions=4 --sync=1 --sync_fault_injection=0 --table_cache_numshardbits=6 --target_file_size_base=16777216 --target_file_size_multiplier=1 --test_batches_snapshots=0 --top_level_index_pinning=2 --uncache_aggressiveness=239 --universal_max_read_amp=-1 --unpartitioned_pinning=1 --use_adaptive_mutex=1 --use_adaptive_mutex_lru=1 --use_attribute_group=0 --use_delta_encoding=0 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=0 --use_multi_cf_iterator=0 --use_multi_get_entity=0 --use_multiget=0 --use_put_entity_one_in=0 --use_sqfc_for_range_queries=1 --use_timed_put_one_in=0 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_compression=0 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none --write_buffer_size=33554432 --write_dbid_to_manifest=0 --write_fault_one_in=8 --writepercent=40
```

Reviewed By: cbi42

Differential Revision: D59377075

Pulled By: hx235

fbshipit-source-id: 91f602fd67e2d339d378cd28b982095fd073dcb6
This commit is contained in:
Hui Xiao 2024-07-29 13:51:49 -07:00 committed by Facebook GitHub Bot
parent d94c2adc28
commit 408e8d4c85
16 changed files with 524 additions and 394 deletions

View File

@ -1411,6 +1411,7 @@ IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
total_log_size_ += log_entry.size(); total_log_size_ += log_entry.size();
log_file_number_size.AddSize(*log_size); log_file_number_size.AddSize(*log_size);
log_empty_ = false; log_empty_ = false;
return io_s; return io_s;
} }

View File

@ -55,7 +55,10 @@ IOStatus Writer::WriteBuffer(const WriteOptions& write_options) {
if (dest_->seen_error()) { if (dest_->seen_error()) {
#ifndef NDEBUG #ifndef NDEBUG
if (dest_->seen_injected_error()) { if (dest_->seen_injected_error()) {
return IOStatus::IOError("Seen injected error. Skip writing buffer."); std::stringstream msg;
msg << "Seen " << FaultInjectionTestFS::kInjected
<< " error. Skip writing buffer.";
return IOStatus::IOError(msg.str());
} }
#endif // NDEBUG #endif // NDEBUG
return IOStatus::IOError("Seen error. Skip writing buffer."); return IOStatus::IOError("Seen error. Skip writing buffer.");
@ -93,7 +96,10 @@ IOStatus Writer::AddRecord(const WriteOptions& write_options,
if (dest_->seen_error()) { if (dest_->seen_error()) {
#ifndef NDEBUG #ifndef NDEBUG
if (dest_->seen_injected_error()) { if (dest_->seen_injected_error()) {
return IOStatus::IOError("Seen injected error. Skip writing buffer."); std::stringstream msg;
msg << "Seen " << FaultInjectionTestFS::kInjected
<< " error. Skip writing buffer.";
return IOStatus::IOError(msg.str());
} }
#endif // NDEBUG #endif // NDEBUG
return IOStatus::IOError("Seen error. Skip writing buffer."); return IOStatus::IOError("Seen error. Skip writing buffer.");
@ -205,7 +211,10 @@ IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) {
if (dest_->seen_error()) { if (dest_->seen_error()) {
#ifndef NDEBUG #ifndef NDEBUG
if (dest_->seen_injected_error()) { if (dest_->seen_injected_error()) {
return IOStatus::IOError("Seen injected error. Skip writing buffer."); std::stringstream msg;
msg << "Seen " << FaultInjectionTestFS::kInjected
<< " error. Skip writing buffer.";
return IOStatus::IOError(msg.str());
} }
#endif // NDEBUG #endif // NDEBUG
return IOStatus::IOError("Seen error. Skip writing buffer."); return IOStatus::IOError("Seen error. Skip writing buffer.");

View File

@ -197,7 +197,6 @@ DECLARE_int64(target_file_size_base);
DECLARE_int32(target_file_size_multiplier); DECLARE_int32(target_file_size_multiplier);
DECLARE_uint64(max_bytes_for_level_base); DECLARE_uint64(max_bytes_for_level_base);
DECLARE_double(max_bytes_for_level_multiplier); DECLARE_double(max_bytes_for_level_multiplier);
DECLARE_int32(range_deletion_width);
DECLARE_uint64(rate_limiter_bytes_per_sec); DECLARE_uint64(rate_limiter_bytes_per_sec);
DECLARE_bool(rate_limit_bg_reads); DECLARE_bool(rate_limit_bg_reads);
DECLARE_bool(rate_limit_user_ops); DECLARE_bool(rate_limit_user_ops);

View File

@ -72,14 +72,7 @@ class DbStressListener : public EventListener {
// pretending doing some work here // pretending doing some work here
RandomSleep(); RandomSleep();
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
} }
@ -180,14 +173,7 @@ class DbStressListener : public EventListener {
void OnSubcompactionCompleted(const SubcompactionJobInfo& /* si */) override { void OnSubcompactionCompleted(const SubcompactionJobInfo& /* si */) override {
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
} }
@ -274,14 +260,7 @@ class DbStressListener : public EventListener {
bool* /* auto_recovery */) override { bool* /* auto_recovery */) override {
RandomSleep(); RandomSleep();
if (FLAGS_error_recovery_with_no_fault_injection && fault_fs_guard) { if (FLAGS_error_recovery_with_no_fault_injection && fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
// TODO(hx235): only exempt the flush thread during error recovery instead // TODO(hx235): only exempt the flush thread during error recovery instead
// of all the flush threads from error injection // of all the flush threads from error injection
fault_fs_guard->SetIOActivtiesExcludedFromFaultInjection( fault_fs_guard->SetIOActivtiesExcludedFromFaultInjection(
@ -293,14 +272,7 @@ class DbStressListener : public EventListener {
const BackgroundErrorRecoveryInfo& /*info*/) override { const BackgroundErrorRecoveryInfo& /*info*/) override {
RandomSleep(); RandomSleep();
if (FLAGS_error_recovery_with_no_fault_injection && fault_fs_guard) { if (FLAGS_error_recovery_with_no_fault_injection && fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->SetIOActivtiesExcludedFromFaultInjection({}); fault_fs_guard->SetIOActivtiesExcludedFromFaultInjection({});
} }
} }

View File

@ -31,6 +31,7 @@ DECLARE_int32(compaction_thread_pool_adjust_interval);
DECLARE_int32(continuous_verification_interval); DECLARE_int32(continuous_verification_interval);
DECLARE_bool(error_recovery_with_no_fault_injection); DECLARE_bool(error_recovery_with_no_fault_injection);
DECLARE_bool(sync_fault_injection); DECLARE_bool(sync_fault_injection);
DECLARE_int32(range_deletion_width);
DECLARE_bool(disable_wal); DECLARE_bool(disable_wal);
DECLARE_int32(manual_wal_flush_one_in); DECLARE_int32(manual_wal_flush_one_in);
DECLARE_int32(metadata_read_fault_one_in); DECLARE_int32(metadata_read_fault_one_in);
@ -147,7 +148,8 @@ class SharedState {
~SharedState() { ~SharedState() {
#ifndef NDEBUG #ifndef NDEBUG
if (FLAGS_read_fault_one_in) { if (FLAGS_read_fault_one_in || FLAGS_write_fault_one_in ||
FLAGS_metadata_write_fault_one_in) {
SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->DisableProcessing();
} }
@ -260,10 +262,14 @@ class SharedState {
// This is useful for crash-recovery testing when the process may crash // This is useful for crash-recovery testing when the process may crash
// before updating the corresponding expected value // before updating the corresponding expected value
// //
// Requires external locking covering `key` in `cf` to prevent concurrent // It can fail and `*prepared` will be set to false if the previous write or
// write or delete to the same `key`. // delete is still in pending state (e.g, still in recovery for retryable IO
PendingExpectedValue PreparePut(int cf, int64_t key) { // errors). If succeeds,`*prepared` will be set to true
return expected_state_manager_->PreparePut(cf, key); //
// Requires external locking covering `key` in `cf` to prevent
// concurrent write or delete to the same `key`.
PendingExpectedValue PreparePut(int cf, int64_t key, bool* prepared) {
return expected_state_manager_->PreparePut(cf, key, prepared);
} }
// Does not requires external locking. // Does not requires external locking.
@ -275,24 +281,31 @@ class SharedState {
// This is useful for crash-recovery testing when the process may crash // This is useful for crash-recovery testing when the process may crash
// before updating the corresponding expected value // before updating the corresponding expected value
// //
// It can fail and `*prepared` will be set to false if the previous write or
// delete is still in pending state (e.g, still in recovery for retryable IO
// errors). If succeeds,`*prepared` will be set to true
//
// Requires external locking covering `key` in `cf` to prevent concurrent // Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`. // write or delete to the same `key`.
PendingExpectedValue PrepareDelete(int cf, int64_t key) { PendingExpectedValue PrepareDelete(int cf, int64_t key, bool* prepared) {
return expected_state_manager_->PrepareDelete(cf, key); return expected_state_manager_->PrepareDelete(cf, key, prepared);
} }
// Requires external locking covering `key` in `cf` to prevent concurrent // Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`. // write or delete to the same `key`.
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) { PendingExpectedValue PrepareSingleDelete(int cf, int64_t key,
return expected_state_manager_->PrepareSingleDelete(cf, key); bool* prepared) {
return expected_state_manager_->PrepareSingleDelete(cf, key, prepared);
} }
// Requires external locking covering keys in `[begin_key, end_key)` in `cf` // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
// to prevent concurrent write or delete to the same `key`. // to prevent concurrent write or delete to the same `key`.
std::vector<PendingExpectedValue> PrepareDeleteRange(int cf, std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
int64_t begin_key, int64_t begin_key,
int64_t end_key) { int64_t end_key,
return expected_state_manager_->PrepareDeleteRange(cf, begin_key, end_key); bool* prepared) {
return expected_state_manager_->PrepareDeleteRange(cf, begin_key, end_key,
prepared);
} }
bool AllowsOverwrite(int64_t key) const { bool AllowsOverwrite(int64_t key) const {

View File

@ -632,9 +632,10 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
for (auto cfh : column_families_) { for (auto cfh : column_families_) {
for (int64_t k = 0; k != number_of_keys; ++k) { for (int64_t k = 0; k != number_of_keys; ++k) {
const std::string key = Key(k); const std::string key = Key(k);
bool prepare = false;
PendingExpectedValue pending_expected_value = PendingExpectedValue pending_expected_value =
shared->PreparePut(cf_idx, k); shared->PreparePut(cf_idx, k, &prepare);
assert(prepare);
const uint32_t value_base = pending_expected_value.GetFinalValueBase(); const uint32_t value_base = pending_expected_value.GetFinalValueBase();
const size_t sz = GenerateValue(value_base, value, sizeof(value)); const size_t sz = GenerateValue(value_base, value, sizeof(value));
@ -1011,26 +1012,12 @@ void StressTest::OperateDb(ThreadState* thread) {
thread->rand.OneIn(FLAGS_verify_db_one_in)) { thread->rand.OneIn(FLAGS_verify_db_one_in)) {
// Temporarily disable error injection for verification // Temporarily disable error injection for verification
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
ContinuouslyVerifyDb(thread); ContinuouslyVerifyDb(thread);
// Enable back error injection disabled for verification // Enable back error injection disabled for verification
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
if (thread->shared->ShouldStopTest()) { if (thread->shared->ShouldStopTest()) {
break; break;
@ -1056,14 +1043,7 @@ void StressTest::OperateDb(ThreadState* thread) {
} else if (s.ok()) { } else if (s.ok()) {
// Temporarily disable error injection for verification // Temporarily disable error injection for verification
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
// Verify no writes during LockWAL // Verify no writes during LockWAL
@ -1118,14 +1098,7 @@ void StressTest::OperateDb(ThreadState* thread) {
// Enable back error injection disabled for verification // Enable back error injection disabled for verification
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
} }
} }
@ -1238,27 +1211,13 @@ void StressTest::OperateDb(ThreadState* thread) {
// failed due to injected error. So we disable fault injection to avoid // failed due to injected error. So we disable fault injection to avoid
// false positive // false positive
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
TestGetProperty(thread); TestGetProperty(thread);
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
} }
@ -1289,25 +1248,11 @@ void StressTest::OperateDb(ThreadState* thread) {
// TODO(hx235): enable error injection with // TODO(hx235): enable error injection with
// backup/restore after fixing the various issues it surfaces // backup/restore after fixing the various issues it surfaces
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
Status s = TestBackupRestore(thread, rand_column_families, rand_keys); Status s = TestBackupRestore(thread, rand_column_families, rand_keys);
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
ProcessStatus(shared, "Backup/restore", s); ProcessStatus(shared, "Backup/restore", s);
} }
@ -1344,7 +1289,13 @@ void StressTest::OperateDb(ThreadState* thread) {
if (thread->rand.OneInOpt(FLAGS_key_may_exist_one_in)) { if (thread->rand.OneInOpt(FLAGS_key_may_exist_one_in)) {
TestKeyMayExist(thread, read_opts, rand_column_families, rand_keys); TestKeyMayExist(thread, read_opts, rand_column_families, rand_keys);
} }
// Prefix-recoverability relies on tracing successful user writes.
// Currently we trace all user writes regardless of whether it later
// succeeds or not. To simplify, we disable any fault injection during
// user write.
// TODO(hx235): support tracing user writes with fault injection.
bool disable_fault_injection_during_user_write =
fault_fs_guard && MightHaveUnsyncedDataLoss();
int prob_op = thread->rand.Uniform(100); int prob_op = thread->rand.Uniform(100);
// Reset this in case we pick something other than a read op. We don't // Reset this in case we pick something other than a read op. We don't
// want to use a stale value when deciding at the beginning of the loop // want to use a stale value when deciding at the beginning of the loop
@ -1403,16 +1354,34 @@ void StressTest::OperateDb(ThreadState* thread) {
} else if (prob_op < write_bound) { } else if (prob_op < write_bound) {
assert(prefix_bound <= prob_op); assert(prefix_bound <= prob_op);
// OPERATION write // OPERATION write
if (disable_fault_injection_during_user_write) {
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys, TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys,
value); value);
if (disable_fault_injection_during_user_write) {
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
} else if (prob_op < del_bound) { } else if (prob_op < del_bound) {
assert(write_bound <= prob_op); assert(write_bound <= prob_op);
// OPERATION delete // OPERATION delete
if (disable_fault_injection_during_user_write) {
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestDelete(thread, write_opts, rand_column_families, rand_keys); TestDelete(thread, write_opts, rand_column_families, rand_keys);
if (disable_fault_injection_during_user_write) {
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
} else if (prob_op < delrange_bound) { } else if (prob_op < delrange_bound) {
assert(del_bound <= prob_op); assert(del_bound <= prob_op);
// OPERATION delete range // OPERATION delete range
if (disable_fault_injection_during_user_write) {
fault_fs_guard->DisableAllThreadLocalErrorInjection();
}
TestDeleteRange(thread, write_opts, rand_column_families, rand_keys); TestDeleteRange(thread, write_opts, rand_column_families, rand_keys);
if (disable_fault_injection_during_user_write) {
fault_fs_guard->EnableAllThreadLocalErrorInjection();
}
} else if (prob_op < iterate_bound) { } else if (prob_op < iterate_bound) {
assert(delrange_bound <= prob_op); assert(delrange_bound <= prob_op);
// OPERATION iterate // OPERATION iterate
@ -1456,14 +1425,7 @@ void StressTest::OperateDb(ThreadState* thread) {
#ifndef NDEBUG #ifndef NDEBUG
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
#endif // NDEBUG #endif // NDEBUG
} }
@ -2344,14 +2306,7 @@ Status StressTest::TestBackupRestore(
// Temporarily disable error injection for clean up // Temporarily disable error injection for clean up
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
if (s.ok() || IsErrorInjectedAndRetryable(s)) { if (s.ok() || IsErrorInjectedAndRetryable(s)) {
@ -2373,14 +2328,7 @@ Status StressTest::TestBackupRestore(
// Enable back error injection disabled for clean up // Enable back error injection disabled for clean up
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
if (!s.ok() && !IsErrorInjectedAndRetryable(s)) { if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
@ -2520,26 +2468,12 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
tmp_opts.sst_file_manager.reset(); tmp_opts.sst_file_manager.reset();
// Temporarily disable error injection for clean-up // Temporarily disable error injection for clean-up
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
DestroyDB(checkpoint_dir, tmp_opts); DestroyDB(checkpoint_dir, tmp_opts);
// Enable back error injection disabled for clean-up // Enable back error injection disabled for clean-up
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
Checkpoint* checkpoint = nullptr; Checkpoint* checkpoint = nullptr;
Status s = Checkpoint::Create(db_, &checkpoint); Status s = Checkpoint::Create(db_, &checkpoint);
@ -2647,14 +2581,7 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
// Temporarily disable error injection for clean-up // Temporarily disable error injection for clean-up
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
if (!s.ok() && !IsErrorInjectedAndRetryable(s)) { if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
@ -2666,14 +2593,7 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
// Enable back error injection disabled for clean-up // Enable back error injection disabled for clean-up
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
return s; return s;
} }
@ -3048,14 +2968,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
if (thread->rand.OneIn(2)) { if (thread->rand.OneIn(2)) {
// Temporarily disable error injection to for validation // Temporarily disable error injection to for validation
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
// Declare a snapshot and compare the data before and after the compaction // Declare a snapshot and compare the data before and after the compaction
@ -3065,14 +2978,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
// Enable back error injection disabled for validation // Enable back error injection disabled for validation
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
} }
std::ostringstream compact_range_opt_oss; std::ostringstream compact_range_opt_oss;
@ -3110,14 +3016,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
if (pre_snapshot != nullptr) { if (pre_snapshot != nullptr) {
// Temporarily disable error injection for validation // Temporarily disable error injection for validation
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
uint32_t post_hash = uint32_t post_hash =
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key); GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
@ -3136,14 +3035,7 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
db_->ReleaseSnapshot(pre_snapshot); db_->ReleaseSnapshot(pre_snapshot);
if (fault_fs_guard) { if (fault_fs_guard) {
// Enable back error injection disabled for validation // Enable back error injection disabled for validation
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableAllThreadLocalErrorInjection();
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
} }
} }
} }
@ -3523,14 +3415,7 @@ void StressTest::Open(SharedState* shared, bool reopen) {
// If this is for DB reopen, error injection may have been enabled. // If this is for DB reopen, error injection may have been enabled.
// Disable it here in case there is no open fault injection. // Disable it here in case there is no open fault injection.
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
} }
// TODO; test transaction DB Open with fault injection // TODO; test transaction DB Open with fault injection
if (!FLAGS_use_txn) { if (!FLAGS_use_txn) {
@ -3611,14 +3496,7 @@ void StressTest::Open(SharedState* shared, bool reopen) {
if (inject_sync_fault || inject_open_meta_read_error || if (inject_sync_fault || inject_open_meta_read_error ||
inject_open_meta_write_error || inject_open_read_error || inject_open_meta_write_error || inject_open_read_error ||
inject_open_write_error) { inject_open_write_error) {
fault_fs_guard->DisableThreadLocalErrorInjection( fault_fs_guard->DisableAllThreadLocalErrorInjection();
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
if (s.ok()) { if (s.ok()) {
// Injected errors might happen in background compactions. We // Injected errors might happen in background compactions. We

View File

@ -63,6 +63,19 @@ class StressTest {
return 0; return 0;
} }
} }
void GetDeleteRangeKeyLocks(
ThreadState* thread, int rand_column_family, int64_t rand_key,
std::vector<std::unique_ptr<MutexLock>>* range_locks) {
for (int j = 0; j < FLAGS_range_deletion_width; ++j) {
if (j == 0 ||
((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) {
range_locks->emplace_back(new MutexLock(
thread->shared->GetMutexForKey(rand_column_family, rand_key + j)));
}
}
}
Status AssertSame(DB* db, ColumnFamilyHandle* cf, Status AssertSame(DB* db, ColumnFamilyHandle* cf,
ThreadState::SnapshotState& snap_state); ThreadState::SnapshotState& snap_state);
@ -283,7 +296,8 @@ class StressTest {
bool IsErrorInjectedAndRetryable(const Status& error_s) const { bool IsErrorInjectedAndRetryable(const Status& error_s) const {
assert(!error_s.ok()); assert(!error_s.ok());
return error_s.getState() && std::strstr(error_s.getState(), "inject") && return error_s.getState() &&
FaultInjectionTestFS::IsInjectedError(error_s) &&
!status_to_io_status(Status(error_s)).GetDataLoss(); !status_to_io_status(Status(error_s)).GetDataLoss();
} }

View File

@ -32,14 +32,32 @@ void ExpectedState::Precommit(int cf, int64_t key, const ExpectedValue& value) {
std::atomic_thread_fence(std::memory_order_release); std::atomic_thread_fence(std::memory_order_release);
} }
PendingExpectedValue ExpectedState::PreparePut(int cf, int64_t key) { PendingExpectedValue ExpectedState::PreparePut(int cf, int64_t key,
bool* prepared) {
assert(prepared);
ExpectedValue expected_value = Load(cf, key); ExpectedValue expected_value = Load(cf, key);
// Calculate the original expected value
const ExpectedValue orig_expected_value = expected_value; const ExpectedValue orig_expected_value = expected_value;
expected_value.Put(true /* pending */);
// Calculate the pending expected value
bool res = expected_value.Put(true /* pending */);
if (!res) {
PendingExpectedValue ret = PendingExpectedValue(
&Value(cf, key), orig_expected_value, orig_expected_value);
*prepared = false;
return ret;
}
const ExpectedValue pending_expected_value = expected_value; const ExpectedValue pending_expected_value = expected_value;
expected_value.Put(false /* pending */);
// Calculate the final expected value
res = expected_value.Put(false /* pending */);
assert(res);
const ExpectedValue final_expected_value = expected_value; const ExpectedValue final_expected_value = expected_value;
// Precommit
Precommit(cf, key, pending_expected_value); Precommit(cf, key, pending_expected_value);
*prepared = true;
return PendingExpectedValue(&Value(cf, key), orig_expected_value, return PendingExpectedValue(&Value(cf, key), orig_expected_value,
final_expected_value); final_expected_value);
} }
@ -48,41 +66,58 @@ ExpectedValue ExpectedState::Get(int cf, int64_t key) { return Load(cf, key); }
PendingExpectedValue ExpectedState::PrepareDelete(int cf, int64_t key, PendingExpectedValue ExpectedState::PrepareDelete(int cf, int64_t key,
bool* prepared) { bool* prepared) {
assert(prepared);
ExpectedValue expected_value = Load(cf, key); ExpectedValue expected_value = Load(cf, key);
// Calculate the original expected value
const ExpectedValue orig_expected_value = expected_value; const ExpectedValue orig_expected_value = expected_value;
// Calculate the pending expected value
bool res = expected_value.Delete(true /* pending */); bool res = expected_value.Delete(true /* pending */);
if (prepared) {
*prepared = res;
}
if (!res) { if (!res) {
return PendingExpectedValue(&Value(cf, key), orig_expected_value, PendingExpectedValue ret = PendingExpectedValue(
orig_expected_value); &Value(cf, key), orig_expected_value, orig_expected_value);
*prepared = false;
return ret;
} }
const ExpectedValue pending_expected_value = expected_value; const ExpectedValue pending_expected_value = expected_value;
expected_value.Delete(false /* pending */);
// Calculate the final expected value
res = expected_value.Delete(false /* pending */);
assert(res);
const ExpectedValue final_expected_value = expected_value; const ExpectedValue final_expected_value = expected_value;
// Precommit
Precommit(cf, key, pending_expected_value); Precommit(cf, key, pending_expected_value);
*prepared = true;
return PendingExpectedValue(&Value(cf, key), orig_expected_value, return PendingExpectedValue(&Value(cf, key), orig_expected_value,
final_expected_value); final_expected_value);
} }
PendingExpectedValue ExpectedState::PrepareSingleDelete(int cf, int64_t key) { PendingExpectedValue ExpectedState::PrepareSingleDelete(int cf, int64_t key,
return PrepareDelete(cf, key); bool* prepared) {
return PrepareDelete(cf, key, prepared);
} }
std::vector<PendingExpectedValue> ExpectedState::PrepareDeleteRange( std::vector<PendingExpectedValue> ExpectedState::PrepareDeleteRange(
int cf, int64_t begin_key, int64_t end_key) { int cf, int64_t begin_key, int64_t end_key, bool* prepared) {
std::vector<PendingExpectedValue> pending_expected_values; std::vector<PendingExpectedValue> pending_expected_values;
bool has_prepared_failed = false;
for (int64_t key = begin_key; key < end_key; ++key) { for (int64_t key = begin_key; key < end_key; ++key) {
bool prepared = false; bool each_prepared = false;
PendingExpectedValue pending_expected_value = PendingExpectedValue pending_expected_value =
PrepareDelete(cf, key, &prepared); PrepareDelete(cf, key, &each_prepared);
if (prepared) { if (each_prepared) {
pending_expected_values.push_back(pending_expected_value); pending_expected_values.push_back(pending_expected_value);
} else { } else {
has_prepared_failed = true;
pending_expected_value.PermitUnclosedPendingState(); pending_expected_value.PermitUnclosedPendingState();
break;
} }
} }
*prepared = !has_prepared_failed;
return pending_expected_values; return pending_expected_values;
} }

View File

@ -44,29 +44,29 @@ class ExpectedState {
// //
// Requires external locking covering `key` in `cf` to prevent concurrent // Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`. // write or delete to the same `key`.
PendingExpectedValue PreparePut(int cf, int64_t key); PendingExpectedValue PreparePut(int cf, int64_t key, bool* prepared);
// Does not requires external locking. // Does not requires external locking.
ExpectedValue Get(int cf, int64_t key); ExpectedValue Get(int cf, int64_t key);
// Prepare a Delete that will be started but not finished yet // Prepare a Delete that will be started but not finished yet.
// This is useful for crash-recovery testing when the process may crash // This is useful for crash-recovery testing when the process may crash
// before updating the corresponding expected value // before updating the corresponding expected value
// //
// Requires external locking covering `key` in `cf` to prevent concurrent // Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`. // write or delete to the same `key`.
PendingExpectedValue PrepareDelete(int cf, int64_t key, PendingExpectedValue PrepareDelete(int cf, int64_t key, bool* prepared);
bool* prepared = nullptr);
// Requires external locking covering `key` in `cf` to prevent concurrent // Requires external locking covering `key` in `cf` to prevent concurrent
// write or delete to the same `key`. // write or delete to the same `key`.
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key); PendingExpectedValue PrepareSingleDelete(int cf, int64_t key, bool* prepared);
// Requires external locking covering keys in `[begin_key, end_key)` in `cf` // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
// to prevent concurrent write or delete to the same `key`. // to prevent concurrent write or delete to the same `key`.
std::vector<PendingExpectedValue> PrepareDeleteRange(int cf, std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
int64_t begin_key, int64_t begin_key,
int64_t end_key); int64_t end_key,
bool* prepared);
// Update the expected value for start of an incomplete write or delete // Update the expected value for start of an incomplete write or delete
// operation on the key assoicated with this expected value // operation on the key assoicated with this expected value
@ -197,28 +197,30 @@ class ExpectedStateManager {
void ClearColumnFamily(int cf) { return latest_->ClearColumnFamily(cf); } void ClearColumnFamily(int cf) { return latest_->ClearColumnFamily(cf); }
// See ExpectedState::PreparePut() // See ExpectedState::PreparePut()
PendingExpectedValue PreparePut(int cf, int64_t key) { PendingExpectedValue PreparePut(int cf, int64_t key, bool* prepared) {
return latest_->PreparePut(cf, key); return latest_->PreparePut(cf, key, prepared);
} }
// See ExpectedState::Get() // See ExpectedState::Get()
ExpectedValue Get(int cf, int64_t key) { return latest_->Get(cf, key); } ExpectedValue Get(int cf, int64_t key) { return latest_->Get(cf, key); }
// See ExpectedState::PrepareDelete() // See ExpectedState::PrepareDelete()
PendingExpectedValue PrepareDelete(int cf, int64_t key) { PendingExpectedValue PrepareDelete(int cf, int64_t key, bool* prepared) {
return latest_->PrepareDelete(cf, key); return latest_->PrepareDelete(cf, key, prepared);
} }
// See ExpectedState::PrepareSingleDelete() // See ExpectedState::PrepareSingleDelete()
PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) { PendingExpectedValue PrepareSingleDelete(int cf, int64_t key,
return latest_->PrepareSingleDelete(cf, key); bool* prepared) {
return latest_->PrepareSingleDelete(cf, key, prepared);
} }
// See ExpectedState::PrepareDeleteRange() // See ExpectedState::PrepareDeleteRange()
std::vector<PendingExpectedValue> PrepareDeleteRange(int cf, std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
int64_t begin_key, int64_t begin_key,
int64_t end_key) { int64_t end_key,
return latest_->PrepareDeleteRange(cf, begin_key, end_key); bool* prepared) {
return latest_->PrepareDeleteRange(cf, begin_key, end_key, prepared);
} }
// See ExpectedState::Exists() // See ExpectedState::Exists()

View File

@ -10,7 +10,11 @@
#include <atomic> #include <atomic>
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
void ExpectedValue::Put(bool pending) { bool ExpectedValue::Put(bool pending) {
if (pending && (PendingWrite() || PendingDelete())) {
return false;
}
if (pending) { if (pending) {
SetPendingWrite(); SetPendingWrite();
} else { } else {
@ -18,9 +22,14 @@ void ExpectedValue::Put(bool pending) {
ClearDeleted(); ClearDeleted();
ClearPendingWrite(); ClearPendingWrite();
} }
return true;
} }
bool ExpectedValue::Delete(bool pending) { bool ExpectedValue::Delete(bool pending) {
if (pending && (PendingWrite() || PendingDelete())) {
return false;
}
if (!Exists()) { if (!Exists()) {
return false; return false;
} }

View File

@ -41,7 +41,7 @@ class ExpectedValue {
uint32_t Read() const { return expected_value_; } uint32_t Read() const { return expected_value_; }
void Put(bool pending); bool Put(bool pending);
bool Delete(bool pending); bool Delete(bool pending);

View File

@ -1611,67 +1611,100 @@ class NonBatchedOpsStressTest : public StressTest {
} }
} }
// To track the final write status
Status s;
// To track the initial write status
Status initial_write_s;
// To track whether WAL write may have succeeded during the initial failed
// write
bool initial_wal_write_may_succeed = true;
bool prepared = false;
PendingExpectedValue pending_expected_value = PendingExpectedValue pending_expected_value =
shared->PreparePut(rand_column_family, rand_key); shared->PreparePut(rand_column_family, rand_key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
return s;
}
const uint32_t value_base = pending_expected_value.GetFinalValueBase(); const uint32_t value_base = pending_expected_value.GetFinalValueBase();
const size_t sz = GenerateValue(value_base, value, sizeof(value)); const size_t sz = GenerateValue(value_base, value, sizeof(value));
const Slice v(value, sz); const Slice v(value, sz);
Status s; do {
// In order to commit the expected state for the initial write failed with
if (FLAGS_use_put_entity_one_in > 0 && // injected retryable error and successful WAL write, retry the write
(value_base % FLAGS_use_put_entity_one_in) == 0) { // until it succeeds after the recovery finishes
if (!FLAGS_use_txn) { if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
if (FLAGS_use_attribute_group) { initial_wal_write_may_succeed) {
s = db_->PutEntity(write_opts, k, lock.reset();
GenerateAttributeGroups({cfh}, value_base, v)); std::this_thread::sleep_for(std::chrono::microseconds(1 * 1000 * 1000));
lock.reset(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key)));
}
if (FLAGS_use_put_entity_one_in > 0 &&
(value_base % FLAGS_use_put_entity_one_in) == 0) {
if (!FLAGS_use_txn) {
if (FLAGS_use_attribute_group) {
s = db_->PutEntity(write_opts, k,
GenerateAttributeGroups({cfh}, value_base, v));
} else {
s = db_->PutEntity(write_opts, cfh, k,
GenerateWideColumns(value_base, v));
}
} else { } else {
s = db_->PutEntity(write_opts, cfh, k, s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
GenerateWideColumns(value_base, v)); return txn.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
});
}
} else if (FLAGS_use_timed_put_one_in > 0 &&
((value_base + kLargePrimeForCommonFactorSkew) %
FLAGS_use_timed_put_one_in) == 0) {
WriteBatch wb;
uint64_t write_unix_time = GetWriteUnixTime(thread);
s = wb.TimedPut(cfh, k, v, write_unix_time);
if (s.ok()) {
s = db_->Write(write_opts, &wb);
}
} else if (FLAGS_use_merge) {
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->Merge(write_opts, cfh, k, v);
} else {
s = db_->Merge(write_opts, cfh, k, write_ts, v);
}
} else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.Merge(cfh, k, v);
});
} }
} else { } else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) { if (!FLAGS_use_txn) {
return txn.PutEntity(cfh, k, GenerateWideColumns(value_base, v)); if (FLAGS_user_timestamp_size == 0) {
}); s = db_->Put(write_opts, cfh, k, v);
} } else {
} else if (FLAGS_use_timed_put_one_in > 0 && s = db_->Put(write_opts, cfh, k, write_ts, v);
((value_base + kLargePrimeForCommonFactorSkew) % }
FLAGS_use_timed_put_one_in) == 0) {
WriteBatch wb;
uint64_t write_unix_time = GetWriteUnixTime(thread);
s = wb.TimedPut(cfh, k, v, write_unix_time);
if (s.ok()) {
s = db_->Write(write_opts, &wb);
}
} else if (FLAGS_use_merge) {
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->Merge(write_opts, cfh, k, v);
} else { } else {
s = db_->Merge(write_opts, cfh, k, write_ts, v); s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.Put(cfh, k, v);
});
} }
} else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.Merge(cfh, k, v);
});
} }
} else { // Only update `initial_write_s`, `initial_wal_write_may_succeed` when the
if (!FLAGS_use_txn) { // first write fails
if (FLAGS_user_timestamp_size == 0) { if (!s.ok() && initial_write_s.ok()) {
s = db_->Put(write_opts, cfh, k, v); initial_write_s = s;
} else { initial_wal_write_may_succeed =
s = db_->Put(write_opts, cfh, k, write_ts, v); !FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.Put(cfh, k, v);
});
} }
} } while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) { if (!s.ok()) {
pending_expected_value.Rollback(); pending_expected_value.Rollback();
if (IsErrorInjectedAndRetryable(s)) { if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s; return s;
} else if (FLAGS_inject_error_severity == 2) { } else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
@ -1685,11 +1718,12 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate(); thread->shared->SafeTerminate();
} }
} else {
pending_expected_value.Commit();
thread->stats.AddBytesForWrites(1, sz);
PrintKeyValue(rand_column_family, static_cast<uint32_t>(rand_key), value,
sz);
} }
pending_expected_value.Commit();
thread->stats.AddBytesForWrites(1, sz);
PrintKeyValue(rand_column_family, static_cast<uint32_t>(rand_key), value,
sz);
return s; return s;
} }
@ -1711,27 +1745,62 @@ class NonBatchedOpsStressTest : public StressTest {
Slice key = key_str; Slice key = key_str;
auto cfh = column_families_[rand_column_family]; auto cfh = column_families_[rand_column_family];
// To track the final write status
Status s;
// To track the initial write status
Status initial_write_s;
// To track whether WAL write may have succeeded during the initial failed
// write
bool initial_wal_write_may_succeed = true;
// Use delete if the key may be overwritten and a single deletion // Use delete if the key may be overwritten and a single deletion
// otherwise. // otherwise.
Status s;
if (shared->AllowsOverwrite(rand_key)) { if (shared->AllowsOverwrite(rand_key)) {
bool prepared = false;
PendingExpectedValue pending_expected_value = PendingExpectedValue pending_expected_value =
shared->PrepareDelete(rand_column_family, rand_key); shared->PrepareDelete(rand_column_family, rand_key, &prepared);
if (!FLAGS_use_txn) { if (!prepared) {
if (FLAGS_user_timestamp_size == 0) { pending_expected_value.PermitUnclosedPendingState();
s = db_->Delete(write_opts, cfh, key); return s;
} else {
s = db_->Delete(write_opts, cfh, key, write_ts);
}
} else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.Delete(cfh, key);
});
} }
do {
// In order to commit the expected state for the initial write failed
// with injected retryable error and successful WAL write, retry the
// write until it succeeds after the recovery finishes
if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed) {
lock.reset();
std::this_thread::sleep_for(
std::chrono::microseconds(1 * 1000 * 1000));
lock.reset(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key)));
}
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->Delete(write_opts, cfh, key);
} else {
s = db_->Delete(write_opts, cfh, key, write_ts);
}
} else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.Delete(cfh, key);
});
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when
// the first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) { if (!s.ok()) {
pending_expected_value.Rollback(); pending_expected_value.Rollback();
if (IsErrorInjectedAndRetryable(s)) { if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s; return s;
} else if (FLAGS_inject_error_severity == 2) { } else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ && if (!is_db_stopped_ &&
@ -1746,27 +1815,56 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "delete error: %s\n", s.ToString().c_str()); fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate(); thread->shared->SafeTerminate();
} }
}
pending_expected_value.Commit();
thread->stats.AddDeletes(1);
} else {
PendingExpectedValue pending_expected_value =
shared->PrepareSingleDelete(rand_column_family, rand_key);
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->SingleDelete(write_opts, cfh, key);
} else {
s = db_->SingleDelete(write_opts, cfh, key, write_ts);
}
} else { } else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) { pending_expected_value.Commit();
return txn.SingleDelete(cfh, key); thread->stats.AddDeletes(1);
});
} }
} else {
bool prepared = false;
PendingExpectedValue pending_expected_value =
shared->PrepareSingleDelete(rand_column_family, rand_key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
return s;
}
do {
// In order to commit the expected state for the initial write failed
// with injected retryable error and successful WAL write, retry the
// write until it succeeds after the recovery finishes
if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed) {
lock.reset();
std::this_thread::sleep_for(
std::chrono::microseconds(1 * 1000 * 1000));
lock.reset(new MutexLock(
shared->GetMutexForKey(rand_column_family, rand_key)));
}
if (!FLAGS_use_txn) {
if (FLAGS_user_timestamp_size == 0) {
s = db_->SingleDelete(write_opts, cfh, key);
} else {
s = db_->SingleDelete(write_opts, cfh, key, write_ts);
}
} else {
s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) {
return txn.SingleDelete(cfh, key);
});
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when
// the first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) { if (!s.ok()) {
pending_expected_value.Rollback(); pending_expected_value.Rollback();
if (IsErrorInjectedAndRetryable(s)) { if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s; return s;
} else if (FLAGS_inject_error_severity == 2) { } else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ && if (!is_db_stopped_ &&
@ -1781,9 +1879,10 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "single delete error: %s\n", s.ToString().c_str()); fprintf(stderr, "single delete error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate(); thread->shared->SafeTerminate();
} }
} else {
pending_expected_value.Commit();
thread->stats.AddSingleDeletes(1);
} }
pending_expected_value.Commit();
thread->stats.AddSingleDeletes(1);
} }
return s; return s;
} }
@ -1805,16 +1904,29 @@ class NonBatchedOpsStressTest : public StressTest {
rand_key = rand_key =
thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1); thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1);
} }
for (int j = 0; j < FLAGS_range_deletion_width; ++j) { GetDeleteRangeKeyLocks(thread, rand_column_family, rand_key, &range_locks);
if (j == 0 ||
((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) { // To track the final write status
range_locks.emplace_back(new MutexLock( Status s;
shared->GetMutexForKey(rand_column_family, rand_key + j))); // To track the initial write status
} Status initial_write_s;
} // To track whether WAL write may have succeeded during the initial failed
// write
bool initial_wal_write_may_succeed = true;
bool prepared = false;
std::vector<PendingExpectedValue> pending_expected_values = std::vector<PendingExpectedValue> pending_expected_values =
shared->PrepareDeleteRange(rand_column_family, rand_key, shared->PrepareDeleteRange(rand_column_family, rand_key,
rand_key + FLAGS_range_deletion_width); rand_key + FLAGS_range_deletion_width,
&prepared);
if (!prepared) {
for (PendingExpectedValue& pending_expected_value :
pending_expected_values) {
pending_expected_value.PermitUnclosedPendingState();
}
return s;
}
const int covered = static_cast<int>(pending_expected_values.size()); const int covered = static_cast<int>(pending_expected_values.size());
std::string keystr = Key(rand_key); std::string keystr = Key(rand_key);
Slice key = keystr; Slice key = keystr;
@ -1823,20 +1935,42 @@ class NonBatchedOpsStressTest : public StressTest {
Slice end_key = end_keystr; Slice end_key = end_keystr;
std::string write_ts_str; std::string write_ts_str;
Slice write_ts; Slice write_ts;
Status s;
if (FLAGS_user_timestamp_size) { do {
write_ts_str = GetNowNanos(); // In order to commit the expected state for the initial write failed with
write_ts = write_ts_str; // injected retryable error and successful WAL write, retry the write
s = db_->DeleteRange(write_opts, cfh, key, end_key, write_ts); // until it succeeds after the recovery finishes
} else { if (!s.ok() && IsErrorInjectedAndRetryable(s) &&
s = db_->DeleteRange(write_opts, cfh, key, end_key); initial_wal_write_may_succeed) {
} range_locks.clear();
std::this_thread::sleep_for(std::chrono::microseconds(1 * 1000 * 1000));
GetDeleteRangeKeyLocks(thread, rand_column_family, rand_key,
&range_locks);
}
if (FLAGS_user_timestamp_size) {
write_ts_str = GetNowNanos();
write_ts = write_ts_str;
s = db_->DeleteRange(write_opts, cfh, key, end_key, write_ts);
} else {
s = db_->DeleteRange(write_opts, cfh, key, end_key);
}
// Only update `initial_write_s`, `initial_wal_write_may_succeed` when the
// first write fails
if (!s.ok() && initial_write_s.ok()) {
initial_write_s = s;
initial_wal_write_may_succeed =
!FaultInjectionTestFS::IsFailedToWriteToWALError(initial_write_s);
}
} while (!s.ok() && IsErrorInjectedAndRetryable(s) &&
initial_wal_write_may_succeed);
if (!s.ok()) { if (!s.ok()) {
for (PendingExpectedValue& pending_expected_value : for (PendingExpectedValue& pending_expected_value :
pending_expected_values) { pending_expected_values) {
pending_expected_value.Rollback(); pending_expected_value.Rollback();
} }
if (IsErrorInjectedAndRetryable(s)) { if (IsErrorInjectedAndRetryable(s)) {
assert(!initial_wal_write_may_succeed);
return s; return s;
} else if (FLAGS_inject_error_severity == 2) { } else if (FLAGS_inject_error_severity == 2) {
if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
@ -1850,13 +1984,14 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "delete range error: %s\n", s.ToString().c_str()); fprintf(stderr, "delete range error: %s\n", s.ToString().c_str());
thread->shared->SafeTerminate(); thread->shared->SafeTerminate();
} }
} else {
for (PendingExpectedValue& pending_expected_value :
pending_expected_values) {
pending_expected_value.Commit();
}
thread->stats.AddRangeDeletions(1);
thread->stats.AddCoveredByRangeDeletions(covered);
} }
for (PendingExpectedValue& pending_expected_value :
pending_expected_values) {
pending_expected_value.Commit();
}
thread->stats.AddRangeDeletions(1);
thread->stats.AddCoveredByRangeDeletions(covered);
return s; return s;
} }
@ -1881,6 +2016,7 @@ class NonBatchedOpsStressTest : public StressTest {
// ingestion a clean slate // ingestion a clean slate
s = db_stress_env->DeleteFile(sst_filename); s = db_stress_env->DeleteFile(sst_filename);
} }
if (fault_fs_guard) { if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection( fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead); FaultInjectionIOType::kMetadataRead);
@ -1921,8 +2057,17 @@ class NonBatchedOpsStressTest : public StressTest {
} }
keys.push_back(key); keys.push_back(key);
bool prepared = false;
PendingExpectedValue pending_expected_value = PendingExpectedValue pending_expected_value =
shared->PreparePut(column_family, key); shared->PreparePut(column_family, key, &prepared);
if (!prepared) {
pending_expected_value.PermitUnclosedPendingState();
for (PendingExpectedValue& pev : pending_expected_values) {
pev.PermitUnclosedPendingState();
}
return;
}
const uint32_t value_base = pending_expected_value.GetFinalValueBase(); const uint32_t value_base = pending_expected_value.GetFinalValueBase();
values.push_back(value_base); values.push_back(value_base);
pending_expected_values.push_back(pending_expected_value); pending_expected_values.push_back(pending_expected_value);

View File

@ -22,6 +22,9 @@
#include "rocksdb/rate_limiter.h" #include "rocksdb/rate_limiter.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "util/aligned_buffer.h" #include "util/aligned_buffer.h"
#ifndef NDEBUG
#include "utilities/fault_injection_fs.h"
#endif // NDEBUG
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Statistics; class Statistics;
@ -327,10 +330,14 @@ class WritableFileWriter {
} }
#endif // NDEBUG #endif // NDEBUG
// TODO(hx235): store the actual previous error status and return it here
IOStatus GetWriterHasPreviousErrorStatus() { IOStatus GetWriterHasPreviousErrorStatus() {
#ifndef NDEBUG #ifndef NDEBUG
if (seen_injected_error_.load(std::memory_order_relaxed)) { if (seen_injected_error_.load(std::memory_order_relaxed)) {
return IOStatus::IOError("Writer has previous injected error."); std::stringstream msg;
msg << "Writer has previous " << FaultInjectionTestFS::kInjected
<< " error.";
return IOStatus::IOError(msg.str());
} }
#endif // NDEBUG #endif // NDEBUG
return IOStatus::IOError("Writer has previous error."); return IOStatus::IOError("Writer has previous error.");

View File

@ -777,14 +777,6 @@ def finalize_and_sanitize(src_params):
# files, which would be problematic when unsynced data can be lost in # files, which would be problematic when unsynced data can be lost in
# crash recoveries. # crash recoveries.
dest_params["enable_compaction_filter"] = 0 dest_params["enable_compaction_filter"] = 0
# Prefix-recoverability relies on tracing successful user writes.
# Currently we trace all user writes regardless of whether it later succeeds or not.
# To simplify, we disable any user write failure injection.
# TODO(hx235): support tracing user writes with failure injection.
# TODO(hx235): support excluding WAL from metadata write fault injection so we don't
# have to disable metadata write fault injection to other file
dest_params["metadata_write_fault_one_in"] = 0
dest_params["exclude_wal_from_write_fault_injection"] = 1
# Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb # Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
# unordered_write is only enabled with --txn, and txn_params disables inplace_update_support, so # unordered_write is only enabled with --txn, and txn_params disables inplace_update_support, so
# setting allow_concurrent_memtable_write=1 won't conflcit with inplace_update_support. # setting allow_concurrent_memtable_write=1 won't conflcit with inplace_update_support.

View File

@ -168,8 +168,9 @@ IOStatus TestFSWritableFile::Append(const Slice& data, const IOOptions& options,
return fs_->GetError(); return fs_->GetError();
} }
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite, IOStatus s = fs_->MaybeInjectThreadLocalError(
options, state_.filename_); FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kAppend);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -203,8 +204,9 @@ IOStatus TestFSWritableFile::Append(
return IOStatus::Corruption("Data is corrupted!"); return IOStatus::Corruption("Data is corrupted!");
} }
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite, IOStatus s = fs_->MaybeInjectThreadLocalError(
options, state_.filename_); FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kAppend);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -266,8 +268,9 @@ IOStatus TestFSWritableFile::PositionedAppend(const Slice& data,
if (fs_->ShouldDataCorruptionBeforeWrite()) { if (fs_->ShouldDataCorruptionBeforeWrite()) {
return IOStatus::Corruption("Data is corrupted!"); return IOStatus::Corruption("Data is corrupted!");
} }
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite, IOStatus s = fs_->MaybeInjectThreadLocalError(
options, state_.filename_); FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kPositionedAppend);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -292,8 +295,9 @@ IOStatus TestFSWritableFile::PositionedAppend(
if (fs_->ShouldDataCorruptionBeforeWrite()) { if (fs_->ShouldDataCorruptionBeforeWrite()) {
return IOStatus::Corruption("Data is corrupted!"); return IOStatus::Corruption("Data is corrupted!");
} }
IOStatus s = fs_->MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite, IOStatus s = fs_->MaybeInjectThreadLocalError(
options, state_.filename_); FaultInjectionIOType::kWrite, options, state_.filename_,
FaultInjectionTestFS::ErrorOperation::kPositionedAppend);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -843,8 +847,9 @@ IOStatus FaultInjectionTestFS::NewWritableFile(
return target()->NewWritableFile(fname, file_opts, result, dbg); return target()->NewWritableFile(fname, file_opts, result, dbg);
} }
IOStatus io_s = MaybeInjectThreadLocalError(FaultInjectionIOType::kWrite, IOStatus io_s = MaybeInjectThreadLocalError(
file_opts.io_options, fname); FaultInjectionIOType::kWrite, file_opts.io_options, fname,
FaultInjectionTestFS::ErrorOperation::kOpen);
if (!io_s.ok()) { if (!io_s.ok()) {
return io_s; return io_s;
} }
@ -1391,9 +1396,12 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
} }
ctx->callstack = port::SaveStack(&ctx->frames); ctx->callstack = port::SaveStack(&ctx->frames);
std::stringstream msg;
msg << FaultInjectionTestFS::kInjected << " ";
if (op != ErrorOperation::kMultiReadSingleReq) { if (op != ErrorOperation::kMultiReadSingleReq) {
// Likely non-per read status code for MultiRead // Likely non-per read status code for MultiRead
ctx->message += "injected read error; "; msg << "read error";
ctx->message = msg.str();
ret_fault_injected = true; ret_fault_injected = true;
ret = IOStatus::IOError(ctx->message); ret = IOStatus::IOError(ctx->message);
} else if (Random::GetTLSInstance()->OneIn(8)) { } else if (Random::GetTLSInstance()->OneIn(8)) {
@ -1401,7 +1409,8 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
// For a small chance, set the failure to status but turn the // For a small chance, set the failure to status but turn the
// result to be empty, which is supposed to be caught for a check. // result to be empty, which is supposed to be caught for a check.
*result = Slice(); *result = Slice();
ctx->message += "injected empty result; "; msg << "empty result";
ctx->message = msg.str();
ret_fault_injected = true; ret_fault_injected = true;
} else if (!direct_io && Random::GetTLSInstance()->OneIn(7) && } else if (!direct_io && Random::GetTLSInstance()->OneIn(7) &&
scratch != nullptr && result->data() == scratch) { scratch != nullptr && result->data() == scratch) {
@ -1418,10 +1427,12 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
// It would work for CRC. Not 100% sure for xxhash and will adjust // It would work for CRC. Not 100% sure for xxhash and will adjust
// if it is not the case. // if it is not the case.
const_cast<char*>(result->data())[result->size() - 1]++; const_cast<char*>(result->data())[result->size() - 1]++;
ctx->message += "injected corrupt last byte; "; msg << "corrupt last byte";
ctx->message = msg.str();
ret_fault_injected = true; ret_fault_injected = true;
} else { } else {
ctx->message += "injected error result multiget single; "; msg << "error result multiget single";
ctx->message = msg.str();
ret_fault_injected = true; ret_fault_injected = true;
ret = IOStatus::IOError(ctx->message); ret = IOStatus::IOError(ctx->message);
} }
@ -1465,7 +1476,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalError(
free(ctx->callstack); free(ctx->callstack);
} }
ctx->callstack = port::SaveStack(&ctx->frames); ctx->callstack = port::SaveStack(&ctx->frames);
ctx->message = GetErrorMessageFromFaultInjectionIOType(type); ctx->message = GetErrorMessage(type, file_name, op);
ret = IOStatus::IOError(ctx->message); ret = IOStatus::IOError(ctx->message);
ret.SetRetryable(ctx->retryable); ret.SetRetryable(ctx->retryable);
ret.SetDataLoss(ctx->has_data_loss); ret.SetDataLoss(ctx->has_data_loss);

View File

@ -224,6 +224,16 @@ class FaultInjectionTestFS : public FileSystemWrapper {
static const char* kClassName() { return "FaultInjectionTestFS"; } static const char* kClassName() { return "FaultInjectionTestFS"; }
const char* Name() const override { return kClassName(); } const char* Name() const override { return kClassName(); }
static bool IsInjectedError(const Status& s) {
assert(!s.ok());
return std::strstr(s.getState(), kInjected.c_str());
}
static bool IsFailedToWriteToWALError(const Status& s) {
assert(!s.ok());
return std::strstr(s.getState(), kFailedToWriteToWAL.c_str());
}
IOStatus NewDirectory(const std::string& name, const IOOptions& options, IOStatus NewDirectory(const std::string& name, const IOOptions& options,
std::unique_ptr<FSDirectory>* result, std::unique_ptr<FSDirectory>* result,
IODebugContext* dbg) override; IODebugContext* dbg) override;
@ -472,6 +482,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
kMultiReadSingleReq = 1, kMultiReadSingleReq = 1,
kMultiRead = 2, kMultiRead = 2,
kOpen, kOpen,
kAppend,
kPositionedAppend,
kUnknown, kUnknown,
}; };
@ -520,17 +532,6 @@ class FaultInjectionTestFS : public FileSystemWrapper {
file_types_excluded_from_write_fault_injection_ = types; file_types_excluded_from_write_fault_injection_ = types;
} }
bool ShouldExcludeFromWriteFaultInjection(const std::string& file_name) {
MutexLock l(&mutex_);
FileType file_type = kTempFile;
uint64_t file_number = 0;
if (!TryParseFileName(file_name, &file_number, &file_type)) {
return false;
}
return file_types_excluded_from_write_fault_injection_.find(file_type) !=
file_types_excluded_from_write_fault_injection_.end();
}
void EnableThreadLocalErrorInjection(FaultInjectionIOType type) { void EnableThreadLocalErrorInjection(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type); ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx) { if (ctx) {
@ -538,6 +539,13 @@ class FaultInjectionTestFS : public FileSystemWrapper {
} }
} }
void EnableAllThreadLocalErrorInjection() {
EnableThreadLocalErrorInjection(FaultInjectionIOType::kRead);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataRead);
EnableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataWrite);
}
void DisableThreadLocalErrorInjection(FaultInjectionIOType type) { void DisableThreadLocalErrorInjection(FaultInjectionIOType type) {
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type); ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx) { if (ctx) {
@ -545,6 +553,13 @@ class FaultInjectionTestFS : public FileSystemWrapper {
} }
} }
void DisableAllThreadLocalErrorInjection() {
DisableThreadLocalErrorInjection(FaultInjectionIOType::kRead);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataRead);
DisableThreadLocalErrorInjection(FaultInjectionIOType::kMetadataWrite);
}
void PrintInjectedThreadLocalErrorBacktrace(FaultInjectionIOType type); void PrintInjectedThreadLocalErrorBacktrace(FaultInjectionIOType type);
// If there is unsynced data in the specified file within the specified // If there is unsynced data in the specified file within the specified
@ -556,7 +571,11 @@ class FaultInjectionTestFS : public FileSystemWrapper {
void ReadUnsynced(const std::string& fname, uint64_t offset, size_t n, void ReadUnsynced(const std::string& fname, uint64_t offset, size_t n,
Slice* result, char* scratch, int64_t* pos_at_last_sync); Slice* result, char* scratch, int64_t* pos_at_last_sync);
inline static const std::string kInjected = "injected";
private: private:
inline static const std::string kFailedToWriteToWAL =
"failed to write to WAL";
port::Mutex mutex_; port::Mutex mutex_;
std::map<std::string, FSFileState> db_file_state_; std::map<std::string, FSFileState> db_file_state_;
std::set<std::string> open_managed_files_; std::set<std::string> open_managed_files_;
@ -628,6 +647,18 @@ class FaultInjectionTestFS : public FileSystemWrapper {
bool direct_io, char* scratch, bool direct_io, char* scratch,
bool need_count_increase, bool need_count_increase,
bool* fault_injected); bool* fault_injected);
bool ShouldExcludeFromWriteFaultInjection(const std::string& file_name) {
MutexLock l(&mutex_);
FileType file_type = kTempFile;
uint64_t file_number = 0;
if (!TryParseFileName(file_name, &file_number, &file_type)) {
return false;
}
return file_types_excluded_from_write_fault_injection_.find(file_type) !=
file_types_excluded_from_write_fault_injection_.end();
}
// Extract number of type from file name. Return false if failing to fine // Extract number of type from file name. Return false if failing to fine
// them. // them.
bool TryParseFileName(const std::string& file_name, uint64_t* number, bool TryParseFileName(const std::string& file_name, uint64_t* number,
@ -690,27 +721,39 @@ class FaultInjectionTestFS : public FileSystemWrapper {
} }
} }
std::string GetErrorMessageFromFaultInjectionIOType( std::string GetErrorMessage(FaultInjectionIOType type,
FaultInjectionIOType type) { const std::string& file_name, ErrorOperation op) {
std::string msg = ""; std::ostringstream msg;
msg << kInjected << " ";
switch (type) { switch (type) {
case FaultInjectionIOType::kRead: case FaultInjectionIOType::kRead:
msg = "injected read error"; msg << "read error";
break; break;
case FaultInjectionIOType::kWrite: case FaultInjectionIOType::kWrite:
msg = "injected write error"; msg << "write error";
break; break;
case FaultInjectionIOType::kMetadataRead: case FaultInjectionIOType::kMetadataRead:
msg = "injected metadata read error"; msg << "metadata read error";
break; break;
case FaultInjectionIOType::kMetadataWrite: case FaultInjectionIOType::kMetadataWrite:
msg = "injected metadata write error"; msg << "metadata write error";
break; break;
default: default:
assert(false); assert(false);
break; break;
} }
return msg;
if (type == FaultInjectionIOType::kWrite &&
(op == ErrorOperation::kOpen || op == ErrorOperation::kAppend ||
op == ErrorOperation::kPositionedAppend)) {
FileType file_type = kTempFile;
uint64_t ignore = 0;
if (TryParseFileName(file_name, &ignore, &file_type) &&
file_type == FileType::kWalFile) {
msg << " " << kFailedToWriteToWAL;
}
}
return msg.str();
} }
}; };