Calculate `injected_error_count` even when `SharedState::ignore_read_error` (#12800)

Summary:
**Context/Summary:**

`injected_error_count` is needed to verify read error injection. For example, when injected_error_count == 0, the read call should not return error. 981fd432fa only calculated `injected_error_count` under `SharedState::ignore_read_error=false` so `injected_error_count==0` when `SharedState::ignore_read_error=true`. However  we can still inject read error in critical read path under `SharedState::ignore_read_error=true` so the read call is expected to return injected error. This contradicts to the  `injected_error_count == 0` as we skipped its calculation. As a consequence, we see

```
TestPrefixScan error: IO error: injected read error;
Verification failed
```
in code paths
```
if (s.ok()) {
    thread->stats.AddPrefixes(1, count);
  } else if (injected_error_count > 0 && IsRetryableInjectedError(s)) {
    fprintf(stdout, "TestPrefixScan error: %s\n", s.ToString().c_str());
  } else {
    fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
    thread->shared->SetVerificationFailure();
}
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12800

Test Plan: CI

Reviewed By: cbi42

Differential Revision: D58918014

Pulled By: hx235

fbshipit-source-id: d73139c114fb3f61003dedca116f7ec36309eca4
This commit is contained in:
Hui Xiao 2024-06-23 21:54:27 -07:00 committed by Facebook GitHub Bot
parent 9d64ca55b7
commit e90e9153d5
1 changed files with 25 additions and 16 deletions

View File

@ -538,13 +538,14 @@ class NonBatchedOpsStressTest : public StressTest {
thread->shared->Get(rand_column_families[0], rand_keys[0]);
int injected_error_count = 0;
if (fault_fs_guard && !SharedState::ignore_read_error) {
if (fault_fs_guard) {
injected_error_count = GetMinInjectedErrorCount(
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kRead),
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kMetadataRead));
if (injected_error_count > 0 && (s.ok() || s.IsNotFound())) {
if (!SharedState::ignore_read_error && injected_error_count > 0 &&
(s.ok() || s.IsNotFound())) {
// Grab mutex so multiple thread don't try to print the
// stack trace at the same time
MutexLock l(thread->shared->GetMutex());
@ -692,7 +693,7 @@ class NonBatchedOpsStressTest : public StressTest {
}
db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(),
statuses.data());
if (fault_fs_guard && !SharedState::ignore_read_error) {
if (fault_fs_guard) {
injected_error_count = GetMinInjectedErrorCount(
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kRead),
@ -706,7 +707,8 @@ class NonBatchedOpsStressTest : public StressTest {
stat_nok_nfound++;
}
}
if (stat_nok_nfound < injected_error_count) {
if (!SharedState::ignore_read_error &&
stat_nok_nfound < injected_error_count) {
// Grab mutex so multiple thread don't try to print the
// stack trace at the same time
MutexLock l(shared->GetMutex());
@ -1001,13 +1003,14 @@ class NonBatchedOpsStressTest : public StressTest {
thread->shared->Get(column_family, key);
int injected_error_count = 0;
if (fault_fs_guard && !SharedState::ignore_read_error) {
if (fault_fs_guard) {
injected_error_count = GetMinInjectedErrorCount(
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kRead),
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kMetadataRead));
if (injected_error_count > 0 && (s.ok() || s.IsNotFound())) {
if (!SharedState::ignore_read_error && injected_error_count > 0 &&
(s.ok() || s.IsNotFound())) {
// Grab mutex so multiple thread don't try to print the
// stack trace at the same time
MutexLock l(thread->shared->GetMutex());
@ -1168,7 +1171,8 @@ class NonBatchedOpsStressTest : public StressTest {
}
}
if (stat_nok_nfound < injected_error_count) {
if (!SharedState::ignore_read_error &&
stat_nok_nfound < injected_error_count) {
// Grab mutex so multiple threads don't try to print the
// stack trace at the same time
assert(thread->shared);
@ -1401,7 +1405,7 @@ class NonBatchedOpsStressTest : public StressTest {
db_->MultiGetEntity(read_opts_copy, num_keys, key_slices.data(),
results.data());
if (fault_fs_guard && !SharedState::ignore_read_error) {
if (fault_fs_guard) {
verify_expected_errors(
[&](size_t i) { return results[i][0].status(); });
}
@ -1431,7 +1435,7 @@ class NonBatchedOpsStressTest : public StressTest {
db_->MultiGetEntity(read_opts_copy, cfh, num_keys, key_slices.data(),
results.data(), statuses.data());
if (fault_fs_guard && !SharedState::ignore_read_error) {
if (fault_fs_guard) {
verify_expected_errors([&](size_t i) { return statuses[i]; });
}
@ -1478,11 +1482,6 @@ class NonBatchedOpsStressTest : public StressTest {
MaybeUseOlderTimestampForRangeScan(thread, read_ts_str, read_ts_slice,
ro_copy);
std::unique_ptr<Iterator> iter(db_->NewIterator(ro_copy, cfh));
uint64_t count = 0;
Status s;
if (fault_fs_guard) {
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kRead);
@ -1491,6 +1490,11 @@ class NonBatchedOpsStressTest : public StressTest {
SharedState::ignore_read_error = false;
}
std::unique_ptr<Iterator> iter(db_->NewIterator(ro_copy, cfh));
uint64_t count = 0;
Status s;
for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix);
iter->Next()) {
++count;
@ -1522,13 +1526,14 @@ class NonBatchedOpsStressTest : public StressTest {
}
int injected_error_count = 0;
if (fault_fs_guard && !SharedState::ignore_read_error) {
if (fault_fs_guard) {
injected_error_count = GetMinInjectedErrorCount(
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kRead),
fault_fs_guard->GetAndResetInjectedThreadLocalErrorCount(
FaultInjectionIOType::kMetadataRead));
if (injected_error_count > 0 && s.ok()) {
if (!SharedState::ignore_read_error && injected_error_count > 0 &&
s.ok()) {
// Grab mutex so multiple thread don't try to print the
// stack trace at the same time
MutexLock l(thread->shared->GetMutex());
@ -1547,6 +1552,10 @@ class NonBatchedOpsStressTest : public StressTest {
} else if (injected_error_count > 0 && IsRetryableInjectedError(s)) {
fprintf(stdout, "TestPrefixScan error: %s\n", s.ToString().c_str());
} else {
fprintf(stderr, "TestPrefixScan is retryable error? %s\n",
IsRetryableInjectedError(s) ? "true" : "false");
fprintf(stderr, "TestPrefixScan injected_error_count: %d\n",
injected_error_count);
fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
thread->shared->SetVerificationFailure();
}