2019-12-09 07:49:32 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#ifdef GFLAGS
|
|
|
|
#include "db_stress_tool/db_stress_common.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
#include "file/file_util.h"
|
2019-12-09 07:49:32 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2019-12-09 07:49:32 +00:00
|
|
|
class CfConsistencyStressTest : public StressTest {
|
|
|
|
public:
|
|
|
|
CfConsistencyStressTest() : batch_id_(0) {}
|
|
|
|
|
2023-12-04 19:17:32 +00:00
|
|
|
~CfConsistencyStressTest() override = default;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
2021-12-07 21:40:46 +00:00
|
|
|
bool IsStateTracked() const override { return false; }
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
Status TestPut(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const ReadOptions& /* read_opts */,
|
|
|
|
const std::vector<int>& rand_column_families,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& rand_keys,
|
|
|
|
char (&value)[100]) override {
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2022-09-30 18:11:07 +00:00
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string k = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
const uint32_t value_base = batch_id_.fetch_add(1);
|
|
|
|
const size_t sz = GenerateValue(value_base, value, sizeof(value));
|
|
|
|
const Slice v(value, sz);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
WriteBatch batch;
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2022-09-30 18:11:07 +00:00
|
|
|
|
2024-04-30 22:40:35 +00:00
|
|
|
Status status;
|
2024-05-09 23:40:22 +00:00
|
|
|
if (FLAGS_use_attribute_group && FLAGS_use_put_entity_one_in > 0 &&
|
|
|
|
(value_base % FLAGS_use_put_entity_one_in) == 0) {
|
|
|
|
std::vector<ColumnFamilyHandle*> cfhs;
|
|
|
|
cfhs.reserve(rand_column_families.size());
|
|
|
|
for (auto cf : rand_column_families) {
|
|
|
|
cfhs.push_back(column_families_[cf]);
|
2024-04-30 22:40:35 +00:00
|
|
|
}
|
2024-05-09 23:40:22 +00:00
|
|
|
status = batch.PutEntity(k, GenerateAttributeGroups(cfhs, value_base, v));
|
|
|
|
} else {
|
|
|
|
for (auto cf : rand_column_families) {
|
|
|
|
ColumnFamilyHandle* const cfh = column_families_[cf];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
if (FLAGS_use_put_entity_one_in > 0 &&
|
|
|
|
(value_base % FLAGS_use_put_entity_one_in) == 0) {
|
|
|
|
status = batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
|
|
|
|
} else if (FLAGS_use_timed_put_one_in > 0 &&
|
|
|
|
((value_base + kLargePrimeForCommonFactorSkew) %
|
|
|
|
FLAGS_use_timed_put_one_in) == 0) {
|
|
|
|
uint64_t write_unix_time = GetWriteUnixTime(thread);
|
|
|
|
status = batch.TimedPut(cfh, k, v, write_unix_time);
|
|
|
|
} else if (FLAGS_use_merge) {
|
|
|
|
status = batch.Merge(cfh, k, v);
|
|
|
|
} else {
|
|
|
|
status = batch.Put(cfh, k, v);
|
|
|
|
}
|
|
|
|
if (!status.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
|
|
|
}
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2022-09-30 18:11:07 +00:00
|
|
|
|
2024-04-30 22:40:35 +00:00
|
|
|
if (status.ok()) {
|
|
|
|
status = db_->Write(write_opts, &batch);
|
|
|
|
}
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2022-09-30 18:11:07 +00:00
|
|
|
|
2024-06-25 03:51:39 +00:00
|
|
|
if (status.ok()) {
|
|
|
|
auto num = static_cast<long>(rand_column_families.size());
|
|
|
|
thread->stats.AddBytesForWrites(num, (sz + 1) * num);
|
|
|
|
} else if (!IsErrorInjectedAndRetryable(status)) {
|
2024-04-30 22:40:35 +00:00
|
|
|
fprintf(stderr, "multi put or merge error: %s\n",
|
|
|
|
status.ToString().c_str());
|
2019-12-09 07:49:32 +00:00
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
|
2024-04-30 22:40:35 +00:00
|
|
|
return status;
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
2019-12-09 07:49:32 +00:00
|
|
|
std::string key_str = Key(rand_keys[0]);
|
|
|
|
Slice key = key_str;
|
|
|
|
WriteBatch batch;
|
|
|
|
for (auto cf : rand_column_families) {
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[cf];
|
|
|
|
batch.Delete(cfh, key);
|
|
|
|
}
|
|
|
|
Status s = db_->Write(write_opts, &batch);
|
2024-06-25 03:51:39 +00:00
|
|
|
if (s.ok()) {
|
|
|
|
thread->stats.AddDeletes(static_cast<long>(rand_column_families.size()));
|
|
|
|
} else if (!IsErrorInjectedAndRetryable(s)) {
|
2019-12-09 07:49:32 +00:00
|
|
|
fprintf(stderr, "multidel error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
2019-12-09 07:49:32 +00:00
|
|
|
int64_t rand_key = rand_keys[0];
|
|
|
|
auto shared = thread->shared;
|
|
|
|
int64_t max_key = shared->GetMaxKey();
|
|
|
|
if (rand_key > max_key - FLAGS_range_deletion_width) {
|
|
|
|
rand_key =
|
|
|
|
thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1);
|
|
|
|
}
|
|
|
|
std::string key_str = Key(rand_key);
|
|
|
|
Slice key = key_str;
|
|
|
|
std::string end_key_str = Key(rand_key + FLAGS_range_deletion_width);
|
|
|
|
Slice end_key = end_key_str;
|
|
|
|
WriteBatch batch;
|
|
|
|
for (auto cf : rand_column_families) {
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[rand_column_families[cf]];
|
|
|
|
batch.DeleteRange(cfh, key, end_key);
|
|
|
|
}
|
|
|
|
Status s = db_->Write(write_opts, &batch);
|
2024-06-25 03:51:39 +00:00
|
|
|
if (s.ok()) {
|
2019-12-09 07:49:32 +00:00
|
|
|
thread->stats.AddRangeDeletions(
|
|
|
|
static_cast<long>(rand_column_families.size()));
|
2024-06-25 03:51:39 +00:00
|
|
|
} else if (!IsErrorInjectedAndRetryable(s)) {
|
|
|
|
fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
void TestIngestExternalFile(
|
2019-12-09 07:49:32 +00:00
|
|
|
ThreadState* /* thread */,
|
|
|
|
const std::vector<int>& /* rand_column_families */,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& /* rand_keys */) override {
|
2019-12-09 07:49:32 +00:00
|
|
|
assert(false);
|
|
|
|
fprintf(stderr,
|
|
|
|
"CfConsistencyStressTest does not support TestIngestExternalFile "
|
|
|
|
"because it's not possible to verify the result\n");
|
|
|
|
std::terminate();
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
Status TestGet(ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
2019-12-09 07:49:32 +00:00
|
|
|
std::string key_str = Key(rand_keys[0]);
|
|
|
|
Slice key = key_str;
|
|
|
|
Status s;
|
|
|
|
bool is_consistent = true;
|
|
|
|
|
|
|
|
if (thread->rand.OneIn(2)) {
|
|
|
|
// 1/2 chance, does a random read from random CF
|
|
|
|
auto cfh =
|
|
|
|
column_families_[rand_column_families[thread->rand.Next() %
|
|
|
|
rand_column_families.size()]];
|
|
|
|
std::string from_db;
|
|
|
|
s = db_->Get(readoptions, cfh, key, &from_db);
|
|
|
|
} else {
|
|
|
|
// 1/2 chance, comparing one key is the same across all CFs
|
|
|
|
const Snapshot* snapshot = db_->GetSnapshot();
|
|
|
|
ReadOptions readoptionscopy = readoptions;
|
|
|
|
readoptionscopy.snapshot = snapshot;
|
|
|
|
|
|
|
|
std::string value0;
|
|
|
|
s = db_->Get(readoptionscopy, column_families_[rand_column_families[0]],
|
|
|
|
key, &value0);
|
Inject more errors to more files in stress test (#12713)
Summary:
**Context:**
We currently have partial error injection:
- DB operation: all read, SST write
- DB open: all read, SST write, all metadata write.
This PR completes the error injection (with some limitations below):
- DB operation & open: all read, all write, all metadata write, all metadata read
**Summary:**
- Inject retryable metadata read, metadata write error concerning directory (e.g, dir sync, ) or file metadata (e.g, name, size, file creation/deletion...)
- Inject retryable errors to all major file types: random access file, sequential file, writable file
- Allow db stress test operations to handle above injected errors gracefully without crashing
- Change all error injection to thread-local implementation for easier disabling and enabling in the same thread. For example, we can control error handling thread to have no error injection. It's also cleaner in code.
- Limitation: compared to before, we now don't have write fault injection for backup/restore CopyOrCreateFiles work threads since they use anonymous background threads as well as read injection for db open bg thread
- Add a new flag to test error recovery without error injection so we can test the path where error recovery actually succeeds
- Some Refactory & fix to db stress test framework (see PR review comments)
- Fix some minor bugs surfaced (see PR review comments)
- Limitation: had to disable backup restore with metadata read/write injection since it surfaces too many testing issues. Will add it back later to focus on surfacing actual code/internal bugs first.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12713
Test Plan:
- Existing UT
- CI with no trivial error failure
Reviewed By: pdillinger
Differential Revision: D58326608
Pulled By: hx235
fbshipit-source-id: 011b5195aaeb6011641ae0a9194f7f2a0e325ad7
2024-06-19 15:42:00 +00:00
|
|
|
|
|
|
|
// Temporarily disable error injection for verification
|
|
|
|
if (fault_fs_guard) {
|
|
|
|
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kRead);
|
|
|
|
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kMetadataRead);
|
|
|
|
}
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (s.ok() || s.IsNotFound()) {
|
|
|
|
bool found = s.ok();
|
|
|
|
for (size_t i = 1; i < rand_column_families.size(); i++) {
|
|
|
|
std::string value1;
|
|
|
|
s = db_->Get(readoptionscopy,
|
|
|
|
column_families_[rand_column_families[i]], key, &value1);
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!found && s.ok()) {
|
|
|
|
fprintf(stderr, "Get() return different results with key %s\n",
|
|
|
|
Slice(key_str).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s is not found\n",
|
|
|
|
column_family_names_[0].c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
Slice(value1).ToString(true).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
} else if (found && s.IsNotFound()) {
|
|
|
|
fprintf(stderr, "Get() return different results with key %s\n",
|
|
|
|
Slice(key_str).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
Slice(value0).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s is not found\n",
|
|
|
|
column_family_names_[i].c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
} else if (s.ok() && value0 != value1) {
|
|
|
|
fprintf(stderr, "Get() return different results with key %s\n",
|
|
|
|
Slice(key_str).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
Slice(value0).ToString(true).c_str());
|
|
|
|
fprintf(stderr, "CF %s returns value %s\n",
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
Slice(value1).ToString(true).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
}
|
|
|
|
if (!is_consistent) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Inject more errors to more files in stress test (#12713)
Summary:
**Context:**
We currently have partial error injection:
- DB operation: all read, SST write
- DB open: all read, SST write, all metadata write.
This PR completes the error injection (with some limitations below):
- DB operation & open: all read, all write, all metadata write, all metadata read
**Summary:**
- Inject retryable metadata read, metadata write error concerning directory (e.g, dir sync, ) or file metadata (e.g, name, size, file creation/deletion...)
- Inject retryable errors to all major file types: random access file, sequential file, writable file
- Allow db stress test operations to handle above injected errors gracefully without crashing
- Change all error injection to thread-local implementation for easier disabling and enabling in the same thread. For example, we can control error handling thread to have no error injection. It's also cleaner in code.
- Limitation: compared to before, we now don't have write fault injection for backup/restore CopyOrCreateFiles work threads since they use anonymous background threads as well as read injection for db open bg thread
- Add a new flag to test error recovery without error injection so we can test the path where error recovery actually succeeds
- Some Refactory & fix to db stress test framework (see PR review comments)
- Fix some minor bugs surfaced (see PR review comments)
- Limitation: had to disable backup restore with metadata read/write injection since it surfaces too many testing issues. Will add it back later to focus on surfacing actual code/internal bugs first.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12713
Test Plan:
- Existing UT
- CI with no trivial error failure
Reviewed By: pdillinger
Differential Revision: D58326608
Pulled By: hx235
fbshipit-source-id: 011b5195aaeb6011641ae0a9194f7f2a0e325ad7
2024-06-19 15:42:00 +00:00
|
|
|
// Enable back error injection disabled for verification
|
|
|
|
if (fault_fs_guard) {
|
|
|
|
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kRead);
|
|
|
|
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kMetadataRead);
|
|
|
|
}
|
2019-12-09 07:49:32 +00:00
|
|
|
db_->ReleaseSnapshot(snapshot);
|
|
|
|
}
|
|
|
|
if (!is_consistent) {
|
2020-01-03 00:43:55 +00:00
|
|
|
fprintf(stderr, "TestGet error: is_consistent is false\n");
|
2019-12-09 07:49:32 +00:00
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// Fail fast to preserve the DB state.
|
|
|
|
thread->shared->SetVerificationFailure();
|
|
|
|
} else if (s.ok()) {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
2024-06-25 03:51:39 +00:00
|
|
|
} else if (!IsErrorInjectedAndRetryable(s)) {
|
2020-01-03 00:43:55 +00:00
|
|
|
fprintf(stderr, "TestGet error: %s\n", s.ToString().c_str());
|
2019-12-09 07:49:32 +00:00
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
std::vector<Status> TestMultiGet(
|
2019-12-09 07:49:32 +00:00
|
|
|
ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
2019-12-09 22:36:10 +00:00
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
2019-12-09 07:49:32 +00:00
|
|
|
size_t num_keys = rand_keys.size();
|
|
|
|
std::vector<std::string> key_str;
|
|
|
|
std::vector<Slice> keys;
|
|
|
|
keys.reserve(num_keys);
|
|
|
|
key_str.reserve(num_keys);
|
|
|
|
std::vector<PinnableSlice> values(num_keys);
|
|
|
|
std::vector<Status> statuses(num_keys);
|
|
|
|
ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]];
|
2022-06-17 23:40:47 +00:00
|
|
|
ReadOptions readoptionscopy = read_opts;
|
|
|
|
readoptionscopy.rate_limiter_priority =
|
|
|
|
FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
key_str.emplace_back(Key(rand_keys[i]));
|
|
|
|
keys.emplace_back(key_str.back());
|
|
|
|
}
|
2022-06-17 23:40:47 +00:00
|
|
|
db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(),
|
2019-12-09 07:49:32 +00:00
|
|
|
statuses.data());
|
2023-12-04 19:17:32 +00:00
|
|
|
for (const auto& s : statuses) {
|
2019-12-09 07:49:32 +00:00
|
|
|
if (s.ok()) {
|
|
|
|
// found case
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
// not found case
|
|
|
|
thread->stats.AddGets(1, 0);
|
2024-06-25 03:51:39 +00:00
|
|
|
} else if (!IsErrorInjectedAndRetryable(s)) {
|
2019-12-09 07:49:32 +00:00
|
|
|
// errors case
|
2020-01-03 00:43:55 +00:00
|
|
|
fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str());
|
2019-12-09 07:49:32 +00:00
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return statuses;
|
|
|
|
}
|
|
|
|
|
2023-03-17 21:47:29 +00:00
|
|
|
void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(thread);
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
Status s;
|
|
|
|
bool is_consistent = true;
|
|
|
|
|
|
|
|
if (thread->rand.OneIn(2)) {
|
|
|
|
// With a 1/2 chance, do a random read from a random CF
|
|
|
|
const size_t cf_id = thread->rand.Next() % rand_column_families.size();
|
|
|
|
|
|
|
|
assert(rand_column_families[cf_id] >= 0);
|
|
|
|
assert(rand_column_families[cf_id] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh =
|
|
|
|
column_families_[rand_column_families[cf_id]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
PinnableWideColumns result;
|
|
|
|
s = db_->GetEntity(read_opts, cfh, key, &result);
|
|
|
|
|
2024-06-25 03:51:39 +00:00
|
|
|
if (s.ok()) {
|
2023-03-17 21:47:29 +00:00
|
|
|
if (!VerifyWideColumns(result.columns())) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"GetEntity error: inconsistent columns for key %s, entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
WideColumnsToHex(result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// With a 1/2 chance, compare one key across all CFs
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
|
|
|
ReadOptions read_opts_copy = read_opts;
|
|
|
|
read_opts_copy.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
assert(rand_column_families[0] >= 0);
|
|
|
|
assert(rand_column_families[0] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
PinnableWideColumns cmp_result;
|
|
|
|
s = db_->GetEntity(read_opts_copy,
|
|
|
|
column_families_[rand_column_families[0]], key,
|
|
|
|
&cmp_result);
|
|
|
|
|
Inject more errors to more files in stress test (#12713)
Summary:
**Context:**
We currently have partial error injection:
- DB operation: all read, SST write
- DB open: all read, SST write, all metadata write.
This PR completes the error injection (with some limitations below):
- DB operation & open: all read, all write, all metadata write, all metadata read
**Summary:**
- Inject retryable metadata read, metadata write error concerning directory (e.g, dir sync, ) or file metadata (e.g, name, size, file creation/deletion...)
- Inject retryable errors to all major file types: random access file, sequential file, writable file
- Allow db stress test operations to handle above injected errors gracefully without crashing
- Change all error injection to thread-local implementation for easier disabling and enabling in the same thread. For example, we can control error handling thread to have no error injection. It's also cleaner in code.
- Limitation: compared to before, we now don't have write fault injection for backup/restore CopyOrCreateFiles work threads since they use anonymous background threads as well as read injection for db open bg thread
- Add a new flag to test error recovery without error injection so we can test the path where error recovery actually succeeds
- Some Refactory & fix to db stress test framework (see PR review comments)
- Fix some minor bugs surfaced (see PR review comments)
- Limitation: had to disable backup restore with metadata read/write injection since it surfaces too many testing issues. Will add it back later to focus on surfacing actual code/internal bugs first.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12713
Test Plan:
- Existing UT
- CI with no trivial error failure
Reviewed By: pdillinger
Differential Revision: D58326608
Pulled By: hx235
fbshipit-source-id: 011b5195aaeb6011641ae0a9194f7f2a0e325ad7
2024-06-19 15:42:00 +00:00
|
|
|
// Temporarily disable error injection for verification
|
|
|
|
if (fault_fs_guard) {
|
|
|
|
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kRead);
|
|
|
|
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kMetadataRead);
|
|
|
|
}
|
Fix nullptr access and race to fault_fs_guard (#12799)
Summary:
**Context/Summary:**
There are a couple places where we forgot to check fault_fs_guard before accessing it. So we can see something like this occasionally
```
=138831==Hint: address points to the zero page.
SCARINESS: 10 (null-deref)
AddressSanitizer:DEADLYSIGNAL
#0 0x18b9e0b in rocksdb::ThreadLocalPtr::Get() const fbcode/internal_repo_rocksdb/repo/util/thread_local.cc:503
https://github.com/facebook/rocksdb/issues/1 0x83d8b7 in rocksdb::StressTest::TestCompactRange(rocksdb::ThreadState*, long, rocksdb::Slice const&, rocksdb::ColumnFamilyHandle*) fbcode/internal_repo_rocksdb/repo/utilities/fault_injection_fs.h
```
Also accessing of `io_activties_exempted_from_fault_injection.find` not fully synced so we see the following
```
WARNING: ThreadSanitizer: data race (pid=90939)
Write of size 8 at 0x7b4c000004d0 by thread T762 (mutexes: write M0):
#0 std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>>::operator=(std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>> const&) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:208 (db_stress+0x411c32) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
https://github.com/facebook/rocksdb/issues/1 rocksdb::DbStressListener::OnErrorRecoveryCompleted(rocksdb::Status) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_set.h:298 (db_stress+0x4112e5) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
https://github.com/facebook/rocksdb/issues/2 rocksdb::EventHelpers::NotifyOnErrorRecoveryEnd(std::vector<std::shared_ptr<rocksdb::EventListener>, std::allocator<std::shared_ptr<rocksdb::EventListener>>> const&, rocksdb::Status const&, rocksdb::Status const&, rocksdb::InstrumentedMutex*) fbcode/internal_repo_rocksdb/repo/db/event_helpers.cc:239 (db_stress+0xa09d60) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
Previous read of size 8 at 0x7b4c000004d0 by thread T131 (mutexes: write M1):
#0 rocksdb::FaultInjectionTestFS::MaybeInjectThreadLocalError(rocksdb::FaultInjectionIOType, rocksdb::IOOptions const&, rocksdb::FaultInjectionTestFS::ErrorOperation, rocksdb::Slice*, bool, char*, bool, bool*) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:798 (db_stress+0xf7d0f3) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12799
Test Plan: CI
Reviewed By: jowlyzhang
Differential Revision: D58917449
Pulled By: hx235
fbshipit-source-id: f24fc1acc2a7d91f9f285447a97ba41397f48dbd
2024-06-24 23:10:36 +00:00
|
|
|
|
2023-03-17 21:47:29 +00:00
|
|
|
if (s.ok() || s.IsNotFound()) {
|
|
|
|
const bool cmp_found = s.ok();
|
|
|
|
|
|
|
|
if (cmp_found) {
|
|
|
|
if (!VerifyWideColumns(cmp_result.columns())) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity error: inconsistent columns for key %s, "
|
|
|
|
"entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_consistent) {
|
2024-05-09 23:40:22 +00:00
|
|
|
if (FLAGS_use_attribute_group) {
|
|
|
|
PinnableAttributeGroups result;
|
|
|
|
result.reserve(rand_column_families.size());
|
|
|
|
for (size_t i = 1; i < rand_column_families.size(); ++i) {
|
|
|
|
assert(rand_column_families[i] >= 0);
|
|
|
|
assert(rand_column_families[i] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
result.emplace_back(column_families_[rand_column_families[i]]);
|
2023-03-17 21:47:29 +00:00
|
|
|
}
|
2024-05-09 23:40:22 +00:00
|
|
|
s = db_->GetEntity(read_opts_copy, key, &result);
|
|
|
|
if (s.ok()) {
|
|
|
|
for (auto& attribute_group : result) {
|
|
|
|
s = attribute_group.status();
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const bool found = s.ok();
|
|
|
|
|
|
|
|
if (!cmp_found && found) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"Non-AttributeGroup GetEntity returns different results "
|
|
|
|
"than AttributeGroup GetEntity for key %s: CF %s "
|
|
|
|
"returns not found, CF %s returns entity %s \n",
|
2023-03-17 21:47:29 +00:00
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
2024-05-09 23:40:22 +00:00
|
|
|
attribute_group.column_family()->GetName().c_str(),
|
|
|
|
WideColumnsToHex(attribute_group.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (cmp_found && !found) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"Non-AttributeGroup GetEntity returns different results "
|
|
|
|
"than AttributeGroup GetEntity for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns not found \n",
|
2023-03-17 21:47:29 +00:00
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str(),
|
2024-05-09 23:40:22 +00:00
|
|
|
attribute_group.column_family()->GetName().c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (found &&
|
|
|
|
attribute_group.columns() != cmp_result.columns()) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"Non-AttributeGroup GetEntity returns different results "
|
|
|
|
"than AttributeGroup GetEntity for key %s: CF %s "
|
2023-03-17 21:47:29 +00:00
|
|
|
"returns entity %s, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str(),
|
2024-05-09 23:40:22 +00:00
|
|
|
attribute_group.column_family()->GetName().c_str(),
|
|
|
|
WideColumnsToHex(attribute_group.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (size_t i = 1; i < rand_column_families.size(); ++i) {
|
|
|
|
assert(rand_column_families[i] >= 0);
|
|
|
|
assert(rand_column_families[i] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
PinnableWideColumns result;
|
|
|
|
s = db_->GetEntity(read_opts_copy,
|
|
|
|
column_families_[rand_column_families[i]], key,
|
|
|
|
&result);
|
|
|
|
|
|
|
|
if (!s.ok() && !s.IsNotFound()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const bool found = s.ok();
|
|
|
|
|
|
|
|
assert(!column_family_names_.empty());
|
|
|
|
assert(i < column_family_names_.size());
|
|
|
|
|
|
|
|
if (!cmp_found && found) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns not found, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
WideColumnsToHex(result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cmp_found && !found) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns not found\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str(),
|
|
|
|
column_family_names_[i].c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found && result != cmp_result) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"GetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_result.columns()).c_str(),
|
|
|
|
column_family_names_[i].c_str(),
|
|
|
|
WideColumnsToHex(result.columns()).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
2023-03-17 21:47:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Inject more errors to more files in stress test (#12713)
Summary:
**Context:**
We currently have partial error injection:
- DB operation: all read, SST write
- DB open: all read, SST write, all metadata write.
This PR completes the error injection (with some limitations below):
- DB operation & open: all read, all write, all metadata write, all metadata read
**Summary:**
- Inject retryable metadata read, metadata write error concerning directory (e.g, dir sync, ) or file metadata (e.g, name, size, file creation/deletion...)
- Inject retryable errors to all major file types: random access file, sequential file, writable file
- Allow db stress test operations to handle above injected errors gracefully without crashing
- Change all error injection to thread-local implementation for easier disabling and enabling in the same thread. For example, we can control error handling thread to have no error injection. It's also cleaner in code.
- Limitation: compared to before, we now don't have write fault injection for backup/restore CopyOrCreateFiles work threads since they use anonymous background threads as well as read injection for db open bg thread
- Add a new flag to test error recovery without error injection so we can test the path where error recovery actually succeeds
- Some Refactory & fix to db stress test framework (see PR review comments)
- Fix some minor bugs surfaced (see PR review comments)
- Limitation: had to disable backup restore with metadata read/write injection since it surfaces too many testing issues. Will add it back later to focus on surfacing actual code/internal bugs first.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12713
Test Plan:
- Existing UT
- CI with no trivial error failure
Reviewed By: pdillinger
Differential Revision: D58326608
Pulled By: hx235
fbshipit-source-id: 011b5195aaeb6011641ae0a9194f7f2a0e325ad7
2024-06-19 15:42:00 +00:00
|
|
|
|
|
|
|
// Enable back error injection disabled for verification
|
|
|
|
if (fault_fs_guard) {
|
|
|
|
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kRead);
|
|
|
|
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
|
|
FaultInjectionIOType::kMetadataRead);
|
|
|
|
}
|
2023-03-17 21:47:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_consistent) {
|
|
|
|
fprintf(stderr, "TestGetEntity error: results are not consistent\n");
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// Fail fast to preserve the DB state.
|
|
|
|
thread->shared->SetVerificationFailure();
|
|
|
|
} else if (s.ok()) {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
2024-06-25 03:51:39 +00:00
|
|
|
} else if (!IsErrorInjectedAndRetryable(s)) {
|
2023-03-17 21:47:29 +00:00
|
|
|
fprintf(stderr, "TestGetEntity error: %s\n", s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-30 03:35:15 +00:00
|
|
|
void TestMultiGetEntity(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
|
|
|
assert(thread);
|
|
|
|
assert(thread->shared);
|
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
|
|
|
ReadOptions read_opts_copy = read_opts;
|
|
|
|
read_opts_copy.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
const size_t num_cfs = rand_column_families.size();
|
|
|
|
|
|
|
|
std::vector<ColumnFamilyHandle*> cfhs;
|
|
|
|
cfhs.reserve(num_cfs);
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_cfs; ++j) {
|
|
|
|
assert(rand_column_families[j] >= 0);
|
|
|
|
assert(rand_column_families[j] <
|
|
|
|
static_cast<int>(column_families_.size()));
|
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh = column_families_[rand_column_families[j]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
cfhs.emplace_back(cfh);
|
|
|
|
}
|
|
|
|
|
|
|
|
const size_t num_keys = rand_keys.size();
|
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
if (FLAGS_use_attribute_group) {
|
|
|
|
// AttributeGroup MultiGetEntity verification
|
|
|
|
|
|
|
|
std::vector<PinnableAttributeGroups> results;
|
|
|
|
std::vector<Slice> key_slices;
|
|
|
|
std::vector<std::string> key_strs;
|
|
|
|
results.reserve(num_keys);
|
|
|
|
key_slices.reserve(num_keys);
|
|
|
|
key_strs.reserve(num_keys);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
key_strs.emplace_back(Key(rand_keys[i]));
|
|
|
|
key_slices.emplace_back(key_strs.back());
|
|
|
|
PinnableAttributeGroups attribute_groups;
|
|
|
|
for (auto* cfh : cfhs) {
|
|
|
|
attribute_groups.emplace_back(cfh);
|
|
|
|
}
|
|
|
|
results.emplace_back(std::move(attribute_groups));
|
|
|
|
}
|
|
|
|
db_->MultiGetEntity(read_opts_copy, num_keys, key_slices.data(),
|
|
|
|
results.data());
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
bool is_consistent = true;
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
const auto& result = results[i];
|
|
|
|
const Status& cmp_s = result[0].status();
|
|
|
|
const WideColumns& cmp_columns = result[0].columns();
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
bool has_error = false;
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
for (size_t j = 0; j < num_cfs; ++j) {
|
|
|
|
const Status& s = result[j].status();
|
|
|
|
const WideColumns& columns = result[j].columns();
|
2024-06-25 03:51:39 +00:00
|
|
|
if (!s.ok() && IsErrorInjectedAndRetryable(s)) {
|
Inject more errors to more files in stress test (#12713)
Summary:
**Context:**
We currently have partial error injection:
- DB operation: all read, SST write
- DB open: all read, SST write, all metadata write.
This PR completes the error injection (with some limitations below):
- DB operation & open: all read, all write, all metadata write, all metadata read
**Summary:**
- Inject retryable metadata read, metadata write error concerning directory (e.g, dir sync, ) or file metadata (e.g, name, size, file creation/deletion...)
- Inject retryable errors to all major file types: random access file, sequential file, writable file
- Allow db stress test operations to handle above injected errors gracefully without crashing
- Change all error injection to thread-local implementation for easier disabling and enabling in the same thread. For example, we can control error handling thread to have no error injection. It's also cleaner in code.
- Limitation: compared to before, we now don't have write fault injection for backup/restore CopyOrCreateFiles work threads since they use anonymous background threads as well as read injection for db open bg thread
- Add a new flag to test error recovery without error injection so we can test the path where error recovery actually succeeds
- Some Refactory & fix to db stress test framework (see PR review comments)
- Fix some minor bugs surfaced (see PR review comments)
- Limitation: had to disable backup restore with metadata read/write injection since it surfaces too many testing issues. Will add it back later to focus on surfacing actual code/internal bugs first.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12713
Test Plan:
- Existing UT
- CI with no trivial error failure
Reviewed By: pdillinger
Differential Revision: D58326608
Pulled By: hx235
fbshipit-source-id: 011b5195aaeb6011641ae0a9194f7f2a0e325ad7
2024-06-19 15:42:00 +00:00
|
|
|
break;
|
|
|
|
} else if (!s.ok() && !s.IsNotFound()) {
|
2024-05-14 23:33:44 +00:00
|
|
|
fprintf(stderr, "TestMultiGetEntity (AttributeGroup) error: %s\n",
|
|
|
|
s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
has_error = true;
|
|
|
|
break;
|
|
|
|
}
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
assert(cmp_s.ok() || cmp_s.IsNotFound());
|
|
|
|
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
if (cmp_s.ok()) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity (AttributeGroup) returns different "
|
|
|
|
"results for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns not found\n",
|
|
|
|
key_slices[i].ToString(true).c_str(),
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_columns).c_str(),
|
|
|
|
column_family_names_[j].c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
continue;
|
|
|
|
}
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
assert(s.ok());
|
|
|
|
if (cmp_s.IsNotFound()) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity (AttributeGroup) returns different results "
|
|
|
|
"for key %s: CF %s "
|
|
|
|
"returns not found, CF %s returns entity %s\n",
|
|
|
|
key_slices[i].ToString(true).c_str(),
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
column_family_names_[j].c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
2023-03-30 03:35:15 +00:00
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
if (columns != cmp_columns) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity (AttributeGroup) returns different results "
|
|
|
|
"for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns entity %s\n",
|
|
|
|
key_slices[i].ToString(true).c_str(),
|
|
|
|
column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_columns).c_str(),
|
|
|
|
column_family_names_[j].c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
if (!VerifyWideColumns(columns)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity (AttributeGroup) error: inconsistent "
|
|
|
|
"columns for key %s, "
|
|
|
|
"entity %s\n",
|
|
|
|
key_slices[i].ToString(true).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (has_error) {
|
|
|
|
break;
|
|
|
|
} else if (!is_consistent) {
|
2023-03-30 03:35:15 +00:00
|
|
|
fprintf(stderr,
|
2024-05-14 23:33:44 +00:00
|
|
|
"TestMultiGetEntity (AttributeGroup) error: results are not "
|
|
|
|
"consistent\n");
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// Fail fast to preserve the DB state.
|
|
|
|
thread->shared->SetVerificationFailure();
|
2023-03-30 03:35:15 +00:00
|
|
|
break;
|
2024-05-14 23:33:44 +00:00
|
|
|
} else if (cmp_s.ok()) {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (cmp_s.IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
2023-03-30 03:35:15 +00:00
|
|
|
}
|
2024-05-14 23:33:44 +00:00
|
|
|
}
|
2023-03-30 03:35:15 +00:00
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
} else {
|
|
|
|
// Non-AttributeGroup MultiGetEntity verification
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
|
|
const std::string key = Key(rand_keys[i]);
|
|
|
|
|
|
|
|
std::vector<Slice> key_slices(num_cfs, key);
|
|
|
|
std::vector<PinnableWideColumns> results(num_cfs);
|
|
|
|
std::vector<Status> statuses(num_cfs);
|
|
|
|
|
|
|
|
db_->MultiGetEntity(read_opts_copy, num_cfs, cfhs.data(),
|
|
|
|
key_slices.data(), results.data(), statuses.data());
|
|
|
|
|
|
|
|
bool is_consistent = true;
|
|
|
|
|
|
|
|
const Status& cmp_s = statuses[0];
|
|
|
|
const WideColumns& cmp_columns = results[0].columns();
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_cfs; ++j) {
|
|
|
|
const Status& s = statuses[j];
|
|
|
|
const WideColumns& columns = results[j].columns();
|
|
|
|
|
2024-06-25 03:51:39 +00:00
|
|
|
if (!s.ok() && IsErrorInjectedAndRetryable(s)) {
|
Inject more errors to more files in stress test (#12713)
Summary:
**Context:**
We currently have partial error injection:
- DB operation: all read, SST write
- DB open: all read, SST write, all metadata write.
This PR completes the error injection (with some limitations below):
- DB operation & open: all read, all write, all metadata write, all metadata read
**Summary:**
- Inject retryable metadata read, metadata write error concerning directory (e.g, dir sync, ) or file metadata (e.g, name, size, file creation/deletion...)
- Inject retryable errors to all major file types: random access file, sequential file, writable file
- Allow db stress test operations to handle above injected errors gracefully without crashing
- Change all error injection to thread-local implementation for easier disabling and enabling in the same thread. For example, we can control error handling thread to have no error injection. It's also cleaner in code.
- Limitation: compared to before, we now don't have write fault injection for backup/restore CopyOrCreateFiles work threads since they use anonymous background threads as well as read injection for db open bg thread
- Add a new flag to test error recovery without error injection so we can test the path where error recovery actually succeeds
- Some Refactory & fix to db stress test framework (see PR review comments)
- Fix some minor bugs surfaced (see PR review comments)
- Limitation: had to disable backup restore with metadata read/write injection since it surfaces too many testing issues. Will add it back later to focus on surfacing actual code/internal bugs first.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12713
Test Plan:
- Existing UT
- CI with no trivial error failure
Reviewed By: pdillinger
Differential Revision: D58326608
Pulled By: hx235
fbshipit-source-id: 011b5195aaeb6011641ae0a9194f7f2a0e325ad7
2024-06-19 15:42:00 +00:00
|
|
|
break;
|
|
|
|
} else if (!s.ok() && !s.IsNotFound()) {
|
2024-05-14 23:33:44 +00:00
|
|
|
fprintf(stderr, "TestMultiGetEntity error: %s\n",
|
|
|
|
s.ToString().c_str());
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(cmp_s.ok() || cmp_s.IsNotFound());
|
|
|
|
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
if (cmp_s.ok()) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
2023-03-30 03:35:15 +00:00
|
|
|
"MultiGetEntity returns different results for key %s: CF %s "
|
2024-05-14 23:33:44 +00:00
|
|
|
"returns entity %s, CF %s returns not found\n",
|
2023-03-30 03:35:15 +00:00
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_columns).c_str(),
|
2024-05-14 23:33:44 +00:00
|
|
|
column_family_names_[j].c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(s.ok());
|
|
|
|
if (cmp_s.IsNotFound()) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"MultiGetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns not found, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
column_family_names_[j].c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (columns != cmp_columns) {
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"MultiGetEntity returns different results for key %s: CF %s "
|
|
|
|
"returns entity %s, CF %s returns entity %s\n",
|
|
|
|
StringToHex(key).c_str(), column_family_names_[0].c_str(),
|
|
|
|
WideColumnsToHex(cmp_columns).c_str(),
|
|
|
|
column_family_names_[j].c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!VerifyWideColumns(columns)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"MultiGetEntity error: inconsistent columns for key %s, "
|
|
|
|
"entity %s\n",
|
|
|
|
StringToHex(key).c_str(),
|
|
|
|
WideColumnsToHex(columns).c_str());
|
|
|
|
is_consistent = false;
|
|
|
|
break;
|
|
|
|
}
|
2023-03-30 03:35:15 +00:00
|
|
|
}
|
|
|
|
|
2024-05-14 23:33:44 +00:00
|
|
|
if (!is_consistent) {
|
2023-03-30 03:35:15 +00:00
|
|
|
fprintf(stderr,
|
2024-05-14 23:33:44 +00:00
|
|
|
"TestMultiGetEntity error: results are not consistent\n");
|
|
|
|
thread->stats.AddErrors(1);
|
|
|
|
// Fail fast to preserve the DB state.
|
|
|
|
thread->shared->SetVerificationFailure();
|
2023-03-30 03:35:15 +00:00
|
|
|
break;
|
2024-05-14 23:33:44 +00:00
|
|
|
} else if (statuses[0].ok()) {
|
|
|
|
thread->stats.AddGets(1, 1);
|
|
|
|
} else if (statuses[0].IsNotFound()) {
|
|
|
|
thread->stats.AddGets(1, 0);
|
2023-03-30 03:35:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) override {
|
2022-10-07 18:17:57 +00:00
|
|
|
assert(!rand_column_families.empty());
|
|
|
|
assert(!rand_keys.empty());
|
|
|
|
|
|
|
|
const std::string key = Key(rand_keys[0]);
|
|
|
|
|
|
|
|
const size_t prefix_to_use =
|
2019-12-09 07:49:32 +00:00
|
|
|
(FLAGS_prefix_size < 0) ? 7 : static_cast<size_t>(FLAGS_prefix_size);
|
|
|
|
|
2022-10-07 18:17:57 +00:00
|
|
|
const Slice prefix(key.data(), prefix_to_use);
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
std::string upper_bound;
|
|
|
|
Slice ub_slice;
|
2022-10-07 18:17:57 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
ReadOptions ro_copy = readoptions;
|
2022-10-07 18:17:57 +00:00
|
|
|
|
2020-01-10 05:25:40 +00:00
|
|
|
// Get the next prefix first and then see if we want to set upper bound.
|
|
|
|
// We'll use the next prefix in an assertion later on
|
|
|
|
if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) {
|
2019-12-09 07:49:32 +00:00
|
|
|
ub_slice = Slice(upper_bound);
|
|
|
|
ro_copy.iterate_upper_bound = &ub_slice;
|
2024-06-18 23:16:09 +00:00
|
|
|
if (FLAGS_use_sqfc_for_range_queries) {
|
|
|
|
ro_copy.table_filter =
|
|
|
|
sqfc_factory_->GetTableFilterForRangeQuery(prefix, ub_slice);
|
|
|
|
}
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
2022-10-07 18:17:57 +00:00
|
|
|
|
|
|
|
ColumnFamilyHandle* const cfh =
|
|
|
|
column_families_[rand_column_families[thread->rand.Uniform(
|
|
|
|
static_cast<int>(rand_column_families.size()))]];
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
std::unique_ptr<Iterator> iter(db_->NewIterator(ro_copy, cfh));
|
|
|
|
|
|
|
|
uint64_t count = 0;
|
|
|
|
Status s;
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix);
|
|
|
|
iter->Next()) {
|
|
|
|
++count;
|
2022-10-07 18:17:57 +00:00
|
|
|
|
2023-03-17 21:47:29 +00:00
|
|
|
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
|
|
s = Status::Corruption("Value and columns inconsistent",
|
|
|
|
DebugString(iter->value(), iter->columns()));
|
2022-10-07 18:17:57 +00:00
|
|
|
break;
|
|
|
|
}
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
2022-10-07 18:17:57 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
assert(prefix_to_use == 0 ||
|
2020-01-10 05:25:40 +00:00
|
|
|
count <= GetPrefixKeyCount(prefix.ToString(), upper_bound));
|
2022-10-07 18:17:57 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (s.ok()) {
|
2022-10-07 18:17:57 +00:00
|
|
|
s = iter->status();
|
|
|
|
}
|
|
|
|
|
2024-06-25 03:51:39 +00:00
|
|
|
if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
|
2020-01-03 00:43:55 +00:00
|
|
|
fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
|
2019-12-09 07:49:32 +00:00
|
|
|
thread->stats.AddErrors(1);
|
2022-10-07 18:17:57 +00:00
|
|
|
|
|
|
|
return s;
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
2022-10-07 18:17:57 +00:00
|
|
|
|
|
|
|
thread->stats.AddPrefixes(1, count);
|
|
|
|
|
|
|
|
return Status::OK();
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
ColumnFamilyHandle* GetControlCfh(ThreadState* thread,
|
|
|
|
int /*column_family_id*/
|
|
|
|
) override {
|
2019-12-09 07:49:32 +00:00
|
|
|
// All column families should contain the same data. Randomly pick one.
|
|
|
|
return column_families_[thread->rand.Next() % column_families_.size()];
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
void VerifyDb(ThreadState* thread) const override {
|
2022-02-17 07:17:03 +00:00
|
|
|
// This `ReadOptions` is for validation purposes. Ignore
|
|
|
|
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
|
2019-12-09 07:49:32 +00:00
|
|
|
ReadOptions options(FLAGS_verify_checksum, true);
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
// We must set total_order_seek to true because we are doing a SeekToFirst
|
|
|
|
// on a column family whose memtables may support (by default) prefix-based
|
|
|
|
// iterator. In this case, NewIterator with options.total_order_seek being
|
|
|
|
// false returns a prefix-based iterator. Calling SeekToFirst using this
|
|
|
|
// iterator causes the iterator to become invalid. That means we cannot
|
|
|
|
// iterate the memtable using this iterator any more, although the memtable
|
|
|
|
// contains the most up-to-date key-values.
|
|
|
|
options.total_order_seek = true;
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
options.snapshot = snapshot_guard.snapshot();
|
|
|
|
|
|
|
|
const size_t num = column_families_.size();
|
|
|
|
|
|
|
|
std::vector<std::unique_ptr<Iterator>> iters;
|
|
|
|
iters.reserve(num);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
iters.emplace_back(db_->NewIterator(options, column_families_[i]));
|
|
|
|
iters.back()->SeekToFirst();
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
std::vector<Status> statuses(num, Status::OK());
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
assert(thread);
|
|
|
|
|
|
|
|
auto shared = thread->shared;
|
|
|
|
assert(shared);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
do {
|
|
|
|
if (shared->HasVerificationFailedYet()) {
|
|
|
|
break;
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
size_t valid_cnt = 0;
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
const auto& iter = iters[i];
|
|
|
|
assert(iter);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (iter->Valid()) {
|
2023-03-17 21:47:29 +00:00
|
|
|
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
|
|
statuses[i] =
|
|
|
|
Status::Corruption("Value and columns inconsistent",
|
|
|
|
DebugString(iter->value(), iter->columns()));
|
2022-10-12 18:43:34 +00:00
|
|
|
} else {
|
|
|
|
++valid_cnt;
|
|
|
|
}
|
2019-12-09 07:49:32 +00:00
|
|
|
} else {
|
2022-10-12 18:43:34 +00:00
|
|
|
statuses[i] = iter->status();
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (valid_cnt == 0) {
|
2022-10-12 18:43:34 +00:00
|
|
|
for (size_t i = 0; i < num; ++i) {
|
2019-12-09 07:49:32 +00:00
|
|
|
const auto& s = statuses[i];
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "Iterator on cf %s has error: %s\n",
|
|
|
|
column_families_[i]->GetName().c_str(),
|
|
|
|
s.ToString().c_str());
|
|
|
|
shared->SetVerificationFailure();
|
|
|
|
}
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
break;
|
2022-10-12 18:43:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_cnt < num) {
|
2019-12-09 07:49:32 +00:00
|
|
|
shared->SetVerificationFailure();
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < num; ++i) {
|
|
|
|
assert(iters[i]);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (!iters[i]->Valid()) {
|
|
|
|
if (statuses[i].ok()) {
|
|
|
|
fprintf(stderr, "Finished scanning cf %s\n",
|
|
|
|
column_families_[i]->GetName().c_str());
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "Iterator on cf %s has error: %s\n",
|
|
|
|
column_families_[i]->GetName().c_str(),
|
|
|
|
statuses[i].ToString().c_str());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "cf %s has remaining data to scan\n",
|
|
|
|
column_families_[i]->GetName().c_str());
|
|
|
|
}
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (shared->HasVerificationFailedYet()) {
|
|
|
|
break;
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
// If the program reaches here, then all column families' iterators are
|
|
|
|
// still valid.
|
2022-10-12 18:43:34 +00:00
|
|
|
assert(valid_cnt == num);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
if (shared->PrintingVerificationResults()) {
|
|
|
|
continue;
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
assert(iters[0]);
|
|
|
|
|
|
|
|
const Slice key = iters[0]->key();
|
|
|
|
const Slice value = iters[0]->value();
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
int num_mismatched_cfs = 0;
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
for (size_t i = 1; i < num; ++i) {
|
|
|
|
assert(iters[i]);
|
|
|
|
|
|
|
|
const int cmp = key.compare(iters[i]->key());
|
|
|
|
|
|
|
|
if (cmp != 0) {
|
|
|
|
++num_mismatched_cfs;
|
|
|
|
|
|
|
|
if (1 == num_mismatched_cfs) {
|
|
|
|
fprintf(stderr, "Verification failed\n");
|
|
|
|
fprintf(stderr, "Latest Sequence Number: %" PRIu64 "\n",
|
|
|
|
db_->GetLatestSequenceNumber());
|
2019-12-09 07:49:32 +00:00
|
|
|
fprintf(stderr, "[%s] %s => %s\n",
|
2022-10-12 18:43:34 +00:00
|
|
|
column_families_[0]->GetName().c_str(),
|
|
|
|
key.ToString(true /* hex */).c_str(),
|
|
|
|
value.ToString(true /* hex */).c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "[%s] %s => %s\n",
|
|
|
|
column_families_[i]->GetName().c_str(),
|
|
|
|
iters[i]->key().ToString(true /* hex */).c_str(),
|
|
|
|
iters[i]->value().ToString(true /* hex */).c_str());
|
|
|
|
|
|
|
|
Slice begin_key;
|
|
|
|
Slice end_key;
|
|
|
|
if (cmp < 0) {
|
|
|
|
begin_key = key;
|
|
|
|
end_key = iters[i]->key();
|
|
|
|
} else {
|
|
|
|
begin_key = iters[i]->key();
|
|
|
|
end_key = key;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto print_key_versions = [&](ColumnFamilyHandle* cfh) {
|
|
|
|
constexpr size_t kMaxNumIKeys = 8;
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
std::vector<KeyVersion> versions;
|
2022-10-12 18:43:34 +00:00
|
|
|
const Status s = GetAllKeyVersions(db_, cfh, begin_key, end_key,
|
|
|
|
kMaxNumIKeys, &versions);
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "%s\n", s.ToString().c_str());
|
|
|
|
return;
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
assert(cfh);
|
|
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
"Internal keys in CF '%s', [%s, %s] (max %" ROCKSDB_PRIszt
|
|
|
|
")\n",
|
|
|
|
cfh->GetName().c_str(),
|
|
|
|
begin_key.ToString(true /* hex */).c_str(),
|
|
|
|
end_key.ToString(true /* hex */).c_str(), kMaxNumIKeys);
|
|
|
|
|
|
|
|
for (const KeyVersion& kv : versions) {
|
|
|
|
fprintf(stderr, " key %s seq %" PRIu64 " type %d\n",
|
|
|
|
Slice(kv.user_key).ToString(true).c_str(), kv.sequence,
|
|
|
|
kv.type);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (1 == num_mismatched_cfs) {
|
|
|
|
print_key_versions(column_families_[0]);
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
|
|
|
print_key_versions(column_families_[i]);
|
|
|
|
|
|
|
|
shared->SetVerificationFailure();
|
2019-12-09 07:49:32 +00:00
|
|
|
}
|
|
|
|
}
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
shared->FinishPrintingVerificationResults();
|
2022-10-12 18:43:34 +00:00
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
for (auto& iter : iters) {
|
2022-10-12 18:43:34 +00:00
|
|
|
assert(iter);
|
2019-12-09 07:49:32 +00:00
|
|
|
iter->Next();
|
|
|
|
}
|
|
|
|
} while (true);
|
|
|
|
}
|
|
|
|
|
2019-12-20 16:46:52 +00:00
|
|
|
void ContinuouslyVerifyDb(ThreadState* thread) const override {
|
|
|
|
assert(thread);
|
|
|
|
Status status;
|
|
|
|
|
|
|
|
DB* db_ptr = cmp_db_ ? cmp_db_ : db_;
|
|
|
|
const auto& cfhs = cmp_db_ ? cmp_cfhs_ : column_families_;
|
2022-06-08 04:07:47 +00:00
|
|
|
|
|
|
|
// Take a snapshot to preserve the state of primary db.
|
|
|
|
ManagedSnapshot snapshot_guard(db_);
|
|
|
|
|
2019-12-20 16:46:52 +00:00
|
|
|
SharedState* shared = thread->shared;
|
|
|
|
assert(shared);
|
2022-06-08 04:07:47 +00:00
|
|
|
|
|
|
|
if (cmp_db_) {
|
|
|
|
status = cmp_db_->TryCatchUpWithPrimary();
|
|
|
|
if (!status.ok()) {
|
|
|
|
fprintf(stderr, "TryCatchUpWithPrimary: %s\n",
|
|
|
|
status.ToString().c_str());
|
|
|
|
shared->SetShouldStopTest();
|
|
|
|
assert(false);
|
|
|
|
return;
|
|
|
|
}
|
2019-12-20 16:46:52 +00:00
|
|
|
}
|
2022-06-08 04:07:47 +00:00
|
|
|
|
2019-12-20 16:46:52 +00:00
|
|
|
const auto checksum_column_family = [](Iterator* iter,
|
|
|
|
uint32_t* checksum) -> Status {
|
|
|
|
assert(nullptr != checksum);
|
2022-10-11 21:40:25 +00:00
|
|
|
|
2019-12-20 16:46:52 +00:00
|
|
|
uint32_t ret = 0;
|
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
ret = crc32c::Extend(ret, iter->key().data(), iter->key().size());
|
|
|
|
ret = crc32c::Extend(ret, iter->value().data(), iter->value().size());
|
2022-10-11 21:40:25 +00:00
|
|
|
|
|
|
|
for (const auto& column : iter->columns()) {
|
|
|
|
ret = crc32c::Extend(ret, column.name().data(), column.name().size());
|
|
|
|
ret =
|
|
|
|
crc32c::Extend(ret, column.value().data(), column.value().size());
|
|
|
|
}
|
2019-12-20 16:46:52 +00:00
|
|
|
}
|
2022-10-11 21:40:25 +00:00
|
|
|
|
2019-12-20 16:46:52 +00:00
|
|
|
*checksum = ret;
|
|
|
|
return iter->status();
|
|
|
|
};
|
2022-02-17 07:17:03 +00:00
|
|
|
// This `ReadOptions` is for validation purposes. Ignore
|
|
|
|
// `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
|
2022-06-08 04:07:47 +00:00
|
|
|
ReadOptions ropts(FLAGS_verify_checksum, true);
|
2019-12-20 16:46:52 +00:00
|
|
|
ropts.total_order_seek = true;
|
2022-06-08 04:07:47 +00:00
|
|
|
if (nullptr == cmp_db_) {
|
|
|
|
ropts.snapshot = snapshot_guard.snapshot();
|
|
|
|
}
|
2019-12-20 16:46:52 +00:00
|
|
|
uint32_t crc = 0;
|
|
|
|
{
|
|
|
|
// Compute crc for all key-values of default column family.
|
|
|
|
std::unique_ptr<Iterator> it(db_ptr->NewIterator(ropts));
|
|
|
|
status = checksum_column_family(it.get(), &crc);
|
2022-06-08 04:07:47 +00:00
|
|
|
if (!status.ok()) {
|
|
|
|
fprintf(stderr, "Computing checksum of default cf: %s\n",
|
|
|
|
status.ToString().c_str());
|
|
|
|
assert(false);
|
|
|
|
}
|
2019-12-20 16:46:52 +00:00
|
|
|
}
|
2022-06-08 04:07:47 +00:00
|
|
|
// Since we currently intentionally disallow reading from the secondary
|
|
|
|
// instance with snapshot, we cannot achieve cross-cf consistency if WAL is
|
|
|
|
// enabled because there is no guarantee that secondary instance replays
|
|
|
|
// the primary's WAL to a consistent point where all cfs have the same
|
|
|
|
// data.
|
|
|
|
if (status.ok() && FLAGS_disable_wal) {
|
|
|
|
uint32_t tmp_crc = 0;
|
2019-12-20 16:46:52 +00:00
|
|
|
for (ColumnFamilyHandle* cfh : cfhs) {
|
|
|
|
if (cfh == db_ptr->DefaultColumnFamily()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::unique_ptr<Iterator> it(db_ptr->NewIterator(ropts, cfh));
|
|
|
|
status = checksum_column_family(it.get(), &tmp_crc);
|
|
|
|
if (!status.ok() || tmp_crc != crc) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2022-06-08 04:07:47 +00:00
|
|
|
if (!status.ok()) {
|
|
|
|
fprintf(stderr, "status: %s\n", status.ToString().c_str());
|
|
|
|
shared->SetShouldStopTest();
|
|
|
|
assert(false);
|
|
|
|
} else if (tmp_crc != crc) {
|
|
|
|
fprintf(stderr, "tmp_crc=%" PRIu32 " crc=%" PRIu32 "\n", tmp_crc, crc);
|
|
|
|
shared->SetShouldStopTest();
|
|
|
|
assert(false);
|
|
|
|
}
|
2019-12-20 16:46:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-09 22:36:10 +00:00
|
|
|
std::vector<int> GenerateColumnFamilies(
|
|
|
|
const int /* num_column_families */,
|
|
|
|
int /* rand_column_family */) const override {
|
2019-12-09 07:49:32 +00:00
|
|
|
std::vector<int> ret;
|
|
|
|
int num = static_cast<int>(column_families_.size());
|
|
|
|
int k = 0;
|
|
|
|
std::generate_n(back_inserter(ret), num, [&k]() -> int { return k++; });
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
Add the PutEntity API to the stress/crash tests (#10760)
Summary:
The patch adds the `PutEntity` API to the non-batched, batched, and
CF consistency stress tests. Namely, when the new `db_stress` command
line parameter `use_put_entity_one_in` is greater than zero, one in
N writes on average is performed using `PutEntity` rather than `Put`.
The wide-column entity written has the generated value in its default
column; in addition, it contains up to three additional columns where
the original generated value is divided up between the column name and the
column value (with the column name containing the first k characters of
the generated value, and the column value containing the rest). Whether
`PutEntity` is used (and if so, how many columns the entity has) is completely
determined by the "value base" used to generate the value (that is, there is
no randomness involved). Assuming the same `use_put_entity_one_in` setting
is used across `db_stress` invocations, this enables us to reconstruct and
validate the entity during subsequent `db_stress` runs.
Note that `PutEntity` is currently incompatible with `Merge`, transactions, and
user-defined timestamps; these combinations are currently disabled/disallowed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10760
Test Plan: Ran some batched, non-batched, and CF consistency stress tests using the script.
Reviewed By: riversand963
Differential Revision: D39939032
Pulled By: ltamasi
fbshipit-source-id: eafdf124e95993fb7d73158e3b006d11819f7fa9
2022-09-30 18:11:07 +00:00
|
|
|
std::atomic<uint32_t> batch_id_;
|
2019-12-09 07:49:32 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
StressTest* CreateCfConsistencyStressTest() {
|
|
|
|
return new CfConsistencyStressTest();
|
|
|
|
}
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2019-12-09 07:49:32 +00:00
|
|
|
#endif // GFLAGS
|