Stress/Crash Test for OptimisticTransactionDB (#11513)

Summary:
Context:
OptimisticTransactionDB has not been covered by db_stress (including crash test) like TransactionDB.
1. Adding the following gflag options to to test OptimisticTransactionDB
- `use_optimistic_txn`: When true, open OptimisticTransactionDB to test
- `occ_validation_policy`: `OccValidationPolicy::kValidateParallel = 1` by default.
- `share_occ_lock_buckets`: Use shared occ locks
- `occ_lock_bucket_count`: 500 by default. Number of buckets to use for shared occ lock.
2. Opening OptimisticTransactionDB and NewTxn/Commit added per `use_optimistic_txn` flag in `db_stress_test_base.cc`
3. OptimisticTransactionDB blackbox/whitebox test added in crash_test.mk

Please note that the existing flag `use_txn` is being used here. When `use_txn == true` and `use_optimistic_txn == false`, we use `TransactionDB` (a.k.a. pessimistic transaction db). When both `use_txn` and `use_optimistic_txn` are true, we use `OptimisticTransactionDB`. If `use_txn == false` but `use_optimistic_txn == true` throw error with message _"You cannot set use_optimistic_txn true while use_txn is false. Please set use_txn true if you want to use OptimisticTransactionDB"_.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11513

Test Plan:
**Crash Test**
Serial Validation
```
export CRASH_TEST_EXT_ARGS="--use_optimistic_txn=1 --use_txn=1 --use_put_entity_one_in=0 --occ_validation_policy=0"
make crash_test -j
```
Parallel Validation (no share bucket)
```
export CRASH_TEST_EXT_ARGS="--use_optimistic_txn=1 --use_txn=1 --use_put_entity_one_in=0 --occ_validation_policy=1 --share_occ_lock_buckets=0"
make crash_test -j
```
Parallel Validation (share bucket)
```
export CRASH_TEST_EXT_ARGS="--use_optimistic_txn=1 --use_txn=1 --use_put_entity_one_in=0 --occ_validation_policy=1 --share_occ_lock_buckets=1 --occ_lock_bucket_count=500"
make crash_test -j
```

**Stress Test**
```
./db_stress -use_optimistic_txn -threads=32
```

Reviewed By: pdillinger

Differential Revision: D46547387

Pulled By: jaykorean

fbshipit-source-id: ca19819ca6e0281694966998014b40d95d4e5960
This commit is contained in:
Jay Huh 2023-06-17 16:27:37 -07:00 committed by Facebook GitHub Bot
parent 1da9ac2363
commit 17d5200504
7 changed files with 247 additions and 110 deletions

View File

@ -21,6 +21,8 @@ CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD) --
blackbox_crash_test_with_multiops_wp_txn \
crash_test_with_tiered_storage blackbox_crash_test_with_tiered_storage \
whitebox_crash_test_with_tiered_storage \
whitebox_crash_test_with_optimistic_txn \
blackbox_crash_test_with_optimistic_txn \
crash_test: $(DB_STRESS_CMD)
# Do not parallelize
@ -37,6 +39,11 @@ crash_test_with_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) whitebox_crash_test_with_txn
$(CRASHTEST_MAKE) blackbox_crash_test_with_txn
crash_test_with_optimistic_txn: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_optimistic_txn
$(CRASHTEST_MAKE) blackbox_crash_test_with_optimistic_txn
crash_test_with_best_efforts_recovery: blackbox_crash_test_with_best_efforts_recovery
crash_test_with_ts: $(DB_STRESS_CMD)
@ -80,6 +87,9 @@ blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_optimistic_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --optimistic_txn blackbox $(CRASH_TEST_EXT_ARGS)
ifeq ($(CRASH_TEST_KILL_ODD),)
CRASH_TEST_KILL_ODD=888887
endif
@ -105,3 +115,7 @@ whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_tiered_storage whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_optimistic_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --optimistic_txn whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)

View File

@ -54,6 +54,7 @@
#include "rocksdb/utilities/checkpoint.h"
#include "rocksdb/utilities/db_ttl.h"
#include "rocksdb/utilities/debug.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"
#include "rocksdb/utilities/options_util.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
@ -194,9 +195,6 @@ DECLARE_bool(rate_limit_user_ops);
DECLARE_bool(rate_limit_auto_wal_flush);
DECLARE_uint64(sst_file_manager_bytes_per_sec);
DECLARE_uint64(sst_file_manager_bytes_per_truncate);
DECLARE_bool(use_txn);
DECLARE_uint64(txn_write_policy);
DECLARE_bool(unordered_write);
DECLARE_int32(backup_one_in);
DECLARE_uint64(backup_max_size);
DECLARE_int32(checkpoint_one_in);
@ -255,6 +253,21 @@ DECLARE_int32(continuous_verification_interval);
DECLARE_int32(get_property_one_in);
DECLARE_string(file_checksum_impl);
// Options for transaction dbs.
// Use TransactionDB (a.k.a. Pessimistic Transaction DB)
// OR OptimisticTransactionDB
DECLARE_bool(use_txn);
// Options for TransactionDB (a.k.a. Pessimistic Transaction DB)
DECLARE_uint64(txn_write_policy);
DECLARE_bool(unordered_write);
// Options for OptimisticTransactionDB
DECLARE_bool(use_optimistic_txn);
DECLARE_uint64(occ_validation_policy);
DECLARE_bool(share_occ_lock_buckets);
DECLARE_uint32(occ_lock_bucket_count);
// Options for StackableDB-based BlobDB
DECLARE_bool(use_blob_db);
DECLARE_uint64(blob_db_min_blob_size);

View File

@ -669,13 +669,24 @@ DEFINE_uint64(sst_file_manager_bytes_per_truncate, 0,
"many bytes. By default whole files will be deleted.");
DEFINE_bool(use_txn, false,
"Use TransactionDB. Currently the default write policy is "
"TxnDBWritePolicy::WRITE_PREPARED");
"Use TransactionDB or OptimisticTransactionDB. When "
"use_optimistic_txn == false (by default), "
"it's (Pessimistic) TransactionDB");
DEFINE_uint64(txn_write_policy, 0,
"The transaction write policy. Default is "
"TxnDBWritePolicy::WRITE_COMMITTED. Note that this should not be "
"changed accross crashes.");
"changed across crashes.");
DEFINE_bool(use_optimistic_txn, false, "Use OptimisticTransactionDB.");
DEFINE_uint64(occ_validation_policy, 1,
"Optimistic Concurrency Control Validation Policy for "
"OptimisticTransactionDB");
DEFINE_bool(share_occ_lock_buckets, false,
"Share a pool of locks across DB instances for buckets");
DEFINE_uint32(
occ_lock_bucket_count, 500,
"Bucket Count for shared Optimistic Concurrency Control (OCC) locks");
DEFINE_bool(unordered_write, false,
"Turn on the unordered_write feature. This options is currently "

View File

@ -301,7 +301,7 @@ void StressTest::FinishInitDb(SharedState* shared) {
exit(1);
}
}
if (FLAGS_use_txn) {
if (FLAGS_use_txn && !FLAGS_use_optimistic_txn) {
// It's OK here without sync because unsynced data cannot be lost at this
// point
// - even with sync_fault_injection=1 as the
@ -556,6 +556,7 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
delete db_;
db_ = nullptr;
txn_db_ = nullptr;
optimistic_txn_db_ = nullptr;
db_preload_finished_.store(true);
auto now = clock_->NowMicros();
@ -634,6 +635,10 @@ Status StressTest::NewTxn(WriteOptions& write_opts, Transaction** txn) {
}
write_opts.disableWAL = FLAGS_disable_wal;
static std::atomic<uint64_t> txn_id = {0};
if (FLAGS_use_optimistic_txn) {
*txn = optimistic_txn_db_->BeginTransaction(write_opts);
return Status::OK();
} else {
TransactionOptions txn_options;
txn_options.use_only_the_last_commit_time_batch_for_recovery =
FLAGS_use_only_the_last_commit_time_batch_for_recovery;
@ -644,13 +649,19 @@ Status StressTest::NewTxn(WriteOptions& write_opts, Transaction** txn) {
Status s = (*txn)->SetName("xid" + istr);
return s;
}
}
Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) {
if (!FLAGS_use_txn) {
return Status::InvalidArgument("CommitTxn when FLAGS_use_txn is not set");
}
Status s = Status::OK();
if (FLAGS_use_optimistic_txn) {
assert(optimistic_txn_db_);
s = txn->Commit();
} else {
assert(txn_db_);
Status s = txn->Prepare();
s = txn->Prepare();
std::shared_ptr<const Snapshot> timestamped_snapshot;
if (s.ok()) {
if (thread && FLAGS_create_timestamped_snapshot_one_in &&
@ -684,6 +695,7 @@ Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) {
constexpr uint64_t time_diff = static_cast<uint64_t>(1000) * 1000 * 1000;
txn_db_->ReleaseTimestampedSnapshotsOlderThan(now - time_diff);
}
}
delete txn;
return s;
}
@ -2311,8 +2323,22 @@ void StressTest::PrintEnv() const {
fprintf(stdout, "Format version : %d\n", FLAGS_format_version);
fprintf(stdout, "TransactionDB : %s\n",
FLAGS_use_txn ? "true" : "false");
if (FLAGS_use_txn) {
fprintf(stdout, "TransactionDB Type : %s\n",
FLAGS_use_optimistic_txn ? "Optimistic" : "Pessimistic");
if (FLAGS_use_optimistic_txn) {
fprintf(stdout, "OCC Validation Type : %d\n",
static_cast<int>(FLAGS_occ_validation_policy));
if (static_cast<uint64_t>(OccValidationPolicy::kValidateParallel) ==
FLAGS_occ_validation_policy) {
fprintf(stdout, "Share Lock Buckets : %s\n",
FLAGS_share_occ_lock_buckets ? "true" : "false");
if (FLAGS_share_occ_lock_buckets) {
fprintf(stdout, "Lock Bucket Count : %d\n",
static_cast<int>(FLAGS_occ_lock_bucket_count));
}
}
} else {
fprintf(stdout, "Two write queues: : %s\n",
FLAGS_two_write_queues ? "true" : "false");
fprintf(stdout, "Write policy : %d\n",
@ -2330,6 +2356,7 @@ void StressTest::PrintEnv() const {
FLAGS_use_only_the_last_commit_time_batch_for_recovery ? "true"
: "false");
}
}
fprintf(stdout, "Stacked BlobDB : %s\n",
FLAGS_use_blob_db ? "true" : "false");
@ -2477,6 +2504,7 @@ void StressTest::PrintEnv() const {
void StressTest::Open(SharedState* shared) {
assert(db_ == nullptr);
assert(txn_db_ == nullptr);
assert(optimistic_txn_db_ == nullptr);
if (!InitializeOptionsFromFile(options_)) {
InitializeOptionsFromFlags(cache_, filter_policy_, options_);
}
@ -2703,13 +2731,41 @@ void StressTest::Open(SharedState* shared) {
}
break;
}
} else {
if (FLAGS_use_optimistic_txn) {
OptimisticTransactionDBOptions optimistic_txn_db_options;
optimistic_txn_db_options.validate_policy =
static_cast<OccValidationPolicy>(FLAGS_occ_validation_policy);
if (FLAGS_share_occ_lock_buckets) {
optimistic_txn_db_options.shared_lock_buckets =
MakeSharedOccLockBuckets(FLAGS_occ_lock_bucket_count);
} else {
optimistic_txn_db_options.occ_lock_buckets =
FLAGS_occ_lock_bucket_count;
optimistic_txn_db_options.shared_lock_buckets = nullptr;
}
s = OptimisticTransactionDB::Open(
options_, optimistic_txn_db_options, FLAGS_db, cf_descriptors,
&column_families_, &optimistic_txn_db_);
if (!s.ok()) {
fprintf(stderr, "Error in opening the OptimisticTransactionDB [%s]\n",
s.ToString().c_str());
fflush(stderr);
}
assert(s.ok());
{
db_ = optimistic_txn_db_;
db_aptr_.store(optimistic_txn_db_, std::memory_order_release);
}
} else {
TransactionDBOptions txn_db_options;
assert(FLAGS_txn_write_policy <= TxnDBWritePolicy::WRITE_UNPREPARED);
txn_db_options.write_policy =
static_cast<TxnDBWritePolicy>(FLAGS_txn_write_policy);
if (FLAGS_unordered_write) {
assert(txn_db_options.write_policy == TxnDBWritePolicy::WRITE_PREPARED);
assert(txn_db_options.write_policy ==
TxnDBWritePolicy::WRITE_PREPARED);
options_.unordered_write = true;
options_.two_write_queues = true;
txn_db_options.skip_concurrency_control = true;
@ -2736,6 +2792,7 @@ void StressTest::Open(SharedState* shared) {
db_aptr_.store(txn_db_, std::memory_order_release);
}
}
}
if (!s.ok()) {
fprintf(stderr, "Error in opening the DB [%s]\n", s.ToString().c_str());
fflush(stderr);
@ -2811,10 +2868,12 @@ void StressTest::Reopen(ThreadState* thread) {
}
assert(s.ok());
}
assert(txn_db_ == nullptr || db_ == txn_db_);
assert((txn_db_ == nullptr && optimistic_txn_db_ == nullptr) ||
(db_ == txn_db_ || db_ == optimistic_txn_db_));
delete db_;
db_ = nullptr;
txn_db_ = nullptr;
optimistic_txn_db_ = nullptr;
num_times_reopened_++;
auto now = clock_->NowMicros();

View File

@ -17,6 +17,7 @@ namespace ROCKSDB_NAMESPACE {
class SystemClock;
class Transaction;
class TransactionDB;
class OptimisticTransactionDB;
struct TransactionDBOptions;
class StressTest {
@ -261,6 +262,7 @@ class StressTest {
std::shared_ptr<const FilterPolicy> filter_policy_;
DB* db_;
TransactionDB* txn_db_;
OptimisticTransactionDB* optimistic_txn_db_;
// Currently only used in MultiOpsTxnsStressTest
std::atomic<DB*> db_aptr_;

View File

@ -274,6 +274,14 @@ int db_stress_tool(int argc, char** argv) {
CheckAndSetOptionsForMultiOpsTxnStressTest();
}
if (!FLAGS_use_txn && FLAGS_use_optimistic_txn) {
fprintf(
stderr,
"You cannot set use_optimistic_txn true while use_txn is false. Please "
"set use_txn true if you want to use OptimisticTransactionDB\n");
exit(1);
}
if (FLAGS_create_timestamped_snapshot_one_in > 0) {
if (!FLAGS_use_txn) {
fprintf(stderr, "timestamped snapshot supported only in TransactionDB\n");
@ -291,8 +299,8 @@ int db_stress_tool(int argc, char** argv) {
exit(1);
}
if (FLAGS_use_txn && FLAGS_sync_fault_injection &&
FLAGS_txn_write_policy != 0) {
if (FLAGS_use_txn && !FLAGS_use_optimistic_txn &&
FLAGS_sync_fault_injection && FLAGS_txn_write_policy != 0) {
fprintf(stderr,
"For TransactionDB, correctness testing with unsync data loss is "
"currently compatible with only write committed policy\n");

View File

@ -369,8 +369,10 @@ cf_consistency_params = {
"ingest_external_file_one_in": 0,
}
# For pessimistic transaction db
txn_params = {
"use_txn": 1,
"use_optimistic_txn": 0,
# Avoid lambda to set it once for the entire test
"txn_write_policy": random.randint(0, 2),
"unordered_write": random.randint(0, 1),
@ -386,6 +388,17 @@ txn_params = {
"use_put_entity_one_in": 0,
}
# For optimistic transaction db
optimistic_txn_params = {
"use_txn": 1,
"use_optimistic_txn": 1,
"occ_validation_policy": random.randint(0, 1),
"share_occ_lock_buckets": random.randint(0, 1),
"occ_lock_bucket_count": lambda: random.choice([10, 100, 500]),
# PutEntity in transactions is not yet implemented
"use_put_entity_one_in": 0,
}
best_efforts_recovery_params = {
"best_efforts_recovery": 1,
"atomic_flush": 0,
@ -549,12 +562,16 @@ def finalize_and_sanitize(src_params):
# Multi-key operations are not currently compatible with transactions or
# timestamp.
if (dest_params.get("test_batches_snapshots") == 1 or
dest_params.get("use_txn") == 1 or
dest_params.get("user_timestamp_size") > 0):
if (
dest_params.get("test_batches_snapshots") == 1
or dest_params.get("use_txn") == 1
or dest_params.get("user_timestamp_size") > 0
):
dest_params["ingest_external_file_one_in"] = 0
if (dest_params.get("test_batches_snapshots") == 1 or
dest_params.get("use_txn") == 1):
if (
dest_params.get("test_batches_snapshots") == 1
or dest_params.get("use_txn") == 1
):
dest_params["delpercent"] += dest_params["delrangepercent"]
dest_params["delrangepercent"] = 0
if (
@ -632,7 +649,7 @@ def finalize_and_sanitize(src_params):
dest_params["unordered_write"] = 0
# For TransactionDB, correctness testing with unsync data loss is currently
# compatible with only write committed policy
if (dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0):
if dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0:
dest_params["sync_fault_injection"] = 0
dest_params["manual_wal_flush_one_in"] = 0
# PutEntity is currently not supported by SstFileWriter or in conjunction with Merge
@ -662,6 +679,8 @@ def gen_cmd_params(args):
params.update(cf_consistency_params)
if args.txn:
params.update(txn_params)
if args.optimistic_txn:
params.update(optimistic_txn_params)
if args.test_best_efforts_recovery:
params.update(best_efforts_recovery_params)
if args.enable_ts:
@ -707,6 +726,7 @@ def gen_cmd(params, unknown_params):
"random_kill_odd",
"cf_consistency",
"txn",
"optimistic_txn",
"test_best_efforts_recovery",
"enable_ts",
"test_multiops_txn",
@ -879,9 +899,17 @@ def whitebox_crash_main(args, unknown_args):
"ops_per_thread": cmd_params["ops_per_thread"],
}
cur_compaction_style = additional_opts.get("compaction_style", cmd_params.get("compaction_style", 0))
if prev_compaction_style != -1 and prev_compaction_style != cur_compaction_style:
print("`compaction_style` is changed in current run so `destroy_db_initially` is set to 1 as a short-term solution to avoid cycling through previous db of different compaction style." + "\n")
cur_compaction_style = additional_opts.get(
"compaction_style", cmd_params.get("compaction_style", 0)
)
if (
prev_compaction_style != -1
and prev_compaction_style != cur_compaction_style
):
print(
"`compaction_style` is changed in current run so `destroy_db_initially` is set to 1 as a short-term solution to avoid cycling through previous db of different compaction style."
+ "\n"
)
additional_opts["destroy_db_initially"] = 1
prev_compaction_style = cur_compaction_style
@ -959,7 +987,7 @@ def whitebox_crash_main(args, unknown_args):
os.mkdir(dbname)
except OSError:
pass
if (expected_values_dir is not None):
if expected_values_dir is not None:
shutil.rmtree(expected_values_dir, True)
os.mkdir(expected_values_dir)
@ -980,6 +1008,7 @@ def main():
parser.add_argument("--simple", action="store_true")
parser.add_argument("--cf_consistency", action="store_true")
parser.add_argument("--txn", action="store_true")
parser.add_argument("--optimistic_txn", action="store_true")
parser.add_argument("--test_best_efforts_recovery", action="store_true")
parser.add_argument("--enable_ts", action="store_true")
parser.add_argument("--test_multiops_txn", action="store_true")
@ -1004,6 +1033,7 @@ def main():
+ list(cf_consistency_params.items())
+ list(tiered_params.items())
+ list(txn_params.items())
+ list(optimistic_txn_params.items())
)
for k, v in all_params.items():