2019-12-09 07:49:32 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#ifdef GFLAGS
|
|
|
|
#pragma once
|
|
|
|
#include "db_stress_tool/db_stress_common.h"
|
|
|
|
#include "db_stress_tool/db_stress_shared_state.h"
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2021-03-15 11:32:24 +00:00
|
|
|
class SystemClock;
|
2019-12-09 07:49:32 +00:00
|
|
|
class Transaction;
|
|
|
|
class TransactionDB;
|
2022-04-28 00:50:54 +00:00
|
|
|
struct TransactionDBOptions;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
class StressTest {
|
|
|
|
public:
|
|
|
|
StressTest();
|
|
|
|
|
|
|
|
virtual ~StressTest();
|
|
|
|
|
2021-06-28 06:53:47 +00:00
|
|
|
std::shared_ptr<Cache> NewCache(size_t capacity, int32_t num_shard_bits);
|
2019-12-09 07:49:32 +00:00
|
|
|
|
2021-02-02 19:39:20 +00:00
|
|
|
static std::vector<std::string> GetBlobCompressionTags();
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
bool BuildOptionsTable();
|
|
|
|
|
2022-05-02 20:25:45 +00:00
|
|
|
void InitDb(SharedState*);
|
2020-06-18 16:51:14 +00:00
|
|
|
// The initialization work is split into two parts to avoid a circular
|
|
|
|
// dependency with `SharedState`.
|
2021-12-14 21:33:16 +00:00
|
|
|
virtual void FinishInitDb(SharedState*);
|
2022-01-31 21:31:00 +00:00
|
|
|
void TrackExpectedState(SharedState* shared);
|
2019-12-09 07:49:32 +00:00
|
|
|
void OperateDb(ThreadState* thread);
|
|
|
|
virtual void VerifyDb(ThreadState* thread) const = 0;
|
2022-06-08 04:07:47 +00:00
|
|
|
virtual void ContinuouslyVerifyDb(ThreadState* /*thread*/) const = 0;
|
2019-12-09 07:49:32 +00:00
|
|
|
void PrintStatistics();
|
|
|
|
|
|
|
|
protected:
|
|
|
|
Status AssertSame(DB* db, ColumnFamilyHandle* cf,
|
|
|
|
ThreadState::SnapshotState& snap_state);
|
|
|
|
|
|
|
|
// Currently PreloadDb has to be single-threaded.
|
|
|
|
void PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
|
|
|
|
SharedState* shared);
|
|
|
|
|
|
|
|
Status SetOptions(ThreadState* thread);
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
Support WriteCommit policy with sync_fault_injection=1 (#10624)
Summary:
**Context:**
Prior to this PR, correctness testing with un-sync data loss [disabled](https://github.com/facebook/rocksdb/pull/10605) transaction (`use_txn=1`) thus all of the `txn_write_policy` . This PR improved that by adding support for one policy - WriteCommit (`txn_write_policy=0`).
**Summary:**
They key to this support is (a) handle Mark{Begin, End}Prepare/MarkCommit/MarkRollback in constructing ExpectedState under WriteCommit policy correctly and (b) monitor CI jobs and solve any test incompatibility issue till jobs are stable. (b) will be part of the test plan.
For (a)
- During prepare (i.e, between `MarkBeginPrepare()` and `MarkEndPrepare(xid)`), `ExpectedStateTraceRecordHandler` will buffer all writes by adding all writes to an internal `WriteBatch`.
- On `MarkEndPrepare()`, that `WriteBatch` will be associated with the transaction's `xid`.
- During the commit (i.e, on `MarkCommit(xid)`), `ExpectedStateTraceRecordHandler` will retrieve and iterate the internal `WriteBatch` and finally apply those writes to `ExpectedState`
- During the rollback (i.e, on `MarkRollback(xid)`), `ExpectedStateTraceRecordHandler` will erase the internal `WriteBatch` from the map.
For (b) - one major issue described below:
- TransactionsDB in db stress recovers prepared-but-not-committed txns from the previous crashed run by randomly committing or rolling back it at the start of the current run, see a historical [PR](https://github.com/facebook/rocksdb/commit/6d06be22c083ccf185fd38dba49fde73b644b4c1) predated correctness testing.
- And we will verify those processed keys in a recovered db against their expected state.
- However since now we turn on `sync_fault_injection=1` where the expected state is constructed from the trace instead of using the LATEST.state from previous run. The expected state now used to verify those processed keys won't contain UNKNOWN_SENTINEL as they should - see test 1 for a failed case.
- Therefore, we decided to manually update its expected state to be UNKNOWN_SENTINEL as part of the processing.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10624
Test Plan:
1. Test exposed the major issue described above. This test will fail without setting UNKNOWN_SENTINEL in expected state during the processing and pass after
```
db=/dev/shm/rocksdb_crashtest_blackbox
exp=/dev/shm/rocksdb_crashtest_expected
dbt=$db.tmp
expt=$exp.tmp
rm -rf $db $exp
mkdir -p $exp
echo "RUN 1"
./db_stress \
--clear_column_family_one_in=0 --column_families=1 --db=$db --delpercent=10 --delrangepercent=0 --destroy_db_initially=0 --expected_values_dir=$exp --iterpercent=0 --key_len_percent_dist=1,30,69 --max_key=1000000 --max_key_len=3 --prefixpercent=0 --readpercent=0 --reopen=0 --ops_per_thread=100000000 --test_batches_snapshots=0 --value_size_mult=32 --writepercent=90 \
--use_txn=1 --txn_write_policy=0 --sync_fault_injection=1 &
pid=$!
sleep 0.2
sleep 20
kill $pid
sleep 0.2
echo "RUN 2"
./db_stress \
--clear_column_family_one_in=0 --column_families=1 --db=$db --delpercent=10 --delrangepercent=0 --destroy_db_initially=0 --expected_values_dir=$exp --iterpercent=0 --key_len_percent_dist=1,30,69 --max_key=1000000 --max_key_len=3 --prefixpercent=0 --readpercent=0 --reopen=0 --ops_per_thread=100000000 --test_batches_snapshots=0 --value_size_mult=32 --writepercent=90 \
--use_txn=1 --txn_write_policy=0 --sync_fault_injection=1 &
pid=$!
sleep 0.2
sleep 20
kill $pid
sleep 0.2
echo "RUN 3"
./db_stress \
--clear_column_family_one_in=0 --column_families=1 --db=$db --delpercent=10 --delrangepercent=0 --destroy_db_initially=0 --expected_values_dir=$exp --iterpercent=0 --key_len_percent_dist=1,30,69 --max_key=1000000 --max_key_len=3 --prefixpercent=0 --readpercent=0 --reopen=0 --ops_per_thread=100000000 --test_batches_snapshots=0 --value_size_mult=32 --writepercent=90 \
--use_txn=1 --txn_write_policy=0 --sync_fault_injection=1
```
2. Manual testing to ensure ExpectedState is constructed correctly during recovery by verifying it against previously crashed TransactionDB's WAL.
- Run the following command to crash a TransactionDB with WriteCommit policy. Then `./ldb dump_wal` on its WAL file
```
db=/dev/shm/rocksdb_crashtest_blackbox
exp=/dev/shm/rocksdb_crashtest_expected
rm -rf $db $exp
mkdir -p $exp
./db_stress \
--clear_column_family_one_in=0 --column_families=1 --db=$db --delpercent=10 --delrangepercent=0 --destroy_db_initially=0 --expected_values_dir=$exp --iterpercent=0 --key_len_percent_dist=1,30,69 --max_key=1000000 --max_key_len=3 --prefixpercent=0 --readpercent=0 --reopen=0 --ops_per_thread=100000000 --test_batches_snapshots=0 --value_size_mult=32 --writepercent=90 \
--use_txn=1 --txn_write_policy=0 --sync_fault_injection=1 &
pid=$!
sleep 30
kill $pid
sleep 1
```
- Run the following command to verify recovery of the crashed db under debugger. Compare the step-wise result with WAL records (e.g, WriteBatch content, xid, prepare/commit/rollback marker)
```
./db_stress \
--clear_column_family_one_in=0 --column_families=1 --db=$db --delpercent=10 --delrangepercent=0 --destroy_db_initially=0 --expected_values_dir=$exp --iterpercent=0 --key_len_percent_dist=1,30,69 --max_key=1000000 --max_key_len=3 --prefixpercent=0 --readpercent=0 --reopen=0 --ops_per_thread=100000000 --test_batches_snapshots=0 --value_size_mult=32 --writepercent=90 \
--use_txn=1 --txn_write_policy=0 --sync_fault_injection=1
```
3. Automatic testing by triggering all RocksDB stress/crash test jobs for 3 rounds with no failure.
Reviewed By: ajkr, riversand963
Differential Revision: D39199373
Pulled By: hx235
fbshipit-source-id: 7a1dec0e3e2ee6ea86ddf5dd19ceb5543a3d6f0c
2022-09-27 01:01:59 +00:00
|
|
|
// For transactionsDB, there can be txns prepared but not yet committeed
|
|
|
|
// right before previous stress run crash.
|
|
|
|
// They will be recovered and processed through
|
|
|
|
// ProcessRecoveredPreparedTxnsHelper on the start of current stress run.
|
|
|
|
void ProcessRecoveredPreparedTxns(SharedState* shared);
|
|
|
|
|
|
|
|
// Default implementation will first update ExpectedState to be
|
|
|
|
// `SharedState::UNKNOWN` for each keys in `txn` and then randomly
|
|
|
|
// commit or rollback `txn`.
|
|
|
|
virtual void ProcessRecoveredPreparedTxnsHelper(Transaction* txn,
|
|
|
|
SharedState* shared);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
Status NewTxn(WriteOptions& write_opts, Transaction** txn);
|
|
|
|
|
Snapshots with user-specified timestamps (#9879)
Summary:
In RocksDB, keys are associated with (internal) sequence numbers which denote when the keys are written
to the database. Sequence numbers in different RocksDB instances are unrelated, thus not comparable.
It is nice if we can associate sequence numbers with their corresponding actual timestamps. One thing we can
do is to support user-defined timestamp, which allows the applications to specify the format of custom timestamps
and encode a timestamp with each key. More details can be found at https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-%28Experimental%29.
This PR provides a different but complementary approach. We can associate rocksdb snapshots (defined in
https://github.com/facebook/rocksdb/blob/7.2.fb/include/rocksdb/snapshot.h#L20) with **user-specified** timestamps.
Since a snapshot is essentially an object representing a sequence number, this PR establishes a bi-directional mapping between sequence numbers and timestamps.
In the past, snapshots are usually taken by readers. The current super-version is grabbed, and a `rocksdb::Snapshot`
object is created with the last published sequence number of the super-version. You can see that the reader actually
has no good idea of what timestamp to assign to this snapshot, because by the time the `GetSnapshot()` is called,
an arbitrarily long period of time may have already elapsed since the last write, which is when the last published
sequence number is written.
This observation motivates the creation of "timestamped" snapshots on the write path. Currently, this functionality is
exposed only to the layer of `TransactionDB`. Application can tell RocksDB to create a snapshot when a transaction
commits, effectively associating the last sequence number with a timestamp. It is also assumed that application will
ensure any two snapshots with timestamps should satisfy the following:
```
snapshot1.seq < snapshot2.seq iff. snapshot1.ts < snapshot2.ts
```
If the application can guarantee that when a reader takes a timestamped snapshot, there is no active writes going on
in the database, then we also allow the user to use a new API `TransactionDB::CreateTimestampedSnapshot()` to create
a snapshot with associated timestamp.
Code example
```cpp
// Create a timestamped snapshot when committing transaction.
txn->SetCommitTimestamp(100);
txn->SetSnapshotOnNextOperation();
txn->Commit();
// A wrapper API for convenience
Status Transaction::CommitAndTryCreateSnapshot(
std::shared_ptr<TransactionNotifier> notifier,
TxnTimestamp ts,
std::shared_ptr<const Snapshot>* ret);
// Create a timestamped snapshot if caller guarantees no concurrent writes
std::pair<Status, std::shared_ptr<const Snapshot>> snapshot = txn_db->CreateTimestampedSnapshot(100);
```
The snapshots created in this way will be managed by RocksDB with ref-counting and potentially shared with
other readers. We provide the following APIs for readers to retrieve a snapshot given a timestamp.
```cpp
// Return the timestamped snapshot correponding to given timestamp. If ts is
// kMaxTxnTimestamp, then we return the latest timestamped snapshot if present.
// Othersise, we return the snapshot whose timestamp is equal to `ts`. If no
// such snapshot exists, then we return null.
std::shared_ptr<const Snapshot> TransactionDB::GetTimestampedSnapshot(TxnTimestamp ts) const;
// Return the latest timestamped snapshot if present.
std::shared_ptr<const Snapshot> TransactionDB::GetLatestTimestampedSnapshot() const;
```
We also provide two additional APIs for stats collection and reporting purposes.
```cpp
Status TransactionDB::GetAllTimestampedSnapshots(
std::vector<std::shared_ptr<const Snapshot>>& snapshots) const;
// Return timestamped snapshots whose timestamps fall in [ts_lb, ts_ub) and store them in `snapshots`.
Status TransactionDB::GetTimestampedSnapshots(
TxnTimestamp ts_lb,
TxnTimestamp ts_ub,
std::vector<std::shared_ptr<const Snapshot>>& snapshots) const;
```
To prevent the number of timestamped snapshots from growing infinitely, we provide the following API to release
timestamped snapshots whose timestamps are older than or equal to a given threshold.
```cpp
void TransactionDB::ReleaseTimestampedSnapshotsOlderThan(TxnTimestamp ts);
```
Before shutdown, RocksDB will release all timestamped snapshots.
Comparison with user-defined timestamp and how they can be combined:
User-defined timestamp persists every key with a timestamp, while timestamped snapshots maintain a volatile
mapping between snapshots (sequence numbers) and timestamps.
Different internal keys with the same user key but different timestamps will be treated as different by compaction,
thus a newer version will not hide older versions (with smaller timestamps) unless they are eligible for garbage collection.
In contrast, taking a timestamped snapshot at a certain sequence number and timestamp prevents all the keys visible in
this snapshot from been dropped by compaction. Here, visible means (seq < snapshot and most recent).
The timestamped snapshot supports the semantics of reading at an exact point in time.
Timestamped snapshots can also be used with user-defined timestamp.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9879
Test Plan:
```
make check
TEST_TMPDIR=/dev/shm make crash_test_with_txn
```
Reviewed By: siying
Differential Revision: D35783919
Pulled By: riversand963
fbshipit-source-id: 586ad905e169189e19d3bfc0cb0177a7239d1bd4
2022-06-10 23:07:03 +00:00
|
|
|
Status CommitTxn(Transaction* txn, ThreadState* thread = nullptr);
|
2019-12-21 07:10:58 +00:00
|
|
|
|
|
|
|
Status RollbackTxn(Transaction* txn);
|
2019-12-09 07:49:32 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
virtual void MaybeClearOneColumnFamily(ThreadState* /* thread */) {}
|
|
|
|
|
|
|
|
virtual bool ShouldAcquireMutexOnKey() const { return false; }
|
|
|
|
|
2021-12-07 21:40:46 +00:00
|
|
|
// Returns true if DB state is tracked by the stress test.
|
|
|
|
virtual bool IsStateTracked() const = 0;
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
virtual std::vector<int> GenerateColumnFamilies(
|
|
|
|
const int /* num_column_families */, int rand_column_family) const {
|
|
|
|
return {rand_column_family};
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual std::vector<int64_t> GenerateKeys(int64_t rand_key) const {
|
|
|
|
return {rand_key};
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual Status TestGet(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) = 0;
|
|
|
|
|
|
|
|
virtual std::vector<Status> TestMultiGet(
|
|
|
|
ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) = 0;
|
|
|
|
|
|
|
|
virtual Status TestPrefixScan(ThreadState* thread,
|
|
|
|
const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys) = 0;
|
|
|
|
|
|
|
|
virtual Status TestPut(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& cf_ids,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& keys,
|
|
|
|
char (&value)[100]) = 0;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
virtual Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& rand_keys) = 0;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
virtual Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& rand_keys) = 0;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
virtual void TestIngestExternalFile(
|
|
|
|
ThreadState* thread, const std::vector<int>& rand_column_families,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& rand_keys) = 0;
|
2019-12-09 07:49:32 +00:00
|
|
|
|
2019-12-10 19:40:09 +00:00
|
|
|
// Issue compact range, starting with start_key, whose integer value
|
|
|
|
// is rand_key.
|
|
|
|
virtual void TestCompactRange(ThreadState* thread, int64_t rand_key,
|
|
|
|
const Slice& start_key,
|
|
|
|
ColumnFamilyHandle* column_family);
|
|
|
|
|
|
|
|
// Calculate a hash value for all keys in range [start_key, end_key]
|
|
|
|
// at a certain snapshot.
|
|
|
|
uint32_t GetRangeHash(ThreadState* thread, const Snapshot* snapshot,
|
|
|
|
ColumnFamilyHandle* column_family,
|
|
|
|
const Slice& start_key, const Slice& end_key);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
// Return a column family handle that mirrors what is pointed by
|
|
|
|
// `column_family_id`, which will be used to validate data to be correct.
|
|
|
|
// By default, the column family itself will be returned.
|
|
|
|
virtual ColumnFamilyHandle* GetControlCfh(ThreadState* /* thread*/,
|
|
|
|
int column_family_id) {
|
|
|
|
return column_families_[column_family_id];
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
// Generated a list of keys that close to boundaries of SST keys.
|
|
|
|
// If there isn't any SST file in the DB, return empty list.
|
|
|
|
std::vector<std::string> GetWhiteBoxKeys(ThreadState* thread, DB* db,
|
|
|
|
ColumnFamilyHandle* cfh,
|
|
|
|
size_t num_keys);
|
|
|
|
#else // !ROCKSDB_LITE
|
|
|
|
std::vector<std::string> GetWhiteBoxKeys(ThreadState*, DB*,
|
|
|
|
ColumnFamilyHandle*, size_t) {
|
|
|
|
// Not supported in LITE mode.
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
#endif // !ROCKSDB_LITE
|
|
|
|
|
|
|
|
// Given a key K, this creates an iterator which scans to K and then
|
|
|
|
// does a random sequence of Next/Prev operations.
|
|
|
|
virtual Status TestIterate(ThreadState* thread, const ReadOptions& read_opts,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys);
|
|
|
|
|
Add Iterator test against expected state to stress test (#10538)
Summary:
As mentioned in https://github.com/facebook/rocksdb/pull/5506#issuecomment-506021913,
`db_stress` does not have much verification for iterator correctness.
It has a `TestIterate()` function, but that is mainly for comparing results
between two iterators, one with `total_order_seek` and the other optionally
sets auto_prefix, upper/lower bounds. Commit 49a0581ad2462e31aa3f768afa769e0d33390f33
added a new `TestIterateAgainstExpected()` function that compares iterator against
expected state. It locks a range of keys, creates an iterator, does
a random sequence of `Next/Prev` and compares against expected state.
This PR is based on that commit, the main changes include some logs
(for easier debugging if a test fails), a forward and backward scan to
cover the entire locked key range, and a flag for optionally turning on
this version of Iterator testing.
Added constraint that the checks against expected state in
`TestIterateAgainstExpected()` and in `TestGet()` are only turned on
when `--skip_verifydb` flag is not set.
Remove the change log introduced in https://github.com/facebook/rocksdb/issues/10553.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10538
Test Plan:
Run `db_stress` with `--verify_iterator_with_expected_state_one_in=1`,
and a large `--iterpercent` and `--num_iterations`. Checked `op_logs`
manually to ensure expected coverage. Tweaked part of the code in
https://github.com/facebook/rocksdb/issues/10449 and stress test was able to catch it.
- internally run various flavor of crash test
Reviewed By: ajkr
Differential Revision: D38847269
Pulled By: cbi42
fbshipit-source-id: 8b4402a9bba9f6cfa08051943cd672579d489599
2022-08-24 21:59:50 +00:00
|
|
|
virtual Status TestIterateAgainstExpected(
|
|
|
|
ThreadState* /* thread */, const ReadOptions& /* read_opts */,
|
|
|
|
const std::vector<int>& /* rand_column_families */,
|
2022-09-15 22:55:37 +00:00
|
|
|
const std::vector<int64_t>& /* rand_keys */) {
|
Add Iterator test against expected state to stress test (#10538)
Summary:
As mentioned in https://github.com/facebook/rocksdb/pull/5506#issuecomment-506021913,
`db_stress` does not have much verification for iterator correctness.
It has a `TestIterate()` function, but that is mainly for comparing results
between two iterators, one with `total_order_seek` and the other optionally
sets auto_prefix, upper/lower bounds. Commit 49a0581ad2462e31aa3f768afa769e0d33390f33
added a new `TestIterateAgainstExpected()` function that compares iterator against
expected state. It locks a range of keys, creates an iterator, does
a random sequence of `Next/Prev` and compares against expected state.
This PR is based on that commit, the main changes include some logs
(for easier debugging if a test fails), a forward and backward scan to
cover the entire locked key range, and a flag for optionally turning on
this version of Iterator testing.
Added constraint that the checks against expected state in
`TestIterateAgainstExpected()` and in `TestGet()` are only turned on
when `--skip_verifydb` flag is not set.
Remove the change log introduced in https://github.com/facebook/rocksdb/issues/10553.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10538
Test Plan:
Run `db_stress` with `--verify_iterator_with_expected_state_one_in=1`,
and a large `--iterpercent` and `--num_iterations`. Checked `op_logs`
manually to ensure expected coverage. Tweaked part of the code in
https://github.com/facebook/rocksdb/issues/10449 and stress test was able to catch it.
- internally run various flavor of crash test
Reviewed By: ajkr
Differential Revision: D38847269
Pulled By: cbi42
fbshipit-source-id: 8b4402a9bba9f6cfa08051943cd672579d489599
2022-08-24 21:59:50 +00:00
|
|
|
return Status::NotSupported();
|
|
|
|
}
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
// Enum used by VerifyIterator() to identify the mode to validate.
|
2019-12-20 22:54:30 +00:00
|
|
|
enum LastIterateOp {
|
|
|
|
kLastOpSeek,
|
|
|
|
kLastOpSeekForPrev,
|
|
|
|
kLastOpNextOrPrev,
|
|
|
|
kLastOpSeekToFirst,
|
|
|
|
kLastOpSeekToLast
|
|
|
|
};
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
// Compare the two iterator, iter and cmp_iter are in the same position,
|
|
|
|
// unless iter might be made invalidate or undefined because of
|
|
|
|
// upper or lower bounds, or prefix extractor.
|
|
|
|
// Will flag failure if the verification fails.
|
|
|
|
// diverged = true if the two iterator is already diverged.
|
|
|
|
// True if verification passed, false if not.
|
2019-12-20 22:54:30 +00:00
|
|
|
// op_logs is the information to print when validation fails.
|
2019-12-09 07:49:32 +00:00
|
|
|
void VerifyIterator(ThreadState* thread, ColumnFamilyHandle* cmp_cfh,
|
|
|
|
const ReadOptions& ro, Iterator* iter, Iterator* cmp_iter,
|
2019-12-20 22:54:30 +00:00
|
|
|
LastIterateOp op, const Slice& seek_key,
|
|
|
|
const std::string& op_logs, bool* diverged);
|
2019-12-09 07:49:32 +00:00
|
|
|
|
|
|
|
virtual Status TestBackupRestore(ThreadState* thread,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys);
|
|
|
|
|
|
|
|
virtual Status TestCheckpoint(ThreadState* thread,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys);
|
|
|
|
|
2019-12-19 22:03:14 +00:00
|
|
|
void TestCompactFiles(ThreadState* thread, ColumnFamilyHandle* column_family);
|
|
|
|
|
|
|
|
Status TestFlush(const std::vector<int>& rand_column_families);
|
|
|
|
|
|
|
|
Status TestPauseBackground(ThreadState* thread);
|
|
|
|
|
|
|
|
void TestAcquireSnapshot(ThreadState* thread, int rand_column_family,
|
|
|
|
const std::string& keystr, uint64_t i);
|
|
|
|
|
|
|
|
Status MaybeReleaseSnapshots(ThreadState* thread, uint64_t i);
|
2019-12-20 20:05:48 +00:00
|
|
|
#ifndef ROCKSDB_LITE
|
2020-03-19 00:11:06 +00:00
|
|
|
Status VerifyGetLiveFiles() const;
|
|
|
|
Status VerifyGetSortedWalFiles() const;
|
|
|
|
Status VerifyGetCurrentWalFile() const;
|
2020-07-14 19:10:56 +00:00
|
|
|
void TestGetProperty(ThreadState* thread) const;
|
2020-03-19 00:11:06 +00:00
|
|
|
|
2019-12-21 05:42:19 +00:00
|
|
|
virtual Status TestApproximateSize(
|
|
|
|
ThreadState* thread, uint64_t iteration,
|
|
|
|
const std::vector<int>& rand_column_families,
|
|
|
|
const std::vector<int64_t>& rand_keys);
|
|
|
|
#endif // !ROCKSDB_LITE
|
|
|
|
|
2021-12-14 21:33:16 +00:00
|
|
|
virtual Status TestCustomOperations(
|
|
|
|
ThreadState* /*thread*/,
|
|
|
|
const std::vector<int>& /*rand_column_families*/) {
|
|
|
|
return Status::NotSupported("TestCustomOperations() must be overridden");
|
|
|
|
}
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
void VerificationAbort(SharedState* shared, std::string msg, Status s) const;
|
|
|
|
|
|
|
|
void VerificationAbort(SharedState* shared, std::string msg, int cf,
|
|
|
|
int64_t key) const;
|
|
|
|
|
2022-08-29 21:13:06 +00:00
|
|
|
void VerificationAbort(SharedState* shared, std::string msg, int cf,
|
|
|
|
int64_t key, Slice value_from_db,
|
|
|
|
Slice value_from_expected) const;
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
void PrintEnv() const;
|
|
|
|
|
2022-05-02 20:25:45 +00:00
|
|
|
void Open(SharedState* shared);
|
2019-12-09 07:49:32 +00:00
|
|
|
|
2019-12-11 04:01:25 +00:00
|
|
|
void Reopen(ThreadState* thread);
|
2019-12-09 07:49:32 +00:00
|
|
|
|
2022-04-28 00:50:54 +00:00
|
|
|
virtual void RegisterAdditionalListeners() {}
|
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
2022-05-02 20:25:45 +00:00
|
|
|
virtual void PrepareTxnDbOptions(SharedState* /*shared*/,
|
|
|
|
TransactionDBOptions& /*txn_db_opts*/) {}
|
2022-04-28 00:50:54 +00:00
|
|
|
#endif
|
|
|
|
|
2022-07-05 20:30:15 +00:00
|
|
|
void MaybeUseOlderTimestampForPointLookup(ThreadState* thread,
|
|
|
|
std::string& ts_str,
|
|
|
|
Slice& ts_slice,
|
|
|
|
ReadOptions& read_opts);
|
|
|
|
|
|
|
|
void MaybeUseOlderTimestampForRangeScan(ThreadState* thread,
|
|
|
|
std::string& ts_str, Slice& ts_slice,
|
|
|
|
ReadOptions& read_opts);
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
std::shared_ptr<Cache> cache_;
|
|
|
|
std::shared_ptr<Cache> compressed_cache_;
|
|
|
|
std::shared_ptr<const FilterPolicy> filter_policy_;
|
|
|
|
DB* db_;
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
TransactionDB* txn_db_;
|
|
|
|
#endif
|
2022-06-14 01:54:38 +00:00
|
|
|
|
|
|
|
// Currently only used in MultiOpsTxnsStressTest
|
|
|
|
std::atomic<DB*> db_aptr_;
|
|
|
|
|
2019-12-09 07:49:32 +00:00
|
|
|
Options options_;
|
2021-03-15 11:32:24 +00:00
|
|
|
SystemClock* clock_;
|
2019-12-09 07:49:32 +00:00
|
|
|
std::vector<ColumnFamilyHandle*> column_families_;
|
|
|
|
std::vector<std::string> column_family_names_;
|
|
|
|
std::atomic<int> new_column_family_name_;
|
|
|
|
int num_times_reopened_;
|
|
|
|
std::unordered_map<std::string, std::vector<std::string>> options_table_;
|
|
|
|
std::vector<std::string> options_index_;
|
|
|
|
std::atomic<bool> db_preload_finished_;
|
|
|
|
|
2019-12-20 16:46:52 +00:00
|
|
|
// Fields used for continuous verification from another thread
|
|
|
|
DB* cmp_db_;
|
|
|
|
std::vector<ColumnFamilyHandle*> cmp_cfhs_;
|
2021-07-01 21:15:49 +00:00
|
|
|
bool is_db_stopped_;
|
2019-12-09 07:49:32 +00:00
|
|
|
};
|
|
|
|
|
Avoid overwriting options loaded from OPTIONS (#9943)
Summary:
This is similar to https://github.com/facebook/rocksdb/issues/9862, including the following fixes/refactoring:
1. If OPTIONS file is specified via `-options_file`, majority of options will be loaded from the file. We should not
overwrite options that have been loaded from the file. Instead, we configure only fields of options which are
shared objects and not set by the OPTIONS file. We also configure a few fields, e.g. `create_if_missing` necessary
for stress test to run.
2. Refactor options initialization into three functions, `InitializeOptionsFromFile()`, `InitializeOptionsFromFlags()`
and `InitializeOptionsGeneral()` similar to db_bench. I hope they can be shared in the future. The high-level logic is
as follows:
```cpp
if (!InitializeOptionsFromFile(...)) {
InitializeOptionsFromFlags(...);
}
InitializeOptionsGeneral(...);
```
3. Currently, the setting for `block_cache_compressed` does not seem correct because it by default specifies a
size of `numeric_limits<size_t>::max()` ((size_t)-1). According to code comments, `-1` indicates default value,
which should be referring to `num_shard_bits` argument.
4. Clarify `fail_if_options_file_error`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9943
Test Plan:
1. make check
2. Run stress tests, and manually check generated OPTIONS file and compare them with input OPTIONS files
Reviewed By: jay-zhuang
Differential Revision: D36133769
Pulled By: riversand963
fbshipit-source-id: 35dacdc090a0a72c922907170cd132b9ecaa073e
2022-05-18 19:43:50 +00:00
|
|
|
// Load options from OPTIONS file and populate `options`.
|
|
|
|
extern bool InitializeOptionsFromFile(Options& options);
|
|
|
|
|
|
|
|
// Initialize `options` using command line arguments.
|
|
|
|
// When this function is called, `cache`, `block_cache_compressed`,
|
|
|
|
// `filter_policy` have all been initialized. Therefore, we just pass them as
|
|
|
|
// input arguments.
|
|
|
|
extern void InitializeOptionsFromFlags(
|
|
|
|
const std::shared_ptr<Cache>& cache,
|
|
|
|
const std::shared_ptr<Cache>& block_cache_compressed,
|
|
|
|
const std::shared_ptr<const FilterPolicy>& filter_policy, Options& options);
|
|
|
|
|
|
|
|
// Initialize `options` on which `InitializeOptionsFromFile()` and
|
|
|
|
// `InitializeOptionsFromFlags()` have both been called already.
|
|
|
|
// There are two cases.
|
|
|
|
// Case 1: OPTIONS file is not specified. Command line arguments have been used
|
|
|
|
// to initialize `options`. InitializeOptionsGeneral() will use
|
|
|
|
// `cache`, `block_cache_compressed` and `filter_policy` to initialize
|
|
|
|
// corresponding fields of `options`. InitializeOptionsGeneral() will
|
|
|
|
// also set up other fields of `options` so that stress test can run.
|
|
|
|
// Examples include `create_if_missing` and
|
|
|
|
// `create_missing_column_families`, etc.
|
|
|
|
// Case 2: OPTIONS file is specified. It is possible that, after loading from
|
|
|
|
// the given OPTIONS files, some shared object fields are still not
|
|
|
|
// initialized because they are not set in the OPTIONS file. In this
|
|
|
|
// case, if command line arguments indicate that the user wants to set
|
|
|
|
// up such shared objects, e.g. block cache, compressed block cache,
|
|
|
|
// row cache, filter policy, then InitializeOptionsGeneral() will honor
|
|
|
|
// the user's choice, thus passing `cache`, `block_cache_compressed`,
|
|
|
|
// `filter_policy` as input arguments.
|
|
|
|
//
|
|
|
|
// InitializeOptionsGeneral() must not overwrite fields of `options` loaded
|
|
|
|
// from OPTIONS file.
|
|
|
|
extern void InitializeOptionsGeneral(
|
|
|
|
const std::shared_ptr<Cache>& cache,
|
|
|
|
const std::shared_ptr<Cache>& block_cache_compressed,
|
|
|
|
const std::shared_ptr<const FilterPolicy>& filter_policy, Options& options);
|
|
|
|
|
|
|
|
// If no OPTIONS file is specified, set up `options` so that we can test
|
|
|
|
// user-defined timestamp which requires `-user_timestamp_size=8`.
|
|
|
|
// This function also checks for known (currently) incompatible features with
|
|
|
|
// user-defined timestamp.
|
|
|
|
extern void CheckAndSetOptionsForUserTimestamp(Options& options);
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2019-12-09 07:49:32 +00:00
|
|
|
#endif // GFLAGS
|