mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
b443d24f4d
Summary: Stress test background threads do not coordinate with test worker threads for db reopen in the middle of a test run, thus accessing db obj in a stress test bg thread can race with test workers. Remove the TimestampedSnapshotThread. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10373 Test Plan: ``` ./db_stress --acquire_snapshot_one_in=0 --adaptive_readahead=0 --allow_concurrent_memtable_write=1 \ --allow_data_in_errors=True --async_io=0 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 \ --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=8 \ --block_size=16384 --bloom_bits=7.580319535285394 --bottommost_compression_type=disable \ --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_size=8388608 --cache_type=lru_cache \ --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=0 --charge_filter_construction=1 \ --charge_table_reader=0 --checkpoint_one_in=0 --checksum_type=kxxHash64 --clear_column_family_one_in=0 \ --compact_files_one_in=1000000 --compact_range_one_in=0 --compaction_pri=1 --compaction_ttl=0 \ --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 \ --compression_type=xpress --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=0 \ --continuous_verification_interval=0 --create_timestamped_snapshot_one_in=20 --data_block_index_type=0 \ --db=/dev/shm/rocksdb/ --db_write_buffer_size=0 --delpercent=5 --delrangepercent=0 --destroy_db_initially=1 \ --detect_filter_construct_corruption=0 --disable_wal=0 --enable_compaction_filter=1 --enable_pipelined_write=0 \ --fail_if_options_file_error=1 --file_checksum_impl=xxh64 --flush_one_in=1000000 --format_version=2 \ --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 \ --get_sorted_wal_files_one_in=0 --index_block_restart_interval=11 --index_type=0 --ingest_external_file_one_in=0 \ --iterpercent=0 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=True \ --log2_keys_per_lock=10 --long_running_snapshots=0 --mark_for_compaction_one_file_in=10 \ --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=25000000 \ --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=64 \ --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=0 --memtable_prefix_bloom_size_ratio=0.5 \ --memtable_whole_key_filtering=1 --memtablerep=skip_list --mmap_read=0 --mock_direct_io=True \ --nooverwritepercent=1 --open_files=500000 --open_metadata_write_fault_one_in=0 \ --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=20000 \ --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=2 \ --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=1 \ --prefixpercent=5 --prepopulate_block_cache=0 --progress_reports=0 --read_fault_one_in=1000 \ --readpercent=55 --recycle_log_file_num=0 --reopen=100 --ribbon_starting_level=8 \ --secondary_cache_fault_one_in=0 --secondary_cache_uri= --snapshot_hold_ops=100000 \ --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 \ --subcompactions=3 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 \ --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=1 \ --txn_write_policy=0 --unordered_write=0 --unpartitioned_pinning=0 \ --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=1 --use_full_merge_v1=1 \ --use_merge=1 --use_multiget=0 --use_txn=1 --user_timestamp_size=0 --value_size_mult=32 \ --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 \ --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none \ --write_buffer_size=4194304 --write_dbid_to_manifest=0 --writepercent=35 ``` make crash_test_with_txn make crash_test_with_multiops_wc_txn Reviewed By: jay-zhuang Differential Revision: D37903189 Pulled By: riversand963 fbshipit-source-id: cd1728ad7ba4ce4cf47af23c4f65dda0956744f9
188 lines
5.5 KiB
C++
188 lines
5.5 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
|
|
#ifdef GFLAGS
|
|
#include "db_stress_tool/db_stress_common.h"
|
|
#include "utilities/fault_injection_fs.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
void ThreadBody(void* v) {
|
|
ThreadState* thread = reinterpret_cast<ThreadState*>(v);
|
|
SharedState* shared = thread->shared;
|
|
|
|
if (!FLAGS_skip_verifydb && shared->ShouldVerifyAtBeginning()) {
|
|
thread->shared->GetStressTest()->VerifyDb(thread);
|
|
}
|
|
{
|
|
MutexLock l(shared->GetMutex());
|
|
shared->IncInitialized();
|
|
if (shared->AllInitialized()) {
|
|
shared->GetCondVar()->SignalAll();
|
|
}
|
|
while (!shared->Started()) {
|
|
shared->GetCondVar()->Wait();
|
|
}
|
|
}
|
|
thread->shared->GetStressTest()->OperateDb(thread);
|
|
|
|
{
|
|
MutexLock l(shared->GetMutex());
|
|
shared->IncOperated();
|
|
if (shared->AllOperated()) {
|
|
shared->GetCondVar()->SignalAll();
|
|
}
|
|
while (!shared->VerifyStarted()) {
|
|
shared->GetCondVar()->Wait();
|
|
}
|
|
}
|
|
|
|
if (!FLAGS_skip_verifydb) {
|
|
thread->shared->GetStressTest()->VerifyDb(thread);
|
|
}
|
|
|
|
{
|
|
MutexLock l(shared->GetMutex());
|
|
shared->IncDone();
|
|
if (shared->AllDone()) {
|
|
shared->GetCondVar()->SignalAll();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool RunStressTest(StressTest* stress) {
|
|
SystemClock* clock = db_stress_env->GetSystemClock().get();
|
|
SharedState shared(db_stress_env, stress);
|
|
stress->InitDb(&shared);
|
|
stress->FinishInitDb(&shared);
|
|
|
|
if (FLAGS_sync_fault_injection) {
|
|
fault_fs_guard->SetFilesystemDirectWritable(false);
|
|
}
|
|
if (FLAGS_write_fault_one_in) {
|
|
fault_fs_guard->EnableWriteErrorInjection();
|
|
}
|
|
|
|
uint32_t n = FLAGS_threads;
|
|
uint64_t now = clock->NowMicros();
|
|
fprintf(stdout, "%s Initializing worker threads\n",
|
|
clock->TimeToString(now / 1000000).c_str());
|
|
|
|
shared.SetThreads(n);
|
|
|
|
if (FLAGS_compaction_thread_pool_adjust_interval > 0) {
|
|
shared.IncBgThreads();
|
|
}
|
|
|
|
if (FLAGS_continuous_verification_interval > 0) {
|
|
shared.IncBgThreads();
|
|
}
|
|
|
|
std::vector<ThreadState*> threads(n);
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
threads[i] = new ThreadState(i, &shared);
|
|
db_stress_env->StartThread(ThreadBody, threads[i]);
|
|
}
|
|
|
|
ThreadState bg_thread(0, &shared);
|
|
if (FLAGS_compaction_thread_pool_adjust_interval > 0) {
|
|
db_stress_env->StartThread(PoolSizeChangeThread, &bg_thread);
|
|
}
|
|
|
|
ThreadState continuous_verification_thread(0, &shared);
|
|
if (FLAGS_continuous_verification_interval > 0) {
|
|
db_stress_env->StartThread(DbVerificationThread,
|
|
&continuous_verification_thread);
|
|
}
|
|
|
|
// Each thread goes through the following states:
|
|
// initializing -> wait for others to init -> read/populate/depopulate
|
|
// wait for others to operate -> verify -> done
|
|
|
|
{
|
|
MutexLock l(shared.GetMutex());
|
|
while (!shared.AllInitialized()) {
|
|
shared.GetCondVar()->Wait();
|
|
}
|
|
if (shared.ShouldVerifyAtBeginning()) {
|
|
if (shared.HasVerificationFailedYet()) {
|
|
fprintf(stderr, "Crash-recovery verification failed :(\n");
|
|
} else {
|
|
fprintf(stdout, "Crash-recovery verification passed :)\n");
|
|
}
|
|
}
|
|
|
|
// This is after the verification step to avoid making all those `Get()`s
|
|
// and `MultiGet()`s contend on the DB-wide trace mutex.
|
|
stress->TrackExpectedState(&shared);
|
|
|
|
now = clock->NowMicros();
|
|
fprintf(stdout, "%s Starting database operations\n",
|
|
clock->TimeToString(now / 1000000).c_str());
|
|
|
|
shared.SetStart();
|
|
shared.GetCondVar()->SignalAll();
|
|
while (!shared.AllOperated()) {
|
|
shared.GetCondVar()->Wait();
|
|
}
|
|
|
|
now = clock->NowMicros();
|
|
if (FLAGS_test_batches_snapshots) {
|
|
fprintf(stdout, "%s Limited verification already done during gets\n",
|
|
clock->TimeToString((uint64_t)now / 1000000).c_str());
|
|
} else if (FLAGS_skip_verifydb) {
|
|
fprintf(stdout, "%s Verification skipped\n",
|
|
clock->TimeToString((uint64_t)now / 1000000).c_str());
|
|
} else {
|
|
fprintf(stdout, "%s Starting verification\n",
|
|
clock->TimeToString((uint64_t)now / 1000000).c_str());
|
|
}
|
|
|
|
shared.SetStartVerify();
|
|
shared.GetCondVar()->SignalAll();
|
|
while (!shared.AllDone()) {
|
|
shared.GetCondVar()->Wait();
|
|
}
|
|
}
|
|
|
|
for (unsigned int i = 1; i < n; i++) {
|
|
threads[0]->stats.Merge(threads[i]->stats);
|
|
}
|
|
threads[0]->stats.Report("Stress Test");
|
|
|
|
for (unsigned int i = 0; i < n; i++) {
|
|
delete threads[i];
|
|
threads[i] = nullptr;
|
|
}
|
|
now = clock->NowMicros();
|
|
if (!FLAGS_skip_verifydb && !FLAGS_test_batches_snapshots &&
|
|
!shared.HasVerificationFailedYet()) {
|
|
fprintf(stdout, "%s Verification successful\n",
|
|
clock->TimeToString(now / 1000000).c_str());
|
|
}
|
|
stress->PrintStatistics();
|
|
|
|
if (FLAGS_compaction_thread_pool_adjust_interval > 0 ||
|
|
FLAGS_continuous_verification_interval > 0) {
|
|
MutexLock l(shared.GetMutex());
|
|
shared.SetShouldStopBgThread();
|
|
while (!shared.BgThreadsFinished()) {
|
|
shared.GetCondVar()->Wait();
|
|
}
|
|
}
|
|
|
|
if (shared.HasVerificationFailedYet()) {
|
|
fprintf(stderr, "Verification failed :(\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
#endif // GFLAGS
|