rocksdb/tools/db_crashtest.py

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals

import argparse

import os
import random
import shutil
import subprocess
import sys
import tempfile
import time

# params overwrite priority:
#   for default:
#       default_params < {blackbox,whitebox}_default_params < args
#   for simple:
#       default_params < {blackbox,whitebox}_default_params <
#       simple_default_params <
#       {blackbox,whitebox}_simple_default_params < args
#   for cf_consistency:
#       default_params < {blackbox,whitebox}_default_params <
#       cf_consistency_params < args
#   for txn:
#       default_params < {blackbox,whitebox}_default_params < txn_params < args
#   for ts:
#       default_params < {blackbox,whitebox}_default_params < ts_params < args
#   for multiops_txn:
#       default_params < {blackbox,whitebox}_default_params < multiops_txn_params < args


default_params = {
    "acquire_snapshot_one_in": lambda: random.choice([100, 10000]),
    "backup_max_size": 100 * 1024 * 1024,
    # Consider larger number when backups considered more stable
    "backup_one_in": lambda: random.choice([1000, 100000]),
    "batch_protection_bytes_per_key": lambda: random.choice([0, 8]),
    "memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
    "block_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
    "block_size": 16384,
    "bloom_bits": lambda: random.choice(
        [random.randint(0, 19), random.lognormvariate(2.3, 1.3)]
    ),
    "cache_index_and_filter_blocks": lambda: random.randint(0, 1),
    "cache_size": lambda: random.choice([8388608, 33554432]),
    "charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]),
    "charge_filter_construction": lambda: random.choice([0, 1]),
    "charge_table_reader": lambda: random.choice([0, 1]),
    "charge_file_metadata": lambda: random.choice([0, 1]),
    "checkpoint_one_in":  lambda: random.choice([10000, 1000000]),
    "compression_type": lambda: random.choice(
        ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]
    ),
    "bottommost_compression_type": lambda: "disable"
    if random.randint(0, 1) == 0
    else random.choice(["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
    "checksum_type": lambda: random.choice(
        ["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]
    ),
    "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1),
    "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1),
    # Disabled compression_parallel_threads as the feature is not stable
    # lambda: random.choice([1] * 9 + [4])
    "compression_parallel_threads": 1,
    "compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1,
    "compression_use_zstd_dict_trainer": lambda: random.randint(0, 1),
    "compression_checksum": lambda: random.randint(0, 1),
    "clear_column_family_one_in": 0,
    "compact_files_one_in":  lambda: random.choice([1000, 1000000]),
    "compact_range_one_in":  lambda: random.choice([1000, 1000000]),
    "compaction_pri": random.randint(0, 4),
    "data_block_index_type": lambda: random.choice([0, 1]),
    "delpercent": 4,
    "delrangepercent": 1,
    "destroy_db_initially": 0,
    "enable_pipelined_write": lambda: random.randint(0, 1),
    "enable_compaction_filter": lambda: random.choice([0, 0, 0, 1]),
    "expected_values_dir": lambda: setup_expected_values_dir(),
    "fail_if_options_file_error": lambda: random.randint(0, 1),
    "flush_one_in": lambda: random.choice([1000, 1000000]),
    "manual_wal_flush_one_in": lambda: random.choice([0, 1000]),
    "file_checksum_impl": lambda: random.choice(["none", "crc32c", "xxh64", "big"]),
    "get_live_files_one_in": lambda: random.choice([10000, 1000000]),
    # Note: the following two are intentionally disabled as the corresponding
    # APIs are not guaranteed to succeed.
    "get_sorted_wal_files_one_in": 0,
    "get_current_wal_file_one_in": 0,
    # Temporarily disable hash index
    "index_type": lambda: random.choice([0, 0, 0, 2, 2, 3]),
    "ingest_external_file_one_in": lambda: random.choice([1000, 1000000]),
    "iterpercent": 10,
    "lock_wal_one_in": lambda: random.choice([10000, 1000000]),
    "mark_for_compaction_one_file_in": lambda: 10 * random.randint(0, 1),
    "max_background_compactions": 20,
    "max_bytes_for_level_base": 10485760,
    # max_key has to be the same across invocations for verification to work, hence no lambda
    "max_key": random.choice([100000, 25000000]),
    "max_write_buffer_number": 3,
    "mmap_read": lambda: random.randint(0, 1),
    # Setting `nooverwritepercent > 0` is only possible because we do not vary
    # the random seed, so the same keys are chosen by every run for disallowing
    # overwrites.
    "nooverwritepercent": 1,
    "open_files": lambda: random.choice([-1, -1, 100, 500000]),
    "optimize_filters_for_memory": lambda: random.randint(0, 1),
    "partition_filters": lambda: random.randint(0, 1),
    "partition_pinning": lambda: random.randint(0, 3),
    "pause_background_one_in": lambda: random.choice([10000, 1000000]),
    "prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]),
    "prefixpercent": 5,
    "progress_reports": 0,
    "readpercent": 45,
    "recycle_log_file_num": lambda: random.randint(0, 1),
    "snapshot_hold_ops": 100000,
    "sst_file_manager_bytes_per_sec": lambda: random.choice([0, 104857600]),
    "sst_file_manager_bytes_per_truncate": lambda: random.choice([0, 1048576]),
    "long_running_snapshots": lambda: random.randint(0, 1),
    "subcompactions": lambda: random.randint(1, 4),
    "target_file_size_base": lambda: random.choice([512 * 1024, 2048 * 1024]),
    "target_file_size_multiplier": 2,
    "test_batches_snapshots": random.randint(0, 1),
    "top_level_index_pinning": lambda: random.randint(0, 3),
    "unpartitioned_pinning": lambda: random.randint(0, 3),
    "use_direct_reads": lambda: random.randint(0, 1),
    "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
    "mock_direct_io": False,
    "cache_type": lambda: random.choice(
        ["lru_cache", "fixed_hyper_clock_cache", "auto_hyper_clock_cache",
         "auto_hyper_clock_cache", "tiered_lru_cache",
         "tiered_fixed_hyper_clock_cache", "tiered_auto_hyper_clock_cache",
         "tiered_auto_hyper_clock_cache"]
    ),
    "use_full_merge_v1": lambda: random.randint(0, 1),
    "use_merge": lambda: random.randint(0, 1),
    # use_put_entity_one_in has to be the same across invocations for verification to work, hence no lambda
    "use_put_entity_one_in": random.choice([0] * 7 + [1, 5, 10]),
    # 999 -> use Bloom API
    "bloom_before_level": lambda: random.choice([random.randint(-1, 2), random.randint(-1, 10), 0x7fffffff - 1, 0x7fffffff]),
    "value_size_mult": 32,
    "verification_only": 0,
    "verify_checksum": 1,
    "write_buffer_size": lambda: random.choice([1024 * 1024, 4 * 1024 * 1024]),
    "writepercent": 35,
    "format_version": lambda: random.choice([2, 3, 4, 5, 6, 6]),
    "index_block_restart_interval": lambda: random.choice(range(1, 16)),
    "use_multiget": lambda: random.randint(0, 1),
    "use_get_entity": lambda: random.choice([0] * 7 + [1]),
    "use_multi_get_entity": lambda: random.choice([0] * 7 + [1]),
    "periodic_compaction_seconds": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
    # 0 = never (used by some), 10 = often (for threading bugs), 600 = default
    "stats_dump_period_sec": lambda: random.choice([0, 10, 600]),
    "compaction_ttl": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
    "fifo_allow_compaction": lambda: random.randint(0, 1),
    # Test small max_manifest_file_size in a smaller chance, as most of the
    # time we wnat manifest history to be preserved to help debug
    "max_manifest_file_size": lambda: random.choice(
        [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]
    ),
    # Sync mode might make test runs slower so running it in a smaller chance
    "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]),
    "bytes_per_sync": lambda: random.choice([0, 262144]),
    "wal_bytes_per_sync": lambda: random.choice([0, 524288]),
    "compaction_readahead_size": lambda: random.choice(
        [0, 0, 1024 * 1024]),
    "db_write_buffer_size": lambda: random.choice(
        [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]
    ),
    "use_write_buffer_manager": lambda: random.randint(0, 1),
    "avoid_unnecessary_blocking_io": random.randint(0, 1),
    "write_dbid_to_manifest": random.randint(0, 1),
    "avoid_flush_during_recovery": lambda: random.choice(
        [1 if t == 0 else 0 for t in range(0, 8)]
    ),
    "max_write_batch_group_size_bytes": lambda: random.choice(
        [16, 64, 1024 * 1024, 16 * 1024 * 1024]
    ),
    "level_compaction_dynamic_level_bytes": lambda: random.randint(0, 1),
    "verify_checksum_one_in": lambda: random.choice([100000, 1000000]),
    "verify_file_checksums_one_in": lambda: random.choice([100000, 1000000]),
    "verify_db_one_in": lambda: random.choice([10000, 100000]),
    "continuous_verification_interval": 0,
    "max_key_len": 3,
    "key_len_percent_dist": "1,30,69",
    "read_fault_one_in": lambda: random.choice([0, 32, 1000]),
    "write_fault_one_in": lambda: random.choice([0, 128, 1000]),
    "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]),
    "open_write_fault_one_in": lambda: random.choice([0, 0, 16]),
    "open_read_fault_one_in": lambda: random.choice([0, 0, 32]),
    "sync_fault_injection": lambda: random.randint(0, 1),
    "get_property_one_in":  lambda: random.choice([100000, 1000000]),
    "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
    "max_write_buffer_size_to_maintain": lambda: random.choice(
        [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]
    ),
    "user_timestamp_size": 0,
    "secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]),
    "compressed_secondary_cache_size": lambda: random.choice([8388608, 16777216]),
    "prepopulate_block_cache": lambda: random.choice([0, 1]),
    "memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]),
    "memtable_whole_key_filtering": lambda: random.randint(0, 1),
    "detect_filter_construct_corruption": lambda: random.choice([0, 1]),
    "adaptive_readahead": lambda: random.choice([0, 1]),
    "async_io": lambda: random.choice([0, 1]),
    "wal_compression": lambda: random.choice(["none", "zstd"]),
    "verify_sst_unique_id_in_manifest": 1,  # always do unique_id verification
    "secondary_cache_uri": lambda: random.choice(
        [
            "",
            "",
            "",
            "compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true",
        ]
    ),
    "allow_data_in_errors": True,
    "enable_thread_tracking": lambda: random.choice([0, 1]),
    "readahead_size": lambda: random.choice([0, 16384, 524288]),
    "initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
    "max_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
    "num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]),
    "min_write_buffer_number_to_merge": lambda: random.choice([1, 2]),
    "preserve_internal_time_seconds": lambda: random.choice([0, 60, 3600, 36000]),
    "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]),
    # 0 (disable) is the default and more commonly used value.
    "bottommost_file_compaction_delay": lambda: random.choice(
        [0, 0, 0, 600, 3600, 86400]
    ),
    "auto_readahead_size" : lambda: random.choice([0, 1]),
    "verify_iterator_with_expected_state_one_in": 5,
}

_TEST_DIR_ENV_VAR = "TEST_TMPDIR"
# If TEST_TMPDIR_EXPECTED is not specified, default value will be TEST_TMPDIR
_TEST_EXPECTED_DIR_ENV_VAR = "TEST_TMPDIR_EXPECTED"
_DEBUG_LEVEL_ENV_VAR = "DEBUG_LEVEL"

stress_cmd = "./db_stress"
cleanup_cmd = None


def is_release_mode():
    return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0"


def get_dbname(test_name):
    test_dir_name = "rocksdb_crashtest_" + test_name
    test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
    if test_tmpdir is None or test_tmpdir == "":
        dbname = tempfile.mkdtemp(prefix=test_dir_name)
    else:
        dbname = test_tmpdir + "/" + test_dir_name
        shutil.rmtree(dbname, True)
        if cleanup_cmd is not None:
            print("Running DB cleanup command - %s\n" % cleanup_cmd)
            # Ignore failure
            os.system(cleanup_cmd)
        try:
            os.mkdir(dbname)
        except OSError:
            pass
    return dbname


expected_values_dir = None


def setup_expected_values_dir():
    global expected_values_dir
    if expected_values_dir is not None:
        return expected_values_dir
    expected_dir_prefix = "rocksdb_crashtest_expected_"
    test_exp_tmpdir = os.environ.get(_TEST_EXPECTED_DIR_ENV_VAR)

    # set the value to _TEST_DIR_ENV_VAR if _TEST_EXPECTED_DIR_ENV_VAR is not
    # specified.
    if test_exp_tmpdir is None or test_exp_tmpdir == "":
        test_exp_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)

    if test_exp_tmpdir is None or test_exp_tmpdir == "":
        expected_values_dir = tempfile.mkdtemp(prefix=expected_dir_prefix)
    else:
        # if tmpdir is specified, store the expected_values_dir under that dir
        expected_values_dir = test_exp_tmpdir + "/rocksdb_crashtest_expected"
        if os.path.exists(expected_values_dir):
            shutil.rmtree(expected_values_dir)
        os.mkdir(expected_values_dir)
    return expected_values_dir


multiops_txn_key_spaces_file = None


def setup_multiops_txn_key_spaces_file():
    global multiops_txn_key_spaces_file
    if multiops_txn_key_spaces_file is not None:
        return multiops_txn_key_spaces_file
    key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces"
    test_exp_tmpdir = os.environ.get(_TEST_EXPECTED_DIR_ENV_VAR)

    # set the value to _TEST_DIR_ENV_VAR if _TEST_EXPECTED_DIR_ENV_VAR is not
    # specified.
    if test_exp_tmpdir is None or test_exp_tmpdir == "":
        test_exp_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)

    if test_exp_tmpdir is None or test_exp_tmpdir == "":
        multiops_txn_key_spaces_file = tempfile.mkstemp(prefix=key_spaces_file_prefix)[
            1
        ]
    else:
        if not os.path.exists(test_exp_tmpdir):
            os.mkdir(test_exp_tmpdir)
        multiops_txn_key_spaces_file = tempfile.mkstemp(
            prefix=key_spaces_file_prefix, dir=test_exp_tmpdir
        )[1]
    return multiops_txn_key_spaces_file


def is_direct_io_supported(dbname):
    with tempfile.NamedTemporaryFile(dir=dbname) as f:
        try:
            os.open(f.name, os.O_DIRECT)
        except BaseException:
            return False
        return True


blackbox_default_params = {
    "disable_wal": lambda: random.choice([0, 0, 0, 1]),
    # total time for this script to test db_stress
    "duration": 6000,
    # time for one db_stress instance to run
    "interval": 120,
    # since we will be killing anyway, use large value for ops_per_thread
    "ops_per_thread": 100000000,
    "reopen": 0,
    "set_options_one_in": 10000,
}

whitebox_default_params = {
    # TODO: enable this at random once we figure out two things. First, we need
    # to ensure the kill odds in WAL-disabled runs result in regular crashing
    # before the fifteen minute timeout. When WAL is disabled there are very few
    # calls to write functions since writes to SST files are buffered and other
    # writes (e.g., MANIFEST) are infrequent. Crashing in reasonable time might
    # currently assume killpoints in write functions are reached frequently.
    #
    # Second, we need to make sure disabling WAL works with `-reopen > 0`.
    "disable_wal": 0,
    "duration": 10000,
    "log2_keys_per_lock": 10,
    "ops_per_thread": 200000,
    "random_kill_odd": 888887,
    "reopen": 20,
}

simple_default_params = {
    "allow_concurrent_memtable_write": lambda: random.randint(0, 1),
    "column_families": 1,
    # TODO: re-enable once internal task T124324915 is fixed.
    # "experimental_mempurge_threshold": lambda: 10.0*random.random(),
    "max_background_compactions": 1,
    "max_bytes_for_level_base": 67108864,
    "memtablerep": "skip_list",
    "target_file_size_base": 16777216,
    "target_file_size_multiplier": 1,
    "test_batches_snapshots": 0,
    "write_buffer_size": 32 * 1024 * 1024,
    "level_compaction_dynamic_level_bytes": lambda: random.randint(0, 1),
    "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
}

blackbox_simple_default_params = {
    "open_files": -1,
    "set_options_one_in": 0,
}

whitebox_simple_default_params = {}

cf_consistency_params = {
    "disable_wal": lambda: random.randint(0, 1),
    "reopen": 0,
    "test_cf_consistency": 1,
    # use small value for write_buffer_size so that RocksDB triggers flush
    # more frequently
    "write_buffer_size": 1024 * 1024,
    "enable_pipelined_write": lambda: random.randint(0, 1),
    # Snapshots are used heavily in this test mode, while they are incompatible
    # with compaction filter.
    "enable_compaction_filter": 0,
    # `CfConsistencyStressTest::TestIngestExternalFile()` is not implemented.
    "ingest_external_file_one_in": 0,
    # `CfConsistencyStressTest::TestIterateAgainstExpected()` is not implemented.
    "verify_iterator_with_expected_state_one_in": 0,
}

# For pessimistic transaction db
txn_params = {
    "use_txn": 1,
    "use_optimistic_txn": 0,
    # Avoid lambda to set it once for the entire test
    "txn_write_policy": random.randint(0, 2),
    "unordered_write": random.randint(0, 1),
    # TODO: there is such a thing as transactions with WAL disabled. We should
    # cover that case.
    "disable_wal": 0,
    # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
    "checkpoint_one_in": 0,
    # pipeline write is not currnetly compatible with WritePrepared txns
    "enable_pipelined_write": 0,
    "create_timestamped_snapshot_one_in": random.choice([0, 20]),
    # PutEntity in transactions is not yet implemented
    "use_put_entity_one_in": 0,
}

# For optimistic transaction db
optimistic_txn_params = {
    "use_txn": 1,
    "use_optimistic_txn": 1,
    "occ_validation_policy": random.randint(0, 1),
    "share_occ_lock_buckets": random.randint(0, 1),
    "occ_lock_bucket_count": lambda: random.choice([10, 100, 500]),
    # PutEntity in transactions is not yet implemented
    "use_put_entity_one_in": 0,
}

best_efforts_recovery_params = {
    "best_efforts_recovery": 1,
    "disable_wal": 1,
    "column_families": 1,
    "skip_verifydb": 1,
    "verify_db_one_in": 0
}

blob_params = {
    "allow_setting_blob_options_dynamically": 1,
    # Enable blob files and GC with a 75% chance initially; note that they might still be
    # enabled/disabled during the test via SetOptions
    "enable_blob_files": lambda: random.choice([0] + [1] * 3),
    "min_blob_size": lambda: random.choice([0, 8, 16]),
    "blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]),
    "blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]),
    "enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3),
    "blob_garbage_collection_age_cutoff": lambda: random.choice(
        [0.0, 0.25, 0.5, 0.75, 1.0]
    ),
    "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
    "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
    "blob_file_starting_level": lambda: random.choice(
        [0] * 4 + [1] * 3 + [2] * 2 + [3]
    ),
    "use_blob_cache": lambda: random.randint(0, 1),
    "use_shared_block_and_blob_cache": lambda: random.randint(0, 1),
    "blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]),
    "prepopulate_blob_cache": lambda: random.randint(0, 1),
}

ts_params = {
    "test_cf_consistency": 0,
    "test_batches_snapshots": 0,
    "user_timestamp_size": 8,
    # Below flag is randomly picked once and kept consistent in following runs.
    "persist_user_defined_timestamps": random.choice([0, 1, 1]),
    "use_merge": 0,
    "use_full_merge_v1": 0,
    "use_txn": 0,
    "ingest_external_file_one_in": 0,
    # PutEntity with timestamps is not yet implemented
    "use_put_entity_one_in": 0,
}

tiered_params = {
    "enable_tiered_storage": 1,
    # Set tiered compaction hot data time as: 1 minute, 1 hour, 10 hour
    "preclude_last_level_data_seconds": lambda: random.choice([60, 3600, 36000]),
    # only test universal compaction for now, level has known issue of
    # endless compaction
    "compaction_style": 1,
    # tiered storage doesn't support blob db yet
    "enable_blob_files": 0,
    "use_blob_db": 0,
}

multiops_txn_default_params = {
    "test_cf_consistency": 0,
    "test_batches_snapshots": 0,
    "test_multi_ops_txns": 1,
    "use_txn": 1,
    "two_write_queues": lambda: random.choice([0, 1]),
    # TODO: enable write-prepared
    "disable_wal": 0,
    "use_only_the_last_commit_time_batch_for_recovery": lambda: random.choice([0, 1]),
    "clear_column_family_one_in": 0,
    "column_families": 1,
    "enable_pipelined_write": lambda: random.choice([0, 1]),
    # This test already acquires snapshots in reads
    "acquire_snapshot_one_in": 0,
    "backup_one_in": 0,
    "writepercent": 0,
    "delpercent": 0,
    "delrangepercent": 0,
    "customopspercent": 80,
    "readpercent": 5,
    "iterpercent": 15,
    "prefixpercent": 0,
    "verify_db_one_in": 1000,
    "continuous_verification_interval": 1000,
    "delay_snapshot_read_one_in": 3,
    # 65536 is the smallest possible value for write_buffer_size. Smaller
    # values will be sanitized to 65536 during db open. SetOptions currently
    # does not sanitize options, but very small write_buffer_size may cause
    # assertion failure in
    # https://github.com/facebook/rocksdb/blob/7.0.fb/db/memtable.cc#L117.
    "write_buffer_size": 65536,
    # flush more frequently to generate more files, thus trigger more
    # compactions.
    "flush_one_in": 1000,
    "key_spaces_path": setup_multiops_txn_key_spaces_file(),
    "rollback_one_in": 4,
    # Re-enable once we have a compaction for MultiOpsTxnStressTest
    "enable_compaction_filter": 0,
    "create_timestamped_snapshot_one_in": 50,
    "sync_fault_injection": 0,
    # This test has aggressive flush frequency and small write buffer size.
    # Disabling write fault to avoid writes being stopped.
    "write_fault_one_in": 0,
    # PutEntity in transactions is not yet implemented
    "use_put_entity_one_in": 0,
    "use_get_entity": 0,
    "use_multi_get_entity": 0,
    # `MultiOpsTxnsStressTest::TestIterateAgainstExpected()` is not implemented.
    "verify_iterator_with_expected_state_one_in": 0,
}

multiops_wc_txn_params = {
    "txn_write_policy": 0,
    # TODO re-enable pipelined write. Not well tested atm
    "enable_pipelined_write": 0,
}

multiops_wp_txn_params = {
    "txn_write_policy": 1,
    "wp_snapshot_cache_bits": 1,
    # try small wp_commit_cache_bits, e.g. 0 once we explore storing full
    # commit sequence numbers in commit cache
    "wp_commit_cache_bits": 10,
    # pipeline write is not currnetly compatible with WritePrepared txns
    "enable_pipelined_write": 0,
    # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
    "checkpoint_one_in": 0,
    # Required to be 1 in order to use commit-time-batch
    "use_only_the_last_commit_time_batch_for_recovery": 1,
    "clear_wp_commit_cache_one_in": 10,
    "create_timestamped_snapshot_one_in": 0,
    # sequence number can be advanced in SwitchMemtable::WriteRecoverableState() for WP.
    # disable it for now until we find another way to test LockWAL().
    "lock_wal_one_in": 0,
}

def finalize_and_sanitize(src_params):
    dest_params = {k: v() if callable(v) else v for (k, v) in src_params.items()}
    if is_release_mode():
        dest_params["read_fault_one_in"] = 0
    if dest_params.get("compression_max_dict_bytes") == 0:
        dest_params["compression_zstd_max_train_bytes"] = 0
        dest_params["compression_max_dict_buffer_bytes"] = 0
    if dest_params.get("compression_type") != "zstd":
        dest_params["compression_zstd_max_train_bytes"] = 0
    if dest_params.get("allow_concurrent_memtable_write", 1) == 1:
        dest_params["memtablerep"] = "skip_list"
    if dest_params["mmap_read"] == 1:
        dest_params["use_direct_io_for_flush_and_compaction"] = 0
        dest_params["use_direct_reads"] = 0
    if (
        dest_params["use_direct_io_for_flush_and_compaction"] == 1
        or dest_params["use_direct_reads"] == 1
    ) and not is_direct_io_supported(dest_params["db"]):
        if is_release_mode():
            print(
                "{} does not support direct IO. Disabling use_direct_reads and "
                "use_direct_io_for_flush_and_compaction.\n".format(dest_params["db"])
            )
            dest_params["use_direct_reads"] = 0
            dest_params["use_direct_io_for_flush_and_compaction"] = 0
        else:
            dest_params["mock_direct_io"] = True

    if dest_params["test_batches_snapshots"] == 1:
        dest_params["enable_compaction_filter"] = 0
        if dest_params["prefix_size"] < 0:
            dest_params["prefix_size"] = 1

    # Multi-key operations are not currently compatible with transactions or
    # timestamp.
    if (
        dest_params.get("test_batches_snapshots") == 1
        or dest_params.get("use_txn") == 1
        or dest_params.get("user_timestamp_size") > 0
    ):
        dest_params["ingest_external_file_one_in"] = 0
    if (
        dest_params.get("test_batches_snapshots") == 1
        or dest_params.get("use_txn") == 1
    ):
        dest_params["delpercent"] += dest_params["delrangepercent"]
        dest_params["delrangepercent"] = 0
    if (
        dest_params.get("disable_wal") == 1
        or dest_params.get("sync_fault_injection") == 1
        or dest_params.get("manual_wal_flush_one_in") > 0
    ):
        # File ingestion does not guarantee prefix-recoverability when unsynced
        # data can be lost. Ingesting a file syncs data immediately that is
        # newer than unsynced memtable data that can be lost on restart.
        #
        # Even if the above issue is fixed or worked around, our
        # trace-and-replay does not trace file ingestion, so in its current form
        # it would not recover the expected state to the correct point in time.
        dest_params["ingest_external_file_one_in"] = 0
        # The `DbStressCompactionFilter` can apply memtable updates to SST
        # files, which would be problematic when unsynced data can be lost in
        # crash recoveries.
        dest_params["enable_compaction_filter"] = 0
    # Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
    if dest_params.get("unordered_write", 0) == 1:
        dest_params["txn_write_policy"] = 1
        dest_params["allow_concurrent_memtable_write"] = 1
    if dest_params.get("disable_wal", 0) == 1:
        dest_params["atomic_flush"] = 1
        dest_params["sync"] = 0
        dest_params["write_fault_one_in"] = 0
    if dest_params.get("open_files", 1) != -1:
        # Compaction TTL and periodic compactions are only compatible
        # with open_files = -1
        dest_params["compaction_ttl"] = 0
        dest_params["periodic_compaction_seconds"] = 0
    if dest_params.get("compaction_style", 0) == 2:
        # Disable compaction TTL in FIFO compaction, because right
        # now assertion failures are triggered.
        dest_params["compaction_ttl"] = 0
        dest_params["periodic_compaction_seconds"] = 0
    if dest_params["partition_filters"] == 1:
        if dest_params["index_type"] != 2:
            dest_params["partition_filters"] = 0
    if dest_params.get("atomic_flush", 0) == 1:
        # disable pipelined write when atomic flush is used.
        dest_params["enable_pipelined_write"] = 0
    if dest_params.get("sst_file_manager_bytes_per_sec", 0) == 0:
        dest_params["sst_file_manager_bytes_per_truncate"] = 0
    if dest_params.get("enable_compaction_filter", 0) == 1:
        # Compaction filter is incompatible with snapshots. Need to avoid taking
        # snapshots, as well as avoid operations that use snapshots for
        # verification.
        dest_params["acquire_snapshot_one_in"] = 0
        dest_params["compact_range_one_in"] = 0
        # Give the iterator ops away to reads.
        dest_params["readpercent"] += dest_params.get("iterpercent", 10)
        dest_params["iterpercent"] = 0
    if dest_params.get("prefix_size") == -1:
        dest_params["readpercent"] += dest_params.get("prefixpercent", 20)
        dest_params["prefixpercent"] = 0
    if (
        dest_params.get("prefix_size") == -1
        and dest_params.get("memtable_whole_key_filtering") == 0
    ):
        dest_params["memtable_prefix_bloom_size_ratio"] = 0
    if dest_params.get("two_write_queues") == 1:
        dest_params["enable_pipelined_write"] = 0
    if dest_params.get("best_efforts_recovery") == 1:
        dest_params["disable_wal"] = 1
        dest_params["enable_compaction_filter"] = 0
        dest_params["sync"] = 0
        dest_params["write_fault_one_in"] = 0
        dest_params["skip_verifydb"] = 1
        dest_params["verify_db_one_in"] = 0
    # Remove the following once write-prepared/write-unprepared with/without
    # unordered write supports timestamped snapshots
    if dest_params.get("create_timestamped_snapshot_one_in", 0) > 0:
        dest_params["txn_write_policy"] = 0
        dest_params["unordered_write"] = 0
    # For TransactionDB, correctness testing with unsync data loss is currently
    # compatible with only write committed policy
    if dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0:
        dest_params["sync_fault_injection"] = 0
        dest_params["manual_wal_flush_one_in"] = 0
    # Wide column stress tests require FullMergeV3
    if dest_params["use_put_entity_one_in"] != 0:
        dest_params["use_full_merge_v1"] = 0
    if dest_params["file_checksum_impl"] == "none":
        dest_params["verify_file_checksums_one_in"] = 0
    if dest_params["write_fault_one_in"] > 0:
        # background work may be disabled while DB is resuming after some error
        dest_params["max_write_buffer_number"] = max(dest_params["max_write_buffer_number"], 10)
    if dest_params["secondary_cache_uri"].find("compressed_secondary_cache") >= 0:
        dest_params["compressed_secondary_cache_size"] = 0
        dest_params["compressed_secondary_cache_ratio"] = 0.0
    if dest_params["cache_type"].find("tiered_") >= 0:
        if dest_params["compressed_secondary_cache_size"] > 0:
            dest_params["compressed_secondary_cache_ratio"] = \
                    float(dest_params["compressed_secondary_cache_size"]/ \
                    (dest_params["cache_size"] + dest_params["compressed_secondary_cache_size"]))
            dest_params["compressed_secondary_cache_size"] = 0
        else:
            dest_params["compressed_secondary_cache_ratio"] = 0.0
            dest_params["cache_type"] = dest_params["cache_type"].replace("tiered_", "")
    else:
        if dest_params["secondary_cache_uri"]:
            dest_params["compressed_secondary_cache_size"] = 0
            dest_params["compressed_secondary_cache_ratio"] = 0.0
    if dest_params["use_write_buffer_manager"]:
        if (dest_params["cache_size"] <= 0
            or dest_params["db_write_buffer_size"] <= 0):
            dest_params["use_write_buffer_manager"] = 0
    if dest_params["user_timestamp_size"] > 0 and dest_params["persist_user_defined_timestamps"] == 0:
        # Features that are not compatible with UDT in memtable only feature.
        dest_params["enable_blob_files"] = 0
        dest_params["allow_setting_blob_options_dynamically"] = 0
        dest_params["atomic_flush"] = 0
        dest_params["allow_concurrent_memtable_write"] = 0
        dest_params["block_protection_bytes_per_key"] = 0
        # TODO(yuzhangyu): make stress test logic handle this and enable testing
        # these APIs.
        # These operations need to compare side to side one operation with another.
        # It's hard to guarantee their consistency because when timestamps can be
        # collapsed, only operations using the same SuperVersion can be consistent
        # with each other. There is no external APIs to ensure that.
        dest_params["use_multiget"] = 0
        dest_params["use_multi_get_entity"] = 0
        dest_params["readpercent"] += dest_params.get("iterpercent", 10);
        dest_params["iterpercent"] = 0
        # Only best efforts recovery test support disabling wal and
        # disable atomic flush.
        if dest_params["test_best_efforts_recovery"] == 0:
          dest_params["disable_wal"] = 0
    return dest_params


def gen_cmd_params(args):
    params = {}

    params.update(default_params)
    if args.test_type == "blackbox":
        params.update(blackbox_default_params)
    if args.test_type == "whitebox":
        params.update(whitebox_default_params)
    if args.simple:
        params.update(simple_default_params)
        if args.test_type == "blackbox":
            params.update(blackbox_simple_default_params)
        if args.test_type == "whitebox":
            params.update(whitebox_simple_default_params)
    if args.cf_consistency:
        params.update(cf_consistency_params)
    if args.txn:
        params.update(txn_params)
    if args.optimistic_txn:
        params.update(optimistic_txn_params)
    if args.test_best_efforts_recovery:
        params.update(best_efforts_recovery_params)
    if args.enable_ts:
        params.update(ts_params)
    if args.test_multiops_txn:
        params.update(multiops_txn_default_params)
        if args.write_policy == "write_committed":
            params.update(multiops_wc_txn_params)
        elif args.write_policy == "write_prepared":
            params.update(multiops_wp_txn_params)
    if args.test_tiered_storage:
        params.update(tiered_params)

    # Best-effort recovery, tiered storage are currently incompatible with BlobDB.
    # Test BE recovery if specified on the command line; otherwise, apply BlobDB
    # related overrides with a 10% chance.
    if (
        not args.test_best_efforts_recovery
        and not args.test_tiered_storage
        and random.choice([0] * 9 + [1]) == 1
    ):
        params.update(blob_params)

    for k, v in vars(args).items():
        if v is not None:
            params[k] = v
    return params


def gen_cmd(params, unknown_params):
    finalzied_params = finalize_and_sanitize(params)
    cmd = (
        [stress_cmd]
        + [
            "--{0}={1}".format(k, v)
            for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
            if k
            not in {
                "test_type",
                "simple",
                "duration",
                "interval",
                "random_kill_odd",
                "cf_consistency",
                "txn",
                "optimistic_txn",
                "test_best_efforts_recovery",
                "enable_ts",
                "test_multiops_txn",
                "write_policy",
                "stress_cmd",
                "test_tiered_storage",
                "cleanup_cmd",
                "skip_tmpdir_check",
                "print_stderr_separately"
            }
            and v is not None
        ]
        + unknown_params
    )
    return cmd


def execute_cmd(cmd, timeout=None):
    child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))

    try:
        outs, errs = child.communicate(timeout=timeout)
        hit_timeout = False
        print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
    except subprocess.TimeoutExpired:
        hit_timeout = True
        child.kill()
        print("KILLED %d\n" % child.pid)
        outs, errs = child.communicate()

    return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8")


def print_output_and_exit_on_error(stdout, stderr, print_stderr_separately=False):
    print("stdout:\n", stdout)
    if len(stderr) == 0:
        return

    if print_stderr_separately:
        print("stderr:\n", stderr, file=sys.stderr)
    else:
        print("stderr:\n", stderr)

    sys.exit(2)

def cleanup_after_success(dbname):
    shutil.rmtree(dbname, True)
    if cleanup_cmd is not None:
        print("Running DB cleanup command - %s\n" % cleanup_cmd)
        ret = os.system(cleanup_cmd)
        if ret != 0:
            print("TEST FAILED. DB cleanup returned error %d\n" % ret)
            sys.exit(1)

# This script runs and kills db_stress multiple times. It checks consistency
# in case of unsafe crashes in RocksDB.
def blackbox_crash_main(args, unknown_args):
    cmd_params = gen_cmd_params(args)
    dbname = get_dbname("blackbox")
    exit_time = time.time() + cmd_params["duration"]

    print(
        "Running blackbox-crash-test with \n"
        + "interval_between_crash="
        + str(cmd_params["interval"])
        + "\n"
        + "total-duration="
        + str(cmd_params["duration"])
        + "\n"
    )

    while time.time() < exit_time:
        cmd = gen_cmd(
            dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
        )

        hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["interval"])

        if not hit_timeout:
            print("Exit Before Killing")
            print_output_and_exit_on_error(outs, errs, args.print_stderr_separately)
            sys.exit(2)

        print_output_and_exit_on_error(outs, errs, args.print_stderr_separately)

        time.sleep(1)  # time to stabilize before the next run

        time.sleep(1)  # time to stabilize before the next run

    # We should run the test one more time with VerifyOnly setup and no-timeout
    # Only do this if the tests are not failed for total-duration
    print("Running final time for verification")
    cmd_params.update({"verification_only": 1})
    cmd_params.update({"skip_verifydb": 0})

    cmd = gen_cmd(
        dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
    )
    hit_timeout, retcode, outs, errs = execute_cmd(cmd)

    # For the final run
    print_output_and_exit_on_error(outs, errs, args.print_stderr_separately)

    # we need to clean up after ourselves -- only do this on test success
    cleanup_after_success(dbname)


# This python script runs db_stress multiple times. Some runs with
# kill_random_test that causes rocksdb to crash at various points in code.
def whitebox_crash_main(args, unknown_args):
    cmd_params = gen_cmd_params(args)
    dbname = get_dbname("whitebox")

    cur_time = time.time()
    exit_time = cur_time + cmd_params["duration"]
    half_time = cur_time + cmd_params["duration"] // 2

    print(
        "Running whitebox-crash-test with \n"
        + "total-duration="
        + str(cmd_params["duration"])
        + "\n"
    )

    total_check_mode = 4
    check_mode = 0
    kill_random_test = cmd_params["random_kill_odd"]
    kill_mode = 0
    prev_compaction_style = -1
    succeeded = True
    hit_timeout = False
    while time.time() < exit_time:
        if check_mode == 0:
            additional_opts = {
                # use large ops per thread since we will kill it anyway
                "ops_per_thread": 100
                * cmd_params["ops_per_thread"],
            }
            # run with kill_random_test, with three modes.
            # Mode 0 covers all kill points. Mode 1 covers less kill points but
            # increases change of triggering them. Mode 2 covers even less
            # frequent kill points and further increases triggering change.
            if kill_mode == 0:
                additional_opts.update(
                    {
                        "kill_random_test": kill_random_test,
                    }
                )
            elif kill_mode == 1:
                if cmd_params.get("disable_wal", 0) == 1:
                    my_kill_odd = kill_random_test // 50 + 1
                else:
                    my_kill_odd = kill_random_test // 10 + 1
                additional_opts.update(
                    {
                        "kill_random_test": my_kill_odd,
                        "kill_exclude_prefixes": "WritableFileWriter::Append,"
                        + "WritableFileWriter::WriteBuffered",
                    }
                )
            elif kill_mode == 2:
                # TODO: May need to adjust random odds if kill_random_test
                # is too small.
                additional_opts.update(
                    {
                        "kill_random_test": (kill_random_test // 5000 + 1),
                        "kill_exclude_prefixes": "WritableFileWriter::Append,"
                        "WritableFileWriter::WriteBuffered,"
                        "PosixMmapFile::Allocate,WritableFileWriter::Flush",
                    }
                )
            # Run kill mode 0, 1 and 2 by turn.
            kill_mode = (kill_mode + 1) % 3
        elif check_mode == 1:
            # normal run with universal compaction mode
            additional_opts = {
                "kill_random_test": None,
                "ops_per_thread": cmd_params["ops_per_thread"],
                "compaction_style": 1,
            }
            # Single level universal has a lot of special logic. Ensure we cover
            # it sometimes.
            if random.randint(0, 1) == 1:
                additional_opts.update(
                    {
                        "num_levels": 1,
                    }
                )
        elif check_mode == 2:
            # normal run with FIFO compaction mode
            # ops_per_thread is divided by 5 because FIFO compaction
            # style is quite a bit slower on reads with lot of files
            additional_opts = {
                "kill_random_test": None,
                "ops_per_thread": cmd_params["ops_per_thread"] // 5,
                "compaction_style": 2,
            }
        else:
            # normal run
            additional_opts = {
                "kill_random_test": None,
                "ops_per_thread": cmd_params["ops_per_thread"],
            }

        cur_compaction_style = additional_opts.get(
            "compaction_style", cmd_params.get("compaction_style", 0)
        )
        if (
            prev_compaction_style != -1
            and prev_compaction_style != cur_compaction_style
        ):
            print(
                "`compaction_style` is changed in current run so `destroy_db_initially` is set to 1 as a short-term solution to avoid cycling through previous db of different compaction style."
                + "\n"
            )
            additional_opts["destroy_db_initially"] = 1
        prev_compaction_style = cur_compaction_style

        cmd = gen_cmd(
            dict(
                list(cmd_params.items())
                + list(additional_opts.items())
                + list({"db": dbname}.items())
            ),
            unknown_args,
        )

        print(
            "Running:" + " ".join(cmd) + "\n"
        )  # noqa: E999 T25377293 Grandfathered in

        # If the running time is 15 minutes over the run time, explicit kill and
        # exit even if white box kill didn't hit. This is to guarantee run time
        # limit, as if it runs as a job, running too long will create problems
        # for job scheduling or execution.
        # TODO detect a hanging condition. The job might run too long as RocksDB
        # hits a hanging bug.
        hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd(
            cmd, exit_time - time.time() + 900
        )
        msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format(
            check_mode, additional_opts["kill_random_test"], retncode
        )

        print(msg)
        print_output_and_exit_on_error(stdoutdata, stderrdata, args.print_stderr_separately)

        if hit_timeout:
            print("Killing the run for running too long")
            break

        succeeded = False
        if additional_opts["kill_random_test"] is None and (retncode == 0):
            # we expect zero retncode if no kill option
            succeeded = True
        elif additional_opts["kill_random_test"] is not None and retncode <= 0:
            # When kill option is given, the test MIGHT kill itself.
            # If it does, negative retncode is expected. Otherwise 0.
            succeeded = True

        if not succeeded:
            print("TEST FAILED. See kill option and exit code above!!!\n")
            sys.exit(1)

        # First half of the duration, keep doing kill test. For the next half,
        # try different modes.
        if time.time() > half_time:
            cleanup_after_success(dbname)
            try:
                os.mkdir(dbname)
            except OSError:
                pass
            if expected_values_dir is not None:
                shutil.rmtree(expected_values_dir, True)
                os.mkdir(expected_values_dir)

            check_mode = (check_mode + 1) % total_check_mode

        time.sleep(1)  # time to stabilize after a kill


    # If successfully finished or timed out (we currently treat timed out test as passing)
    # Clean up after ourselves
    if succeeded or hit_timeout:
        cleanup_after_success(dbname)


def main():
    global stress_cmd
    global cleanup_cmd

    parser = argparse.ArgumentParser(
        description="This script runs and kills \
        db_stress multiple times"
    )
    parser.add_argument("test_type", choices=["blackbox", "whitebox"])
    parser.add_argument("--simple", action="store_true")
    parser.add_argument("--cf_consistency", action="store_true")
    parser.add_argument("--txn", action="store_true")
    parser.add_argument("--optimistic_txn", action="store_true")
    parser.add_argument("--test_best_efforts_recovery", action="store_true")
    parser.add_argument("--enable_ts", action="store_true")
    parser.add_argument("--test_multiops_txn", action="store_true")
    parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
    parser.add_argument("--stress_cmd")
    parser.add_argument("--test_tiered_storage", action="store_true")
    parser.add_argument("--cleanup_cmd")
    parser.add_argument("--skip_tmpdir_check", action="store_true")
    parser.add_argument("--print_stderr_separately", action="store_true", default=False)

    all_params = dict(
        list(default_params.items())
        + list(blackbox_default_params.items())
        + list(whitebox_default_params.items())
        + list(simple_default_params.items())
        + list(blackbox_simple_default_params.items())
        + list(whitebox_simple_default_params.items())
        + list(blob_params.items())
        + list(ts_params.items())
        + list(multiops_txn_default_params.items())
        + list(multiops_wc_txn_params.items())
        + list(multiops_wp_txn_params.items())
        + list(best_efforts_recovery_params.items())
        + list(cf_consistency_params.items())
        + list(tiered_params.items())
        + list(txn_params.items())
        + list(optimistic_txn_params.items())
    )

    for k, v in all_params.items():
        parser.add_argument("--" + k, type=type(v() if callable(v) else v))
    # unknown_args are passed directly to db_stress
    args, unknown_args = parser.parse_known_args()

    test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
    if test_tmpdir is not None and not args.skip_tmpdir_check:
        isdir = False
        try:
            isdir = os.path.isdir(test_tmpdir)
            if not isdir:
                print(
                    "ERROR: %s env var is set to a non-existent directory: %s. Update it to correct directory path."
                    % (_TEST_DIR_ENV_VAR, test_tmpdir)
                )
                sys.exit(1)
        except OSError:
            pass

    if args.stress_cmd:
        stress_cmd = args.stress_cmd
    if args.cleanup_cmd:
        cleanup_cmd = args.cleanup_cmd
    if args.test_type == "blackbox":
        blackbox_crash_main(args, unknown_args)
    if args.test_type == "whitebox":
        whitebox_crash_main(args, unknown_args)
    # Only delete the `expected_values_dir` if test passes
    if expected_values_dir is not None:
        shutil.rmtree(expected_values_dir)
    if multiops_txn_key_spaces_file is not None:
        os.remove(multiops_txn_key_spaces_file)


if __name__ == "__main__":
    main()