Tiered storage stress test (#10493)

Summary:
Add Tiered storage stress test and db_bench option

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10493

Test Plan:
new crashtest:
https://app.circleci.com/pipelines/github/facebook/rocksdb/16905/workflows/68c2967c-9274-434f-8506-1403cf441ead

Reviewed By: ajkr

Differential Revision: D38481892

Pulled By: jay-zhuang

fbshipit-source-id: 217a0be4acb93d420222e6ede2a1290d9f464776
This commit is contained in:
Jay Zhuang 2022-08-08 13:08:35 -07:00 committed by Facebook GitHub Bot
parent 0d885e80d4
commit 1e86d424e4
7 changed files with 76 additions and 2 deletions

View File

@ -575,6 +575,34 @@ jobs:
- run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
- post-steps
build-linux-crashtest-tiered-storage-bb:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run:
name: "run crashtest"
command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 blackbox_crash_test_with_tiered_storage
no_output_timeout: 100m
- post-steps
build-linux-crashtest-tiered-storage-wb:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run:
name: "run crashtest"
command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 whitebox_crash_test_with_tiered_storage
no_output_timeout: 100m
- post-steps
build-windows:
executor: windows-2xlarge
parameters:

View File

@ -18,7 +18,9 @@ CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD)
whitebox_crash_test whitebox_crash_test_with_atomic_flush \
whitebox_crash_test_with_txn whitebox_crash_test_with_ts \
blackbox_crash_test_with_multiops_wc_txn \
blackbox_crash_test_with_multiops_wp_txn
blackbox_crash_test_with_multiops_wp_txn \
crash_test_with_tiered_storage blackbox_crash_test_with_tiered_storage \
whitebox_crash_test_with_tiered_storage \
crash_test: $(DB_STRESS_CMD)
# Do not parallelize
@ -42,6 +44,11 @@ crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) whitebox_crash_test_with_ts
$(CRASHTEST_MAKE) blackbox_crash_test_with_ts
crash_test_with_tiered_storage: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_tiered_storage
$(CRASHTEST_MAKE) blackbox_crash_test_with_tiered_storage
crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn
@ -70,6 +77,9 @@ blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
ifeq ($(CRASH_TEST_KILL_ODD),)
CRASH_TEST_KILL_ODD=888887
endif
@ -91,3 +101,7 @@ whitebox_crash_test_with_txn: $(DB_STRESS_CMD)
whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_tiered_storage whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)

View File

@ -307,6 +307,10 @@ DECLARE_int32(create_timestamped_snapshot_one_in);
DECLARE_bool(allow_data_in_errors);
// Tiered storage
DECLARE_bool(enable_tiered_storage); // set last_level_temperature
DECLARE_int64(preclude_last_level_data_seconds);
constexpr long KB = 1024;
constexpr int kRandomValueMaxFactor = 3;
constexpr int kValueMaxLen = 100;

View File

@ -483,6 +483,12 @@ DEFINE_int32(prepopulate_blob_cache, 0,
"[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 "
"to disable and 1 to insert during flush.");
DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature");
DEFINE_int64(preclude_last_level_data_seconds, 0,
"Preclude data from the last level. Used with tiered storage "
"feature to preclude new data from comacting to the last level.");
static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);

View File

@ -3063,6 +3063,12 @@ void InitializeOptionsFromFlags(
options.wal_compression =
StringToCompressionType(FLAGS_wal_compression.c_str());
if (FLAGS_enable_tiered_storage) {
options.bottommost_temperature = Temperature::kCold;
}
options.preclude_last_level_data_seconds =
FLAGS_preclude_last_level_data_seconds;
switch (FLAGS_rep_factory) {
case kSkipList:
// no need to do anything

View File

@ -1315,6 +1315,10 @@ DEFINE_int32(simulate_hybrid_hdd_multipliers, 1,
"are simulated.");
DEFINE_bool(simulate_hdd, false, "Simulate read/write latency on HDD.");
DEFINE_int64(
preclude_last_level_data_seconds, 0,
"Preclude the latest data from the last level. (Used for tiered storage)");
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
static ROCKSDB_NAMESPACE::Env* FLAGS_env = ROCKSDB_NAMESPACE::Env::Default();
@ -4449,6 +4453,8 @@ class Benchmark {
if (FLAGS_simulate_hybrid_fs_file != "") {
options.bottommost_temperature = Temperature::kWarm;
}
options.preclude_last_level_data_seconds =
FLAGS_preclude_last_level_data_seconds;
options.sample_for_compression = FLAGS_sample_for_compression;
options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds;
options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB;

View File

@ -6,7 +6,6 @@ import os
import sys
import time
import random
import re
import tempfile
import subprocess
import shutil
@ -367,6 +366,14 @@ ts_params = {
"ingest_external_file_one_in": 0,
}
tiered_params = {
"enable_tiered_storage": 1,
"preclude_last_level_data_seconds": lambda: random.choice([3600]),
# only test universal compaction for now, level has known issue of
# endless compaction
"compaction_style": 1,
}
multiops_txn_default_params = {
"test_cf_consistency": 0,
"test_batches_snapshots": 0,
@ -573,6 +580,8 @@ def gen_cmd_params(args):
params.update(multiops_wc_txn_params)
elif args.write_policy == 'write_prepared':
params.update(multiops_wp_txn_params)
if args.enable_tiered_storage:
params.update(tiered_params)
# Best-effort recovery and BlobDB are currently incompatible. Test BE recovery
# if specified on the command line; otherwise, apply BlobDB related overrides
@ -820,6 +829,7 @@ def main():
parser.add_argument("--test_multiops_txn", action='store_true')
parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
parser.add_argument("--stress_cmd")
parser.add_argument("--enable_tiered_storage", action='store_true')
all_params = dict(list(default_params.items())
+ list(blackbox_default_params.items())