From 2352e2dfdaa293019b83219a7bc9ea8f31f6524a Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Wed, 22 Jun 2022 16:04:03 -0700 Subject: [PATCH] Add the blob cache to the stress tests and the benchmarking tool (#10202) Summary: In order to facilitate correctness and performance testing, we would like to add the new blob cache to our stress test tool `db_stress` and our continuously running crash test script `db_crashtest.py`, as well as our synthetic benchmarking tool `db_bench` and the BlobDB performance testing script `run_blob_bench.sh`. As part of this task, we would also like to utilize these benchmarking tools to get some initial performance numbers about the effectiveness of caching blobs. This PR is a part of https://github.com/facebook/rocksdb/issues/10156 Pull Request resolved: https://github.com/facebook/rocksdb/pull/10202 Reviewed By: ltamasi Differential Revision: D37325739 Pulled By: gangliao fbshipit-source-id: deb65d0d414502270dd4c324d987fd5469869fa8 --- db_stress_tool/db_stress_common.h | 4 +++ db_stress_tool/db_stress_gflags.cc | 22 ++++++++++++- db_stress_tool/db_stress_test_base.cc | 29 +++++++++++++++++ tools/benchmark.sh | 23 ++++++++++++-- tools/db_bench_tool.cc | 46 +++++++++++++++++++++++++++ tools/db_crashtest.py | 3 ++ tools/run_blob_bench.sh | 16 ++++++++++ 7 files changed, 139 insertions(+), 4 deletions(-) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index c00a69d881..893cff7520 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -267,6 +267,10 @@ DECLARE_double(blob_garbage_collection_age_cutoff); DECLARE_double(blob_garbage_collection_force_threshold); DECLARE_uint64(blob_compaction_readahead_size); DECLARE_int32(blob_file_starting_level); +DECLARE_bool(use_blob_cache); +DECLARE_bool(use_shared_block_and_blob_cache); +DECLARE_uint64(blob_cache_size); +DECLARE_int32(blob_cache_numshardbits); DECLARE_int32(approximate_size_one_in); DECLARE_bool(sync_fault_injection); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 26e8c1ebb5..48e8d7c960 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -305,7 +305,7 @@ DEFINE_int64(cache_size, 2LL * KB * KB * KB, DEFINE_int32(cache_numshardbits, 6, "Number of shards for the block cache" " is 2 ** cache_numshardbits. Negative means use default settings." - " This is applied only if FLAGS_cache_size is non-negative."); + " This is applied only if FLAGS_cache_size is greater than 0."); DEFINE_bool(cache_index_and_filter_blocks, false, "True if indexes/filters should be cached in block cache."); @@ -450,6 +450,26 @@ DEFINE_int32( "[Integrated BlobDB] Enable writing blob files during flushes and " "compactions starting from the specified level."); +DEFINE_bool(use_blob_cache, false, "[Integrated BlobDB] Enable blob cache."); + +DEFINE_bool( + use_shared_block_and_blob_cache, true, + "[Integrated BlobDB] Use a shared backing cache for both block " + "cache and blob cache. It only takes effect if use_blob_cache is enabled."); + +DEFINE_uint64( + blob_cache_size, 2LL * KB * KB * KB, + "[Integrated BlobDB] Number of bytes to use as a cache of blobs. It only " + "takes effect if the block and blob caches are different " + "(use_shared_block_and_blob_cache = false)."); + +DEFINE_int32(blob_cache_numshardbits, 6, + "[Integrated BlobDB] Number of shards for the blob cache is 2 ** " + "blob_cache_numshardbits. Negative means use default settings. " + "It only takes effect if blob_cache_size is greater than 0, and " + "the block and blob caches are different " + "(use_shared_block_and_blob_cache = false)."); + static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 97e4b4cb0e..4879c229c1 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2334,6 +2334,17 @@ void StressTest::Open(SharedState* shared) { options_.blob_compaction_readahead_size, options_.blob_file_starting_level); + if (FLAGS_use_blob_cache) { + fprintf(stdout, + "Integrated BlobDB: blob cache enabled, block and blob caches " + "shared: %d, blob cache size %" PRIu64 + ", blob cache num shard bits: %d\n", + FLAGS_use_shared_block_and_blob_cache, FLAGS_blob_cache_size, + FLAGS_blob_cache_numshardbits); + } else { + fprintf(stdout, "Integrated BlobDB: blob cache disabled\n"); + } + fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str()); Status s; @@ -2886,6 +2897,24 @@ void InitializeOptionsFromFlags( options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size; options.blob_file_starting_level = FLAGS_blob_file_starting_level; + if (FLAGS_use_blob_cache) { + if (FLAGS_use_shared_block_and_blob_cache) { + options.blob_cache = cache; + } else { + if (FLAGS_blob_cache_size > 0) { + LRUCacheOptions co; + co.capacity = FLAGS_blob_cache_size; + co.num_shard_bits = FLAGS_blob_cache_numshardbits; + options.blob_cache = NewLRUCache(co); + } else { + fprintf(stderr, + "Unable to create a standalone blob cache if blob_cache_size " + "<= 0.\n"); + exit(1); + } + } + } + options.wal_compression = StringToCompressionType(FLAGS_wal_compression.c_str()); diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 07d3144d85..4e5dae4376 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -52,7 +52,8 @@ function display_usage() { echo -e "\tNUM_THREADS\t\t\tThe number of threads to use (default: 64)" echo -e "\tMB_WRITE_PER_SEC\t\t\tRate limit for background writer" echo -e "\tNUM_NEXTS_PER_SEEK\t\t(default: 10)" - echo -e "\tCACHE_SIZE\t\t\tSize of the block cache(default: 16GB)" + echo -e "\tCACHE_SIZE\t\t\tSize of the block cache (default: 16GB)" + echo -e "\tCACHE_NUMSHARDBITS\t\t\tNumber of shards for the block cache is 2 ** cache_numshardbits (default: 6)" echo -e "\tCOMPRESSION_MAX_DICT_BYTES" echo -e "\tCOMPRESSION_TYPE\t\tDefault compression(default: zstd)" echo -e "\tBOTTOMMOST_COMPRESSION\t\t(default: none)" @@ -89,6 +90,11 @@ function display_usage() { echo -e "\tBLOB_COMPRESSION_TYPE\tValue for blob_compression_type" echo -e "\tBLOB_GC_AGE_CUTOFF\tValue for blob_garbage_collection_age_cutoff" echo -e "\tBLOB_GC_FORCE_THRESHOLD\tValue for blob_garbage_collection_force_threshold" + echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)" + echo -e "\tUSE_BLOB_CACHE\t\t\tEnable blob cache (default: 1)" + echo -e "\tUSE_SHARED_BLOCK_AND_BLOB_CACHE\t\t\tUse the same backing cache for block cache and blob cache (default: 1)" + echo -e "\tBLOB_CACHE_SIZE\t\t\tSize of the blob cache (default: 16GB)" + echo -e "\tBLOB_CACHE_NUMSHARDBITS\t\t\tNumber of shards for the blob cache is 2 ** blob_cache_numshardbits (default: 6)" } if [ $# -lt 1 ]; then @@ -156,7 +162,8 @@ num_threads=${NUM_THREADS:-64} mb_written_per_sec=${MB_WRITE_PER_SEC:-0} # Only for tests that do range scans num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} -cache_size=${CACHE_SIZE:-$((17179869184))} +cache_size=${CACHE_SIZE:-$(( 16 * $G ))} +cache_numshardbits=${CACHE_NUMSHARDBITS:-6} compression_max_dict_bytes=${COMPRESSION_MAX_DICT_BYTES:-0} compression_type=${COMPRESSION_TYPE:-zstd} min_level_to_compress=${MIN_LEVEL_TO_COMPRESS:-"-1"} @@ -227,6 +234,11 @@ blob_file_size=${BLOB_FILE_SIZE:-$(( 256 * $M ))} blob_compression_type=${BLOB_COMPRESSION_TYPE:-${compression_type}} blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"} blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1} +blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0} +use_blob_cache=${USE_BLOB_CACHE:-1} +use_shared_block_and_blob_cache=${USE_SHARED_BLOCK_AND_BLOB_CACHE:-1} +blob_cache_size=${BLOB_CACHE_SIZE:-$(( 16 * $G ))} +blob_cache_numshardbits=${BLOB_CACHE_NUMSHARDBITS:-6} const_params_base=" --db=$DB_DIR \ @@ -237,7 +249,7 @@ const_params_base=" --value_size=$value_size \ --block_size=$block_size \ --cache_size=$cache_size \ - --cache_numshardbits=6 \ + --cache_numshardbits=$cache_numshardbits \ --compression_max_dict_bytes=$compression_max_dict_bytes \ --compression_ratio=0.5 \ --compression_type=$compression_type \ @@ -288,6 +300,11 @@ blob_const_params=" --enable_blob_garbage_collection=true \ --blob_garbage_collection_age_cutoff=$blob_gc_age_cutoff \ --blob_garbage_collection_force_threshold=$blob_gc_force_threshold \ + --blob_file_starting_level=$blob_file_starting_level \ + --use_blob_cache=$use_blob_cache \ + --use_shared_block_and_blob_cache=$use_shared_block_and_blob_cache \ + --blob_cache_size=$blob_cache_size \ + --blob_cache_numshardbits=$blob_cache_numshardbits \ " # TODO: diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 5b20143fda..1a41138da2 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1074,6 +1074,26 @@ DEFINE_int32( ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_starting_level, "[Integrated BlobDB] The starting level for blob files."); +DEFINE_bool(use_blob_cache, false, "[Integrated BlobDB] Enable blob cache."); + +DEFINE_bool( + use_shared_block_and_blob_cache, true, + "[Integrated BlobDB] Use a shared backing cache for both block " + "cache and blob cache. It only takes effect if use_blob_cache is enabled."); + +DEFINE_uint64( + blob_cache_size, 8 << 20, + "[Integrated BlobDB] Number of bytes to use as a cache of blobs. It only " + "takes effect if the block and blob caches are different " + "(use_shared_block_and_blob_cache = false)."); + +DEFINE_int32(blob_cache_numshardbits, 6, + "[Integrated BlobDB] Number of shards for the blob cache is 2 ** " + "blob_cache_numshardbits. Negative means use default settings. " + "It only takes effect if blob_cache_size is greater than 0, and " + "the block and blob caches are different " + "(use_shared_block_and_blob_cache = false)."); + #ifndef ROCKSDB_LITE // Secondary DB instance Options @@ -4476,6 +4496,32 @@ class Benchmark { FLAGS_blob_compaction_readahead_size; options.blob_file_starting_level = FLAGS_blob_file_starting_level; + if (FLAGS_use_blob_cache) { + if (FLAGS_use_shared_block_and_blob_cache) { + options.blob_cache = cache_; + } else { + if (FLAGS_blob_cache_size > 0) { + LRUCacheOptions co; + co.capacity = FLAGS_blob_cache_size; + co.num_shard_bits = FLAGS_blob_cache_numshardbits; + options.blob_cache = NewLRUCache(co); + } else { + fprintf(stderr, + "Unable to create a standalone blob cache if blob_cache_size " + "<= 0.\n"); + exit(1); + } + } + fprintf(stdout, + "Integrated BlobDB: blob cache enabled, block and blob caches " + "shared: %d, blob cache size %" PRIu64 + ", blob cache num shard bits: %d\n", + FLAGS_use_shared_block_and_blob_cache, FLAGS_blob_cache_size, + FLAGS_blob_cache_numshardbits); + } else { + fprintf(stdout, "Integrated BlobDB: blob cache disabled\n"); + } + #ifndef ROCKSDB_LITE if (FLAGS_readonly && FLAGS_transaction_db) { fprintf(stderr, "Cannot use readonly flag with transaction_db\n"); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index cd311d5ad7..8dbf13dac5 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -344,6 +344,9 @@ blob_params = { "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]), "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]), "blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]), + "use_blob_cache": lambda: random.randint(0, 1), + "use_shared_block_and_blob_cache": lambda: random.randint(0, 1), + "blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]), } ts_params = { diff --git a/tools/run_blob_bench.sh b/tools/run_blob_bench.sh index 28ea5bf1d9..32b45717af 100755 --- a/tools/run_blob_bench.sh +++ b/tools/run_blob_bench.sh @@ -55,6 +55,10 @@ function display_usage() { echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)" echo -e "\tBLOB_COMPACTION_READAHEAD_SIZE\tBlob compaction readahead size (default: 0)" echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)" + echo -e "\tUSE_BLOB_CACHE\t\t\tEnable blob cache. (default: 1)" + echo -e "\tUSE_SHARED_BLOCK_AND_BLOB_CACHE\t\t\tUse the same backing cache for block cache and blob cache. (default: 1)" + echo -e "\tBLOB_CACHE_SIZE\t\t\tSize of the blob cache (default: 16GB)" + echo -e "\tBLOB_CACHE_NUMSHARDBITS\t\t\tNumber of shards for the blob cache is 2 ** blob_cache_numshardbits (default: 6)" echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)" echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)" } @@ -115,6 +119,10 @@ blob_garbage_collection_age_cutoff=${BLOB_GC_AGE_CUTOFF:-0.25} blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0} blob_compaction_readahead_size=${BLOB_COMPACTION_READAHEAD_SIZE:-0} blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0} +use_blob_cache=${USE_BLOB_CACHE:-1} +use_shared_block_and_blob_cache=${USE_SHARED_BLOCK_AND_BLOB_CACHE:-1} +blob_cache_size=${BLOB_CACHE_SIZE:-$((16 * G))} +blob_cache_numshardbits=${BLOB_CACHE_NUMSHARDBITS:-6} if [ "$enable_blob_files" == "1" ]; then target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))} @@ -145,6 +153,10 @@ echo -e "Blob GC age cutoff:\t\t\t$blob_garbage_collection_age_cutoff" echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold" echo -e "Blob compaction readahead size:\t\t$blob_compaction_readahead_size" echo -e "Blob file starting level:\t\t$blob_file_starting_level" +echo -e "Blob cache enabled:\t\t\t$use_blob_cache" +echo -e "Blob cache and block cache shared:\t\t\t$use_shared_block_and_blob_cache" +echo -e "Blob cache size:\t\t$blob_cache_size" +echo -e "Blob cache number of shard bits:\t\t$blob_cache_numshardbits" echo -e "Target SST file size:\t\t\t$target_file_size_base" echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base" echo "=================================================================" @@ -171,6 +183,10 @@ PARAMS="\ --blob_file_size=$blob_file_size \ --blob_compression_type=$blob_compression_type \ --blob_file_starting_level=$blob_file_starting_level \ + --use_blob_cache=$use_blob_cache \ + --use_shared_block_and_blob_cache=$use_shared_block_and_blob_cache \ + --blob_cache_size=$blob_cache_size \ + --blob_cache_numshardbits=$blob_cache_numshardbits \ --write_buffer_size=$write_buffer_size \ --target_file_size_base=$target_file_size_base \ --max_bytes_for_level_base=$max_bytes_for_level_base"