Add the blob cache to the stress tests and the benchmarking tool (#10202)

Summary:
In order to facilitate correctness and performance testing, we would like to add the new blob cache to our stress test tool `db_stress` and our continuously running crash test script `db_crashtest.py`, as well as our synthetic benchmarking tool `db_bench` and the BlobDB performance testing script `run_blob_bench.sh`.
As part of this task, we would also like to utilize these benchmarking tools to get some initial performance numbers about the effectiveness of caching blobs.

This PR is a part of https://github.com/facebook/rocksdb/issues/10156

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10202

Reviewed By: ltamasi

Differential Revision: D37325739

Pulled By: gangliao

fbshipit-source-id: deb65d0d414502270dd4c324d987fd5469869fa8
This commit is contained in:
Gang Liao 2022-06-22 16:04:03 -07:00 committed by Facebook GitHub Bot
parent c073ed7601
commit 2352e2dfda
7 changed files with 139 additions and 4 deletions

View File

@ -267,6 +267,10 @@ DECLARE_double(blob_garbage_collection_age_cutoff);
DECLARE_double(blob_garbage_collection_force_threshold); DECLARE_double(blob_garbage_collection_force_threshold);
DECLARE_uint64(blob_compaction_readahead_size); DECLARE_uint64(blob_compaction_readahead_size);
DECLARE_int32(blob_file_starting_level); DECLARE_int32(blob_file_starting_level);
DECLARE_bool(use_blob_cache);
DECLARE_bool(use_shared_block_and_blob_cache);
DECLARE_uint64(blob_cache_size);
DECLARE_int32(blob_cache_numshardbits);
DECLARE_int32(approximate_size_one_in); DECLARE_int32(approximate_size_one_in);
DECLARE_bool(sync_fault_injection); DECLARE_bool(sync_fault_injection);

View File

@ -305,7 +305,7 @@ DEFINE_int64(cache_size, 2LL * KB * KB * KB,
DEFINE_int32(cache_numshardbits, 6, DEFINE_int32(cache_numshardbits, 6,
"Number of shards for the block cache" "Number of shards for the block cache"
" is 2 ** cache_numshardbits. Negative means use default settings." " is 2 ** cache_numshardbits. Negative means use default settings."
" This is applied only if FLAGS_cache_size is non-negative."); " This is applied only if FLAGS_cache_size is greater than 0.");
DEFINE_bool(cache_index_and_filter_blocks, false, DEFINE_bool(cache_index_and_filter_blocks, false,
"True if indexes/filters should be cached in block cache."); "True if indexes/filters should be cached in block cache.");
@ -450,6 +450,26 @@ DEFINE_int32(
"[Integrated BlobDB] Enable writing blob files during flushes and " "[Integrated BlobDB] Enable writing blob files during flushes and "
"compactions starting from the specified level."); "compactions starting from the specified level.");
DEFINE_bool(use_blob_cache, false, "[Integrated BlobDB] Enable blob cache.");
DEFINE_bool(
use_shared_block_and_blob_cache, true,
"[Integrated BlobDB] Use a shared backing cache for both block "
"cache and blob cache. It only takes effect if use_blob_cache is enabled.");
DEFINE_uint64(
blob_cache_size, 2LL * KB * KB * KB,
"[Integrated BlobDB] Number of bytes to use as a cache of blobs. It only "
"takes effect if the block and blob caches are different "
"(use_shared_block_and_blob_cache = false).");
DEFINE_int32(blob_cache_numshardbits, 6,
"[Integrated BlobDB] Number of shards for the blob cache is 2 ** "
"blob_cache_numshardbits. Negative means use default settings. "
"It only takes effect if blob_cache_size is greater than 0, and "
"the block and blob caches are different "
"(use_shared_block_and_blob_cache = false).");
static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) = static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);

View File

@ -2334,6 +2334,17 @@ void StressTest::Open(SharedState* shared) {
options_.blob_compaction_readahead_size, options_.blob_compaction_readahead_size,
options_.blob_file_starting_level); options_.blob_file_starting_level);
if (FLAGS_use_blob_cache) {
fprintf(stdout,
"Integrated BlobDB: blob cache enabled, block and blob caches "
"shared: %d, blob cache size %" PRIu64
", blob cache num shard bits: %d\n",
FLAGS_use_shared_block_and_blob_cache, FLAGS_blob_cache_size,
FLAGS_blob_cache_numshardbits);
} else {
fprintf(stdout, "Integrated BlobDB: blob cache disabled\n");
}
fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str()); fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str());
Status s; Status s;
@ -2886,6 +2897,24 @@ void InitializeOptionsFromFlags(
options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size; options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size;
options.blob_file_starting_level = FLAGS_blob_file_starting_level; options.blob_file_starting_level = FLAGS_blob_file_starting_level;
if (FLAGS_use_blob_cache) {
if (FLAGS_use_shared_block_and_blob_cache) {
options.blob_cache = cache;
} else {
if (FLAGS_blob_cache_size > 0) {
LRUCacheOptions co;
co.capacity = FLAGS_blob_cache_size;
co.num_shard_bits = FLAGS_blob_cache_numshardbits;
options.blob_cache = NewLRUCache(co);
} else {
fprintf(stderr,
"Unable to create a standalone blob cache if blob_cache_size "
"<= 0.\n");
exit(1);
}
}
}
options.wal_compression = options.wal_compression =
StringToCompressionType(FLAGS_wal_compression.c_str()); StringToCompressionType(FLAGS_wal_compression.c_str());

View File

@ -52,7 +52,8 @@ function display_usage() {
echo -e "\tNUM_THREADS\t\t\tThe number of threads to use (default: 64)" echo -e "\tNUM_THREADS\t\t\tThe number of threads to use (default: 64)"
echo -e "\tMB_WRITE_PER_SEC\t\t\tRate limit for background writer" echo -e "\tMB_WRITE_PER_SEC\t\t\tRate limit for background writer"
echo -e "\tNUM_NEXTS_PER_SEEK\t\t(default: 10)" echo -e "\tNUM_NEXTS_PER_SEEK\t\t(default: 10)"
echo -e "\tCACHE_SIZE\t\t\tSize of the block cache(default: 16GB)" echo -e "\tCACHE_SIZE\t\t\tSize of the block cache (default: 16GB)"
echo -e "\tCACHE_NUMSHARDBITS\t\t\tNumber of shards for the block cache is 2 ** cache_numshardbits (default: 6)"
echo -e "\tCOMPRESSION_MAX_DICT_BYTES" echo -e "\tCOMPRESSION_MAX_DICT_BYTES"
echo -e "\tCOMPRESSION_TYPE\t\tDefault compression(default: zstd)" echo -e "\tCOMPRESSION_TYPE\t\tDefault compression(default: zstd)"
echo -e "\tBOTTOMMOST_COMPRESSION\t\t(default: none)" echo -e "\tBOTTOMMOST_COMPRESSION\t\t(default: none)"
@ -89,6 +90,11 @@ function display_usage() {
echo -e "\tBLOB_COMPRESSION_TYPE\tValue for blob_compression_type" echo -e "\tBLOB_COMPRESSION_TYPE\tValue for blob_compression_type"
echo -e "\tBLOB_GC_AGE_CUTOFF\tValue for blob_garbage_collection_age_cutoff" echo -e "\tBLOB_GC_AGE_CUTOFF\tValue for blob_garbage_collection_age_cutoff"
echo -e "\tBLOB_GC_FORCE_THRESHOLD\tValue for blob_garbage_collection_force_threshold" echo -e "\tBLOB_GC_FORCE_THRESHOLD\tValue for blob_garbage_collection_force_threshold"
echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)"
echo -e "\tUSE_BLOB_CACHE\t\t\tEnable blob cache (default: 1)"
echo -e "\tUSE_SHARED_BLOCK_AND_BLOB_CACHE\t\t\tUse the same backing cache for block cache and blob cache (default: 1)"
echo -e "\tBLOB_CACHE_SIZE\t\t\tSize of the blob cache (default: 16GB)"
echo -e "\tBLOB_CACHE_NUMSHARDBITS\t\t\tNumber of shards for the blob cache is 2 ** blob_cache_numshardbits (default: 6)"
} }
if [ $# -lt 1 ]; then if [ $# -lt 1 ]; then
@ -156,7 +162,8 @@ num_threads=${NUM_THREADS:-64}
mb_written_per_sec=${MB_WRITE_PER_SEC:-0} mb_written_per_sec=${MB_WRITE_PER_SEC:-0}
# Only for tests that do range scans # Only for tests that do range scans
num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
cache_size=${CACHE_SIZE:-$((17179869184))} cache_size=${CACHE_SIZE:-$(( 16 * $G ))}
cache_numshardbits=${CACHE_NUMSHARDBITS:-6}
compression_max_dict_bytes=${COMPRESSION_MAX_DICT_BYTES:-0} compression_max_dict_bytes=${COMPRESSION_MAX_DICT_BYTES:-0}
compression_type=${COMPRESSION_TYPE:-zstd} compression_type=${COMPRESSION_TYPE:-zstd}
min_level_to_compress=${MIN_LEVEL_TO_COMPRESS:-"-1"} min_level_to_compress=${MIN_LEVEL_TO_COMPRESS:-"-1"}
@ -227,6 +234,11 @@ blob_file_size=${BLOB_FILE_SIZE:-$(( 256 * $M ))}
blob_compression_type=${BLOB_COMPRESSION_TYPE:-${compression_type}} blob_compression_type=${BLOB_COMPRESSION_TYPE:-${compression_type}}
blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"} blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"}
blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1} blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1}
blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0}
use_blob_cache=${USE_BLOB_CACHE:-1}
use_shared_block_and_blob_cache=${USE_SHARED_BLOCK_AND_BLOB_CACHE:-1}
blob_cache_size=${BLOB_CACHE_SIZE:-$(( 16 * $G ))}
blob_cache_numshardbits=${BLOB_CACHE_NUMSHARDBITS:-6}
const_params_base=" const_params_base="
--db=$DB_DIR \ --db=$DB_DIR \
@ -237,7 +249,7 @@ const_params_base="
--value_size=$value_size \ --value_size=$value_size \
--block_size=$block_size \ --block_size=$block_size \
--cache_size=$cache_size \ --cache_size=$cache_size \
--cache_numshardbits=6 \ --cache_numshardbits=$cache_numshardbits \
--compression_max_dict_bytes=$compression_max_dict_bytes \ --compression_max_dict_bytes=$compression_max_dict_bytes \
--compression_ratio=0.5 \ --compression_ratio=0.5 \
--compression_type=$compression_type \ --compression_type=$compression_type \
@ -288,6 +300,11 @@ blob_const_params="
--enable_blob_garbage_collection=true \ --enable_blob_garbage_collection=true \
--blob_garbage_collection_age_cutoff=$blob_gc_age_cutoff \ --blob_garbage_collection_age_cutoff=$blob_gc_age_cutoff \
--blob_garbage_collection_force_threshold=$blob_gc_force_threshold \ --blob_garbage_collection_force_threshold=$blob_gc_force_threshold \
--blob_file_starting_level=$blob_file_starting_level \
--use_blob_cache=$use_blob_cache \
--use_shared_block_and_blob_cache=$use_shared_block_and_blob_cache \
--blob_cache_size=$blob_cache_size \
--blob_cache_numshardbits=$blob_cache_numshardbits \
" "
# TODO: # TODO:

View File

@ -1074,6 +1074,26 @@ DEFINE_int32(
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_starting_level, ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_starting_level,
"[Integrated BlobDB] The starting level for blob files."); "[Integrated BlobDB] The starting level for blob files.");
DEFINE_bool(use_blob_cache, false, "[Integrated BlobDB] Enable blob cache.");
DEFINE_bool(
use_shared_block_and_blob_cache, true,
"[Integrated BlobDB] Use a shared backing cache for both block "
"cache and blob cache. It only takes effect if use_blob_cache is enabled.");
DEFINE_uint64(
blob_cache_size, 8 << 20,
"[Integrated BlobDB] Number of bytes to use as a cache of blobs. It only "
"takes effect if the block and blob caches are different "
"(use_shared_block_and_blob_cache = false).");
DEFINE_int32(blob_cache_numshardbits, 6,
"[Integrated BlobDB] Number of shards for the blob cache is 2 ** "
"blob_cache_numshardbits. Negative means use default settings. "
"It only takes effect if blob_cache_size is greater than 0, and "
"the block and blob caches are different "
"(use_shared_block_and_blob_cache = false).");
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
// Secondary DB instance Options // Secondary DB instance Options
@ -4476,6 +4496,32 @@ class Benchmark {
FLAGS_blob_compaction_readahead_size; FLAGS_blob_compaction_readahead_size;
options.blob_file_starting_level = FLAGS_blob_file_starting_level; options.blob_file_starting_level = FLAGS_blob_file_starting_level;
if (FLAGS_use_blob_cache) {
if (FLAGS_use_shared_block_and_blob_cache) {
options.blob_cache = cache_;
} else {
if (FLAGS_blob_cache_size > 0) {
LRUCacheOptions co;
co.capacity = FLAGS_blob_cache_size;
co.num_shard_bits = FLAGS_blob_cache_numshardbits;
options.blob_cache = NewLRUCache(co);
} else {
fprintf(stderr,
"Unable to create a standalone blob cache if blob_cache_size "
"<= 0.\n");
exit(1);
}
}
fprintf(stdout,
"Integrated BlobDB: blob cache enabled, block and blob caches "
"shared: %d, blob cache size %" PRIu64
", blob cache num shard bits: %d\n",
FLAGS_use_shared_block_and_blob_cache, FLAGS_blob_cache_size,
FLAGS_blob_cache_numshardbits);
} else {
fprintf(stdout, "Integrated BlobDB: blob cache disabled\n");
}
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (FLAGS_readonly && FLAGS_transaction_db) { if (FLAGS_readonly && FLAGS_transaction_db) {
fprintf(stderr, "Cannot use readonly flag with transaction_db\n"); fprintf(stderr, "Cannot use readonly flag with transaction_db\n");

View File

@ -344,6 +344,9 @@ blob_params = {
"blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]), "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
"blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]), "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
"blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]), "blob_file_starting_level": lambda: random.choice([0] * 4 + [1] * 3 + [2] * 2 + [3]),
"use_blob_cache": lambda: random.randint(0, 1),
"use_shared_block_and_blob_cache": lambda: random.randint(0, 1),
"blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]),
} }
ts_params = { ts_params = {

View File

@ -55,6 +55,10 @@ function display_usage() {
echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)" echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)"
echo -e "\tBLOB_COMPACTION_READAHEAD_SIZE\tBlob compaction readahead size (default: 0)" echo -e "\tBLOB_COMPACTION_READAHEAD_SIZE\tBlob compaction readahead size (default: 0)"
echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)" echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)"
echo -e "\tUSE_BLOB_CACHE\t\t\tEnable blob cache. (default: 1)"
echo -e "\tUSE_SHARED_BLOCK_AND_BLOB_CACHE\t\t\tUse the same backing cache for block cache and blob cache. (default: 1)"
echo -e "\tBLOB_CACHE_SIZE\t\t\tSize of the blob cache (default: 16GB)"
echo -e "\tBLOB_CACHE_NUMSHARDBITS\t\t\tNumber of shards for the blob cache is 2 ** blob_cache_numshardbits (default: 6)"
echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)" echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)"
echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)" echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)"
} }
@ -115,6 +119,10 @@ blob_garbage_collection_age_cutoff=${BLOB_GC_AGE_CUTOFF:-0.25}
blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0} blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0}
blob_compaction_readahead_size=${BLOB_COMPACTION_READAHEAD_SIZE:-0} blob_compaction_readahead_size=${BLOB_COMPACTION_READAHEAD_SIZE:-0}
blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0} blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0}
use_blob_cache=${USE_BLOB_CACHE:-1}
use_shared_block_and_blob_cache=${USE_SHARED_BLOCK_AND_BLOB_CACHE:-1}
blob_cache_size=${BLOB_CACHE_SIZE:-$((16 * G))}
blob_cache_numshardbits=${BLOB_CACHE_NUMSHARDBITS:-6}
if [ "$enable_blob_files" == "1" ]; then if [ "$enable_blob_files" == "1" ]; then
target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))} target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))}
@ -145,6 +153,10 @@ echo -e "Blob GC age cutoff:\t\t\t$blob_garbage_collection_age_cutoff"
echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold" echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold"
echo -e "Blob compaction readahead size:\t\t$blob_compaction_readahead_size" echo -e "Blob compaction readahead size:\t\t$blob_compaction_readahead_size"
echo -e "Blob file starting level:\t\t$blob_file_starting_level" echo -e "Blob file starting level:\t\t$blob_file_starting_level"
echo -e "Blob cache enabled:\t\t\t$use_blob_cache"
echo -e "Blob cache and block cache shared:\t\t\t$use_shared_block_and_blob_cache"
echo -e "Blob cache size:\t\t$blob_cache_size"
echo -e "Blob cache number of shard bits:\t\t$blob_cache_numshardbits"
echo -e "Target SST file size:\t\t\t$target_file_size_base" echo -e "Target SST file size:\t\t\t$target_file_size_base"
echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base" echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base"
echo "=================================================================" echo "================================================================="
@ -171,6 +183,10 @@ PARAMS="\
--blob_file_size=$blob_file_size \ --blob_file_size=$blob_file_size \
--blob_compression_type=$blob_compression_type \ --blob_compression_type=$blob_compression_type \
--blob_file_starting_level=$blob_file_starting_level \ --blob_file_starting_level=$blob_file_starting_level \
--use_blob_cache=$use_blob_cache \
--use_shared_block_and_blob_cache=$use_shared_block_and_blob_cache \
--blob_cache_size=$blob_cache_size \
--blob_cache_numshardbits=$blob_cache_numshardbits \
--write_buffer_size=$write_buffer_size \ --write_buffer_size=$write_buffer_size \
--target_file_size_base=$target_file_size_base \ --target_file_size_base=$target_file_size_base \
--max_bytes_for_level_base=$max_bytes_for_level_base" --max_bytes_for_level_base=$max_bytes_for_level_base"