diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index 6c19e7921f..96de7f32ab 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -192,13 +192,13 @@ Status CompressedSecondaryCache::InsertInternal( type == kNoCompression && !cache_options_.do_not_compress_roles.Contains(helper->role)) { PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, data_size); - CompressionOptions compression_opts; CompressionContext compression_context(cache_options_.compression_type, - compression_opts); + cache_options_.compression_opts); uint64_t sample_for_compression{0}; CompressionInfo compression_info( - compression_opts, compression_context, CompressionDict::GetEmptyDict(), - cache_options_.compression_type, sample_for_compression); + cache_options_.compression_opts, compression_context, + CompressionDict::GetEmptyDict(), cache_options_.compression_type, + sample_for_compression); bool success = CompressData(val, compression_info, @@ -291,6 +291,11 @@ std::string CompressedSecondaryCache::GetPrintableOptions() const { snprintf(buffer, kBufferSize, " compression_type : %s\n", CompressionTypeToString(cache_options_.compression_type).c_str()); ret.append(buffer); + snprintf(buffer, kBufferSize, " compression_opts : %s\n", + CompressionOptionsToString( + const_cast(cache_options_.compression_opts)) + .c_str()); + ret.append(buffer); snprintf(buffer, kBufferSize, " compress_format_version : %d\n", cache_options_.compress_format_version); ret.append(buffer); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 5b76707741..76301f8889 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -61,151 +61,6 @@ enum CompactionPri : char { kRoundRobin = 0x4, }; -// Compression options for different compression algorithms like Zlib -struct CompressionOptions { - // ==> BEGIN options that can be set by deprecated configuration syntax, <== - // ==> e.g. compression_opts=5:6:7:8:9:10:true:11:false <== - // ==> Please use compression_opts={level=6;strategy=7;} form instead. <== - - // RocksDB's generic default compression level. Internally it'll be translated - // to the default compression level specific to the library being used (see - // comment above `ColumnFamilyOptions::compression`). - // - // The default value is the max 16-bit int as it'll be written out in OPTIONS - // file, which should be portable. - static constexpr int kDefaultCompressionLevel = 32767; - - // zlib only: windowBits parameter. See https://www.zlib.net/manual.html - int window_bits = -14; - - // Compression "level" applicable to zstd, zlib, LZ4, and LZ4HC. Except for - // kDefaultCompressionLevel (see above), the meaning of each value depends - // on the compression algorithm. Decreasing across non- - // `kDefaultCompressionLevel` values will either favor speed over - // compression ratio or have no effect. - // - // In LZ4 specifically, the absolute value of a negative `level` internally - // configures the `acceleration` parameter. For example, set `level=-10` for - // `acceleration=10`. This negation is necessary to ensure decreasing `level` - // values favor speed over compression ratio. - int level = kDefaultCompressionLevel; - - // zlib only: strategy parameter. See https://www.zlib.net/manual.html - int strategy = 0; - - // Maximum size of dictionaries used to prime the compression library. - // Enabling dictionary can improve compression ratios when there are - // repetitions across data blocks. - // - // The dictionary is created by sampling the SST file data. If - // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's - // dictionary generator (see comments for option `use_zstd_dict_trainer` for - // detail on dictionary generator). If `zstd_max_train_bytes` is zero, the - // random samples are used directly as the dictionary. - // - // When compression dictionary is disabled, we compress and write each block - // before buffering data for the next one. When compression dictionary is - // enabled, we buffer SST file data in-memory so we can sample it, as data - // can only be compressed and written after the dictionary has been finalized. - // - // The amount of data buffered can be limited by `max_dict_buffer_bytes`. This - // buffered memory is charged to the block cache when there is a block cache. - // If block cache insertion fails with `Status::MemoryLimit` (i.e., it is - // full), we finalize the dictionary with whatever data we have and then stop - // buffering. - uint32_t max_dict_bytes = 0; - - // Maximum size of training data passed to zstd's dictionary trainer. Using - // zstd's dictionary trainer can achieve even better compression ratio - // improvements than using `max_dict_bytes` alone. - // - // The training data will be used to generate a dictionary of max_dict_bytes. - uint32_t zstd_max_train_bytes = 0; - - // Number of threads for parallel compression. - // Parallel compression is enabled only if threads > 1. - // THE FEATURE IS STILL EXPERIMENTAL - // - // This option is valid only when BlockBasedTable is used. - // - // When parallel compression is enabled, SST size file sizes might be - // more inflated compared to the target size, because more data of unknown - // compressed size is in flight when compression is parallelized. To be - // reasonably accurate, this inflation is also estimated by using historical - // compression ratio and current bytes inflight. - uint32_t parallel_threads = 1; - - // When the compression options are set by the user, it will be set to "true". - // For bottommost_compression_opts, to enable it, user must set enabled=true. - // Otherwise, bottommost compression will use compression_opts as default - // compression options. - // - // For compression_opts, if compression_opts.enabled=false, it is still - // used as compression options for compression process. - bool enabled = false; - - // Limit on data buffering when gathering samples to build a dictionary. Zero - // means no limit. When dictionary is disabled (`max_dict_bytes == 0`), - // enabling this limit (`max_dict_buffer_bytes != 0`) has no effect. - // - // In compaction, the buffering is limited to the target file size (see - // `target_file_size_base` and `target_file_size_multiplier`) even if this - // setting permits more buffering. Since we cannot determine where the file - // should be cut until data blocks are compressed with dictionary, buffering - // more than the target file size could lead to selecting samples that belong - // to a later output SST. - // - // Limiting too strictly may harm dictionary effectiveness since it forces - // RocksDB to pick samples from the initial portion of the output SST, which - // may not be representative of the whole file. Configuring this limit below - // `zstd_max_train_bytes` (when enabled) can restrict how many samples we can - // pass to the dictionary trainer. Configuring it below `max_dict_bytes` can - // restrict the size of the final dictionary. - uint64_t max_dict_buffer_bytes = 0; - - // Use zstd trainer to generate dictionaries. When this option is set to true, - // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes - // buffered data will be passed to zstd dictionary trainer to generate a - // dictionary of size max_dict_bytes. - // - // When this option is false, zstd's API ZDICT_finalizeDictionary() will be - // called to generate dictionaries. zstd_max_train_bytes of training sampled - // data will be passed to this API. Using this API should save CPU time on - // dictionary training, but the compression ratio may not be as good as using - // a dictionary trainer. - bool use_zstd_dict_trainer = true; - - // ===> END options that can be set by deprecated configuration syntax <=== - // ===> Use compression_opts={level=6;strategy=7;} form for below opts <=== - - // Essentially specifies a minimum acceptable compression ratio. A block is - // stored uncompressed if the compressed block does not achieve this ratio, - // because the downstream cost of decompression is not considered worth such - // a small savings (if any). - // However, the ratio is specified in a way that is efficient for checking. - // An integer from 1 to 1024 indicates the maximum allowable compressed bytes - // per 1KB of input, so the minimum acceptable ratio is 1024.0 / this value. - // For example, for a minimum ratio of 1.5:1, set to 683. See SetMinRatio(). - // Default: abandon use of compression for a specific block or entry if - // compressed by less than 12.5% (minimum ratio of 1.143:1). - int max_compressed_bytes_per_kb = 1024 * 7 / 8; - - // ZSTD only. - // Enable compression algorithm's checksum feature. - // (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) - // Each compressed frame will have a 32-bit checksum attached. The checksum - // computed from the uncompressed data and can be verified during - // decompression. - bool checksum = false; - - // A convenience function for setting max_compressed_bytes_per_kb based on a - // minimum acceptable compression ratio (uncompressed size over compressed - // size). - void SetMinRatio(double min_ratio) { - max_compressed_bytes_per_kb = static_cast(1024.0 / min_ratio + 0.5); - } -}; - // Temperature of a file. Used to pass to FileSystem for a different // placement and/or coding. // Reserve some numbers in the middle, in case we need to insert new tier diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 2a358504e8..d249c77227 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -295,6 +295,9 @@ struct CompressedSecondaryCacheOptions : LRUCacheOptions { // The compression method (if any) that is used to compress data. CompressionType compression_type = CompressionType::kLZ4Compression; + // Options specific to the compression algorithm + CompressionOptions compression_opts; + // compress_format_version can have two values: // compress_format_version == 1 -- decompressed size is not included in the // block header. diff --git a/include/rocksdb/compression_type.h b/include/rocksdb/compression_type.h index bfeb00bdef..1fe2fd3fc7 100644 --- a/include/rocksdb/compression_type.h +++ b/include/rocksdb/compression_type.h @@ -6,6 +6,7 @@ #pragma once #include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { @@ -37,4 +38,149 @@ enum CompressionType : unsigned char { kDisableCompressionOption = 0xff, }; +// Compression options for different compression algorithms like Zlib +struct CompressionOptions { + // ==> BEGIN options that can be set by deprecated configuration syntax, <== + // ==> e.g. compression_opts=5:6:7:8:9:10:true:11:false <== + // ==> Please use compression_opts={level=6;strategy=7;} form instead. <== + + // RocksDB's generic default compression level. Internally it'll be translated + // to the default compression level specific to the library being used (see + // comment above `ColumnFamilyOptions::compression`). + // + // The default value is the max 16-bit int as it'll be written out in OPTIONS + // file, which should be portable. + static constexpr int kDefaultCompressionLevel = 32767; + + // zlib only: windowBits parameter. See https://www.zlib.net/manual.html + int window_bits = -14; + + // Compression "level" applicable to zstd, zlib, LZ4, and LZ4HC. Except for + // kDefaultCompressionLevel (see above), the meaning of each value depends + // on the compression algorithm. Decreasing across non- + // `kDefaultCompressionLevel` values will either favor speed over + // compression ratio or have no effect. + // + // In LZ4 specifically, the absolute value of a negative `level` internally + // configures the `acceleration` parameter. For example, set `level=-10` for + // `acceleration=10`. This negation is necessary to ensure decreasing `level` + // values favor speed over compression ratio. + int level = kDefaultCompressionLevel; + + // zlib only: strategy parameter. See https://www.zlib.net/manual.html + int strategy = 0; + + // Maximum size of dictionaries used to prime the compression library. + // Enabling dictionary can improve compression ratios when there are + // repetitions across data blocks. + // + // The dictionary is created by sampling the SST file data. If + // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's + // dictionary generator (see comments for option `use_zstd_dict_trainer` for + // detail on dictionary generator). If `zstd_max_train_bytes` is zero, the + // random samples are used directly as the dictionary. + // + // When compression dictionary is disabled, we compress and write each block + // before buffering data for the next one. When compression dictionary is + // enabled, we buffer SST file data in-memory so we can sample it, as data + // can only be compressed and written after the dictionary has been finalized. + // + // The amount of data buffered can be limited by `max_dict_buffer_bytes`. This + // buffered memory is charged to the block cache when there is a block cache. + // If block cache insertion fails with `Status::MemoryLimit` (i.e., it is + // full), we finalize the dictionary with whatever data we have and then stop + // buffering. + uint32_t max_dict_bytes = 0; + + // Maximum size of training data passed to zstd's dictionary trainer. Using + // zstd's dictionary trainer can achieve even better compression ratio + // improvements than using `max_dict_bytes` alone. + // + // The training data will be used to generate a dictionary of max_dict_bytes. + uint32_t zstd_max_train_bytes = 0; + + // Number of threads for parallel compression. + // Parallel compression is enabled only if threads > 1. + // THE FEATURE IS STILL EXPERIMENTAL + // + // This option is valid only when BlockBasedTable is used. + // + // When parallel compression is enabled, SST size file sizes might be + // more inflated compared to the target size, because more data of unknown + // compressed size is in flight when compression is parallelized. To be + // reasonably accurate, this inflation is also estimated by using historical + // compression ratio and current bytes inflight. + uint32_t parallel_threads = 1; + + // When the compression options are set by the user, it will be set to "true". + // For bottommost_compression_opts, to enable it, user must set enabled=true. + // Otherwise, bottommost compression will use compression_opts as default + // compression options. + // + // For compression_opts, if compression_opts.enabled=false, it is still + // used as compression options for compression process. + bool enabled = false; + + // Limit on data buffering when gathering samples to build a dictionary. Zero + // means no limit. When dictionary is disabled (`max_dict_bytes == 0`), + // enabling this limit (`max_dict_buffer_bytes != 0`) has no effect. + // + // In compaction, the buffering is limited to the target file size (see + // `target_file_size_base` and `target_file_size_multiplier`) even if this + // setting permits more buffering. Since we cannot determine where the file + // should be cut until data blocks are compressed with dictionary, buffering + // more than the target file size could lead to selecting samples that belong + // to a later output SST. + // + // Limiting too strictly may harm dictionary effectiveness since it forces + // RocksDB to pick samples from the initial portion of the output SST, which + // may not be representative of the whole file. Configuring this limit below + // `zstd_max_train_bytes` (when enabled) can restrict how many samples we can + // pass to the dictionary trainer. Configuring it below `max_dict_bytes` can + // restrict the size of the final dictionary. + uint64_t max_dict_buffer_bytes = 0; + + // Use zstd trainer to generate dictionaries. When this option is set to true, + // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes + // buffered data will be passed to zstd dictionary trainer to generate a + // dictionary of size max_dict_bytes. + // + // When this option is false, zstd's API ZDICT_finalizeDictionary() will be + // called to generate dictionaries. zstd_max_train_bytes of training sampled + // data will be passed to this API. Using this API should save CPU time on + // dictionary training, but the compression ratio may not be as good as using + // a dictionary trainer. + bool use_zstd_dict_trainer = true; + + // ===> END options that can be set by deprecated configuration syntax <=== + // ===> Use compression_opts={level=6;strategy=7;} form for below opts <=== + + // Essentially specifies a minimum acceptable compression ratio. A block is + // stored uncompressed if the compressed block does not achieve this ratio, + // because the downstream cost of decompression is not considered worth such + // a small savings (if any). + // However, the ratio is specified in a way that is efficient for checking. + // An integer from 1 to 1024 indicates the maximum allowable compressed bytes + // per 1KB of input, so the minimum acceptable ratio is 1024.0 / this value. + // For example, for a minimum ratio of 1.5:1, set to 683. See SetMinRatio(). + // Default: abandon use of compression for a specific block or entry if + // compressed by less than 12.5% (minimum ratio of 1.143:1). + int max_compressed_bytes_per_kb = 1024 * 7 / 8; + + // ZSTD only. + // Enable compression algorithm's checksum feature. + // (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) + // Each compressed frame will have a 32-bit checksum attached. The checksum + // computed from the uncompressed data and can be verified during + // decompression. + bool checksum = false; + + // A convenience function for setting max_compressed_bytes_per_kb based on a + // minimum acceptable compression ratio (uncompressed size over compressed + // size). + void SetMinRatio(double min_ratio) { + max_compressed_bytes_per_kb = static_cast(1024.0 / min_ratio + 0.5); + } +}; + } // namespace ROCKSDB_NAMESPACE diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index e177934b09..2a550cfbf1 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -595,6 +595,12 @@ static enum ROCKSDB_NAMESPACE::CompressionType FLAGS_compressed_secondary_cache_compression_type_e = ROCKSDB_NAMESPACE::kLZ4Compression; +DEFINE_int32(compressed_secondary_cache_compression_level, + ROCKSDB_NAMESPACE::CompressionOptions().level, + "Compression level. The meaning of this value is library-" + "dependent. If unset, we try to use the default for the library " + "specified in `--compressed_secondary_cache_compression_type`"); + DEFINE_uint32( compressed_secondary_cache_compress_format_version, 2, "compress_format_version can have two values: " @@ -3078,6 +3084,8 @@ class Benchmark { FLAGS_compressed_secondary_cache_low_pri_pool_ratio; secondary_cache_opts.compression_type = FLAGS_compressed_secondary_cache_compression_type_e; + secondary_cache_opts.compression_opts.level = + FLAGS_compressed_secondary_cache_compression_level; secondary_cache_opts.compress_format_version = FLAGS_compressed_secondary_cache_compress_format_version; if (FLAGS_use_tiered_cache) { diff --git a/unreleased_history/public_api_changes/comp_cache_compression_level.md b/unreleased_history/public_api_changes/comp_cache_compression_level.md new file mode 100644 index 0000000000..29843c9524 --- /dev/null +++ b/unreleased_history/public_api_changes/comp_cache_compression_level.md @@ -0,0 +1 @@ +Add CompressionOptions to the CompressedSecondaryCacheOptions structure to allow users to specify library specific options when creating the compressed secondary cache.