mirror of https://github.com/facebook/rocksdb.git
Add CompressionOptions to the compressed secondary cache (#12234)
Summary: Add ```CompressionOptions``` to ```CompressedSecondaryCacheOptions``` to allow users to set options such as compression level. It allows performance to be fine tuned. Tests - Run db_bench and verify compression options in the LOG file Pull Request resolved: https://github.com/facebook/rocksdb/pull/12234 Reviewed By: ajkr Differential Revision: D52758133 Pulled By: anand1976 fbshipit-source-id: af849fbffce6f84704387c195d8edba40d9548f6
This commit is contained in:
parent
cad76a2e1e
commit
b49f9cdd3c
|
@ -192,13 +192,13 @@ Status CompressedSecondaryCache::InsertInternal(
|
|||
type == kNoCompression &&
|
||||
!cache_options_.do_not_compress_roles.Contains(helper->role)) {
|
||||
PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, data_size);
|
||||
CompressionOptions compression_opts;
|
||||
CompressionContext compression_context(cache_options_.compression_type,
|
||||
compression_opts);
|
||||
cache_options_.compression_opts);
|
||||
uint64_t sample_for_compression{0};
|
||||
CompressionInfo compression_info(
|
||||
compression_opts, compression_context, CompressionDict::GetEmptyDict(),
|
||||
cache_options_.compression_type, sample_for_compression);
|
||||
cache_options_.compression_opts, compression_context,
|
||||
CompressionDict::GetEmptyDict(), cache_options_.compression_type,
|
||||
sample_for_compression);
|
||||
|
||||
bool success =
|
||||
CompressData(val, compression_info,
|
||||
|
@ -291,6 +291,11 @@ std::string CompressedSecondaryCache::GetPrintableOptions() const {
|
|||
snprintf(buffer, kBufferSize, " compression_type : %s\n",
|
||||
CompressionTypeToString(cache_options_.compression_type).c_str());
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " compression_opts : %s\n",
|
||||
CompressionOptionsToString(
|
||||
const_cast<CompressionOptions&>(cache_options_.compression_opts))
|
||||
.c_str());
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " compress_format_version : %d\n",
|
||||
cache_options_.compress_format_version);
|
||||
ret.append(buffer);
|
||||
|
|
|
@ -61,151 +61,6 @@ enum CompactionPri : char {
|
|||
kRoundRobin = 0x4,
|
||||
};
|
||||
|
||||
// Compression options for different compression algorithms like Zlib
|
||||
struct CompressionOptions {
|
||||
// ==> BEGIN options that can be set by deprecated configuration syntax, <==
|
||||
// ==> e.g. compression_opts=5:6:7:8:9:10:true:11:false <==
|
||||
// ==> Please use compression_opts={level=6;strategy=7;} form instead. <==
|
||||
|
||||
// RocksDB's generic default compression level. Internally it'll be translated
|
||||
// to the default compression level specific to the library being used (see
|
||||
// comment above `ColumnFamilyOptions::compression`).
|
||||
//
|
||||
// The default value is the max 16-bit int as it'll be written out in OPTIONS
|
||||
// file, which should be portable.
|
||||
static constexpr int kDefaultCompressionLevel = 32767;
|
||||
|
||||
// zlib only: windowBits parameter. See https://www.zlib.net/manual.html
|
||||
int window_bits = -14;
|
||||
|
||||
// Compression "level" applicable to zstd, zlib, LZ4, and LZ4HC. Except for
|
||||
// kDefaultCompressionLevel (see above), the meaning of each value depends
|
||||
// on the compression algorithm. Decreasing across non-
|
||||
// `kDefaultCompressionLevel` values will either favor speed over
|
||||
// compression ratio or have no effect.
|
||||
//
|
||||
// In LZ4 specifically, the absolute value of a negative `level` internally
|
||||
// configures the `acceleration` parameter. For example, set `level=-10` for
|
||||
// `acceleration=10`. This negation is necessary to ensure decreasing `level`
|
||||
// values favor speed over compression ratio.
|
||||
int level = kDefaultCompressionLevel;
|
||||
|
||||
// zlib only: strategy parameter. See https://www.zlib.net/manual.html
|
||||
int strategy = 0;
|
||||
|
||||
// Maximum size of dictionaries used to prime the compression library.
|
||||
// Enabling dictionary can improve compression ratios when there are
|
||||
// repetitions across data blocks.
|
||||
//
|
||||
// The dictionary is created by sampling the SST file data. If
|
||||
// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
|
||||
// dictionary generator (see comments for option `use_zstd_dict_trainer` for
|
||||
// detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
|
||||
// random samples are used directly as the dictionary.
|
||||
//
|
||||
// When compression dictionary is disabled, we compress and write each block
|
||||
// before buffering data for the next one. When compression dictionary is
|
||||
// enabled, we buffer SST file data in-memory so we can sample it, as data
|
||||
// can only be compressed and written after the dictionary has been finalized.
|
||||
//
|
||||
// The amount of data buffered can be limited by `max_dict_buffer_bytes`. This
|
||||
// buffered memory is charged to the block cache when there is a block cache.
|
||||
// If block cache insertion fails with `Status::MemoryLimit` (i.e., it is
|
||||
// full), we finalize the dictionary with whatever data we have and then stop
|
||||
// buffering.
|
||||
uint32_t max_dict_bytes = 0;
|
||||
|
||||
// Maximum size of training data passed to zstd's dictionary trainer. Using
|
||||
// zstd's dictionary trainer can achieve even better compression ratio
|
||||
// improvements than using `max_dict_bytes` alone.
|
||||
//
|
||||
// The training data will be used to generate a dictionary of max_dict_bytes.
|
||||
uint32_t zstd_max_train_bytes = 0;
|
||||
|
||||
// Number of threads for parallel compression.
|
||||
// Parallel compression is enabled only if threads > 1.
|
||||
// THE FEATURE IS STILL EXPERIMENTAL
|
||||
//
|
||||
// This option is valid only when BlockBasedTable is used.
|
||||
//
|
||||
// When parallel compression is enabled, SST size file sizes might be
|
||||
// more inflated compared to the target size, because more data of unknown
|
||||
// compressed size is in flight when compression is parallelized. To be
|
||||
// reasonably accurate, this inflation is also estimated by using historical
|
||||
// compression ratio and current bytes inflight.
|
||||
uint32_t parallel_threads = 1;
|
||||
|
||||
// When the compression options are set by the user, it will be set to "true".
|
||||
// For bottommost_compression_opts, to enable it, user must set enabled=true.
|
||||
// Otherwise, bottommost compression will use compression_opts as default
|
||||
// compression options.
|
||||
//
|
||||
// For compression_opts, if compression_opts.enabled=false, it is still
|
||||
// used as compression options for compression process.
|
||||
bool enabled = false;
|
||||
|
||||
// Limit on data buffering when gathering samples to build a dictionary. Zero
|
||||
// means no limit. When dictionary is disabled (`max_dict_bytes == 0`),
|
||||
// enabling this limit (`max_dict_buffer_bytes != 0`) has no effect.
|
||||
//
|
||||
// In compaction, the buffering is limited to the target file size (see
|
||||
// `target_file_size_base` and `target_file_size_multiplier`) even if this
|
||||
// setting permits more buffering. Since we cannot determine where the file
|
||||
// should be cut until data blocks are compressed with dictionary, buffering
|
||||
// more than the target file size could lead to selecting samples that belong
|
||||
// to a later output SST.
|
||||
//
|
||||
// Limiting too strictly may harm dictionary effectiveness since it forces
|
||||
// RocksDB to pick samples from the initial portion of the output SST, which
|
||||
// may not be representative of the whole file. Configuring this limit below
|
||||
// `zstd_max_train_bytes` (when enabled) can restrict how many samples we can
|
||||
// pass to the dictionary trainer. Configuring it below `max_dict_bytes` can
|
||||
// restrict the size of the final dictionary.
|
||||
uint64_t max_dict_buffer_bytes = 0;
|
||||
|
||||
// Use zstd trainer to generate dictionaries. When this option is set to true,
|
||||
// zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
|
||||
// buffered data will be passed to zstd dictionary trainer to generate a
|
||||
// dictionary of size max_dict_bytes.
|
||||
//
|
||||
// When this option is false, zstd's API ZDICT_finalizeDictionary() will be
|
||||
// called to generate dictionaries. zstd_max_train_bytes of training sampled
|
||||
// data will be passed to this API. Using this API should save CPU time on
|
||||
// dictionary training, but the compression ratio may not be as good as using
|
||||
// a dictionary trainer.
|
||||
bool use_zstd_dict_trainer = true;
|
||||
|
||||
// ===> END options that can be set by deprecated configuration syntax <===
|
||||
// ===> Use compression_opts={level=6;strategy=7;} form for below opts <===
|
||||
|
||||
// Essentially specifies a minimum acceptable compression ratio. A block is
|
||||
// stored uncompressed if the compressed block does not achieve this ratio,
|
||||
// because the downstream cost of decompression is not considered worth such
|
||||
// a small savings (if any).
|
||||
// However, the ratio is specified in a way that is efficient for checking.
|
||||
// An integer from 1 to 1024 indicates the maximum allowable compressed bytes
|
||||
// per 1KB of input, so the minimum acceptable ratio is 1024.0 / this value.
|
||||
// For example, for a minimum ratio of 1.5:1, set to 683. See SetMinRatio().
|
||||
// Default: abandon use of compression for a specific block or entry if
|
||||
// compressed by less than 12.5% (minimum ratio of 1.143:1).
|
||||
int max_compressed_bytes_per_kb = 1024 * 7 / 8;
|
||||
|
||||
// ZSTD only.
|
||||
// Enable compression algorithm's checksum feature.
|
||||
// (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428)
|
||||
// Each compressed frame will have a 32-bit checksum attached. The checksum
|
||||
// computed from the uncompressed data and can be verified during
|
||||
// decompression.
|
||||
bool checksum = false;
|
||||
|
||||
// A convenience function for setting max_compressed_bytes_per_kb based on a
|
||||
// minimum acceptable compression ratio (uncompressed size over compressed
|
||||
// size).
|
||||
void SetMinRatio(double min_ratio) {
|
||||
max_compressed_bytes_per_kb = static_cast<int>(1024.0 / min_ratio + 0.5);
|
||||
}
|
||||
};
|
||||
|
||||
// Temperature of a file. Used to pass to FileSystem for a different
|
||||
// placement and/or coding.
|
||||
// Reserve some numbers in the middle, in case we need to insert new tier
|
||||
|
|
|
@ -295,6 +295,9 @@ struct CompressedSecondaryCacheOptions : LRUCacheOptions {
|
|||
// The compression method (if any) that is used to compress data.
|
||||
CompressionType compression_type = CompressionType::kLZ4Compression;
|
||||
|
||||
// Options specific to the compression algorithm
|
||||
CompressionOptions compression_opts;
|
||||
|
||||
// compress_format_version can have two values:
|
||||
// compress_format_version == 1 -- decompressed size is not included in the
|
||||
// block header.
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "rocksdb/rocksdb_namespace.h"
|
||||
#include "rocksdb/types.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
|
@ -37,4 +38,149 @@ enum CompressionType : unsigned char {
|
|||
kDisableCompressionOption = 0xff,
|
||||
};
|
||||
|
||||
// Compression options for different compression algorithms like Zlib
|
||||
struct CompressionOptions {
|
||||
// ==> BEGIN options that can be set by deprecated configuration syntax, <==
|
||||
// ==> e.g. compression_opts=5:6:7:8:9:10:true:11:false <==
|
||||
// ==> Please use compression_opts={level=6;strategy=7;} form instead. <==
|
||||
|
||||
// RocksDB's generic default compression level. Internally it'll be translated
|
||||
// to the default compression level specific to the library being used (see
|
||||
// comment above `ColumnFamilyOptions::compression`).
|
||||
//
|
||||
// The default value is the max 16-bit int as it'll be written out in OPTIONS
|
||||
// file, which should be portable.
|
||||
static constexpr int kDefaultCompressionLevel = 32767;
|
||||
|
||||
// zlib only: windowBits parameter. See https://www.zlib.net/manual.html
|
||||
int window_bits = -14;
|
||||
|
||||
// Compression "level" applicable to zstd, zlib, LZ4, and LZ4HC. Except for
|
||||
// kDefaultCompressionLevel (see above), the meaning of each value depends
|
||||
// on the compression algorithm. Decreasing across non-
|
||||
// `kDefaultCompressionLevel` values will either favor speed over
|
||||
// compression ratio or have no effect.
|
||||
//
|
||||
// In LZ4 specifically, the absolute value of a negative `level` internally
|
||||
// configures the `acceleration` parameter. For example, set `level=-10` for
|
||||
// `acceleration=10`. This negation is necessary to ensure decreasing `level`
|
||||
// values favor speed over compression ratio.
|
||||
int level = kDefaultCompressionLevel;
|
||||
|
||||
// zlib only: strategy parameter. See https://www.zlib.net/manual.html
|
||||
int strategy = 0;
|
||||
|
||||
// Maximum size of dictionaries used to prime the compression library.
|
||||
// Enabling dictionary can improve compression ratios when there are
|
||||
// repetitions across data blocks.
|
||||
//
|
||||
// The dictionary is created by sampling the SST file data. If
|
||||
// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
|
||||
// dictionary generator (see comments for option `use_zstd_dict_trainer` for
|
||||
// detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
|
||||
// random samples are used directly as the dictionary.
|
||||
//
|
||||
// When compression dictionary is disabled, we compress and write each block
|
||||
// before buffering data for the next one. When compression dictionary is
|
||||
// enabled, we buffer SST file data in-memory so we can sample it, as data
|
||||
// can only be compressed and written after the dictionary has been finalized.
|
||||
//
|
||||
// The amount of data buffered can be limited by `max_dict_buffer_bytes`. This
|
||||
// buffered memory is charged to the block cache when there is a block cache.
|
||||
// If block cache insertion fails with `Status::MemoryLimit` (i.e., it is
|
||||
// full), we finalize the dictionary with whatever data we have and then stop
|
||||
// buffering.
|
||||
uint32_t max_dict_bytes = 0;
|
||||
|
||||
// Maximum size of training data passed to zstd's dictionary trainer. Using
|
||||
// zstd's dictionary trainer can achieve even better compression ratio
|
||||
// improvements than using `max_dict_bytes` alone.
|
||||
//
|
||||
// The training data will be used to generate a dictionary of max_dict_bytes.
|
||||
uint32_t zstd_max_train_bytes = 0;
|
||||
|
||||
// Number of threads for parallel compression.
|
||||
// Parallel compression is enabled only if threads > 1.
|
||||
// THE FEATURE IS STILL EXPERIMENTAL
|
||||
//
|
||||
// This option is valid only when BlockBasedTable is used.
|
||||
//
|
||||
// When parallel compression is enabled, SST size file sizes might be
|
||||
// more inflated compared to the target size, because more data of unknown
|
||||
// compressed size is in flight when compression is parallelized. To be
|
||||
// reasonably accurate, this inflation is also estimated by using historical
|
||||
// compression ratio and current bytes inflight.
|
||||
uint32_t parallel_threads = 1;
|
||||
|
||||
// When the compression options are set by the user, it will be set to "true".
|
||||
// For bottommost_compression_opts, to enable it, user must set enabled=true.
|
||||
// Otherwise, bottommost compression will use compression_opts as default
|
||||
// compression options.
|
||||
//
|
||||
// For compression_opts, if compression_opts.enabled=false, it is still
|
||||
// used as compression options for compression process.
|
||||
bool enabled = false;
|
||||
|
||||
// Limit on data buffering when gathering samples to build a dictionary. Zero
|
||||
// means no limit. When dictionary is disabled (`max_dict_bytes == 0`),
|
||||
// enabling this limit (`max_dict_buffer_bytes != 0`) has no effect.
|
||||
//
|
||||
// In compaction, the buffering is limited to the target file size (see
|
||||
// `target_file_size_base` and `target_file_size_multiplier`) even if this
|
||||
// setting permits more buffering. Since we cannot determine where the file
|
||||
// should be cut until data blocks are compressed with dictionary, buffering
|
||||
// more than the target file size could lead to selecting samples that belong
|
||||
// to a later output SST.
|
||||
//
|
||||
// Limiting too strictly may harm dictionary effectiveness since it forces
|
||||
// RocksDB to pick samples from the initial portion of the output SST, which
|
||||
// may not be representative of the whole file. Configuring this limit below
|
||||
// `zstd_max_train_bytes` (when enabled) can restrict how many samples we can
|
||||
// pass to the dictionary trainer. Configuring it below `max_dict_bytes` can
|
||||
// restrict the size of the final dictionary.
|
||||
uint64_t max_dict_buffer_bytes = 0;
|
||||
|
||||
// Use zstd trainer to generate dictionaries. When this option is set to true,
|
||||
// zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
|
||||
// buffered data will be passed to zstd dictionary trainer to generate a
|
||||
// dictionary of size max_dict_bytes.
|
||||
//
|
||||
// When this option is false, zstd's API ZDICT_finalizeDictionary() will be
|
||||
// called to generate dictionaries. zstd_max_train_bytes of training sampled
|
||||
// data will be passed to this API. Using this API should save CPU time on
|
||||
// dictionary training, but the compression ratio may not be as good as using
|
||||
// a dictionary trainer.
|
||||
bool use_zstd_dict_trainer = true;
|
||||
|
||||
// ===> END options that can be set by deprecated configuration syntax <===
|
||||
// ===> Use compression_opts={level=6;strategy=7;} form for below opts <===
|
||||
|
||||
// Essentially specifies a minimum acceptable compression ratio. A block is
|
||||
// stored uncompressed if the compressed block does not achieve this ratio,
|
||||
// because the downstream cost of decompression is not considered worth such
|
||||
// a small savings (if any).
|
||||
// However, the ratio is specified in a way that is efficient for checking.
|
||||
// An integer from 1 to 1024 indicates the maximum allowable compressed bytes
|
||||
// per 1KB of input, so the minimum acceptable ratio is 1024.0 / this value.
|
||||
// For example, for a minimum ratio of 1.5:1, set to 683. See SetMinRatio().
|
||||
// Default: abandon use of compression for a specific block or entry if
|
||||
// compressed by less than 12.5% (minimum ratio of 1.143:1).
|
||||
int max_compressed_bytes_per_kb = 1024 * 7 / 8;
|
||||
|
||||
// ZSTD only.
|
||||
// Enable compression algorithm's checksum feature.
|
||||
// (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428)
|
||||
// Each compressed frame will have a 32-bit checksum attached. The checksum
|
||||
// computed from the uncompressed data and can be verified during
|
||||
// decompression.
|
||||
bool checksum = false;
|
||||
|
||||
// A convenience function for setting max_compressed_bytes_per_kb based on a
|
||||
// minimum acceptable compression ratio (uncompressed size over compressed
|
||||
// size).
|
||||
void SetMinRatio(double min_ratio) {
|
||||
max_compressed_bytes_per_kb = static_cast<int>(1024.0 / min_ratio + 0.5);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -595,6 +595,12 @@ static enum ROCKSDB_NAMESPACE::CompressionType
|
|||
FLAGS_compressed_secondary_cache_compression_type_e =
|
||||
ROCKSDB_NAMESPACE::kLZ4Compression;
|
||||
|
||||
DEFINE_int32(compressed_secondary_cache_compression_level,
|
||||
ROCKSDB_NAMESPACE::CompressionOptions().level,
|
||||
"Compression level. The meaning of this value is library-"
|
||||
"dependent. If unset, we try to use the default for the library "
|
||||
"specified in `--compressed_secondary_cache_compression_type`");
|
||||
|
||||
DEFINE_uint32(
|
||||
compressed_secondary_cache_compress_format_version, 2,
|
||||
"compress_format_version can have two values: "
|
||||
|
@ -3078,6 +3084,8 @@ class Benchmark {
|
|||
FLAGS_compressed_secondary_cache_low_pri_pool_ratio;
|
||||
secondary_cache_opts.compression_type =
|
||||
FLAGS_compressed_secondary_cache_compression_type_e;
|
||||
secondary_cache_opts.compression_opts.level =
|
||||
FLAGS_compressed_secondary_cache_compression_level;
|
||||
secondary_cache_opts.compress_format_version =
|
||||
FLAGS_compressed_secondary_cache_compress_format_version;
|
||||
if (FLAGS_use_tiered_cache) {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Add CompressionOptions to the CompressedSecondaryCacheOptions structure to allow users to specify library specific options when creating the compressed secondary cache.
|
Loading…
Reference in New Issue