mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 07:30:54 +00:00
Move advanced column family options to advanced_options.h
Summary: For the sake of making our options simpler, we should keep options.h as simple as possible and move more advanced/less common options to advaned_options.h I started with ColumnFamilyOptions and also did some re-ordering I have moved all ColumnFamilyOptions to advanced_options.h and only left these options in options.h ``` const Comparator* comparator = BytewiseComparator(); std::shared_ptr<MergeOperator> merge_operator = nullptr; const CompactionFilter* compaction_filter = nullptr; std::shared_ptr<CompactionFilterFactory> compaction_filter_factory = nullptr; size_t write_buffer_size = 64 << 20; CompressionType compression; int level0_file_num_compaction_trigger = 4; bool disable_auto_compactions = false; ``` Please feel free to comment on specific options if you think they should be advanced or should not be Closes https://github.com/facebook/rocksdb/pull/1847 Differential Revision: D4519996 Pulled By: IslamAbdelRahman fbshipit-source-id: abebd9a
This commit is contained in:
parent
2ca2059f66
commit
08864df212
558
include/rocksdb/advanced_options.h
Normal file
558
include/rocksdb/advanced_options.h
Normal file
|
@ -0,0 +1,558 @@
|
|||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "rocksdb/memtablerep.h"
|
||||
#include "rocksdb/universal_compaction.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class Slice;
|
||||
class SliceTransform;
|
||||
enum CompressionType : unsigned char;
|
||||
class TablePropertiesCollectorFactory;
|
||||
class TableFactory;
|
||||
struct Options;
|
||||
|
||||
enum CompactionStyle : char {
|
||||
// level based compaction style
|
||||
kCompactionStyleLevel = 0x0,
|
||||
// Universal compaction style
|
||||
// Not supported in ROCKSDB_LITE.
|
||||
kCompactionStyleUniversal = 0x1,
|
||||
// FIFO compaction style
|
||||
// Not supported in ROCKSDB_LITE
|
||||
kCompactionStyleFIFO = 0x2,
|
||||
// Disable background compaction. Compaction jobs are submitted
|
||||
// via CompactFiles().
|
||||
// Not supported in ROCKSDB_LITE
|
||||
kCompactionStyleNone = 0x3,
|
||||
};
|
||||
|
||||
// In Level-based comapction, it Determines which file from a level to be
|
||||
// picked to merge to the next level. We suggest people try
|
||||
// kMinOverlappingRatio first when you tune your database.
|
||||
enum CompactionPri : char {
|
||||
// Slightly Priotize larger files by size compensated by #deletes
|
||||
kByCompensatedSize = 0x0,
|
||||
// First compact files whose data's latest update time is oldest.
|
||||
// Try this if you only update some hot keys in small ranges.
|
||||
kOldestLargestSeqFirst = 0x1,
|
||||
// First compact files whose range hasn't been compacted to the next level
|
||||
// for the longest. If your updates are random across the key space,
|
||||
// write amplification is slightly better with this option.
|
||||
kOldestSmallestSeqFirst = 0x2,
|
||||
// First compact files whose ratio between overlapping size in next level
|
||||
// and its size is the smallest. It in many cases can optimize write
|
||||
// amplification.
|
||||
kMinOverlappingRatio = 0x3,
|
||||
};
|
||||
|
||||
struct CompactionOptionsFIFO {
|
||||
// once the total sum of table files reaches this, we will delete the oldest
|
||||
// table file
|
||||
// Default: 1GB
|
||||
uint64_t max_table_files_size;
|
||||
|
||||
CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
|
||||
};
|
||||
|
||||
// Compression options for different compression algorithms like Zlib
|
||||
struct CompressionOptions {
|
||||
int window_bits;
|
||||
int level;
|
||||
int strategy;
|
||||
// Maximum size of dictionary used to prime the compression library. Currently
|
||||
// this dictionary will be constructed by sampling the first output file in a
|
||||
// subcompaction when the target level is bottommost. This dictionary will be
|
||||
// loaded into the compression library before compressing/uncompressing each
|
||||
// data block of subsequent files in the subcompaction. Effectively, this
|
||||
// improves compression ratios when there are repetitions across data blocks.
|
||||
// A value of 0 indicates the feature is disabled.
|
||||
// Default: 0.
|
||||
uint32_t max_dict_bytes;
|
||||
|
||||
CompressionOptions()
|
||||
: window_bits(-14), level(-1), strategy(0), max_dict_bytes(0) {}
|
||||
CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes)
|
||||
: window_bits(wbits),
|
||||
level(_lev),
|
||||
strategy(_strategy),
|
||||
max_dict_bytes(_max_dict_bytes) {}
|
||||
};
|
||||
|
||||
enum UpdateStatus { // Return status For inplace update callback
|
||||
UPDATE_FAILED = 0, // Nothing to update
|
||||
UPDATED_INPLACE = 1, // Value updated inplace
|
||||
UPDATED = 2, // No inplace update. Merged value set
|
||||
};
|
||||
|
||||
|
||||
struct AdvancedColumnFamilyOptions {
|
||||
// The maximum number of write buffers that are built up in memory.
|
||||
// The default and the minimum number is 2, so that when 1 write buffer
|
||||
// is being flushed to storage, new writes can continue to the other
|
||||
// write buffer.
|
||||
// If max_write_buffer_number > 3, writing will be slowed down to
|
||||
// options.delayed_write_rate if we are writing to the last write buffer
|
||||
// allowed.
|
||||
//
|
||||
// Default: 2
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int max_write_buffer_number = 2;
|
||||
|
||||
// The minimum number of write buffers that will be merged together
|
||||
// before writing to storage. If set to 1, then
|
||||
// all write buffers are flushed to L0 as individual files and this increases
|
||||
// read amplification because a get request has to check in all of these
|
||||
// files. Also, an in-memory merge may result in writing lesser
|
||||
// data to storage if there are duplicate records in each of these
|
||||
// individual write buffers. Default: 1
|
||||
int min_write_buffer_number_to_merge = 1;
|
||||
|
||||
// The total maximum number of write buffers to maintain in memory including
|
||||
// copies of buffers that have already been flushed. Unlike
|
||||
// max_write_buffer_number, this parameter does not affect flushing.
|
||||
// This controls the minimum amount of write history that will be available
|
||||
// in memory for conflict checking when Transactions are used.
|
||||
//
|
||||
// When using an OptimisticTransactionDB:
|
||||
// If this value is too low, some transactions may fail at commit time due
|
||||
// to not being able to determine whether there were any write conflicts.
|
||||
//
|
||||
// When using a TransactionDB:
|
||||
// If Transaction::SetSnapshot is used, TransactionDB will read either
|
||||
// in-memory write buffers or SST files to do write-conflict checking.
|
||||
// Increasing this value can reduce the number of reads to SST files
|
||||
// done for conflict detection.
|
||||
//
|
||||
// Setting this value to 0 will cause write buffers to be freed immediately
|
||||
// after they are flushed.
|
||||
// If this value is set to -1, 'max_write_buffer_number' will be used.
|
||||
//
|
||||
// Default:
|
||||
// If using a TransactionDB/OptimisticTransactionDB, the default value will
|
||||
// be set to the value of 'max_write_buffer_number' if it is not explicitly
|
||||
// set by the user. Otherwise, the default is 0.
|
||||
int max_write_buffer_number_to_maintain = 0;
|
||||
|
||||
// Allows thread-safe inplace updates. If this is true, there is no way to
|
||||
// achieve point-in-time consistency using snapshot or iterator (assuming
|
||||
// concurrent updates). Hence iterator and multi-get will return results
|
||||
// which are not consistent as of any point-in-time.
|
||||
// If inplace_callback function is not set,
|
||||
// Put(key, new_value) will update inplace the existing_value iff
|
||||
// * key exists in current memtable
|
||||
// * new sizeof(new_value) <= sizeof(existing_value)
|
||||
// * existing_value for that key is a put i.e. kTypeValue
|
||||
// If inplace_callback function is set, check doc for inplace_callback.
|
||||
// Default: false.
|
||||
bool inplace_update_support = false;
|
||||
|
||||
// Number of locks used for inplace update
|
||||
// Default: 10000, if inplace_update_support = true, else 0.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t inplace_update_num_locks = 10000;
|
||||
|
||||
// existing_value - pointer to previous value (from both memtable and sst).
|
||||
// nullptr if key doesn't exist
|
||||
// existing_value_size - pointer to size of existing_value).
|
||||
// nullptr if key doesn't exist
|
||||
// delta_value - Delta value to be merged with the existing_value.
|
||||
// Stored in transaction logs.
|
||||
// merged_value - Set when delta is applied on the previous value.
|
||||
|
||||
// Applicable only when inplace_update_support is true,
|
||||
// this callback function is called at the time of updating the memtable
|
||||
// as part of a Put operation, lets say Put(key, delta_value). It allows the
|
||||
// 'delta_value' specified as part of the Put operation to be merged with
|
||||
// an 'existing_value' of the key in the database.
|
||||
|
||||
// If the merged value is smaller in size that the 'existing_value',
|
||||
// then this function can update the 'existing_value' buffer inplace and
|
||||
// the corresponding 'existing_value'_size pointer, if it wishes to.
|
||||
// The callback should return UpdateStatus::UPDATED_INPLACE.
|
||||
// In this case. (In this case, the snapshot-semantics of the rocksdb
|
||||
// Iterator is not atomic anymore).
|
||||
|
||||
// If the merged value is larger in size than the 'existing_value' or the
|
||||
// application does not wish to modify the 'existing_value' buffer inplace,
|
||||
// then the merged value should be returned via *merge_value. It is set by
|
||||
// merging the 'existing_value' and the Put 'delta_value'. The callback should
|
||||
// return UpdateStatus::UPDATED in this case. This merged value will be added
|
||||
// to the memtable.
|
||||
|
||||
// If merging fails or the application does not wish to take any action,
|
||||
// then the callback should return UpdateStatus::UPDATE_FAILED.
|
||||
|
||||
// Please remember that the original call from the application is Put(key,
|
||||
// delta_value). So the transaction log (if enabled) will still contain (key,
|
||||
// delta_value). The 'merged_value' is not stored in the transaction log.
|
||||
// Hence the inplace_callback function should be consistent across db reopens.
|
||||
|
||||
// Default: nullptr
|
||||
UpdateStatus (*inplace_callback)(char* existing_value,
|
||||
uint32_t* existing_value_size,
|
||||
Slice delta_value,
|
||||
std::string* merged_value) = nullptr;
|
||||
|
||||
// if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0,
|
||||
// create prefix bloom for memtable with the size of
|
||||
// write_buffer_size * memtable_prefix_bloom_size_ratio.
|
||||
// If it is larger than 0.25, it is santinized to 0.25.
|
||||
//
|
||||
// Default: 0 (disable)
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
double memtable_prefix_bloom_size_ratio = 0.0;
|
||||
|
||||
// Page size for huge page for the arena used by the memtable. If <=0, it
|
||||
// won't allocate from huge page but from malloc.
|
||||
// Users are responsible to reserve huge pages for it to be allocated. For
|
||||
// example:
|
||||
// sysctl -w vm.nr_hugepages=20
|
||||
// See linux doc Documentation/vm/hugetlbpage.txt
|
||||
// If there isn't enough free huge page available, it will fall back to
|
||||
// malloc.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t memtable_huge_page_size = 0;
|
||||
|
||||
// If non-nullptr, memtable will use the specified function to extract
|
||||
// prefixes for keys, and for each prefix maintain a hint of insert location
|
||||
// to reduce CPU usage for inserting keys with the prefix. Keys out of
|
||||
// domain of the prefix extractor will be insert without using hints.
|
||||
//
|
||||
// Currently only the default skiplist based memtable implements the feature.
|
||||
// All other memtable implementation will ignore the option. It incurs ~250
|
||||
// additional bytes of memory overhead to store a hint for each prefix.
|
||||
// Also concurrent writes (when allow_concurrent_memtable_write is true) will
|
||||
// ignore the option.
|
||||
//
|
||||
// The option is best suited for workloads where keys will likely to insert
|
||||
// to a location close the the last inserted key with the same prefix.
|
||||
// One example could be inserting keys of the form (prefix + timestamp),
|
||||
// and keys of the same prefix always comes in with time order. Another
|
||||
// example would be updating the same key over and over again, in which case
|
||||
// the prefix can be the key itself.
|
||||
//
|
||||
// Default: nullptr (disable)
|
||||
std::shared_ptr<const SliceTransform>
|
||||
memtable_insert_with_hint_prefix_extractor = nullptr;
|
||||
|
||||
// Control locality of bloom filter probes to improve cache miss rate.
|
||||
// This option only applies to memtable prefix bloom and plaintable
|
||||
// prefix bloom. It essentially limits every bloom checking to one cache line.
|
||||
// This optimization is turned off when set to 0, and positive number to turn
|
||||
// it on.
|
||||
// Default: 0
|
||||
uint32_t bloom_locality = 0;
|
||||
|
||||
// size of one block in arena memory allocation.
|
||||
// If <= 0, a proper value is automatically calculated (usually 1/8 of
|
||||
// writer_buffer_size, rounded up to a multiple of 4KB).
|
||||
//
|
||||
// There are two additional restriction of the The specified size:
|
||||
// (1) size should be in the range of [4096, 2 << 30] and
|
||||
// (2) be the multiple of the CPU word (which helps with the memory
|
||||
// alignment).
|
||||
//
|
||||
// We'll automatically check and adjust the size number to make sure it
|
||||
// conforms to the restrictions.
|
||||
//
|
||||
// Default: 0
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t arena_block_size = 0;
|
||||
|
||||
// Different levels can have different compression policies. There
|
||||
// are cases where most lower levels would like to use quick compression
|
||||
// algorithms while the higher levels (which have more data) use
|
||||
// compression algorithms that have better compression but could
|
||||
// be slower. This array, if non-empty, should have an entry for
|
||||
// each level of the database; these override the value specified in
|
||||
// the previous field 'compression'.
|
||||
//
|
||||
// NOTICE if level_compaction_dynamic_level_bytes=true,
|
||||
// compression_per_level[0] still determines L0, but other elements
|
||||
// of the array are based on base level (the level L0 files are merged
|
||||
// to), and may not match the level users see from info log for metadata.
|
||||
// If L0 files are merged to level-n, then, for i>0, compression_per_level[i]
|
||||
// determines compaction type for level n+i-1.
|
||||
// For example, if we have three 5 levels, and we determine to merge L0
|
||||
// data to L4 (which means L1..L3 will be empty), then the new files go to
|
||||
// L4 uses compression type compression_per_level[1].
|
||||
// If now L0 is merged to L2. Data goes to L2 will be compressed
|
||||
// according to compression_per_level[1], L3 using compression_per_level[2]
|
||||
// and L4 using compression_per_level[3]. Compaction for each level can
|
||||
// change when data grows.
|
||||
std::vector<CompressionType> compression_per_level;
|
||||
|
||||
// Number of levels for this database
|
||||
int num_levels = 7;
|
||||
|
||||
// Soft limit on number of level-0 files. We start slowing down writes at this
|
||||
// point. A value <0 means that no writing slow down will be triggered by
|
||||
// number of files in level-0.
|
||||
//
|
||||
// Default: 20
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int level0_slowdown_writes_trigger = 20;
|
||||
|
||||
// Maximum number of level-0 files. We stop writes at this point.
|
||||
//
|
||||
// Default: 36
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int level0_stop_writes_trigger = 36;
|
||||
|
||||
// Target file size for compaction.
|
||||
// target_file_size_base is per-file size for level-1.
|
||||
// Target file size for level L can be calculated by
|
||||
// target_file_size_base * (target_file_size_multiplier ^ (L-1))
|
||||
// For example, if target_file_size_base is 2MB and
|
||||
// target_file_size_multiplier is 10, then each file on level-1 will
|
||||
// be 2MB, and each file on level 2 will be 20MB,
|
||||
// and each file on level-3 will be 200MB.
|
||||
//
|
||||
// Default: 64MB.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
uint64_t target_file_size_base = 64 * 1048576;
|
||||
|
||||
// By default target_file_size_multiplier is 1, which means
|
||||
// by default files in different levels will have similar size.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int target_file_size_multiplier = 1;
|
||||
|
||||
// If true, RocksDB will pick target size of each level dynamically.
|
||||
// We will pick a base level b >= 1. L0 will be directly merged into level b,
|
||||
// instead of always into level 1. Level 1 to b-1 need to be empty.
|
||||
// We try to pick b and its target size so that
|
||||
// 1. target size is in the range of
|
||||
// (max_bytes_for_level_base / max_bytes_for_level_multiplier,
|
||||
// max_bytes_for_level_base]
|
||||
// 2. target size of the last level (level num_levels-1) equals to extra size
|
||||
// of the level.
|
||||
// At the same time max_bytes_for_level_multiplier and
|
||||
// max_bytes_for_level_multiplier_additional are still satisfied.
|
||||
//
|
||||
// With this option on, from an empty DB, we make last level the base level,
|
||||
// which means merging L0 data into the last level, until it exceeds
|
||||
// max_bytes_for_level_base. And then we make the second last level to be
|
||||
// base level, to start to merge L0 data to second last level, with its
|
||||
// target size to be 1/max_bytes_for_level_multiplier of the last level's
|
||||
// extra size. After the data accumulates more so that we need to move the
|
||||
// base level to the third last one, and so on.
|
||||
//
|
||||
// For example, assume max_bytes_for_level_multiplier=10, num_levels=6,
|
||||
// and max_bytes_for_level_base=10MB.
|
||||
// Target sizes of level 1 to 5 starts with:
|
||||
// [- - - - 10MB]
|
||||
// with base level is level. Target sizes of level 1 to 4 are not applicable
|
||||
// because they will not be used.
|
||||
// Until the size of Level 5 grows to more than 10MB, say 11MB, we make
|
||||
// base target to level 4 and now the targets looks like:
|
||||
// [- - - 1.1MB 11MB]
|
||||
// While data are accumulated, size targets are tuned based on actual data
|
||||
// of level 5. When level 5 has 50MB of data, the target is like:
|
||||
// [- - - 5MB 50MB]
|
||||
// Until level 5's actual size is more than 100MB, say 101MB. Now if we keep
|
||||
// level 4 to be the base level, its target size needs to be 10.1MB, which
|
||||
// doesn't satisfy the target size range. So now we make level 3 the target
|
||||
// size and the target sizes of the levels look like:
|
||||
// [- - 1.01MB 10.1MB 101MB]
|
||||
// In the same way, while level 5 further grows, all levels' targets grow,
|
||||
// like
|
||||
// [- - 5MB 50MB 500MB]
|
||||
// Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
|
||||
// base level and make levels' target sizes like this:
|
||||
// [- 1.001MB 10.01MB 100.1MB 1001MB]
|
||||
// and go on...
|
||||
//
|
||||
// By doing it, we give max_bytes_for_level_multiplier a priority against
|
||||
// max_bytes_for_level_base, for a more predictable LSM tree shape. It is
|
||||
// useful to limit worse case space amplification.
|
||||
//
|
||||
// max_bytes_for_level_multiplier_additional is ignored with this flag on.
|
||||
//
|
||||
// Turning this feature on or off for an existing DB can cause unexpected
|
||||
// LSM tree structure so it's not recommended.
|
||||
//
|
||||
// NOTE: this option is experimental
|
||||
//
|
||||
// Default: false
|
||||
bool level_compaction_dynamic_level_bytes = false;
|
||||
|
||||
// Default: 10.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
double max_bytes_for_level_multiplier = 10;
|
||||
|
||||
// Different max-size multipliers for different levels.
|
||||
// These are multiplied by max_bytes_for_level_multiplier to arrive
|
||||
// at the max-size of each level.
|
||||
//
|
||||
// Default: 1
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
std::vector<int> max_bytes_for_level_multiplier_additional =
|
||||
std::vector<int>(num_levels, 1);
|
||||
|
||||
// We try to limit number of bytes in one compaction to be lower than this
|
||||
// threshold. But it's not guaranteed.
|
||||
// Value 0 will be sanitized.
|
||||
//
|
||||
// Default: result.target_file_size_base * 25
|
||||
uint64_t max_compaction_bytes = 0;
|
||||
|
||||
// All writes will be slowed down to at least delayed_write_rate if estimated
|
||||
// bytes needed to be compaction exceed this threshold.
|
||||
//
|
||||
// Default: 64GB
|
||||
uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull;
|
||||
|
||||
// All writes are stopped if estimated bytes needed to be compaction exceed
|
||||
// this threshold.
|
||||
//
|
||||
// Default: 256GB
|
||||
uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull;
|
||||
|
||||
// The compaction style. Default: kCompactionStyleLevel
|
||||
CompactionStyle compaction_style = kCompactionStyleLevel;
|
||||
|
||||
// If level compaction_style = kCompactionStyleLevel, for each level,
|
||||
// which files are prioritized to be picked to compact.
|
||||
// Default: kByCompensatedSize
|
||||
CompactionPri compaction_pri = kByCompensatedSize;
|
||||
|
||||
// The options needed to support Universal Style compactions
|
||||
CompactionOptionsUniversal compaction_options_universal;
|
||||
|
||||
// The options for FIFO compaction style
|
||||
CompactionOptionsFIFO compaction_options_fifo;
|
||||
|
||||
// An iteration->Next() sequentially skips over keys with the same
|
||||
// user-key unless this option is set. This number specifies the number
|
||||
// of keys (with the same userkey) that will be sequentially
|
||||
// skipped before a reseek is issued.
|
||||
//
|
||||
// Default: 8
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
uint64_t max_sequential_skip_in_iterations = 8;
|
||||
|
||||
// This is a factory that provides MemTableRep objects.
|
||||
// Default: a factory that provides a skip-list-based implementation of
|
||||
// MemTableRep.
|
||||
std::shared_ptr<MemTableRepFactory> memtable_factory =
|
||||
std::shared_ptr<SkipListFactory>(new SkipListFactory);
|
||||
|
||||
// Block-based table related options are moved to BlockBasedTableOptions.
|
||||
// Related options that were originally here but now moved include:
|
||||
// no_block_cache
|
||||
// block_cache
|
||||
// block_cache_compressed
|
||||
// block_size
|
||||
// block_size_deviation
|
||||
// block_restart_interval
|
||||
// filter_policy
|
||||
// whole_key_filtering
|
||||
// If you'd like to customize some of these options, you will need to
|
||||
// use NewBlockBasedTableFactory() to construct a new table factory.
|
||||
|
||||
// This option allows user to collect their own interested statistics of
|
||||
// the tables.
|
||||
// Default: empty vector -- no user-defined statistics collection will be
|
||||
// performed.
|
||||
typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
|
||||
TablePropertiesCollectorFactories;
|
||||
TablePropertiesCollectorFactories table_properties_collector_factories;
|
||||
|
||||
// Maximum number of successive merge operations on a key in the memtable.
|
||||
//
|
||||
// When a merge operation is added to the memtable and the maximum number of
|
||||
// successive merges is reached, the value of the key will be calculated and
|
||||
// inserted into the memtable instead of the merge operation. This will
|
||||
// ensure that there are never more than max_successive_merges merge
|
||||
// operations in the memtable.
|
||||
//
|
||||
// Default: 0 (disabled)
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t max_successive_merges = 0;
|
||||
|
||||
// This flag specifies that the implementation should optimize the filters
|
||||
// mainly for cases where keys are found rather than also optimize for keys
|
||||
// missed. This would be used in cases where the application knows that
|
||||
// there are very few misses or the performance in the case of misses is not
|
||||
// important.
|
||||
//
|
||||
// For now, this flag allows us to not store filters for the last level i.e
|
||||
// the largest level which contains data of the LSM store. For keys which
|
||||
// are hits, the filters in this level are not useful because we will search
|
||||
// for the data anyway. NOTE: the filters in other levels are still useful
|
||||
// even for key hit because they tell us whether to look in that level or go
|
||||
// to the higher level.
|
||||
//
|
||||
// Default: false
|
||||
bool optimize_filters_for_hits = false;
|
||||
|
||||
// After writing every SST file, reopen it and read all the keys.
|
||||
// Default: false
|
||||
bool paranoid_file_checks = false;
|
||||
|
||||
// In debug mode, RocksDB run consistency checks on the LSM everytime the LSM
|
||||
// change (Flush, Compaction, AddFile). These checks are disabled in release
|
||||
// mode, use this option to enable them in release mode as well.
|
||||
// Default: false
|
||||
bool force_consistency_checks = false;
|
||||
|
||||
// Measure IO stats in compactions and flushes, if true.
|
||||
// Default: false
|
||||
bool report_bg_io_stats = false;
|
||||
|
||||
// Create ColumnFamilyOptions with default values for all fields
|
||||
AdvancedColumnFamilyOptions();
|
||||
// Create ColumnFamilyOptions from Options
|
||||
explicit AdvancedColumnFamilyOptions(const Options& options);
|
||||
|
||||
// ---------------- DEPRECATED OPTIONS ----------------
|
||||
|
||||
// DEPRECATED
|
||||
// This does not do anything anymore.
|
||||
int max_mem_compaction_level;
|
||||
|
||||
// DEPRECATED -- this options is no longer used
|
||||
// Puts are delayed to options.delayed_write_rate when any level has a
|
||||
// compaction score that exceeds soft_rate_limit. This is ignored when == 0.0.
|
||||
//
|
||||
// Default: 0 (disabled)
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
double soft_rate_limit = 0.0;
|
||||
|
||||
// DEPRECATED -- this options is no longer used
|
||||
double hard_rate_limit = 0.0;
|
||||
|
||||
// DEPRECATED -- this options is no longer used
|
||||
unsigned int rate_limit_delay_max_milliseconds = 100;
|
||||
|
||||
// DEPREACTED
|
||||
// Does not have any effect.
|
||||
bool purge_redundant_kvs_while_flush = true;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
|
@ -17,10 +17,10 @@
|
|||
#include <limits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "rocksdb/advanced_options.h"
|
||||
#include "rocksdb/comparator.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/listener.h"
|
||||
#include "rocksdb/memtablerep.h"
|
||||
#include "rocksdb/universal_compaction.h"
|
||||
#include "rocksdb/version.h"
|
||||
#include "rocksdb/write_buffer_manager.h"
|
||||
|
@ -42,12 +42,9 @@ class FilterPolicy;
|
|||
class Logger;
|
||||
class MergeOperator;
|
||||
class Snapshot;
|
||||
class TableFactory;
|
||||
class MemTableRepFactory;
|
||||
class TablePropertiesCollectorFactory;
|
||||
class RateLimiter;
|
||||
class Slice;
|
||||
class SliceTransform;
|
||||
class Statistics;
|
||||
class InternalKeyComparator;
|
||||
class WalFilter;
|
||||
|
@ -79,113 +76,9 @@ enum CompressionType : unsigned char {
|
|||
kDisableCompressionOption = 0xff,
|
||||
};
|
||||
|
||||
enum CompactionStyle : char {
|
||||
// level based compaction style
|
||||
kCompactionStyleLevel = 0x0,
|
||||
// Universal compaction style
|
||||
// Not supported in ROCKSDB_LITE.
|
||||
kCompactionStyleUniversal = 0x1,
|
||||
// FIFO compaction style
|
||||
// Not supported in ROCKSDB_LITE
|
||||
kCompactionStyleFIFO = 0x2,
|
||||
// Disable background compaction. Compaction jobs are submitted
|
||||
// via CompactFiles().
|
||||
// Not supported in ROCKSDB_LITE
|
||||
kCompactionStyleNone = 0x3,
|
||||
};
|
||||
|
||||
// In Level-based comapction, it Determines which file from a level to be
|
||||
// picked to merge to the next level. We suggest people try
|
||||
// kMinOverlappingRatio first when you tune your database.
|
||||
enum CompactionPri : char {
|
||||
// Slightly Priotize larger files by size compensated by #deletes
|
||||
kByCompensatedSize = 0x0,
|
||||
// First compact files whose data's latest update time is oldest.
|
||||
// Try this if you only update some hot keys in small ranges.
|
||||
kOldestLargestSeqFirst = 0x1,
|
||||
// First compact files whose range hasn't been compacted to the next level
|
||||
// for the longest. If your updates are random across the key space,
|
||||
// write amplification is slightly better with this option.
|
||||
kOldestSmallestSeqFirst = 0x2,
|
||||
// First compact files whose ratio between overlapping size in next level
|
||||
// and its size is the smallest. It in many cases can optimize write
|
||||
// amplification.
|
||||
kMinOverlappingRatio = 0x3,
|
||||
};
|
||||
|
||||
enum class WALRecoveryMode : char {
|
||||
// Original levelDB recovery
|
||||
// We tolerate incomplete record in trailing data on all logs
|
||||
// Use case : This is legacy behavior (default)
|
||||
kTolerateCorruptedTailRecords = 0x00,
|
||||
// Recover from clean shutdown
|
||||
// We don't expect to find any corruption in the WAL
|
||||
// Use case : This is ideal for unit tests and rare applications that
|
||||
// can require high consistency guarantee
|
||||
kAbsoluteConsistency = 0x01,
|
||||
// Recover to point-in-time consistency
|
||||
// We stop the WAL playback on discovering WAL inconsistency
|
||||
// Use case : Ideal for systems that have disk controller cache like
|
||||
// hard disk, SSD without super capacitor that store related data
|
||||
kPointInTimeRecovery = 0x02,
|
||||
// Recovery after a disaster
|
||||
// We ignore any corruption in the WAL and try to salvage as much data as
|
||||
// possible
|
||||
// Use case : Ideal for last ditch effort to recover data or systems that
|
||||
// operate with low grade unrelated data
|
||||
kSkipAnyCorruptedRecords = 0x03,
|
||||
};
|
||||
|
||||
struct CompactionOptionsFIFO {
|
||||
// once the total sum of table files reaches this, we will delete the oldest
|
||||
// table file
|
||||
// Default: 1GB
|
||||
uint64_t max_table_files_size;
|
||||
|
||||
CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {}
|
||||
};
|
||||
|
||||
// Compression options for different compression algorithms like Zlib
|
||||
struct CompressionOptions {
|
||||
int window_bits;
|
||||
int level;
|
||||
int strategy;
|
||||
// Maximum size of dictionary used to prime the compression library. Currently
|
||||
// this dictionary will be constructed by sampling the first output file in a
|
||||
// subcompaction when the target level is bottommost. This dictionary will be
|
||||
// loaded into the compression library before compressing/uncompressing each
|
||||
// data block of subsequent files in the subcompaction. Effectively, this
|
||||
// improves compression ratios when there are repetitions across data blocks.
|
||||
// A value of 0 indicates the feature is disabled.
|
||||
// Default: 0.
|
||||
uint32_t max_dict_bytes;
|
||||
|
||||
CompressionOptions()
|
||||
: window_bits(-14), level(-1), strategy(0), max_dict_bytes(0) {}
|
||||
CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes)
|
||||
: window_bits(wbits),
|
||||
level(_lev),
|
||||
strategy(_strategy),
|
||||
max_dict_bytes(_max_dict_bytes) {}
|
||||
};
|
||||
|
||||
enum UpdateStatus { // Return status For inplace update callback
|
||||
UPDATE_FAILED = 0, // Nothing to update
|
||||
UPDATED_INPLACE = 1, // Value updated inplace
|
||||
UPDATED = 2, // No inplace update. Merged value set
|
||||
};
|
||||
|
||||
struct DbPath {
|
||||
std::string path;
|
||||
uint64_t target_size; // Target size of total files under the path, in byte.
|
||||
|
||||
DbPath() : target_size(0) {}
|
||||
DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
|
||||
};
|
||||
|
||||
struct Options;
|
||||
|
||||
struct ColumnFamilyOptions {
|
||||
struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
|
||||
// The function recovers options to a previous version. Only 4.6 or later
|
||||
// versions are supported.
|
||||
ColumnFamilyOptions* OldDefaults(int rocksdb_major_version = 4,
|
||||
|
@ -295,54 +188,6 @@ struct ColumnFamilyOptions {
|
|||
// Dynamically changeable through SetOptions() API
|
||||
size_t write_buffer_size = 64 << 20;
|
||||
|
||||
// The maximum number of write buffers that are built up in memory.
|
||||
// The default and the minimum number is 2, so that when 1 write buffer
|
||||
// is being flushed to storage, new writes can continue to the other
|
||||
// write buffer.
|
||||
// If max_write_buffer_number > 3, writing will be slowed down to
|
||||
// options.delayed_write_rate if we are writing to the last write buffer
|
||||
// allowed.
|
||||
//
|
||||
// Default: 2
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int max_write_buffer_number = 2;
|
||||
|
||||
// The minimum number of write buffers that will be merged together
|
||||
// before writing to storage. If set to 1, then
|
||||
// all write buffers are flushed to L0 as individual files and this increases
|
||||
// read amplification because a get request has to check in all of these
|
||||
// files. Also, an in-memory merge may result in writing lesser
|
||||
// data to storage if there are duplicate records in each of these
|
||||
// individual write buffers. Default: 1
|
||||
int min_write_buffer_number_to_merge = 1;
|
||||
|
||||
// The total maximum number of write buffers to maintain in memory including
|
||||
// copies of buffers that have already been flushed. Unlike
|
||||
// max_write_buffer_number, this parameter does not affect flushing.
|
||||
// This controls the minimum amount of write history that will be available
|
||||
// in memory for conflict checking when Transactions are used.
|
||||
//
|
||||
// When using an OptimisticTransactionDB:
|
||||
// If this value is too low, some transactions may fail at commit time due
|
||||
// to not being able to determine whether there were any write conflicts.
|
||||
//
|
||||
// When using a TransactionDB:
|
||||
// If Transaction::SetSnapshot is used, TransactionDB will read either
|
||||
// in-memory write buffers or SST files to do write-conflict checking.
|
||||
// Increasing this value can reduce the number of reads to SST files
|
||||
// done for conflict detection.
|
||||
//
|
||||
// Setting this value to 0 will cause write buffers to be freed immediately
|
||||
// after they are flushed.
|
||||
// If this value is set to -1, 'max_write_buffer_number' will be used.
|
||||
//
|
||||
// Default:
|
||||
// If using a TransactionDB/OptimisticTransactionDB, the default value will
|
||||
// be set to the value of 'max_write_buffer_number' if it is not explicitly
|
||||
// set by the user. Otherwise, the default is 0.
|
||||
int max_write_buffer_number_to_maintain = 0;
|
||||
|
||||
// Compress blocks using the specified compression algorithm. This
|
||||
// parameter can be changed dynamically.
|
||||
//
|
||||
|
@ -359,29 +204,6 @@ struct ColumnFamilyOptions {
|
|||
// efficiently detect that and will switch to uncompressed mode.
|
||||
CompressionType compression;
|
||||
|
||||
// Different levels can have different compression policies. There
|
||||
// are cases where most lower levels would like to use quick compression
|
||||
// algorithms while the higher levels (which have more data) use
|
||||
// compression algorithms that have better compression but could
|
||||
// be slower. This array, if non-empty, should have an entry for
|
||||
// each level of the database; these override the value specified in
|
||||
// the previous field 'compression'.
|
||||
//
|
||||
// NOTICE if level_compaction_dynamic_level_bytes=true,
|
||||
// compression_per_level[0] still determines L0, but other elements
|
||||
// of the array are based on base level (the level L0 files are merged
|
||||
// to), and may not match the level users see from info log for metadata.
|
||||
// If L0 files are merged to level-n, then, for i>0, compression_per_level[i]
|
||||
// determines compaction type for level n+i-1.
|
||||
// For example, if we have three 5 levels, and we determine to merge L0
|
||||
// data to L4 (which means L1..L3 will be empty), then the new files go to
|
||||
// L4 uses compression type compression_per_level[1].
|
||||
// If now L0 is merged to L2. Data goes to L2 will be compressed
|
||||
// according to compression_per_level[1], L3 using compression_per_level[2]
|
||||
// and L4 using compression_per_level[3]. Compaction for each level can
|
||||
// change when data grows.
|
||||
std::vector<CompressionType> compression_per_level;
|
||||
|
||||
// Compression algorithm that will be used for the bottommost level that
|
||||
// contain files. If level-compaction is used, this option will only affect
|
||||
// levels after base level.
|
||||
|
@ -392,6 +214,14 @@ struct ColumnFamilyOptions {
|
|||
// different options for compression algorithms
|
||||
CompressionOptions compression_opts;
|
||||
|
||||
// Number of files to trigger level-0 compaction. A value <0 means that
|
||||
// level-0 compaction will not be triggered by number of files at all.
|
||||
//
|
||||
// Default: 4
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int level0_file_num_compaction_trigger = 4;
|
||||
|
||||
// If non-nullptr, use the specified function to determine the
|
||||
// prefixes for keys. These prefixes will be placed in the filter.
|
||||
// Depending on the workload, this can reduce the number of read-IOP
|
||||
|
@ -408,56 +238,6 @@ struct ColumnFamilyOptions {
|
|||
// Default: nullptr
|
||||
std::shared_ptr<const SliceTransform> prefix_extractor = nullptr;
|
||||
|
||||
// Number of levels for this database
|
||||
int num_levels = 7;
|
||||
|
||||
// Number of files to trigger level-0 compaction. A value <0 means that
|
||||
// level-0 compaction will not be triggered by number of files at all.
|
||||
//
|
||||
// Default: 4
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int level0_file_num_compaction_trigger = 4;
|
||||
|
||||
// Soft limit on number of level-0 files. We start slowing down writes at this
|
||||
// point. A value <0 means that no writing slow down will be triggered by
|
||||
// number of files in level-0.
|
||||
//
|
||||
// Default: 20
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int level0_slowdown_writes_trigger = 20;
|
||||
|
||||
// Maximum number of level-0 files. We stop writes at this point.
|
||||
//
|
||||
// Default: 36
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int level0_stop_writes_trigger = 36;
|
||||
|
||||
// This does not do anything anymore. Deprecated.
|
||||
int max_mem_compaction_level;
|
||||
|
||||
// Target file size for compaction.
|
||||
// target_file_size_base is per-file size for level-1.
|
||||
// Target file size for level L can be calculated by
|
||||
// target_file_size_base * (target_file_size_multiplier ^ (L-1))
|
||||
// For example, if target_file_size_base is 2MB and
|
||||
// target_file_size_multiplier is 10, then each file on level-1 will
|
||||
// be 2MB, and each file on level 2 will be 20MB,
|
||||
// and each file on level-3 will be 200MB.
|
||||
//
|
||||
// Default: 64MB.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
uint64_t target_file_size_base = 64 * 1048576;
|
||||
|
||||
// By default target_file_size_multiplier is 1, which means
|
||||
// by default files in different levels will have similar size.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
int target_file_size_multiplier = 1;
|
||||
|
||||
// Control maximum total data size for a level.
|
||||
// max_bytes_for_level_base is the max total for level-1.
|
||||
// Maximum number of bytes for level L can be calculated as
|
||||
|
@ -472,355 +252,18 @@ struct ColumnFamilyOptions {
|
|||
// Dynamically changeable through SetOptions() API
|
||||
uint64_t max_bytes_for_level_base = 256 * 1048576;
|
||||
|
||||
// If true, RocksDB will pick target size of each level dynamically.
|
||||
// We will pick a base level b >= 1. L0 will be directly merged into level b,
|
||||
// instead of always into level 1. Level 1 to b-1 need to be empty.
|
||||
// We try to pick b and its target size so that
|
||||
// 1. target size is in the range of
|
||||
// (max_bytes_for_level_base / max_bytes_for_level_multiplier,
|
||||
// max_bytes_for_level_base]
|
||||
// 2. target size of the last level (level num_levels-1) equals to extra size
|
||||
// of the level.
|
||||
// At the same time max_bytes_for_level_multiplier and
|
||||
// max_bytes_for_level_multiplier_additional are still satisfied.
|
||||
//
|
||||
// With this option on, from an empty DB, we make last level the base level,
|
||||
// which means merging L0 data into the last level, until it exceeds
|
||||
// max_bytes_for_level_base. And then we make the second last level to be
|
||||
// base level, to start to merge L0 data to second last level, with its
|
||||
// target size to be 1/max_bytes_for_level_multiplier of the last level's
|
||||
// extra size. After the data accumulates more so that we need to move the
|
||||
// base level to the third last one, and so on.
|
||||
//
|
||||
// For example, assume max_bytes_for_level_multiplier=10, num_levels=6,
|
||||
// and max_bytes_for_level_base=10MB.
|
||||
// Target sizes of level 1 to 5 starts with:
|
||||
// [- - - - 10MB]
|
||||
// with base level is level. Target sizes of level 1 to 4 are not applicable
|
||||
// because they will not be used.
|
||||
// Until the size of Level 5 grows to more than 10MB, say 11MB, we make
|
||||
// base target to level 4 and now the targets looks like:
|
||||
// [- - - 1.1MB 11MB]
|
||||
// While data are accumulated, size targets are tuned based on actual data
|
||||
// of level 5. When level 5 has 50MB of data, the target is like:
|
||||
// [- - - 5MB 50MB]
|
||||
// Until level 5's actual size is more than 100MB, say 101MB. Now if we keep
|
||||
// level 4 to be the base level, its target size needs to be 10.1MB, which
|
||||
// doesn't satisfy the target size range. So now we make level 3 the target
|
||||
// size and the target sizes of the levels look like:
|
||||
// [- - 1.01MB 10.1MB 101MB]
|
||||
// In the same way, while level 5 further grows, all levels' targets grow,
|
||||
// like
|
||||
// [- - 5MB 50MB 500MB]
|
||||
// Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the
|
||||
// base level and make levels' target sizes like this:
|
||||
// [- 1.001MB 10.01MB 100.1MB 1001MB]
|
||||
// and go on...
|
||||
//
|
||||
// By doing it, we give max_bytes_for_level_multiplier a priority against
|
||||
// max_bytes_for_level_base, for a more predictable LSM tree shape. It is
|
||||
// useful to limit worse case space amplification.
|
||||
//
|
||||
// max_bytes_for_level_multiplier_additional is ignored with this flag on.
|
||||
//
|
||||
// Turning this feature on or off for an existing DB can cause unexpected
|
||||
// LSM tree structure so it's not recommended.
|
||||
//
|
||||
// NOTE: this option is experimental
|
||||
//
|
||||
// Default: false
|
||||
bool level_compaction_dynamic_level_bytes = false;
|
||||
|
||||
// Default: 10.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
double max_bytes_for_level_multiplier = 10;
|
||||
|
||||
// Different max-size multipliers for different levels.
|
||||
// These are multiplied by max_bytes_for_level_multiplier to arrive
|
||||
// at the max-size of each level.
|
||||
//
|
||||
// Default: 1
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
std::vector<int> max_bytes_for_level_multiplier_additional =
|
||||
std::vector<int>(num_levels, 1);
|
||||
|
||||
// We try to limit number of bytes in one compaction to be lower than this
|
||||
// threshold. But it's not guaranteed.
|
||||
// Value 0 will be sanitized.
|
||||
//
|
||||
// Default: result.target_file_size_base * 25
|
||||
uint64_t max_compaction_bytes = 0;
|
||||
|
||||
// DEPRECATED -- this options is no longer used
|
||||
// Puts are delayed to options.delayed_write_rate when any level has a
|
||||
// compaction score that exceeds soft_rate_limit. This is ignored when == 0.0.
|
||||
//
|
||||
// Default: 0 (disabled)
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
double soft_rate_limit = 0.0;
|
||||
|
||||
// DEPRECATED -- this options is no longer used
|
||||
double hard_rate_limit = 0.0;
|
||||
|
||||
// All writes will be slowed down to at least delayed_write_rate if estimated
|
||||
// bytes needed to be compaction exceed this threshold.
|
||||
//
|
||||
// Default: 64GB
|
||||
uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull;
|
||||
|
||||
// All writes are stopped if estimated bytes needed to be compaction exceed
|
||||
// this threshold.
|
||||
//
|
||||
// Default: 256GB
|
||||
uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull;
|
||||
|
||||
// DEPRECATED -- this options is no longer used
|
||||
unsigned int rate_limit_delay_max_milliseconds = 100;
|
||||
|
||||
// size of one block in arena memory allocation.
|
||||
// If <= 0, a proper value is automatically calculated (usually 1/8 of
|
||||
// writer_buffer_size, rounded up to a multiple of 4KB).
|
||||
//
|
||||
// There are two additional restriction of the The specified size:
|
||||
// (1) size should be in the range of [4096, 2 << 30] and
|
||||
// (2) be the multiple of the CPU word (which helps with the memory
|
||||
// alignment).
|
||||
//
|
||||
// We'll automatically check and adjust the size number to make sure it
|
||||
// conforms to the restrictions.
|
||||
//
|
||||
// Default: 0
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t arena_block_size = 0;
|
||||
|
||||
// Disable automatic compactions. Manual compactions can still
|
||||
// be issued on this column family
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
bool disable_auto_compactions = false;
|
||||
|
||||
// DEPREACTED
|
||||
// Does not have any effect.
|
||||
bool purge_redundant_kvs_while_flush = true;
|
||||
|
||||
// The compaction style. Default: kCompactionStyleLevel
|
||||
CompactionStyle compaction_style = kCompactionStyleLevel;
|
||||
|
||||
// If level compaction_style = kCompactionStyleLevel, for each level,
|
||||
// which files are prioritized to be picked to compact.
|
||||
// Default: kByCompensatedSize
|
||||
CompactionPri compaction_pri = kByCompensatedSize;
|
||||
|
||||
|
||||
// The options needed to support Universal Style compactions
|
||||
CompactionOptionsUniversal compaction_options_universal;
|
||||
|
||||
// The options for FIFO compaction style
|
||||
CompactionOptionsFIFO compaction_options_fifo;
|
||||
|
||||
// An iteration->Next() sequentially skips over keys with the same
|
||||
// user-key unless this option is set. This number specifies the number
|
||||
// of keys (with the same userkey) that will be sequentially
|
||||
// skipped before a reseek is issued.
|
||||
//
|
||||
// Default: 8
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
uint64_t max_sequential_skip_in_iterations = 8;
|
||||
|
||||
// This is a factory that provides MemTableRep objects.
|
||||
// Default: a factory that provides a skip-list-based implementation of
|
||||
// MemTableRep.
|
||||
std::shared_ptr<MemTableRepFactory> memtable_factory =
|
||||
std::shared_ptr<SkipListFactory>(new SkipListFactory);
|
||||
|
||||
// This is a factory that provides TableFactory objects.
|
||||
// Default: a block-based table factory that provides a default
|
||||
// implementation of TableBuilder and TableReader with default
|
||||
// BlockBasedTableOptions.
|
||||
std::shared_ptr<TableFactory> table_factory;
|
||||
|
||||
// Block-based table related options are moved to BlockBasedTableOptions.
|
||||
// Related options that were originally here but now moved include:
|
||||
// no_block_cache
|
||||
// block_cache
|
||||
// block_cache_compressed
|
||||
// block_size
|
||||
// block_size_deviation
|
||||
// block_restart_interval
|
||||
// filter_policy
|
||||
// whole_key_filtering
|
||||
// If you'd like to customize some of these options, you will need to
|
||||
// use NewBlockBasedTableFactory() to construct a new table factory.
|
||||
|
||||
// This option allows user to collect their own interested statistics of
|
||||
// the tables.
|
||||
// Default: empty vector -- no user-defined statistics collection will be
|
||||
// performed.
|
||||
typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
|
||||
TablePropertiesCollectorFactories;
|
||||
TablePropertiesCollectorFactories table_properties_collector_factories;
|
||||
|
||||
// Allows thread-safe inplace updates. If this is true, there is no way to
|
||||
// achieve point-in-time consistency using snapshot or iterator (assuming
|
||||
// concurrent updates). Hence iterator and multi-get will return results
|
||||
// which are not consistent as of any point-in-time.
|
||||
// If inplace_callback function is not set,
|
||||
// Put(key, new_value) will update inplace the existing_value iff
|
||||
// * key exists in current memtable
|
||||
// * new sizeof(new_value) <= sizeof(existing_value)
|
||||
// * existing_value for that key is a put i.e. kTypeValue
|
||||
// If inplace_callback function is set, check doc for inplace_callback.
|
||||
// Default: false.
|
||||
bool inplace_update_support = false;
|
||||
|
||||
// Number of locks used for inplace update
|
||||
// Default: 10000, if inplace_update_support = true, else 0.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t inplace_update_num_locks = 10000;
|
||||
|
||||
// existing_value - pointer to previous value (from both memtable and sst).
|
||||
// nullptr if key doesn't exist
|
||||
// existing_value_size - pointer to size of existing_value).
|
||||
// nullptr if key doesn't exist
|
||||
// delta_value - Delta value to be merged with the existing_value.
|
||||
// Stored in transaction logs.
|
||||
// merged_value - Set when delta is applied on the previous value.
|
||||
|
||||
// Applicable only when inplace_update_support is true,
|
||||
// this callback function is called at the time of updating the memtable
|
||||
// as part of a Put operation, lets say Put(key, delta_value). It allows the
|
||||
// 'delta_value' specified as part of the Put operation to be merged with
|
||||
// an 'existing_value' of the key in the database.
|
||||
|
||||
// If the merged value is smaller in size that the 'existing_value',
|
||||
// then this function can update the 'existing_value' buffer inplace and
|
||||
// the corresponding 'existing_value'_size pointer, if it wishes to.
|
||||
// The callback should return UpdateStatus::UPDATED_INPLACE.
|
||||
// In this case. (In this case, the snapshot-semantics of the rocksdb
|
||||
// Iterator is not atomic anymore).
|
||||
|
||||
// If the merged value is larger in size than the 'existing_value' or the
|
||||
// application does not wish to modify the 'existing_value' buffer inplace,
|
||||
// then the merged value should be returned via *merge_value. It is set by
|
||||
// merging the 'existing_value' and the Put 'delta_value'. The callback should
|
||||
// return UpdateStatus::UPDATED in this case. This merged value will be added
|
||||
// to the memtable.
|
||||
|
||||
// If merging fails or the application does not wish to take any action,
|
||||
// then the callback should return UpdateStatus::UPDATE_FAILED.
|
||||
|
||||
// Please remember that the original call from the application is Put(key,
|
||||
// delta_value). So the transaction log (if enabled) will still contain (key,
|
||||
// delta_value). The 'merged_value' is not stored in the transaction log.
|
||||
// Hence the inplace_callback function should be consistent across db reopens.
|
||||
|
||||
// Default: nullptr
|
||||
UpdateStatus (*inplace_callback)(char* existing_value,
|
||||
uint32_t* existing_value_size,
|
||||
Slice delta_value,
|
||||
std::string* merged_value) = nullptr;
|
||||
|
||||
// if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0,
|
||||
// create prefix bloom for memtable with the size of
|
||||
// write_buffer_size * memtable_prefix_bloom_size_ratio.
|
||||
// If it is larger than 0.25, it is santinized to 0.25.
|
||||
//
|
||||
// Default: 0 (disable)
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
double memtable_prefix_bloom_size_ratio = 0.0;
|
||||
|
||||
// Page size for huge page for the arena used by the memtable. If <=0, it
|
||||
// won't allocate from huge page but from malloc.
|
||||
// Users are responsible to reserve huge pages for it to be allocated. For
|
||||
// example:
|
||||
// sysctl -w vm.nr_hugepages=20
|
||||
// See linux doc Documentation/vm/hugetlbpage.txt
|
||||
// If there isn't enough free huge page available, it will fall back to
|
||||
// malloc.
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t memtable_huge_page_size = 0;
|
||||
|
||||
// If non-nullptr, memtable will use the specified function to extract
|
||||
// prefixes for keys, and for each prefix maintain a hint of insert location
|
||||
// to reduce CPU usage for inserting keys with the prefix. Keys out of
|
||||
// domain of the prefix extractor will be insert without using hints.
|
||||
//
|
||||
// Currently only the default skiplist based memtable implements the feature.
|
||||
// All other memtable implementation will ignore the option. It incurs ~250
|
||||
// additional bytes of memory overhead to store a hint for each prefix.
|
||||
// Also concurrent writes (when allow_concurrent_memtable_write is true) will
|
||||
// ignore the option.
|
||||
//
|
||||
// The option is best suited for workloads where keys will likely to insert
|
||||
// to a location close the the last inserted key with the same prefix.
|
||||
// One example could be inserting keys of the form (prefix + timestamp),
|
||||
// and keys of the same prefix always comes in with time order. Another
|
||||
// example would be updating the same key over and over again, in which case
|
||||
// the prefix can be the key itself.
|
||||
//
|
||||
// Default: nullptr (disable)
|
||||
std::shared_ptr<const SliceTransform>
|
||||
memtable_insert_with_hint_prefix_extractor = nullptr;
|
||||
|
||||
// Control locality of bloom filter probes to improve cache miss rate.
|
||||
// This option only applies to memtable prefix bloom and plaintable
|
||||
// prefix bloom. It essentially limits every bloom checking to one cache line.
|
||||
// This optimization is turned off when set to 0, and positive number to turn
|
||||
// it on.
|
||||
// Default: 0
|
||||
uint32_t bloom_locality = 0;
|
||||
|
||||
// Maximum number of successive merge operations on a key in the memtable.
|
||||
//
|
||||
// When a merge operation is added to the memtable and the maximum number of
|
||||
// successive merges is reached, the value of the key will be calculated and
|
||||
// inserted into the memtable instead of the merge operation. This will
|
||||
// ensure that there are never more than max_successive_merges merge
|
||||
// operations in the memtable.
|
||||
//
|
||||
// Default: 0 (disabled)
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
size_t max_successive_merges = 0;
|
||||
|
||||
// This flag specifies that the implementation should optimize the filters
|
||||
// mainly for cases where keys are found rather than also optimize for keys
|
||||
// missed. This would be used in cases where the application knows that
|
||||
// there are very few misses or the performance in the case of misses is not
|
||||
// important.
|
||||
//
|
||||
// For now, this flag allows us to not store filters for the last level i.e
|
||||
// the largest level which contains data of the LSM store. For keys which
|
||||
// are hits, the filters in this level are not useful because we will search
|
||||
// for the data anyway. NOTE: the filters in other levels are still useful
|
||||
// even for key hit because they tell us whether to look in that level or go
|
||||
// to the higher level.
|
||||
//
|
||||
// Default: false
|
||||
bool optimize_filters_for_hits = false;
|
||||
|
||||
// After writing every SST file, reopen it and read all the keys.
|
||||
// Default: false
|
||||
bool paranoid_file_checks = false;
|
||||
|
||||
// In debug mode, RocksDB run consistency checks on the LSM everytime the LSM
|
||||
// change (Flush, Compaction, AddFile). These checks are disabled in release
|
||||
// mode, use this option to enable them in release mode as well.
|
||||
// Default: false
|
||||
bool force_consistency_checks = false;
|
||||
|
||||
// Measure IO stats in compactions and flushes, if true.
|
||||
// Default: false
|
||||
bool report_bg_io_stats = false;
|
||||
|
||||
// Create ColumnFamilyOptions with default values for all fields
|
||||
ColumnFamilyOptions();
|
||||
// Create ColumnFamilyOptions from Options
|
||||
|
@ -829,6 +272,38 @@ struct ColumnFamilyOptions {
|
|||
void Dump(Logger* log) const;
|
||||
};
|
||||
|
||||
enum class WALRecoveryMode : char {
|
||||
// Original levelDB recovery
|
||||
// We tolerate incomplete record in trailing data on all logs
|
||||
// Use case : This is legacy behavior (default)
|
||||
kTolerateCorruptedTailRecords = 0x00,
|
||||
// Recover from clean shutdown
|
||||
// We don't expect to find any corruption in the WAL
|
||||
// Use case : This is ideal for unit tests and rare applications that
|
||||
// can require high consistency guarantee
|
||||
kAbsoluteConsistency = 0x01,
|
||||
// Recover to point-in-time consistency
|
||||
// We stop the WAL playback on discovering WAL inconsistency
|
||||
// Use case : Ideal for systems that have disk controller cache like
|
||||
// hard disk, SSD without super capacitor that store related data
|
||||
kPointInTimeRecovery = 0x02,
|
||||
// Recovery after a disaster
|
||||
// We ignore any corruption in the WAL and try to salvage as much data as
|
||||
// possible
|
||||
// Use case : Ideal for last ditch effort to recover data or systems that
|
||||
// operate with low grade unrelated data
|
||||
kSkipAnyCorruptedRecords = 0x03,
|
||||
};
|
||||
|
||||
struct DbPath {
|
||||
std::string path;
|
||||
uint64_t target_size; // Target size of total files under the path, in byte.
|
||||
|
||||
DbPath() : target_size(0) {}
|
||||
DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
|
||||
};
|
||||
|
||||
|
||||
struct DBOptions {
|
||||
// The function recovers options to the option as in version 4.6.
|
||||
DBOptions* OldDefaults(int rocksdb_major_version = 4,
|
||||
|
|
101
util/options.cc
101
util/options.cc
|
@ -36,63 +36,16 @@
|
|||
|
||||
namespace rocksdb {
|
||||
|
||||
ColumnFamilyOptions::ColumnFamilyOptions()
|
||||
: compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
|
||||
table_factory(
|
||||
std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {
|
||||
AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() {
|
||||
assert(memtable_factory.get() != nullptr);
|
||||
}
|
||||
|
||||
ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
|
||||
: comparator(options.comparator),
|
||||
merge_operator(options.merge_operator),
|
||||
compaction_filter(options.compaction_filter),
|
||||
compaction_filter_factory(options.compaction_filter_factory),
|
||||
write_buffer_size(options.write_buffer_size),
|
||||
max_write_buffer_number(options.max_write_buffer_number),
|
||||
AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
|
||||
: max_write_buffer_number(options.max_write_buffer_number),
|
||||
min_write_buffer_number_to_merge(
|
||||
options.min_write_buffer_number_to_merge),
|
||||
max_write_buffer_number_to_maintain(
|
||||
options.max_write_buffer_number_to_maintain),
|
||||
compression(options.compression),
|
||||
compression_per_level(options.compression_per_level),
|
||||
bottommost_compression(options.bottommost_compression),
|
||||
compression_opts(options.compression_opts),
|
||||
prefix_extractor(options.prefix_extractor),
|
||||
num_levels(options.num_levels),
|
||||
level0_file_num_compaction_trigger(
|
||||
options.level0_file_num_compaction_trigger),
|
||||
level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
|
||||
level0_stop_writes_trigger(options.level0_stop_writes_trigger),
|
||||
target_file_size_base(options.target_file_size_base),
|
||||
target_file_size_multiplier(options.target_file_size_multiplier),
|
||||
max_bytes_for_level_base(options.max_bytes_for_level_base),
|
||||
level_compaction_dynamic_level_bytes(
|
||||
options.level_compaction_dynamic_level_bytes),
|
||||
max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
|
||||
max_bytes_for_level_multiplier_additional(
|
||||
options.max_bytes_for_level_multiplier_additional),
|
||||
max_compaction_bytes(options.max_compaction_bytes),
|
||||
soft_rate_limit(options.soft_rate_limit),
|
||||
soft_pending_compaction_bytes_limit(
|
||||
options.soft_pending_compaction_bytes_limit),
|
||||
hard_pending_compaction_bytes_limit(
|
||||
options.hard_pending_compaction_bytes_limit),
|
||||
rate_limit_delay_max_milliseconds(
|
||||
options.rate_limit_delay_max_milliseconds),
|
||||
arena_block_size(options.arena_block_size),
|
||||
disable_auto_compactions(options.disable_auto_compactions),
|
||||
purge_redundant_kvs_while_flush(options.purge_redundant_kvs_while_flush),
|
||||
compaction_style(options.compaction_style),
|
||||
compaction_pri(options.compaction_pri),
|
||||
compaction_options_universal(options.compaction_options_universal),
|
||||
compaction_options_fifo(options.compaction_options_fifo),
|
||||
max_sequential_skip_in_iterations(
|
||||
options.max_sequential_skip_in_iterations),
|
||||
memtable_factory(options.memtable_factory),
|
||||
table_factory(options.table_factory),
|
||||
table_properties_collector_factories(
|
||||
options.table_properties_collector_factories),
|
||||
inplace_update_support(options.inplace_update_support),
|
||||
inplace_update_num_locks(options.inplace_update_num_locks),
|
||||
inplace_callback(options.inplace_callback),
|
||||
|
@ -102,6 +55,32 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
|
|||
memtable_insert_with_hint_prefix_extractor(
|
||||
options.memtable_insert_with_hint_prefix_extractor),
|
||||
bloom_locality(options.bloom_locality),
|
||||
arena_block_size(options.arena_block_size),
|
||||
compression_per_level(options.compression_per_level),
|
||||
num_levels(options.num_levels),
|
||||
level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
|
||||
level0_stop_writes_trigger(options.level0_stop_writes_trigger),
|
||||
target_file_size_base(options.target_file_size_base),
|
||||
target_file_size_multiplier(options.target_file_size_multiplier),
|
||||
level_compaction_dynamic_level_bytes(
|
||||
options.level_compaction_dynamic_level_bytes),
|
||||
max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier),
|
||||
max_bytes_for_level_multiplier_additional(
|
||||
options.max_bytes_for_level_multiplier_additional),
|
||||
max_compaction_bytes(options.max_compaction_bytes),
|
||||
soft_pending_compaction_bytes_limit(
|
||||
options.soft_pending_compaction_bytes_limit),
|
||||
hard_pending_compaction_bytes_limit(
|
||||
options.hard_pending_compaction_bytes_limit),
|
||||
compaction_style(options.compaction_style),
|
||||
compaction_pri(options.compaction_pri),
|
||||
compaction_options_universal(options.compaction_options_universal),
|
||||
compaction_options_fifo(options.compaction_options_fifo),
|
||||
max_sequential_skip_in_iterations(
|
||||
options.max_sequential_skip_in_iterations),
|
||||
memtable_factory(options.memtable_factory),
|
||||
table_properties_collector_factories(
|
||||
options.table_properties_collector_factories),
|
||||
max_successive_merges(options.max_successive_merges),
|
||||
optimize_filters_for_hits(options.optimize_filters_for_hits),
|
||||
paranoid_file_checks(options.paranoid_file_checks),
|
||||
|
@ -114,6 +93,28 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
|
|||
}
|
||||
}
|
||||
|
||||
ColumnFamilyOptions::ColumnFamilyOptions()
|
||||
: compression(Snappy_Supported() ? kSnappyCompression : kNoCompression),
|
||||
table_factory(
|
||||
std::shared_ptr<TableFactory>(new BlockBasedTableFactory())) {}
|
||||
|
||||
ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
|
||||
: AdvancedColumnFamilyOptions(options),
|
||||
comparator(options.comparator),
|
||||
merge_operator(options.merge_operator),
|
||||
compaction_filter(options.compaction_filter),
|
||||
compaction_filter_factory(options.compaction_filter_factory),
|
||||
write_buffer_size(options.write_buffer_size),
|
||||
compression(options.compression),
|
||||
bottommost_compression(options.bottommost_compression),
|
||||
compression_opts(options.compression_opts),
|
||||
level0_file_num_compaction_trigger(
|
||||
options.level0_file_num_compaction_trigger),
|
||||
prefix_extractor(options.prefix_extractor),
|
||||
max_bytes_for_level_base(options.max_bytes_for_level_base),
|
||||
disable_auto_compactions(options.disable_auto_compactions),
|
||||
table_factory(options.table_factory) {}
|
||||
|
||||
DBOptions::DBOptions() {}
|
||||
|
||||
DBOptions::DBOptions(const Options& options)
|
||||
|
|
|
@ -365,6 +365,21 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
|
|||
OptionType::kBoolean, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableDBOptions, avoid_flush_during_shutdown)}}};
|
||||
|
||||
// offset_of is used to get the offset of a class data member
|
||||
// ex: offset_of(&ColumnFamilyOptions::num_levels)
|
||||
// This call will return the offset of num_levels in ColumnFamilyOptions class
|
||||
//
|
||||
// This is the same as offsetof() but allow us to work with non standard-layout
|
||||
// classes and structures
|
||||
// refs:
|
||||
// http://en.cppreference.com/w/cpp/concept/StandardLayoutType
|
||||
// https://gist.github.com/graphitemaster/494f21190bb2c63c5516
|
||||
template <typename T1, typename T2>
|
||||
inline int offset_of(T1 T2::*member) {
|
||||
static T2 obj;
|
||||
return int(size_t(&(obj.*member)) - size_t(&obj));
|
||||
}
|
||||
|
||||
static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
|
||||
/* not yet supported
|
||||
CompactionOptionsFIFO compaction_options_fifo;
|
||||
|
@ -379,45 +394,44 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
|
|||
std::string* merged_value);
|
||||
*/
|
||||
{"report_bg_io_stats",
|
||||
{offsetof(struct ColumnFamilyOptions, report_bg_io_stats),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, true,
|
||||
{offset_of(&ColumnFamilyOptions::report_bg_io_stats), OptionType::kBoolean,
|
||||
OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, report_bg_io_stats)}},
|
||||
{"compaction_measure_io_stats",
|
||||
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false, 0}},
|
||||
{"disable_auto_compactions",
|
||||
{offsetof(struct ColumnFamilyOptions, disable_auto_compactions),
|
||||
{offset_of(&ColumnFamilyOptions::disable_auto_compactions),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, disable_auto_compactions)}},
|
||||
{"filter_deletes",
|
||||
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"inplace_update_support",
|
||||
{offsetof(struct ColumnFamilyOptions, inplace_update_support),
|
||||
{offset_of(&ColumnFamilyOptions::inplace_update_support),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
|
||||
{"level_compaction_dynamic_level_bytes",
|
||||
{offsetof(struct ColumnFamilyOptions,
|
||||
level_compaction_dynamic_level_bytes),
|
||||
{offset_of(&ColumnFamilyOptions::level_compaction_dynamic_level_bytes),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
|
||||
{"optimize_filters_for_hits",
|
||||
{offsetof(struct ColumnFamilyOptions, optimize_filters_for_hits),
|
||||
{offset_of(&ColumnFamilyOptions::optimize_filters_for_hits),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
|
||||
{"paranoid_file_checks",
|
||||
{offsetof(struct ColumnFamilyOptions, paranoid_file_checks),
|
||||
{offset_of(&ColumnFamilyOptions::paranoid_file_checks),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, paranoid_file_checks)}},
|
||||
{"force_consistency_checks",
|
||||
{offsetof(struct ColumnFamilyOptions, force_consistency_checks),
|
||||
{offset_of(&ColumnFamilyOptions::force_consistency_checks),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
|
||||
{"purge_redundant_kvs_while_flush",
|
||||
{offsetof(struct ColumnFamilyOptions, purge_redundant_kvs_while_flush),
|
||||
{offset_of(&ColumnFamilyOptions::purge_redundant_kvs_while_flush),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
|
||||
{"verify_checksums_in_compaction",
|
||||
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"soft_pending_compaction_bytes_limit",
|
||||
{offsetof(struct ColumnFamilyOptions, soft_pending_compaction_bytes_limit),
|
||||
{offset_of(&ColumnFamilyOptions::soft_pending_compaction_bytes_limit),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, soft_pending_compaction_bytes_limit)}},
|
||||
{"hard_pending_compaction_bytes_limit",
|
||||
{offsetof(struct ColumnFamilyOptions, hard_pending_compaction_bytes_limit),
|
||||
{offset_of(&ColumnFamilyOptions::hard_pending_compaction_bytes_limit),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, hard_pending_compaction_bytes_limit)}},
|
||||
{"hard_rate_limit",
|
||||
|
@ -425,21 +439,21 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
|
|||
{"soft_rate_limit",
|
||||
{0, OptionType::kDouble, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"max_compaction_bytes",
|
||||
{offsetof(struct ColumnFamilyOptions, max_compaction_bytes),
|
||||
{offset_of(&ColumnFamilyOptions::max_compaction_bytes),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, max_compaction_bytes)}},
|
||||
{"expanded_compaction_factor",
|
||||
{0, OptionType::kInt, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"level0_file_num_compaction_trigger",
|
||||
{offsetof(struct ColumnFamilyOptions, level0_file_num_compaction_trigger),
|
||||
{offset_of(&ColumnFamilyOptions::level0_file_num_compaction_trigger),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, level0_file_num_compaction_trigger)}},
|
||||
{"level0_slowdown_writes_trigger",
|
||||
{offsetof(struct ColumnFamilyOptions, level0_slowdown_writes_trigger),
|
||||
{offset_of(&ColumnFamilyOptions::level0_slowdown_writes_trigger),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, level0_slowdown_writes_trigger)}},
|
||||
{"level0_stop_writes_trigger",
|
||||
{offsetof(struct ColumnFamilyOptions, level0_stop_writes_trigger),
|
||||
{offset_of(&ColumnFamilyOptions::level0_stop_writes_trigger),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, level0_stop_writes_trigger)}},
|
||||
{"max_grandparent_overlap_factor",
|
||||
|
@ -447,53 +461,53 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
|
|||
{"max_mem_compaction_level",
|
||||
{0, OptionType::kInt, OptionVerificationType::kDeprecated, false, 0}},
|
||||
{"max_write_buffer_number",
|
||||
{offsetof(struct ColumnFamilyOptions, max_write_buffer_number),
|
||||
{offset_of(&ColumnFamilyOptions::max_write_buffer_number),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, max_write_buffer_number)}},
|
||||
{"max_write_buffer_number_to_maintain",
|
||||
{offsetof(struct ColumnFamilyOptions, max_write_buffer_number_to_maintain),
|
||||
{offset_of(&ColumnFamilyOptions::max_write_buffer_number_to_maintain),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
|
||||
{"min_write_buffer_number_to_merge",
|
||||
{offsetof(struct ColumnFamilyOptions, min_write_buffer_number_to_merge),
|
||||
{offset_of(&ColumnFamilyOptions::min_write_buffer_number_to_merge),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
|
||||
{"num_levels",
|
||||
{offsetof(struct ColumnFamilyOptions, num_levels), OptionType::kInt,
|
||||
{offset_of(&ColumnFamilyOptions::num_levels), OptionType::kInt,
|
||||
OptionVerificationType::kNormal, false, 0}},
|
||||
{"source_compaction_factor",
|
||||
{0, OptionType::kInt, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"target_file_size_multiplier",
|
||||
{offsetof(struct ColumnFamilyOptions, target_file_size_multiplier),
|
||||
{offset_of(&ColumnFamilyOptions::target_file_size_multiplier),
|
||||
OptionType::kInt, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, target_file_size_multiplier)}},
|
||||
{"arena_block_size",
|
||||
{offsetof(struct ColumnFamilyOptions, arena_block_size),
|
||||
OptionType::kSizeT, OptionVerificationType::kNormal, true,
|
||||
{offset_of(&ColumnFamilyOptions::arena_block_size), OptionType::kSizeT,
|
||||
OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, arena_block_size)}},
|
||||
{"inplace_update_num_locks",
|
||||
{offsetof(struct ColumnFamilyOptions, inplace_update_num_locks),
|
||||
{offset_of(&ColumnFamilyOptions::inplace_update_num_locks),
|
||||
OptionType::kSizeT, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, inplace_update_num_locks)}},
|
||||
{"max_successive_merges",
|
||||
{offsetof(struct ColumnFamilyOptions, max_successive_merges),
|
||||
{offset_of(&ColumnFamilyOptions::max_successive_merges),
|
||||
OptionType::kSizeT, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, max_successive_merges)}},
|
||||
{"memtable_huge_page_size",
|
||||
{offsetof(struct ColumnFamilyOptions, memtable_huge_page_size),
|
||||
{offset_of(&ColumnFamilyOptions::memtable_huge_page_size),
|
||||
OptionType::kSizeT, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, memtable_huge_page_size)}},
|
||||
{"memtable_prefix_bloom_huge_page_tlb_size",
|
||||
{0, OptionType::kSizeT, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"write_buffer_size",
|
||||
{offsetof(struct ColumnFamilyOptions, write_buffer_size),
|
||||
OptionType::kSizeT, OptionVerificationType::kNormal, true,
|
||||
{offset_of(&ColumnFamilyOptions::write_buffer_size), OptionType::kSizeT,
|
||||
OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, write_buffer_size)}},
|
||||
{"bloom_locality",
|
||||
{offsetof(struct ColumnFamilyOptions, bloom_locality),
|
||||
OptionType::kUInt32T, OptionVerificationType::kNormal, false, 0}},
|
||||
{offset_of(&ColumnFamilyOptions::bloom_locality), OptionType::kUInt32T,
|
||||
OptionVerificationType::kNormal, false, 0}},
|
||||
{"memtable_prefix_bloom_bits",
|
||||
{0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"memtable_prefix_bloom_size_ratio",
|
||||
{offsetof(struct ColumnFamilyOptions, memtable_prefix_bloom_size_ratio),
|
||||
{offset_of(&ColumnFamilyOptions::memtable_prefix_bloom_size_ratio),
|
||||
OptionType::kDouble, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, memtable_prefix_bloom_size_ratio)}},
|
||||
{"memtable_prefix_bloom_probes",
|
||||
|
@ -501,72 +515,72 @@ static std::unordered_map<std::string, OptionTypeInfo> cf_options_type_info = {
|
|||
{"min_partial_merge_operands",
|
||||
{0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, true, 0}},
|
||||
{"max_bytes_for_level_base",
|
||||
{offsetof(struct ColumnFamilyOptions, max_bytes_for_level_base),
|
||||
{offset_of(&ColumnFamilyOptions::max_bytes_for_level_base),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, max_bytes_for_level_base)}},
|
||||
{"max_bytes_for_level_multiplier",
|
||||
{offsetof(struct ColumnFamilyOptions, max_bytes_for_level_multiplier),
|
||||
{offset_of(&ColumnFamilyOptions::max_bytes_for_level_multiplier),
|
||||
OptionType::kDouble, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier)}},
|
||||
{"max_bytes_for_level_multiplier_additional",
|
||||
{offsetof(struct ColumnFamilyOptions,
|
||||
max_bytes_for_level_multiplier_additional),
|
||||
{offset_of(
|
||||
&ColumnFamilyOptions::max_bytes_for_level_multiplier_additional),
|
||||
OptionType::kVectorInt, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions,
|
||||
max_bytes_for_level_multiplier_additional)}},
|
||||
{"max_sequential_skip_in_iterations",
|
||||
{offsetof(struct ColumnFamilyOptions, max_sequential_skip_in_iterations),
|
||||
{offset_of(&ColumnFamilyOptions::max_sequential_skip_in_iterations),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, max_sequential_skip_in_iterations)}},
|
||||
{"target_file_size_base",
|
||||
{offsetof(struct ColumnFamilyOptions, target_file_size_base),
|
||||
{offset_of(&ColumnFamilyOptions::target_file_size_base),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, target_file_size_base)}},
|
||||
{"rate_limit_delay_max_milliseconds",
|
||||
{0, OptionType::kUInt, OptionVerificationType::kDeprecated, false, 0}},
|
||||
{"compression",
|
||||
{offsetof(struct ColumnFamilyOptions, compression),
|
||||
{offset_of(&ColumnFamilyOptions::compression),
|
||||
OptionType::kCompressionType, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableCFOptions, compression)}},
|
||||
{"compression_per_level",
|
||||
{offsetof(struct ColumnFamilyOptions, compression_per_level),
|
||||
{offset_of(&ColumnFamilyOptions::compression_per_level),
|
||||
OptionType::kVectorCompressionType, OptionVerificationType::kNormal,
|
||||
false, 0}},
|
||||
{"bottommost_compression",
|
||||
{offsetof(struct ColumnFamilyOptions, bottommost_compression),
|
||||
{offset_of(&ColumnFamilyOptions::bottommost_compression),
|
||||
OptionType::kCompressionType, OptionVerificationType::kNormal, false, 0}},
|
||||
{"comparator",
|
||||
{offsetof(struct ColumnFamilyOptions, comparator), OptionType::kComparator,
|
||||
{offset_of(&ColumnFamilyOptions::comparator), OptionType::kComparator,
|
||||
OptionVerificationType::kByName, false, 0}},
|
||||
{"prefix_extractor",
|
||||
{offsetof(struct ColumnFamilyOptions, prefix_extractor),
|
||||
{offset_of(&ColumnFamilyOptions::prefix_extractor),
|
||||
OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull,
|
||||
false, 0}},
|
||||
{"memtable_insert_with_hint_prefix_extractor",
|
||||
{offsetof(struct ColumnFamilyOptions,
|
||||
memtable_insert_with_hint_prefix_extractor),
|
||||
{offset_of(
|
||||
&ColumnFamilyOptions::memtable_insert_with_hint_prefix_extractor),
|
||||
OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull,
|
||||
false, 0}},
|
||||
{"memtable_factory",
|
||||
{offsetof(struct ColumnFamilyOptions, memtable_factory),
|
||||
{offset_of(&ColumnFamilyOptions::memtable_factory),
|
||||
OptionType::kMemTableRepFactory, OptionVerificationType::kByName, false,
|
||||
0}},
|
||||
{"table_factory",
|
||||
{offsetof(struct ColumnFamilyOptions, table_factory),
|
||||
OptionType::kTableFactory, OptionVerificationType::kByName, false, 0}},
|
||||
{offset_of(&ColumnFamilyOptions::table_factory), OptionType::kTableFactory,
|
||||
OptionVerificationType::kByName, false, 0}},
|
||||
{"compaction_filter",
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_filter),
|
||||
{offset_of(&ColumnFamilyOptions::compaction_filter),
|
||||
OptionType::kCompactionFilter, OptionVerificationType::kByName, false,
|
||||
0}},
|
||||
{"compaction_filter_factory",
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_filter_factory),
|
||||
{offset_of(&ColumnFamilyOptions::compaction_filter_factory),
|
||||
OptionType::kCompactionFilterFactory, OptionVerificationType::kByName,
|
||||
false, 0}},
|
||||
{"merge_operator",
|
||||
{offsetof(struct ColumnFamilyOptions, merge_operator),
|
||||
{offset_of(&ColumnFamilyOptions::merge_operator),
|
||||
OptionType::kMergeOperator, OptionVerificationType::kByName, false, 0}},
|
||||
{"compaction_style",
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_style),
|
||||
{offset_of(&ColumnFamilyOptions::compaction_style),
|
||||
OptionType::kCompactionStyle, OptionVerificationType::kNormal, false,
|
||||
0}}};
|
||||
|
||||
|
|
|
@ -314,32 +314,31 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
|||
// options in the blacklist need to appear in the same order as in
|
||||
// ColumnFamilyOptions.
|
||||
const OffsetGap kColumnFamilyOptionsBlacklist = {
|
||||
{offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)},
|
||||
{offsetof(struct ColumnFamilyOptions, merge_operator),
|
||||
sizeof(std::shared_ptr<MergeOperator>)},
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_filter),
|
||||
sizeof(const CompactionFilter*)},
|
||||
{offsetof(struct ColumnFamilyOptions, compaction_filter_factory),
|
||||
sizeof(std::shared_ptr<CompactionFilterFactory>)},
|
||||
{offsetof(struct ColumnFamilyOptions, compression_per_level),
|
||||
sizeof(std::vector<CompressionType>)},
|
||||
{offsetof(struct ColumnFamilyOptions, prefix_extractor),
|
||||
sizeof(std::shared_ptr<const SliceTransform>)},
|
||||
{offsetof(struct ColumnFamilyOptions,
|
||||
max_bytes_for_level_multiplier_additional),
|
||||
sizeof(std::vector<int>)},
|
||||
{offsetof(struct ColumnFamilyOptions, memtable_factory),
|
||||
sizeof(std::shared_ptr<MemTableRepFactory>)},
|
||||
{offsetof(struct ColumnFamilyOptions, table_factory),
|
||||
sizeof(std::shared_ptr<TableFactory>)},
|
||||
{offsetof(struct ColumnFamilyOptions,
|
||||
table_properties_collector_factories),
|
||||
sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
|
||||
{offsetof(struct ColumnFamilyOptions, inplace_callback),
|
||||
{offset_of(&ColumnFamilyOptions::inplace_callback),
|
||||
sizeof(UpdateStatus(*)(char*, uint32_t*, Slice, std::string*))},
|
||||
{offsetof(struct ColumnFamilyOptions,
|
||||
memtable_insert_with_hint_prefix_extractor),
|
||||
{offset_of(
|
||||
&ColumnFamilyOptions::memtable_insert_with_hint_prefix_extractor),
|
||||
sizeof(std::shared_ptr<const SliceTransform>)},
|
||||
{offset_of(&ColumnFamilyOptions::compression_per_level),
|
||||
sizeof(std::vector<CompressionType>)},
|
||||
{offset_of(
|
||||
&ColumnFamilyOptions::max_bytes_for_level_multiplier_additional),
|
||||
sizeof(std::vector<int>)},
|
||||
{offset_of(&ColumnFamilyOptions::memtable_factory),
|
||||
sizeof(std::shared_ptr<MemTableRepFactory>)},
|
||||
{offset_of(&ColumnFamilyOptions::table_properties_collector_factories),
|
||||
sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
|
||||
{offset_of(&ColumnFamilyOptions::comparator), sizeof(Comparator*)},
|
||||
{offset_of(&ColumnFamilyOptions::merge_operator),
|
||||
sizeof(std::shared_ptr<MergeOperator>)},
|
||||
{offset_of(&ColumnFamilyOptions::compaction_filter),
|
||||
sizeof(const CompactionFilter*)},
|
||||
{offset_of(&ColumnFamilyOptions::compaction_filter_factory),
|
||||
sizeof(std::shared_ptr<CompactionFilterFactory>)},
|
||||
{offset_of(&ColumnFamilyOptions::prefix_extractor),
|
||||
sizeof(std::shared_ptr<const SliceTransform>)},
|
||||
{offset_of(&ColumnFamilyOptions::table_factory),
|
||||
sizeof(std::shared_ptr<TableFactory>)},
|
||||
};
|
||||
|
||||
char* options_ptr = new char[sizeof(ColumnFamilyOptions)];
|
||||
|
|
Loading…
Reference in a new issue