mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 04:41:49 +00:00
62fc15f009
Summary: add option `block_protection_bytes_per_key` and implementation for block per key-value checksum. The main changes are 1. checksum construction and verification in block.cc/h 2. pass the option `block_protection_bytes_per_key` around (mainly for methods defined in table_cache.h) 3. unit tests/crash test updates Tests: * Added unit tests * Crash test: `python3 tools/db_crashtest.py blackbox --simple --block_protection_bytes_per_key=1 --write_buffer_size=1048576` Follow up (maybe as a separate PR): make sure corruption status returned from BlockIters are correctly handled. Performance: Turning on block per KV protection has a non-trivial negative impact on read performance and costs additional memory. For memory, each block includes additional 24 bytes for checksum-related states beside checksum itself. For CPU, I set up a DB of size ~1.2GB with 5M keys (32 bytes key and 200 bytes value) which compacts to ~5 SST files (target file size 256 MB) in L6 without compression. I tested readrandom performance with various block cache size (to mimic various cache hit rates): ``` SETUP make OPTIMIZE_LEVEL="-O3" USE_LTO=1 DEBUG_LEVEL=0 -j32 db_bench ./db_bench -benchmarks=fillseq,compact0,waitforcompaction,compact,waitforcompaction -write_buffer_size=33554432 -level_compaction_dynamic_level_bytes=true -max_background_jobs=8 -target_file_size_base=268435456 --num=5000000 --key_size=32 --value_size=200 --compression_type=none BENCHMARK ./db_bench --use_existing_db -benchmarks=readtocache,readrandom[-X10] --num=5000000 --key_size=32 --disable_auto_compactions --reads=1000000 --block_protection_bytes_per_key=[0|1] --cache_size=$CACHESIZE The readrandom ops/sec looks like the following: Block cache size: 2GB 1.2GB * 0.9 1.2GB * 0.8 1.2GB * 0.5 8MB Main 240805 223604 198176 161653 139040 PR prot_bytes=0 238691 226693 200127 161082 141153 PR prot_bytes=1 214983 193199 178532 137013 108211 prot_bytes=1 vs -10% -15% -10.8% -15% -23% prot_bytes=0 ``` The benchmark has a lot of variance, but there was a 5% to 25% regression in this benchmark with different cache hit rates. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11287 Reviewed By: ajkr Differential Revision: D43970708 Pulled By: cbi42 fbshipit-source-id: ef98d898b71779846fa74212b9ec9e08b7183940
107 lines
4.7 KiB
C++
107 lines
4.7 KiB
C++
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "table/block_based/block_cache.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
void BlockCreateContext::Create(std::unique_ptr<Block_kData>* parsed_out,
|
|
BlockContents&& block) {
|
|
parsed_out->reset(new Block_kData(
|
|
std::move(block), table_options->read_amp_bytes_per_bit, statistics));
|
|
parsed_out->get()->InitializeDataBlockProtectionInfo(protection_bytes_per_key,
|
|
raw_ucmp);
|
|
}
|
|
void BlockCreateContext::Create(std::unique_ptr<Block_kIndex>* parsed_out,
|
|
BlockContents&& block) {
|
|
parsed_out->reset(new Block_kIndex(std::move(block),
|
|
/*read_amp_bytes_per_bit*/ 0, statistics));
|
|
parsed_out->get()->InitializeIndexBlockProtectionInfo(
|
|
protection_bytes_per_key, raw_ucmp, index_value_is_full,
|
|
index_has_first_key);
|
|
}
|
|
void BlockCreateContext::Create(
|
|
std::unique_ptr<Block_kFilterPartitionIndex>* parsed_out,
|
|
BlockContents&& block) {
|
|
parsed_out->reset(new Block_kFilterPartitionIndex(
|
|
std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
|
|
parsed_out->get()->InitializeIndexBlockProtectionInfo(
|
|
protection_bytes_per_key, raw_ucmp, index_value_is_full,
|
|
index_has_first_key);
|
|
}
|
|
void BlockCreateContext::Create(
|
|
std::unique_ptr<Block_kRangeDeletion>* parsed_out, BlockContents&& block) {
|
|
parsed_out->reset(new Block_kRangeDeletion(
|
|
std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
|
|
}
|
|
void BlockCreateContext::Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
|
|
BlockContents&& block) {
|
|
parsed_out->reset(new Block_kMetaIndex(
|
|
std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
|
|
parsed_out->get()->InitializeMetaIndexBlockProtectionInfo(
|
|
protection_bytes_per_key);
|
|
}
|
|
|
|
void BlockCreateContext::Create(
|
|
std::unique_ptr<ParsedFullFilterBlock>* parsed_out, BlockContents&& block) {
|
|
parsed_out->reset(new ParsedFullFilterBlock(
|
|
table_options->filter_policy.get(), std::move(block)));
|
|
}
|
|
|
|
void BlockCreateContext::Create(std::unique_ptr<UncompressionDict>* parsed_out,
|
|
BlockContents&& block) {
|
|
parsed_out->reset(new UncompressionDict(
|
|
block.data, std::move(block.allocation), using_zstd));
|
|
}
|
|
|
|
namespace {
|
|
// For getting SecondaryCache-compatible helpers from a BlockType. This is
|
|
// useful for accessing block cache in untyped contexts, such as for generic
|
|
// cache warming in table builder.
|
|
const std::array<const Cache::CacheItemHelper*,
|
|
static_cast<unsigned>(BlockType::kInvalid) + 1>
|
|
kCacheItemFullHelperForBlockType{{
|
|
BlockCacheInterface<Block_kData>::GetFullHelper(),
|
|
BlockCacheInterface<ParsedFullFilterBlock>::GetFullHelper(),
|
|
BlockCacheInterface<Block_kFilterPartitionIndex>::GetFullHelper(),
|
|
nullptr, // kProperties
|
|
BlockCacheInterface<UncompressionDict>::GetFullHelper(),
|
|
BlockCacheInterface<Block_kRangeDeletion>::GetFullHelper(),
|
|
nullptr, // kHashIndexPrefixes
|
|
nullptr, // kHashIndexMetadata
|
|
nullptr, // kMetaIndex (not yet stored in block cache)
|
|
BlockCacheInterface<Block_kIndex>::GetFullHelper(),
|
|
nullptr, // kInvalid
|
|
}};
|
|
|
|
// For getting basic helpers from a BlockType (no SecondaryCache support)
|
|
const std::array<const Cache::CacheItemHelper*,
|
|
static_cast<unsigned>(BlockType::kInvalid) + 1>
|
|
kCacheItemBasicHelperForBlockType{{
|
|
BlockCacheInterface<Block_kData>::GetBasicHelper(),
|
|
BlockCacheInterface<ParsedFullFilterBlock>::GetBasicHelper(),
|
|
BlockCacheInterface<Block_kFilterPartitionIndex>::GetBasicHelper(),
|
|
nullptr, // kProperties
|
|
BlockCacheInterface<UncompressionDict>::GetBasicHelper(),
|
|
BlockCacheInterface<Block_kRangeDeletion>::GetBasicHelper(),
|
|
nullptr, // kHashIndexPrefixes
|
|
nullptr, // kHashIndexMetadata
|
|
nullptr, // kMetaIndex (not yet stored in block cache)
|
|
BlockCacheInterface<Block_kIndex>::GetBasicHelper(),
|
|
nullptr, // kInvalid
|
|
}};
|
|
} // namespace
|
|
|
|
const Cache::CacheItemHelper* GetCacheItemHelper(
|
|
BlockType block_type, CacheTier lowest_used_cache_tier) {
|
|
if (lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) {
|
|
return kCacheItemFullHelperForBlockType[static_cast<unsigned>(block_type)];
|
|
} else {
|
|
return kCacheItemBasicHelperForBlockType[static_cast<unsigned>(block_type)];
|
|
}
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|