mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-29 18:33:58 +00:00
62fc15f009
Summary: add option `block_protection_bytes_per_key` and implementation for block per key-value checksum. The main changes are 1. checksum construction and verification in block.cc/h 2. pass the option `block_protection_bytes_per_key` around (mainly for methods defined in table_cache.h) 3. unit tests/crash test updates Tests: * Added unit tests * Crash test: `python3 tools/db_crashtest.py blackbox --simple --block_protection_bytes_per_key=1 --write_buffer_size=1048576` Follow up (maybe as a separate PR): make sure corruption status returned from BlockIters are correctly handled. Performance: Turning on block per KV protection has a non-trivial negative impact on read performance and costs additional memory. For memory, each block includes additional 24 bytes for checksum-related states beside checksum itself. For CPU, I set up a DB of size ~1.2GB with 5M keys (32 bytes key and 200 bytes value) which compacts to ~5 SST files (target file size 256 MB) in L6 without compression. I tested readrandom performance with various block cache size (to mimic various cache hit rates): ``` SETUP make OPTIMIZE_LEVEL="-O3" USE_LTO=1 DEBUG_LEVEL=0 -j32 db_bench ./db_bench -benchmarks=fillseq,compact0,waitforcompaction,compact,waitforcompaction -write_buffer_size=33554432 -level_compaction_dynamic_level_bytes=true -max_background_jobs=8 -target_file_size_base=268435456 --num=5000000 --key_size=32 --value_size=200 --compression_type=none BENCHMARK ./db_bench --use_existing_db -benchmarks=readtocache,readrandom[-X10] --num=5000000 --key_size=32 --disable_auto_compactions --reads=1000000 --block_protection_bytes_per_key=[0|1] --cache_size=$CACHESIZE The readrandom ops/sec looks like the following: Block cache size: 2GB 1.2GB * 0.9 1.2GB * 0.8 1.2GB * 0.5 8MB Main 240805 223604 198176 161653 139040 PR prot_bytes=0 238691 226693 200127 161082 141153 PR prot_bytes=1 214983 193199 178532 137013 108211 prot_bytes=1 vs -10% -15% -10.8% -15% -23% prot_bytes=0 ``` The benchmark has a lot of variance, but there was a 5% to 25% regression in this benchmark with different cache hit rates. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11287 Reviewed By: ajkr Differential Revision: D43970708 Pulled By: cbi42 fbshipit-source-id: ef98d898b71779846fa74212b9ec9e08b7183940
141 lines
5 KiB
C++
141 lines
5 KiB
C++
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
// Code supporting block cache (Cache) access for block-based table, based on
|
|
// the convenient APIs in typed_cache.h
|
|
|
|
#pragma once
|
|
|
|
#include <type_traits>
|
|
|
|
#include "cache/typed_cache.h"
|
|
#include "port/lang.h"
|
|
#include "table/block_based/block.h"
|
|
#include "table/block_based/block_type.h"
|
|
#include "table/block_based/parsed_full_filter_block.h"
|
|
#include "table/format.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
// Metaprogramming wrappers for Block, to give each type a single role when
|
|
// used with FullTypedCacheInterface.
|
|
// (NOTE: previous attempts to create actual derived classes of Block with
|
|
// virtual calls resulted in performance regression)
|
|
|
|
class Block_kData : public Block {
|
|
public:
|
|
using Block::Block;
|
|
|
|
static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kDataBlock;
|
|
static constexpr BlockType kBlockType = BlockType::kData;
|
|
};
|
|
|
|
class Block_kIndex : public Block {
|
|
public:
|
|
using Block::Block;
|
|
|
|
static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kIndexBlock;
|
|
static constexpr BlockType kBlockType = BlockType::kIndex;
|
|
};
|
|
|
|
class Block_kFilterPartitionIndex : public Block {
|
|
public:
|
|
using Block::Block;
|
|
|
|
static constexpr CacheEntryRole kCacheEntryRole =
|
|
CacheEntryRole::kFilterMetaBlock;
|
|
static constexpr BlockType kBlockType = BlockType::kFilterPartitionIndex;
|
|
};
|
|
|
|
class Block_kRangeDeletion : public Block {
|
|
public:
|
|
using Block::Block;
|
|
|
|
static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kOtherBlock;
|
|
static constexpr BlockType kBlockType = BlockType::kRangeDeletion;
|
|
};
|
|
|
|
// Useful for creating the Block even though meta index blocks are not
|
|
// yet stored in block cache
|
|
class Block_kMetaIndex : public Block {
|
|
public:
|
|
using Block::Block;
|
|
|
|
static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kOtherBlock;
|
|
static constexpr BlockType kBlockType = BlockType::kMetaIndex;
|
|
};
|
|
|
|
struct BlockCreateContext : public Cache::CreateContext {
|
|
BlockCreateContext() {}
|
|
BlockCreateContext(const BlockBasedTableOptions* _table_options,
|
|
Statistics* _statistics, bool _using_zstd,
|
|
uint8_t _protection_bytes_per_key,
|
|
const Comparator* _raw_ucmp,
|
|
bool _index_value_is_full = false,
|
|
bool _index_has_first_key = false)
|
|
: table_options(_table_options),
|
|
statistics(_statistics),
|
|
using_zstd(_using_zstd),
|
|
protection_bytes_per_key(_protection_bytes_per_key),
|
|
raw_ucmp(_raw_ucmp),
|
|
index_value_is_full(_index_value_is_full),
|
|
index_has_first_key(_index_has_first_key) {}
|
|
|
|
const BlockBasedTableOptions* table_options = nullptr;
|
|
Statistics* statistics = nullptr;
|
|
bool using_zstd = false;
|
|
uint8_t protection_bytes_per_key = 0;
|
|
const Comparator* raw_ucmp = nullptr;
|
|
bool index_value_is_full;
|
|
bool index_has_first_key;
|
|
|
|
// For TypedCacheInterface
|
|
template <typename TBlocklike>
|
|
inline void Create(std::unique_ptr<TBlocklike>* parsed_out,
|
|
size_t* charge_out, const Slice& data,
|
|
MemoryAllocator* alloc) {
|
|
Create(parsed_out,
|
|
BlockContents(AllocateAndCopyBlock(data, alloc), data.size()));
|
|
*charge_out = parsed_out->get()->ApproximateMemoryUsage();
|
|
}
|
|
|
|
void Create(std::unique_ptr<Block_kData>* parsed_out, BlockContents&& block);
|
|
void Create(std::unique_ptr<Block_kIndex>* parsed_out, BlockContents&& block);
|
|
void Create(std::unique_ptr<Block_kFilterPartitionIndex>* parsed_out,
|
|
BlockContents&& block);
|
|
void Create(std::unique_ptr<Block_kRangeDeletion>* parsed_out,
|
|
BlockContents&& block);
|
|
void Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
|
|
BlockContents&& block);
|
|
void Create(std::unique_ptr<ParsedFullFilterBlock>* parsed_out,
|
|
BlockContents&& block);
|
|
void Create(std::unique_ptr<UncompressionDict>* parsed_out,
|
|
BlockContents&& block);
|
|
};
|
|
|
|
// Convenient cache interface to use for block_cache, with support for
|
|
// SecondaryCache.
|
|
template <typename TBlocklike>
|
|
using BlockCacheInterface =
|
|
FullTypedCacheInterface<TBlocklike, BlockCreateContext>;
|
|
|
|
// Shortcut name for cache handles under BlockCacheInterface
|
|
template <typename TBlocklike>
|
|
using BlockCacheTypedHandle =
|
|
typename BlockCacheInterface<TBlocklike>::TypedHandle;
|
|
|
|
// Selects the right helper based on BlockType and CacheTier
|
|
const Cache::CacheItemHelper* GetCacheItemHelper(
|
|
BlockType block_type,
|
|
CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier);
|
|
|
|
// For SFINAE check that a type is "blocklike" with a kCacheEntryRole member.
|
|
// Can get difficult compiler/linker errors without a good check like this.
|
|
template <typename TUse, typename TBlocklike>
|
|
using WithBlocklikeCheck = std::enable_if_t<
|
|
TBlocklike::kCacheEntryRole == CacheEntryRole::kMisc || true, TUse>;
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|