rocksdb/trace_replay/block_cache_tracer.h

240 lines
9 KiB
C
Raw Normal View History

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <atomic>
#include <fstream>
#include "monitoring/instrumented_mutex.h"
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
#include "rocksdb/block_cache_trace_writer.h"
#include "rocksdb/options.h"
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
#include "rocksdb/table_reader_caller.h"
#include "rocksdb/trace_reader_writer.h"
#include "trace_replay/trace_replay.h"
namespace ROCKSDB_NAMESPACE {
class Env;
class SystemClock;
extern const uint64_t kMicrosInSecond;
extern const uint64_t kSecondInMinute;
extern const uint64_t kSecondInHour;
struct BlockCacheTraceRecord;
class BlockCacheTraceHelper {
public:
static bool IsGetOrMultiGetOnDataBlock(TraceType block_type,
TableReaderCaller caller);
static bool IsGetOrMultiGet(TableReaderCaller caller);
static bool IsUserAccess(TableReaderCaller caller);
// Row key is a concatenation of the access's fd_number and the referenced
// user key.
static std::string ComputeRowKey(const BlockCacheTraceRecord& access);
// The first four bytes of the referenced key in a Get request is the table
// id.
static uint64_t GetTableId(const BlockCacheTraceRecord& access);
// The sequence number of a get request is the last part of the referenced
// key.
static uint64_t GetSequenceNumber(const BlockCacheTraceRecord& access);
// Block offset in a file is the last varint64 in the block key.
static uint64_t GetBlockOffsetInFile(const BlockCacheTraceRecord& access);
static const std::string kUnknownColumnFamilyName;
static const uint64_t kReservedGetId;
};
Create a BlockCacheLookupContext to enable fine-grained block cache tracing. (#5421) Summary: BlockCacheLookupContext only contains the caller for now. We will trace block accesses at five places: 1. BlockBasedTable::GetFilter. 2. BlockBasedTable::GetUncompressedDict. 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index, and range deletion block.) 4. BlockBasedTable::Get. (To trace the referenced key and whether the referenced key exists in a fetched data block.) 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the referenced key exists in a fetched data block.) We create the context at: 1. BlockBasedTable::Get. (kUserGet) 2. BlockBasedTable::MultiGet. (kUserMGet) 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or external SST ingestion calls this function.) 4. BlockBasedTable::Open. (kPrefetch) 5. Index/Filter::CacheDependencies. (kPrefetch) 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or kUserApproximateSize). I loaded 1 million key-value pairs into the database and ran the readrandom benchmark with a single thread. I gave the block cache 10 GB to make sure all reads hit the block cache after warmup. The throughput is comparable. Throughput of this PR: 231334 ops/s. Throughput of the master branch: 238428 ops/s. Experiment setup: RocksDB: version 6.2 Date: Mon Jun 10 10:42:51 2019 CPU: 24 * Intel Core Processor (Skylake) CPUCache: 16384 KB Keys: 20 bytes each Values: 100 bytes each (100 bytes after compression) Entries: 1000000 Prefix: 20 bytes Keys per prefix: 0 RawSize: 114.4 MB (estimated) FileSize: 114.4 MB (estimated) Write rate: 0 bytes/second Read rate: 0 ops/second Compression: NoCompression Compression sampling rate: 0 Memtablerep: skip_list Perf Level: 1 Load command: ./db_bench --benchmarks="fillseq" --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 Run command: ./db_bench --benchmarks="readrandom,stats" --use_existing_db --threads=1 --duration=120 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 --duration=120 TODOs: 1. Create a caller for external SST file ingestion and differentiate the callers for iterator. 2. Integrate tracer to trace block cache accesses. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5421 Differential Revision: D15704258 Pulled By: HaoyuHuang fbshipit-source-id: 4aa8a55f8cb1576ffb367bfa3186a91d8f06d93a
2019-06-10 22:30:05 +00:00
// Lookup context for tracing block cache accesses.
// We trace block accesses at five places:
// 1. BlockBasedTable::GetFilter
// 2. BlockBasedTable::GetUncompressedDict.
// 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index,
// and range deletion block.)
// 4. BlockBasedTable::Get. (To trace the referenced key and whether the
// referenced key exists in a fetched data block.)
// 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the
// referenced key exists in a fetched data block.)
// The context is created at:
// 1. BlockBasedTable::Get. (kUserGet)
// 2. BlockBasedTable::MultiGet. (kUserMGet)
// 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or
// external SST ingestion calls this function.)
// 4. BlockBasedTable::Open. (kPrefetch)
// 5. Index/Filter::CacheDependencies. (kPrefetch)
// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
// kUserApproximateSize).
struct BlockCacheLookupContext {
BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
BlockCacheLookupContext(const TableReaderCaller& _caller, uint64_t _get_id,
bool _get_from_user_specified_snapshot)
: caller(_caller),
get_id(_get_id),
get_from_user_specified_snapshot(_get_from_user_specified_snapshot) {}
const TableReaderCaller caller;
// These are populated when we perform lookup/insert on block cache. The block
// cache tracer uses these inforation when logging the block access at
// BlockBasedTable::GET and BlockBasedTable::MultiGet.
bool is_cache_hit = false;
bool no_insert = false;
TraceType block_type = TraceType::kTraceMax;
uint64_t block_size = 0;
std::string block_key;
uint64_t num_keys_in_block = 0;
// The unique id associated with Get and MultiGet. This enables us to track
// how many blocks a Get/MultiGet request accesses. We can also measure the
// impact of row cache vs block cache.
uint64_t get_id = 0;
std::string referenced_key;
bool get_from_user_specified_snapshot = false;
void FillLookupContext(bool _is_cache_hit, bool _no_insert,
TraceType _block_type, uint64_t _block_size,
const std::string& _block_key,
uint64_t _num_keys_in_block) {
is_cache_hit = _is_cache_hit;
no_insert = _no_insert;
block_type = _block_type;
block_size = _block_size;
block_key = _block_key;
num_keys_in_block = _num_keys_in_block;
}
Create a BlockCacheLookupContext to enable fine-grained block cache tracing. (#5421) Summary: BlockCacheLookupContext only contains the caller for now. We will trace block accesses at five places: 1. BlockBasedTable::GetFilter. 2. BlockBasedTable::GetUncompressedDict. 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index, and range deletion block.) 4. BlockBasedTable::Get. (To trace the referenced key and whether the referenced key exists in a fetched data block.) 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the referenced key exists in a fetched data block.) We create the context at: 1. BlockBasedTable::Get. (kUserGet) 2. BlockBasedTable::MultiGet. (kUserMGet) 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or external SST ingestion calls this function.) 4. BlockBasedTable::Open. (kPrefetch) 5. Index/Filter::CacheDependencies. (kPrefetch) 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or kUserApproximateSize). I loaded 1 million key-value pairs into the database and ran the readrandom benchmark with a single thread. I gave the block cache 10 GB to make sure all reads hit the block cache after warmup. The throughput is comparable. Throughput of this PR: 231334 ops/s. Throughput of the master branch: 238428 ops/s. Experiment setup: RocksDB: version 6.2 Date: Mon Jun 10 10:42:51 2019 CPU: 24 * Intel Core Processor (Skylake) CPUCache: 16384 KB Keys: 20 bytes each Values: 100 bytes each (100 bytes after compression) Entries: 1000000 Prefix: 20 bytes Keys per prefix: 0 RawSize: 114.4 MB (estimated) FileSize: 114.4 MB (estimated) Write rate: 0 bytes/second Read rate: 0 ops/second Compression: NoCompression Compression sampling rate: 0 Memtablerep: skip_list Perf Level: 1 Load command: ./db_bench --benchmarks="fillseq" --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 Run command: ./db_bench --benchmarks="readrandom,stats" --use_existing_db --threads=1 --duration=120 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --statistics --cache_index_and_filter_blocks --cache_size=10737418240 --disable_auto_compactions=1 --disable_wal=1 --compression_type=none --min_level_to_compress=-1 --compression_ratio=1 --num=1000000 --duration=120 TODOs: 1. Create a caller for external SST file ingestion and differentiate the callers for iterator. 2. Integrate tracer to trace block cache accesses. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5421 Differential Revision: D15704258 Pulled By: HaoyuHuang fbshipit-source-id: 4aa8a55f8cb1576ffb367bfa3186a91d8f06d93a
2019-06-10 22:30:05 +00:00
};
struct BlockCacheTraceHeader {
uint64_t start_time;
uint32_t rocksdb_major_version;
uint32_t rocksdb_minor_version;
};
// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
// user-provided TraceWriter. Every RocksDB operation is written as a single
// trace. Each trace will have a timestamp and type, followed by the trace
// payload.
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
class BlockCacheTraceWriterImpl : public BlockCacheTraceWriter {
public:
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
BlockCacheTraceWriterImpl(SystemClock* clock,
const BlockCacheTraceWriterOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer);
~BlockCacheTraceWriterImpl() = default;
// No copy and move.
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
BlockCacheTraceWriterImpl(const BlockCacheTraceWriterImpl&) = delete;
BlockCacheTraceWriterImpl& operator=(const BlockCacheTraceWriterImpl&) =
delete;
BlockCacheTraceWriterImpl(BlockCacheTraceWriterImpl&&) = delete;
BlockCacheTraceWriterImpl& operator=(BlockCacheTraceWriterImpl&&) = delete;
// Pass Slice references to avoid copy.
Status WriteBlockAccess(const BlockCacheTraceRecord& record,
const Slice& block_key, const Slice& cf_name,
const Slice& referenced_key) override;
// Write a trace header at the beginning, typically on initiating a trace,
// with some metadata like a magic number and RocksDB version.
Status WriteHeader() override;
private:
SystemClock* clock_;
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
BlockCacheTraceWriterOptions trace_options_;
std::unique_ptr<TraceWriter> trace_writer_;
};
// Write a trace record in human readable format, see
// https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
// for details.
class BlockCacheHumanReadableTraceWriter {
public:
~BlockCacheHumanReadableTraceWriter();
Status NewWritableFile(const std::string& human_readable_trace_file_path,
ROCKSDB_NAMESPACE::Env* env);
Status WriteHumanReadableTraceRecord(const BlockCacheTraceRecord& access,
uint64_t block_id, uint64_t get_key_id);
private:
char trace_record_buffer_[1024 * 1024];
std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
human_readable_trace_file_writer_;
};
// BlockCacheTraceReader helps read the trace file generated by
// BlockCacheTraceWriter using a user provided TraceReader.
class BlockCacheTraceReader {
public:
BlockCacheTraceReader(std::unique_ptr<TraceReader>&& reader);
virtual ~BlockCacheTraceReader() = default;
// No copy and move.
BlockCacheTraceReader(const BlockCacheTraceReader&) = delete;
BlockCacheTraceReader& operator=(const BlockCacheTraceReader&) = delete;
BlockCacheTraceReader(BlockCacheTraceReader&&) = delete;
BlockCacheTraceReader& operator=(BlockCacheTraceReader&&) = delete;
Status ReadHeader(BlockCacheTraceHeader* header);
Status ReadAccess(BlockCacheTraceRecord* record);
private:
std::unique_ptr<TraceReader> trace_reader_;
};
// Read a trace record in human readable format, see
// https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
// for detailed.
class BlockCacheHumanReadableTraceReader : public BlockCacheTraceReader {
public:
BlockCacheHumanReadableTraceReader(const std::string& trace_file_path);
~BlockCacheHumanReadableTraceReader();
Status ReadHeader(BlockCacheTraceHeader* header);
Status ReadAccess(BlockCacheTraceRecord* record);
private:
std::ifstream human_readable_trace_reader_;
};
// A block cache tracer. It downsamples the accesses according to
// trace_options and uses BlockCacheTraceWriter to write the access record to
// the trace file.
class BlockCacheTracer {
public:
BlockCacheTracer();
~BlockCacheTracer();
// No copy and move.
BlockCacheTracer(const BlockCacheTracer&) = delete;
BlockCacheTracer& operator=(const BlockCacheTracer&) = delete;
BlockCacheTracer(BlockCacheTracer&&) = delete;
BlockCacheTracer& operator=(BlockCacheTracer&&) = delete;
// Start writing block cache accesses to the trace_writer.
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
Status StartTrace(const BlockCacheTraceOptions& trace_options,
std::unique_ptr<BlockCacheTraceWriter>&& trace_writer);
// Stop writing block cache accesses to the trace_writer.
void EndTrace();
bool is_tracing_enabled() const {
return writer_.load(std::memory_order_relaxed);
}
Status WriteBlockAccess(const BlockCacheTraceRecord& record,
const Slice& block_key, const Slice& cf_name,
const Slice& referenced_key);
// GetId cycles from 1 to std::numeric_limits<uint64_t>::max().
uint64_t NextGetId();
private:
Refactor block cache tracing APIs (#10811) Summary: Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Currently, only a TraceWriter is supported, with a default built-in implementation of FileTraceWriter. The TraceWriter, however, takes a flat trace record and is thus only suitable for file tracing. This PR introduces an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. `DB::StartBlockTrace` will internally redirect to changed `BlockCacheTrace::StartBlockCacheTrace`. New API `DB::StartBlockTrace` is also added that directly takes `BlockCacheTraceWriter` pointer. This same philosophy can be applied to KV and IO tracing as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10811 Test Plan: existing unit tests Old API DB::StartBlockTrace checked with db_bench tool create database ``` ./db_bench --benchmarks="fillseq" \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 ``` To trace block cache accesses when running readrandom benchmark: ``` ./db_bench --benchmarks="readrandom" --use_existing_db --duration=60 \ --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 \ --cache_index_and_filter_blocks --cache_size=1048576 \ --disable_auto_compactions=1 --disable_wal=1 --compression_type=none \ --min_level_to_compress=-1 --compression_ratio=1 --num=10000000 \ --threads=16 \ -block_cache_trace_file="/tmp/binary_trace_test_example" \ -block_cache_trace_max_trace_file_size_in_bytes=1073741824 \ -block_cache_trace_sampling_frequency=1 ``` Reviewed By: anand1976 Differential Revision: D40435289 Pulled By: akankshamahajan15 fbshipit-source-id: fa2755f4788185e19f4605e731641cfd21ab3282
2022-10-21 19:15:35 +00:00
BlockCacheTraceOptions trace_options_;
// A mutex protects the writer_.
InstrumentedMutex trace_writer_mutex_;
std::atomic<BlockCacheTraceWriter*> writer_;
std::atomic<uint64_t> get_id_counter_;
};
} // namespace ROCKSDB_NAMESPACE