mirror of
https://github.com/facebook/rocksdb.git
synced 2024-12-04 20:02:50 +00:00
f799c73d28
Summary: Currently, some numbers in the `tracer_analyzer_tool` may be a little confusing and unfriendly for people who want to add new query types. It may be better to replace them with the existing enumeration type to improve readability. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10827 Reviewed By: ajkr Differential Revision: D40576023 Pulled By: hx235 fbshipit-source-id: 0eb16820a15f365d53e848a3a8efd92928420429
185 lines
6 KiB
C++
185 lines
6 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
|
|
#include "rocksdb/options.h"
|
|
#include "rocksdb/rocksdb_namespace.h"
|
|
#include "rocksdb/status.h"
|
|
#include "rocksdb/trace_record.h"
|
|
#include "rocksdb/utilities/replayer.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
// This file contains Tracer and Replayer classes that enable capturing and
|
|
// replaying RocksDB traces.
|
|
|
|
class ColumnFamilyHandle;
|
|
class ColumnFamilyData;
|
|
class DB;
|
|
class DBImpl;
|
|
class Env;
|
|
class Slice;
|
|
class SystemClock;
|
|
class TraceReader;
|
|
class TraceWriter;
|
|
class WriteBatch;
|
|
|
|
struct ReadOptions;
|
|
struct TraceOptions;
|
|
struct WriteOptions;
|
|
|
|
extern const std::string kTraceMagic;
|
|
const unsigned int kTraceTimestampSize = 8;
|
|
const unsigned int kTraceTypeSize = 1;
|
|
const unsigned int kTracePayloadLengthSize = 4;
|
|
const unsigned int kTraceMetadataSize =
|
|
kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;
|
|
|
|
static const int kTraceFileMajorVersion = 0;
|
|
static const int kTraceFileMinorVersion = 2;
|
|
|
|
// The data structure that defines a single trace.
|
|
struct Trace {
|
|
uint64_t ts; // timestamp
|
|
TraceType type;
|
|
// Each bit in payload_map stores which corresponding struct member added in
|
|
// the payload. Each TraceType has its corresponding payload struct. For
|
|
// example, if bit at position 0 is set in write payload, then the write batch
|
|
// will be addedd.
|
|
uint64_t payload_map = 0;
|
|
// Each trace type has its own payload_struct, which will be serialized in the
|
|
// payload.
|
|
std::string payload;
|
|
|
|
void reset() {
|
|
ts = 0;
|
|
type = kTraceMax;
|
|
payload_map = 0;
|
|
payload.clear();
|
|
}
|
|
};
|
|
|
|
enum TracePayloadType : char {
|
|
// Each member of all query payload structs should have a corresponding flag
|
|
// here. Make sure to add them sequentially in the order of it is added.
|
|
kEmptyPayload = 0,
|
|
kWriteBatchData = 1,
|
|
kGetCFID = 2,
|
|
kGetKey = 3,
|
|
kIterCFID = 4,
|
|
kIterKey = 5,
|
|
kIterLowerBound = 6,
|
|
kIterUpperBound = 7,
|
|
kMultiGetSize = 8,
|
|
kMultiGetCFIDs = 9,
|
|
kMultiGetKeys = 10,
|
|
};
|
|
|
|
class TracerHelper {
|
|
public:
|
|
// Parse the string with major and minor version only
|
|
static Status ParseVersionStr(std::string& v_string, int* v_num);
|
|
|
|
// Parse the trace file version and db version in trace header
|
|
static Status ParseTraceHeader(const Trace& header, int* trace_version,
|
|
int* db_version);
|
|
|
|
// Encode a version 0.1 trace object into the given string.
|
|
static void EncodeTrace(const Trace& trace, std::string* encoded_trace);
|
|
|
|
// Decode a string into the given trace object.
|
|
static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);
|
|
|
|
// Decode a string into the given trace header.
|
|
static Status DecodeHeader(const std::string& encoded_trace, Trace* header);
|
|
|
|
// Set the payload map based on the payload type
|
|
static bool SetPayloadMap(uint64_t& payload_map,
|
|
const TracePayloadType payload_type);
|
|
|
|
// Decode a Trace object into the corresponding TraceRecord.
|
|
// Return Status::OK() if nothing is wrong, record will be set accordingly.
|
|
// Return Status::NotSupported() if the trace type is not support, or the
|
|
// corresponding error status, record will be set to nullptr.
|
|
static Status DecodeTraceRecord(Trace* trace, int trace_file_version,
|
|
std::unique_ptr<TraceRecord>* record);
|
|
};
|
|
|
|
// Tracer captures all RocksDB operations using a user-provided TraceWriter.
|
|
// Every RocksDB operation is written as a single trace. Each trace will have a
|
|
// timestamp and type, followed by the trace payload.
|
|
class Tracer {
|
|
public:
|
|
Tracer(SystemClock* clock, const TraceOptions& trace_options,
|
|
std::unique_ptr<TraceWriter>&& trace_writer);
|
|
~Tracer();
|
|
|
|
// Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
|
|
Status Write(WriteBatch* write_batch);
|
|
|
|
// Trace Get operations.
|
|
Status Get(ColumnFamilyHandle* cfname, const Slice& key);
|
|
|
|
// Trace Iterators.
|
|
Status IteratorSeek(const uint32_t& cf_id, const Slice& key,
|
|
const Slice& lower_bound, const Slice upper_bound);
|
|
Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
|
|
const Slice& lower_bound, const Slice upper_bound);
|
|
|
|
// Trace MultiGet
|
|
|
|
Status MultiGet(const size_t num_keys, ColumnFamilyHandle** column_families,
|
|
const Slice* keys);
|
|
|
|
Status MultiGet(const size_t num_keys, ColumnFamilyHandle* column_family,
|
|
const Slice* keys);
|
|
|
|
Status MultiGet(const std::vector<ColumnFamilyHandle*>& column_family,
|
|
const std::vector<Slice>& keys);
|
|
|
|
// Returns true if the trace is over the configured max trace file limit.
|
|
// False otherwise.
|
|
bool IsTraceFileOverMax();
|
|
|
|
// Returns true if the order of write trace records must match the order of
|
|
// the corresponding records logged to WAL and applied to the DB.
|
|
bool IsWriteOrderPreserved() { return trace_options_.preserve_write_order; }
|
|
|
|
// Writes a trace footer at the end of the tracing
|
|
Status Close();
|
|
|
|
private:
|
|
// Write a trace header at the beginning, typically on initiating a trace,
|
|
// with some metadata like a magic number, trace version, RocksDB version, and
|
|
// trace format.
|
|
Status WriteHeader();
|
|
|
|
// Write a trace footer, typically on ending a trace, with some metadata.
|
|
Status WriteFooter();
|
|
|
|
// Write a single trace using the provided TraceWriter to the underlying
|
|
// system, say, a filesystem or a streaming service.
|
|
Status WriteTrace(const Trace& trace);
|
|
|
|
// Helps in filtering and sampling of traces.
|
|
// Returns true if a trace should be skipped, false otherwise.
|
|
bool ShouldSkipTrace(const TraceType& type);
|
|
|
|
SystemClock* clock_;
|
|
TraceOptions trace_options_;
|
|
std::unique_ptr<TraceWriter> trace_writer_;
|
|
uint64_t trace_request_count_;
|
|
Status trace_write_status_;
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|