2021-08-12 02:31:44 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
|
|
|
|
|
|
#include "utilities/trace/replayer_impl.h"
|
|
|
|
|
|
|
|
#include <cmath>
|
|
|
|
#include <thread>
|
|
|
|
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "rocksdb/slice.h"
|
|
|
|
#include "rocksdb/system_clock.h"
|
|
|
|
#include "util/threadpool_imp.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
ReplayerImpl::ReplayerImpl(DB* db,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& handles,
|
|
|
|
std::unique_ptr<TraceReader>&& reader)
|
|
|
|
: Replayer(),
|
|
|
|
trace_reader_(std::move(reader)),
|
|
|
|
prepared_(false),
|
|
|
|
trace_end_(false),
|
|
|
|
header_ts_(0),
|
2021-08-12 16:21:40 +00:00
|
|
|
exec_handler_(TraceRecord::NewExecutionHandler(db, handles)),
|
|
|
|
env_(db->GetEnv()),
|
|
|
|
trace_file_version_(-1) {}
|
2021-08-12 02:31:44 +00:00
|
|
|
|
|
|
|
ReplayerImpl::~ReplayerImpl() {
|
|
|
|
exec_handler_.reset();
|
|
|
|
trace_reader_.reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status ReplayerImpl::Prepare() {
|
|
|
|
Trace header;
|
|
|
|
int db_version;
|
|
|
|
Status s = ReadHeader(&header);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
s = TracerHelper::ParseTraceHeader(header, &trace_file_version_, &db_version);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
header_ts_ = header.ts;
|
|
|
|
prepared_ = true;
|
|
|
|
trace_end_ = false;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status ReplayerImpl::Next(std::unique_ptr<TraceRecord>* record) {
|
|
|
|
if (!prepared_) {
|
|
|
|
return Status::Incomplete("Not prepared!");
|
|
|
|
}
|
|
|
|
if (trace_end_) {
|
|
|
|
return Status::Incomplete("Trace end.");
|
|
|
|
}
|
|
|
|
|
|
|
|
Trace trace;
|
|
|
|
Status s = ReadTrace(&trace); // ReadTrace is atomic
|
|
|
|
// Reached the trace end.
|
|
|
|
if (s.ok() && trace.type == kTraceEnd) {
|
|
|
|
trace_end_ = true;
|
|
|
|
return Status::Incomplete("Trace end.");
|
|
|
|
}
|
|
|
|
if (!s.ok() || record == nullptr) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-08-24 00:17:13 +00:00
|
|
|
return TracerHelper::DecodeTraceRecord(&trace, trace_file_version_, record);
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
|
|
|
|
2021-08-19 00:04:36 +00:00
|
|
|
Status ReplayerImpl::Execute(const std::unique_ptr<TraceRecord>& record,
|
|
|
|
std::unique_ptr<TraceRecordResult>* result) {
|
|
|
|
return record->Accept(exec_handler_.get(), result);
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
|
|
|
|
2021-08-19 00:04:36 +00:00
|
|
|
Status ReplayerImpl::Replay(
|
|
|
|
const ReplayOptions& options,
|
|
|
|
const std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)>&
|
|
|
|
result_callback) {
|
2021-08-12 02:31:44 +00:00
|
|
|
if (options.fast_forward <= 0.0) {
|
|
|
|
return Status::InvalidArgument("Wrong fast forward speed!");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!prepared_) {
|
|
|
|
return Status::Incomplete("Not prepared!");
|
|
|
|
}
|
|
|
|
if (trace_end_) {
|
|
|
|
return Status::Incomplete("Trace end.");
|
|
|
|
}
|
|
|
|
|
|
|
|
Status s = Status::OK();
|
|
|
|
|
|
|
|
if (options.num_threads <= 1) {
|
|
|
|
// num_threads == 0 or num_threads == 1 uses single thread.
|
|
|
|
std::chrono::system_clock::time_point replay_epoch =
|
|
|
|
std::chrono::system_clock::now();
|
|
|
|
|
|
|
|
while (s.ok()) {
|
|
|
|
Trace trace;
|
|
|
|
s = ReadTrace(&trace);
|
|
|
|
// If already at trace end, ReadTrace should return Status::Incomplete().
|
|
|
|
if (!s.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// No need to sleep before breaking the loop if at the trace end.
|
|
|
|
if (trace.type == kTraceEnd) {
|
|
|
|
trace_end_ = true;
|
|
|
|
s = Status::Incomplete("Trace end.");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// In single-threaded replay, decode first then sleep.
|
|
|
|
std::unique_ptr<TraceRecord> record;
|
2021-08-24 00:17:13 +00:00
|
|
|
s = TracerHelper::DecodeTraceRecord(&trace, trace_file_version_, &record);
|
2021-08-19 00:04:36 +00:00
|
|
|
if (!s.ok() && !s.IsNotSupported()) {
|
2021-08-12 02:31:44 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-08-19 00:04:36 +00:00
|
|
|
std::chrono::system_clock::time_point sleep_to =
|
2021-08-12 02:31:44 +00:00
|
|
|
replay_epoch +
|
|
|
|
std::chrono::microseconds(static_cast<uint64_t>(std::llround(
|
2021-08-19 00:04:36 +00:00
|
|
|
1.0 * (trace.ts - header_ts_) / options.fast_forward)));
|
|
|
|
if (sleep_to > std::chrono::system_clock::now()) {
|
|
|
|
std::this_thread::sleep_until(sleep_to);
|
|
|
|
}
|
2021-08-12 02:31:44 +00:00
|
|
|
|
2021-08-19 00:04:36 +00:00
|
|
|
// Skip unsupported traces, stop for other errors.
|
|
|
|
if (s.IsNotSupported()) {
|
|
|
|
if (result_callback != nullptr) {
|
|
|
|
result_callback(s, nullptr);
|
|
|
|
}
|
|
|
|
s = Status::OK();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (result_callback == nullptr) {
|
|
|
|
s = Execute(record, nullptr);
|
|
|
|
} else {
|
|
|
|
std::unique_ptr<TraceRecordResult> res;
|
|
|
|
s = Execute(record, &res);
|
|
|
|
result_callback(s, std::move(res));
|
|
|
|
}
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Multi-threaded replay.
|
|
|
|
ThreadPoolImpl thread_pool;
|
|
|
|
thread_pool.SetHostEnv(env_);
|
|
|
|
thread_pool.SetBackgroundThreads(static_cast<int>(options.num_threads));
|
|
|
|
|
|
|
|
std::mutex mtx;
|
|
|
|
// Background decoding and execution status.
|
|
|
|
Status bg_s = Status::OK();
|
|
|
|
uint64_t last_err_ts = static_cast<uint64_t>(-1);
|
2021-08-27 20:15:25 +00:00
|
|
|
// Callback function used in background work to update bg_s for the ealiest
|
|
|
|
// TraceRecord which has execution error. This is different from the
|
|
|
|
// timestamp of the first execution error (either start or end timestamp).
|
|
|
|
//
|
|
|
|
// Suppose TraceRecord R1, R2, with timestamps T1 < T2. Their execution
|
|
|
|
// timestamps are T1_start, T1_end, T2_start, T2_end.
|
|
|
|
// Single-thread: there must be T1_start < T1_end < T2_start < T2_end.
|
|
|
|
// Multi-thread: T1_start < T2_start may not be enforced. Orders of them are
|
|
|
|
// totally unknown.
|
|
|
|
// In order to report the same `first` error in both single-thread and
|
|
|
|
// multi-thread replay, we can only rely on the TraceRecords' timestamps,
|
|
|
|
// rather than their executin timestamps. Although in single-thread replay,
|
|
|
|
// the first error is also the last error, while in multi-thread replay, the
|
|
|
|
// first error may not be the first error in execution, and it may not be
|
|
|
|
// the last error in exeution as well.
|
2021-08-12 02:31:44 +00:00
|
|
|
auto error_cb = [&mtx, &bg_s, &last_err_ts](Status err, uint64_t err_ts) {
|
|
|
|
std::lock_guard<std::mutex> gd(mtx);
|
|
|
|
// Only record the first error.
|
|
|
|
if (!err.ok() && !err.IsNotSupported() && err_ts < last_err_ts) {
|
|
|
|
bg_s = err;
|
|
|
|
last_err_ts = err_ts;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::chrono::system_clock::time_point replay_epoch =
|
|
|
|
std::chrono::system_clock::now();
|
|
|
|
|
|
|
|
while (bg_s.ok() && s.ok()) {
|
|
|
|
Trace trace;
|
|
|
|
s = ReadTrace(&trace);
|
|
|
|
// If already at trace end, ReadTrace should return Status::Incomplete().
|
|
|
|
if (!s.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
TraceType trace_type = trace.type;
|
|
|
|
|
|
|
|
// No need to sleep before breaking the loop if at the trace end.
|
|
|
|
if (trace_type == kTraceEnd) {
|
|
|
|
trace_end_ = true;
|
|
|
|
s = Status::Incomplete("Trace end.");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-08-27 20:15:25 +00:00
|
|
|
// In multi-threaded replay, sleep first then start decoding and
|
2021-08-12 02:31:44 +00:00
|
|
|
// execution in a thread.
|
2021-08-19 00:04:36 +00:00
|
|
|
std::chrono::system_clock::time_point sleep_to =
|
2021-08-12 02:31:44 +00:00
|
|
|
replay_epoch +
|
|
|
|
std::chrono::microseconds(static_cast<uint64_t>(std::llround(
|
2021-08-19 00:04:36 +00:00
|
|
|
1.0 * (trace.ts - header_ts_) / options.fast_forward)));
|
|
|
|
if (sleep_to > std::chrono::system_clock::now()) {
|
|
|
|
std::this_thread::sleep_until(sleep_to);
|
|
|
|
}
|
2021-08-12 02:31:44 +00:00
|
|
|
|
|
|
|
if (trace_type == kTraceWrite || trace_type == kTraceGet ||
|
|
|
|
trace_type == kTraceIteratorSeek ||
|
|
|
|
trace_type == kTraceIteratorSeekForPrev ||
|
|
|
|
trace_type == kTraceMultiGet) {
|
|
|
|
std::unique_ptr<ReplayerWorkerArg> ra(new ReplayerWorkerArg);
|
|
|
|
ra->trace_entry = std::move(trace);
|
|
|
|
ra->handler = exec_handler_.get();
|
|
|
|
ra->trace_file_version = trace_file_version_;
|
|
|
|
ra->error_cb = error_cb;
|
2021-08-19 00:04:36 +00:00
|
|
|
ra->result_cb = result_callback;
|
2021-08-12 02:31:44 +00:00
|
|
|
thread_pool.Schedule(&ReplayerImpl::BackgroundWork, ra.release(),
|
|
|
|
nullptr, nullptr);
|
2021-08-19 00:04:36 +00:00
|
|
|
} else {
|
|
|
|
// Skip unsupported traces.
|
|
|
|
if (result_callback != nullptr) {
|
|
|
|
result_callback(Status::NotSupported("Unsupported trace type."),
|
|
|
|
nullptr);
|
|
|
|
}
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
thread_pool.WaitForJobsAndJoinAllThreads();
|
|
|
|
if (!bg_s.ok()) {
|
|
|
|
s = bg_s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.IsIncomplete()) {
|
|
|
|
// Reaching eof returns Incomplete status at the moment.
|
|
|
|
// Could happen when killing a process without calling EndTrace() API.
|
|
|
|
// TODO: Add better error handling.
|
|
|
|
trace_end_ = true;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t ReplayerImpl::GetHeaderTimestamp() const { return header_ts_; }
|
|
|
|
|
|
|
|
Status ReplayerImpl::ReadHeader(Trace* header) {
|
|
|
|
assert(header != nullptr);
|
|
|
|
Status s = trace_reader_->Reset();
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
std::string encoded_trace;
|
|
|
|
// Read the trace head
|
|
|
|
s = trace_reader_->Read(&encoded_trace);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
return TracerHelper::DecodeHeader(encoded_trace, header);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status ReplayerImpl::ReadTrace(Trace* trace) {
|
|
|
|
assert(trace != nullptr);
|
|
|
|
std::string encoded_trace;
|
|
|
|
// We don't know if TraceReader is implemented thread-safe, so we protect the
|
|
|
|
// reading trace part with a mutex. The decoding part does not need to be
|
|
|
|
// protected since it's local.
|
|
|
|
{
|
|
|
|
std::lock_guard<std::mutex> guard(mutex_);
|
|
|
|
Status s = trace_reader_->Read(&encoded_trace);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return TracerHelper::DecodeTrace(encoded_trace, trace);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReplayerImpl::BackgroundWork(void* arg) {
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
std::unique_ptr<ReplayerWorkerArg> ra(static_cast<ReplayerWorkerArg*>(arg));
|
2021-08-12 02:31:44 +00:00
|
|
|
assert(ra != nullptr);
|
|
|
|
|
|
|
|
std::unique_ptr<TraceRecord> record;
|
2021-08-24 00:17:13 +00:00
|
|
|
Status s = TracerHelper::DecodeTraceRecord(&(ra->trace_entry),
|
|
|
|
ra->trace_file_version, &record);
|
2021-08-19 00:04:36 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
// Stop the replay
|
|
|
|
if (ra->error_cb != nullptr) {
|
|
|
|
ra->error_cb(s, ra->trace_entry.ts);
|
|
|
|
}
|
|
|
|
// Report the result
|
|
|
|
if (ra->result_cb != nullptr) {
|
|
|
|
ra->result_cb(s, nullptr);
|
|
|
|
}
|
|
|
|
return;
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
2021-08-19 00:04:36 +00:00
|
|
|
|
|
|
|
if (ra->result_cb == nullptr) {
|
|
|
|
s = record->Accept(ra->handler, nullptr);
|
|
|
|
} else {
|
|
|
|
std::unique_ptr<TraceRecordResult> res;
|
|
|
|
s = record->Accept(ra->handler, &res);
|
|
|
|
ra->result_cb(s, std::move(res));
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
2021-08-19 00:04:36 +00:00
|
|
|
record.reset();
|
2021-08-12 02:31:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|