Skip subsequent trace writes after encountering trace write failure (#11996)

Summary:
**Context/Summary:**
We ignore trace writing status e.g, 543191f2ea/db/db_impl/db_impl_write.cc (L221-L222)

If a write into the trace file fails, subsequent trace write will continue onto the same file.

This will trigger the assertion `assert(sync_without_flush_called_)` intended to catch write to a file that has previously seen error, added in https://github.com/facebook/rocksdb/pull/10489, https://github.com/facebook/rocksdb/pull/10555

Alternative (rejected) is to handle trace writing status at a higher level at e.g, 543191f2ea/db/db_impl/db_impl_write.cc (L221-L222). However, it makes sense to ignore such status considering tracing is not a critical but assistant component to db operation. And this alternative requires more code change. So it's better to handle the failure at a lower level as this PR

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11996

Test Plan: Add new UT failed before this PR and pass after

Reviewed By: akankshamahajan15

Differential Revision: D50532467

Pulled By: hx235

fbshipit-source-id: f2032abafd94917adbf89a20841d15b448782a33
This commit is contained in:
Hui Xiao 2023-10-24 09:58:02 -07:00 committed by Facebook GitHub Bot
parent 519f2a41fb
commit 99b371b417
4 changed files with 58 additions and 2 deletions

View File

@ -31,6 +31,8 @@ int main() {
#include "test_util/testutil.h"
#include "tools/trace_analyzer_tool.h"
#include "trace_replay/trace_replay.h"
#include "utilities/fault_injection_env.h"
#include "utilities/trace/file_trace_reader_writer.h"
namespace ROCKSDB_NAMESPACE {
@ -785,6 +787,48 @@ TEST_F(TraceAnalyzerTest, Iterator) {
*/
}
TEST_F(TraceAnalyzerTest, ExistsPreviousTraceWriteError) {
DB* db_ = nullptr;
Options options;
options.create_if_missing = true;
std::unique_ptr<FaultInjectionTestEnv> fault_env(
new FaultInjectionTestEnv(env_));
const std::string trace_path =
test_path_ + "/previous_trace_write_error_trace";
std::unique_ptr<TraceWriter> trace_writer;
ASSERT_OK(NewFileTraceWriter(fault_env.get(), env_options_, trace_path,
&trace_writer));
ASSERT_OK(DB::Open(options, dbname_, &db_));
ASSERT_OK(db_->StartTrace(TraceOptions(), std::move(trace_writer)));
// Inject write error on the first trace write.
// This trace write is made big enough to actually write to FS for error
// injection.
const std::string kBigKey(1000000, 'k');
const std::string kBigValue(1000000, 'v');
fault_env->SetFilesystemActive(false, Status::IOError("Injected"));
ASSERT_OK(db_->Put(WriteOptions(), kBigKey, kBigValue));
fault_env->SetFilesystemActive(true);
// Without proper handling of the previous trace write error,
// this trace write will continue and crash the db (in DEBUG_LEVEL > 0)
// due to writing to the trace file that has seen error.
ASSERT_OK(db_->Put(WriteOptions(), kBigKey, kBigValue));
// Verify `EndTrace()` returns the previous write trace error if any
Status s = db_->EndTrace();
ASSERT_TRUE(s.IsIncomplete());
ASSERT_TRUE(s.ToString().find("Tracing has seen error") != std::string::npos);
ASSERT_TRUE(s.ToString().find("Injected") != std::string::npos);
delete db_;
ASSERT_OK(DestroyDB(dbname_, options));
}
// Test analyzing of multiget
TEST_F(TraceAnalyzerTest, MultiGet) {
std::string trace_path = test_path_ + "/trace";

View File

@ -345,7 +345,8 @@ Tracer::Tracer(SystemClock* clock, const TraceOptions& trace_options,
: clock_(clock),
trace_options_(trace_options),
trace_writer_(std::move(trace_writer)),
trace_request_count_(0) {
trace_request_count_(0),
trace_write_status_(Status::OK()) {
// TODO: What if this fails?
WriteHeader().PermitUncheckedError();
}
@ -612,9 +613,18 @@ Status Tracer::WriteFooter() {
}
Status Tracer::WriteTrace(const Trace& trace) {
if (!trace_write_status_.ok()) {
return Status::Incomplete("Tracing has seen error: %s",
trace_write_status_.ToString());
}
assert(trace_write_status_.ok());
std::string encoded_trace;
TracerHelper::EncodeTrace(trace, &encoded_trace);
return trace_writer_->Write(Slice(encoded_trace));
Status s = trace_writer_->Write(Slice(encoded_trace));
if (!s.ok()) {
trace_write_status_ = s;
}
return s;
}
Status Tracer::Close() { return WriteFooter(); }

View File

@ -178,6 +178,7 @@ class Tracer {
TraceOptions trace_options_;
std::unique_ptr<TraceWriter> trace_writer_;
uint64_t trace_request_count_;
Status trace_write_status_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -0,0 +1 @@
If an error occurs when writing to a trace file after `DB::StartTrace()`, the subsequent trace writes are skipped to avoid writing to a file that has previously seen error. In this case, `DB::EndTrace()` will also return a non-ok status with info about the error occured previously in its status message.