mirror of https://github.com/facebook/rocksdb.git
Skip subsequent trace writes after encountering trace write failure (#11996)
Summary: **Context/Summary:** We ignore trace writing status e.g,543191f2ea/db/db_impl/db_impl_write.cc (L221-L222)
If a write into the trace file fails, subsequent trace write will continue onto the same file. This will trigger the assertion `assert(sync_without_flush_called_)` intended to catch write to a file that has previously seen error, added in https://github.com/facebook/rocksdb/pull/10489, https://github.com/facebook/rocksdb/pull/10555 Alternative (rejected) is to handle trace writing status at a higher level at e.g,543191f2ea/db/db_impl/db_impl_write.cc (L221-L222)
. However, it makes sense to ignore such status considering tracing is not a critical but assistant component to db operation. And this alternative requires more code change. So it's better to handle the failure at a lower level as this PR Pull Request resolved: https://github.com/facebook/rocksdb/pull/11996 Test Plan: Add new UT failed before this PR and pass after Reviewed By: akankshamahajan15 Differential Revision: D50532467 Pulled By: hx235 fbshipit-source-id: f2032abafd94917adbf89a20841d15b448782a33
This commit is contained in:
parent
519f2a41fb
commit
99b371b417
|
@ -31,6 +31,8 @@ int main() {
|
|||
#include "test_util/testutil.h"
|
||||
#include "tools/trace_analyzer_tool.h"
|
||||
#include "trace_replay/trace_replay.h"
|
||||
#include "utilities/fault_injection_env.h"
|
||||
#include "utilities/trace/file_trace_reader_writer.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
|
@ -785,6 +787,48 @@ TEST_F(TraceAnalyzerTest, Iterator) {
|
|||
*/
|
||||
}
|
||||
|
||||
TEST_F(TraceAnalyzerTest, ExistsPreviousTraceWriteError) {
|
||||
DB* db_ = nullptr;
|
||||
Options options;
|
||||
options.create_if_missing = true;
|
||||
|
||||
std::unique_ptr<FaultInjectionTestEnv> fault_env(
|
||||
new FaultInjectionTestEnv(env_));
|
||||
const std::string trace_path =
|
||||
test_path_ + "/previous_trace_write_error_trace";
|
||||
std::unique_ptr<TraceWriter> trace_writer;
|
||||
ASSERT_OK(NewFileTraceWriter(fault_env.get(), env_options_, trace_path,
|
||||
&trace_writer));
|
||||
|
||||
ASSERT_OK(DB::Open(options, dbname_, &db_));
|
||||
ASSERT_OK(db_->StartTrace(TraceOptions(), std::move(trace_writer)));
|
||||
|
||||
// Inject write error on the first trace write.
|
||||
// This trace write is made big enough to actually write to FS for error
|
||||
// injection.
|
||||
const std::string kBigKey(1000000, 'k');
|
||||
const std::string kBigValue(1000000, 'v');
|
||||
fault_env->SetFilesystemActive(false, Status::IOError("Injected"));
|
||||
|
||||
ASSERT_OK(db_->Put(WriteOptions(), kBigKey, kBigValue));
|
||||
|
||||
fault_env->SetFilesystemActive(true);
|
||||
|
||||
// Without proper handling of the previous trace write error,
|
||||
// this trace write will continue and crash the db (in DEBUG_LEVEL > 0)
|
||||
// due to writing to the trace file that has seen error.
|
||||
ASSERT_OK(db_->Put(WriteOptions(), kBigKey, kBigValue));
|
||||
|
||||
// Verify `EndTrace()` returns the previous write trace error if any
|
||||
Status s = db_->EndTrace();
|
||||
ASSERT_TRUE(s.IsIncomplete());
|
||||
ASSERT_TRUE(s.ToString().find("Tracing has seen error") != std::string::npos);
|
||||
ASSERT_TRUE(s.ToString().find("Injected") != std::string::npos);
|
||||
|
||||
delete db_;
|
||||
ASSERT_OK(DestroyDB(dbname_, options));
|
||||
}
|
||||
|
||||
// Test analyzing of multiget
|
||||
TEST_F(TraceAnalyzerTest, MultiGet) {
|
||||
std::string trace_path = test_path_ + "/trace";
|
||||
|
|
|
@ -345,7 +345,8 @@ Tracer::Tracer(SystemClock* clock, const TraceOptions& trace_options,
|
|||
: clock_(clock),
|
||||
trace_options_(trace_options),
|
||||
trace_writer_(std::move(trace_writer)),
|
||||
trace_request_count_(0) {
|
||||
trace_request_count_(0),
|
||||
trace_write_status_(Status::OK()) {
|
||||
// TODO: What if this fails?
|
||||
WriteHeader().PermitUncheckedError();
|
||||
}
|
||||
|
@ -612,9 +613,18 @@ Status Tracer::WriteFooter() {
|
|||
}
|
||||
|
||||
Status Tracer::WriteTrace(const Trace& trace) {
|
||||
if (!trace_write_status_.ok()) {
|
||||
return Status::Incomplete("Tracing has seen error: %s",
|
||||
trace_write_status_.ToString());
|
||||
}
|
||||
assert(trace_write_status_.ok());
|
||||
std::string encoded_trace;
|
||||
TracerHelper::EncodeTrace(trace, &encoded_trace);
|
||||
return trace_writer_->Write(Slice(encoded_trace));
|
||||
Status s = trace_writer_->Write(Slice(encoded_trace));
|
||||
if (!s.ok()) {
|
||||
trace_write_status_ = s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status Tracer::Close() { return WriteFooter(); }
|
||||
|
|
|
@ -178,6 +178,7 @@ class Tracer {
|
|||
TraceOptions trace_options_;
|
||||
std::unique_ptr<TraceWriter> trace_writer_;
|
||||
uint64_t trace_request_count_;
|
||||
Status trace_write_status_;
|
||||
};
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
If an error occurs when writing to a trace file after `DB::StartTrace()`, the subsequent trace writes are skipped to avoid writing to a file that has previously seen error. In this case, `DB::EndTrace()` will also return a non-ok status with info about the error occured previously in its status message.
|
Loading…
Reference in New Issue