From 8b38d4b4006ca9fd49432ccc16d9911919870dd2 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 21 Oct 2024 21:02:03 -0700 Subject: [PATCH] Fix write tracing to check callback status (#13088) Summary: we currently record write operations to tracer before checking callback in PipelinedWriteImpl and WriteImplWALOnly. For optimistic transaction DB, this means that an operation can be recorded to tracer even when it's not written to DB or WAL. I suspect this is the reason some of our optimistic txn crash test is failing. The evidence is that the trace contains some duplicated entry and has more entries compared to the corresponding entry in WAL. This PR moves the tracer logic to be after checking callback status. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13088 Test Plan: monitor crash test. Reviewed By: hx235 Differential Revision: D64711753 Pulled By: cbi42 fbshipit-source-id: 55fd1223538ec6294ce84a957c306d3d9d91df5f --- db/db_impl/db_impl_write.cc | 57 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index e28d3fa913..248ddb88de 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -735,17 +735,6 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options, size_t total_byte_size = 0; if (w.status.ok()) { - // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock - // grabs but does not seem thread-safe. - if (tracer_) { - InstrumentedMutexLock lock(&trace_mutex_); - if (tracer_ != nullptr && tracer_->IsWriteOrderPreserved()) { - for (auto* writer : wal_write_group) { - // TODO: maybe handle the tracing status? - tracer_->Write(writer->batch).PermitUncheckedError(); - } - } - } SequenceNumber next_sequence = current_sequence; for (auto* writer : wal_write_group) { assert(writer); @@ -760,6 +749,22 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options, } } } + // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock + // grabs but does not seem thread-safe. + if (tracer_) { + InstrumentedMutexLock lock(&trace_mutex_); + if (tracer_ != nullptr && tracer_->IsWriteOrderPreserved()) { + for (auto* writer : wal_write_group) { + if (writer->CallbackFailed()) { + // When optimisitc txn conflict checking fails, we should + // not record to trace. + continue; + } + // TODO: maybe handle the tracing status? + tracer_->Write(writer->batch).PermitUncheckedError(); + } + } + } if (w.disable_wal) { has_unpersisted_data_.store(true, std::memory_order_relaxed); } @@ -1005,19 +1010,6 @@ Status DBImpl::WriteImplWALOnly( WriteThread::WriteGroup write_group; uint64_t last_sequence; write_thread->EnterAsBatchGroupLeader(&w, &write_group); - // Note: no need to update last_batch_group_size_ here since the batch writes - // to WAL only - // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock - // grabs but does not seem thread-safe. - if (tracer_) { - InstrumentedMutexLock lock(&trace_mutex_); - if (tracer_ != nullptr && tracer_->IsWriteOrderPreserved()) { - for (auto* writer : write_group) { - // TODO: maybe handle the tracing status? - tracer_->Write(writer->batch).PermitUncheckedError(); - } - } - } size_t pre_release_callback_cnt = 0; size_t total_byte_size = 0; @@ -1032,6 +1024,23 @@ Status DBImpl::WriteImplWALOnly( } } + // Note: no need to update last_batch_group_size_ here since the batch writes + // to WAL only + // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock + // grabs but does not seem thread-safe. + if (tracer_) { + InstrumentedMutexLock lock(&trace_mutex_); + if (tracer_ != nullptr && tracer_->IsWriteOrderPreserved()) { + for (auto* writer : write_group) { + if (writer->CallbackFailed()) { + continue; + } + // TODO: maybe handle the tracing status? + tracer_->Write(writer->batch).PermitUncheckedError(); + } + } + } + const bool concurrent_update = true; // Update stats while we are an exclusive group leader, so we know // that nobody else can be writing to these particular stats.