mirror of https://github.com/facebook/rocksdb.git
Fix skip WAL for whole write_group when leader's callback fail (#4838)
Summary: The original implementation has two problems: 1.f0dda35d7d/db/db_impl_write.cc (L478)
f0dda35d7d/db/write_thread.h (L231)
If the callback status of leader of the write_group fails, then the whole write_group will not write to WAL, this may cause data loss. 2.f0dda35d7d/db/write_thread.h (L130)
The annotation says that Writer.status is the status of memtable inserter, but the original implementation use it for another case which is not consistent with the original design. Looks like we can still reuse Writer.status, but we should modify the annotation, so Writer.status is not only the status of memtable inserter. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4838 Differential Revision: D13574070 Pulled By: yiwu-arbug fbshipit-source-id: a2a2aefcfd329c4c6a91652bf090aaf1ce119c4b
This commit is contained in:
parent
74f7d7551e
commit
8c79f79208
|
@ -14,6 +14,7 @@
|
||||||
* Fix a memory leak when files with range tombstones are read in mmap mode and block cache is enabled
|
* Fix a memory leak when files with range tombstones are read in mmap mode and block cache is enabled
|
||||||
* Fix handling of corrupt range tombstone blocks such that corruptions cannot cause deleted keys to reappear
|
* Fix handling of corrupt range tombstone blocks such that corruptions cannot cause deleted keys to reappear
|
||||||
* Lock free MultiGet
|
* Lock free MultiGet
|
||||||
|
* Fix with pipelined write, write leaders's callback failure lead to the whole write group fail.
|
||||||
|
|
||||||
## 5.18.0 (11/30/2018)
|
## 5.18.0 (11/30/2018)
|
||||||
### New Features
|
### New Features
|
||||||
|
|
|
@ -475,7 +475,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
|
||||||
|
|
||||||
PERF_TIMER_STOP(write_pre_and_post_process_time);
|
PERF_TIMER_STOP(write_pre_and_post_process_time);
|
||||||
|
|
||||||
if (w.ShouldWriteToWAL()) {
|
if (w.status.ok() && !write_options.disableWAL) {
|
||||||
PERF_TIMER_GUARD(write_wal_time);
|
PERF_TIMER_GUARD(write_wal_time);
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_SELF, 1);
|
stats->AddDBStats(InternalStats::WRITE_DONE_BY_SELF, 1);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_SELF, 1);
|
RecordTick(stats_, WRITE_DONE_BY_SELF, 1);
|
||||||
|
@ -504,7 +504,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
|
||||||
WriteThread::WriteGroup memtable_write_group;
|
WriteThread::WriteGroup memtable_write_group;
|
||||||
if (w.state == WriteThread::STATE_MEMTABLE_WRITER_LEADER) {
|
if (w.state == WriteThread::STATE_MEMTABLE_WRITER_LEADER) {
|
||||||
PERF_TIMER_GUARD(write_memtable_time);
|
PERF_TIMER_GUARD(write_memtable_time);
|
||||||
assert(w.status.ok());
|
assert(w.ShouldWriteToMemtable());
|
||||||
write_thread_.EnterAsMemTableWriter(&w, &memtable_write_group);
|
write_thread_.EnterAsMemTableWriter(&w, &memtable_write_group);
|
||||||
if (memtable_write_group.size > 1 &&
|
if (memtable_write_group.size > 1 &&
|
||||||
immutable_db_options_.allow_concurrent_memtable_write) {
|
immutable_db_options_.allow_concurrent_memtable_write) {
|
||||||
|
|
|
@ -127,7 +127,7 @@ class WriteThread {
|
||||||
std::atomic<uint8_t> state; // write under StateMutex() or pre-link
|
std::atomic<uint8_t> state; // write under StateMutex() or pre-link
|
||||||
WriteGroup* write_group;
|
WriteGroup* write_group;
|
||||||
SequenceNumber sequence; // the sequence number to use for the first key
|
SequenceNumber sequence; // the sequence number to use for the first key
|
||||||
Status status; // status of memtable inserter
|
Status status;
|
||||||
Status callback_status; // status returned by callback->Callback()
|
Status callback_status; // status returned by callback->Callback()
|
||||||
|
|
||||||
std::aligned_storage<sizeof(std::mutex)>::type state_mutex_bytes;
|
std::aligned_storage<sizeof(std::mutex)>::type state_mutex_bytes;
|
||||||
|
|
Loading…
Reference in New Issue