WritePrepared Txn: AddPrepared for all sub-batches

Summary:
Currently AddPrepared is performed only on the first sub-batch if there are duplicate keys in the write batch. This could cause a problem if the transaction takes too long to commit and the seq number of the first sub-patch moved to old_prepared_ but not the seq of the later ones. The patch fixes this by calling AddPrepared for all sub-patches.
Closes https://github.com/facebook/rocksdb/pull/3651

Differential Revision: D7388635

Pulled By: maysamyabandeh

fbshipit-source-id: 0ccd80c150d9bc42fe955e49ddb9d7ca353067b4
This commit is contained in:
Maysam Yabandeh 2018-03-23 17:21:47 -07:00 committed by Facebook Github Bot
parent d382ae7de6
commit 3e417a6607
7 changed files with 63 additions and 19 deletions

View file

@ -2145,8 +2145,7 @@ Status DBImpl::DeleteFile(std::string name) {
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
&edit, &mutex_, directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(
cfd, &job_context.superversion_context,
InstallSuperVersionAndScheduleWork(cfd, &job_context.superversion_context,
*cfd->GetLatestMutableCFOptions(),
FlushReason::kDeleteFiles);
}
@ -2230,8 +2229,7 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
&edit, &mutex_, directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(
cfd, &job_context.superversion_context,
InstallSuperVersionAndScheduleWork(cfd, &job_context.superversion_context,
*cfd->GetLatestMutableCFOptions(),
FlushReason::kDeleteFiles);
}
@ -2861,8 +2859,7 @@ Status DBImpl::IngestExternalFile(
&mutex_, directories_.GetDbDir());
}
if (status.ok()) {
InstallSuperVersionAndScheduleWork(cfd, &sv_context,
*mutable_cf_options,
InstallSuperVersionAndScheduleWork(cfd, &sv_context, *mutable_cf_options,
FlushReason::kExternalFileIngestion);
}

View file

@ -2044,8 +2044,7 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
void DBImpl::InstallSuperVersionAndScheduleWork(
ColumnFamilyData* cfd, SuperVersionContext* sv_context,
const MutableCFOptions& mutable_cf_options,
FlushReason flush_reason) {
const MutableCFOptions& mutable_cf_options, FlushReason flush_reason) {
mutex_.AssertHeld();
// Update max_total_in_memory_state_

View file

@ -1366,8 +1366,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context,
new_mem->Ref();
cfd->SetMemtable(new_mem);
InstallSuperVersionAndScheduleWork(cfd, &context->superversion_context,
mutable_cf_options,
flush_reason);
mutable_cf_options, flush_reason);
if (two_write_queues_) {
nonmem_write_thread_.ExitUnbatched(&nonmem_w);
}

View file

@ -930,6 +930,44 @@ TEST_P(WritePreparedTransactionTest, AdvanceMaxEvictedSeqBasicTest) {
}
}
// This tests that transactions with duplicate keys perform correctly after max
// is advancing their prepared sequence numbers. This will not be the case if
// for example the txn does not add the prepared seq for the second sub-batch to
// the PrepareHeap structure.
TEST_P(WritePreparedTransactionTest, AdvanceMaxEvictedSeqWithDuplicatesTest) {
WriteOptions write_options;
TransactionOptions txn_options;
Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
ASSERT_OK(txn0->SetName("xid"));
ASSERT_OK(txn0->Put(Slice("key"), Slice("value1")));
ASSERT_OK(txn0->Put(Slice("key"), Slice("value2")));
ASSERT_OK(txn0->Prepare());
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
// Ensure that all the prepared sequence numbers will be removed from the
// PrepareHeap.
SequenceNumber new_max = wp_db->COMMIT_CACHE_SIZE;
wp_db->AdvanceMaxEvictedSeq(0, new_max);
ReadOptions ropt;
PinnableSlice pinnable_val;
auto s = db->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val);
ASSERT_TRUE(s.IsNotFound());
delete txn0;
wp_db->db_impl_->FlushWAL(true);
wp_db->TEST_Crash();
ReOpenNoDelete();
wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
wp_db->AdvanceMaxEvictedSeq(0, new_max);
s = db->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val);
ASSERT_TRUE(s.IsNotFound());
txn0 = db->GetTransactionByName("xid");
ASSERT_OK(txn0->Rollback());
delete txn0;
}
TEST_P(WritePreparedTransactionTest, SeqAdvanceConcurrentTest) {
// Given the sequential run of txns, with this timeout we should never see a
// deadlock nor a timeout unless we have a key conflict, which should be

View file

@ -83,7 +83,9 @@ Status WritePreparedTxn::PrepareInternal() {
// callback otherwise there is a non-zero chance of max dvancing prepare_seq
// and readers assume the data as committed.
if (s.ok()) {
wpt_db_->AddPrepared(prepare_seq);
for (size_t i = 0; i < prepare_batch_cnt_; i++) {
wpt_db_->AddPrepared(prepare_seq + i);
}
}
return s;
}
@ -293,7 +295,9 @@ Status WritePreparedTxn::RollbackInternal() {
if (do_one_write) {
// Mark the txn as rolled back
uint64_t& rollback_seq = seq_used;
wpt_db_->RollbackPrepared(GetId(), rollback_seq);
for (size_t i = 0; i < prepare_batch_cnt_; i++) {
wpt_db_->RollbackPrepared(GetId() + i, rollback_seq);
}
return s;
} // else do the 2nd write for commit
uint64_t& prepare_seq = seq_used;
@ -318,7 +322,9 @@ Status WritePreparedTxn::RollbackInternal() {
// Mark the txn as rolled back
uint64_t& rollback_seq = seq_used;
if (s.ok()) {
wpt_db_->RollbackPrepared(GetId(), rollback_seq);
for (size_t i = 0; i < prepare_batch_cnt_; i++) {
wpt_db_->RollbackPrepared(GetId() + i, rollback_seq);
}
}
return s;

View file

@ -36,7 +36,10 @@ Status WritePreparedTxnDB::Initialize(
assert(dbimpl != nullptr);
auto rtxns = dbimpl->recovered_transactions();
for (auto rtxn : rtxns) {
AddPrepared(rtxn.second->seq_);
auto cnt = rtxn.second->batch_cnt_ ? rtxn.second->batch_cnt_ : 1;
for (size_t i = 0; i < cnt; i++) {
AddPrepared(rtxn.second->seq_ + i);
}
}
SequenceNumber prev_max = max_evicted_seq_;
SequenceNumber last_seq = db_impl_->GetLatestSequenceNumber();

View file

@ -241,6 +241,8 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
friend class PreparedHeap_Concurrent_Test;
friend class WritePreparedTxnDBMock;
friend class WritePreparedTransactionTest_AdvanceMaxEvictedSeqBasicTest_Test;
friend class
WritePreparedTransactionTest_AdvanceMaxEvictedSeqWithDuplicatesTest_Test;
friend class WritePreparedTransactionTest_BasicRecoveryTest_Test;
friend class WritePreparedTransactionTest_IsInSnapshotEmptyMapTest_Test;
friend class WritePreparedTransactionTest_OldCommitMapGC_Test;