WritePrepared Txn: ValidateSnapshot

Summary:
Implements ValidateSnapshot for WritePrepared txns and also adds a unit test to clarify the contract of this function.
Closes https://github.com/facebook/rocksdb/pull/3101

Differential Revision: D6199405

Pulled By: maysamyabandeh

fbshipit-source-id: ace509934c307ea5d26f4bbac5f836d7c80fd240
This commit is contained in:
Maysam Yabandeh 2017-11-01 18:56:25 -07:00 committed by Facebook Github Bot
parent 7fe3b32896
commit 02693f64fc
7 changed files with 104 additions and 31 deletions

View File

@ -1644,8 +1644,8 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
uint64_t oldest_snapshot;
if (snapshots_.empty()) {
oldest_snapshot = concurrent_prepare_ && seq_per_batch_
? versions_->LastToBeWrittenSequence()
: versions_->LastSequence();
? versions_->LastToBeWrittenSequence()
: versions_->LastSequence();
} else {
oldest_snapshot = snapshots_.oldest()->number_;
}

View File

@ -145,6 +145,7 @@ class PessimisticTransaction : public TransactionBaseImpl {
uint64_t expiration_time_;
private:
friend class TransactionTest_ValidateSnapshotTest_Test;
// Used to create unique ids for transactions.
static std::atomic<TransactionID> txn_id_counter_;
@ -179,8 +180,9 @@ class PessimisticTransaction : public TransactionBaseImpl {
// Whether to perform deadlock detection or not.
int64_t deadlock_detect_depth_;
Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key,
SequenceNumber prev_seqno, SequenceNumber* new_seqno);
virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
const Slice& key, SequenceNumber prev_seqno,
SequenceNumber* new_seqno);
void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
const Slice& key) override;
@ -208,9 +210,6 @@ class WriteCommittedTxn : public PessimisticTransaction {
Status RollbackInternal() override;
Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key,
SequenceNumber prev_seqno, SequenceNumber* new_seqno);
// No copying allowed
WriteCommittedTxn(const WriteCommittedTxn&);
void operator=(const WriteCommittedTxn&);

View File

@ -110,6 +110,47 @@ TEST_P(TransactionTest, SuccessTest) {
delete txn;
}
// This test clarifies the contract of ValidateSnapshot
TEST_P(TransactionTest, ValidateSnapshotTest) {
for (bool with_2pc : {true, false}) {
ReOpen();
WriteOptions write_options;
ReadOptions read_options;
string value;
Status s;
Transaction* txn1 =
db->BeginTransaction(write_options, TransactionOptions());
ASSERT_TRUE(txn1);
s = txn1->Put(Slice("foo"), Slice("bar1"));
ASSERT_OK(s);
if (with_2pc) {
s = txn1->SetName("xid1");
ASSERT_OK(s);
s = txn1->Prepare();
ASSERT_OK(s);
}
Transaction* txn2 =
db->BeginTransaction(write_options, TransactionOptions());
ASSERT_TRUE(txn2);
txn2->SetSnapshot();
s = txn1->Commit();
ASSERT_OK(s);
delete txn1;
SequenceNumber dont_care;
auto pes_txn2 = dynamic_cast<PessimisticTransaction*>(txn2);
// Test the simple case where the key is not tracked yet
auto trakced_seq = kMaxSequenceNumber;
s = pes_txn2->ValidateSnapshot(db->DefaultColumnFamily(), "foo",
trakced_seq, &dont_care);
ASSERT_TRUE(s.IsBusy());
delete txn2;
}
}
TEST_P(TransactionTest, WaitingTxn) {
WriteOptions write_options;
ReadOptions read_options;

View File

@ -22,11 +22,9 @@
namespace rocksdb {
Status TransactionUtil::CheckKeyForConflicts(DBImpl* db_impl,
ColumnFamilyHandle* column_family,
const std::string& key,
SequenceNumber key_seq,
bool cache_only) {
Status TransactionUtil::CheckKeyForConflicts(
DBImpl* db_impl, ColumnFamilyHandle* column_family, const std::string& key,
SequenceNumber snap_seq, bool cache_only, ReadCallback* snap_checker) {
Status result;
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
@ -42,7 +40,8 @@ Status TransactionUtil::CheckKeyForConflicts(DBImpl* db_impl,
SequenceNumber earliest_seq =
db_impl->GetEarliestMemTableSequenceNumber(sv, true);
result = CheckKey(db_impl, sv, earliest_seq, key_seq, key, cache_only);
result = CheckKey(db_impl, sv, earliest_seq, snap_seq, key, cache_only,
snap_checker);
db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
}
@ -52,8 +51,9 @@ Status TransactionUtil::CheckKeyForConflicts(DBImpl* db_impl,
Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
SequenceNumber earliest_seq,
SequenceNumber key_seq, const std::string& key,
bool cache_only) {
SequenceNumber snap_seq,
const std::string& key, bool cache_only,
ReadCallback* snap_checker) {
Status result;
bool need_to_read_sst = false;
@ -73,9 +73,9 @@ Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
result = Status::TryAgain(
"Transaction ould not check for conflicts as the MemTable does not "
"countain a long enough history to check write at SequenceNumber: ",
ToString(key_seq));
ToString(snap_seq));
}
} else if (key_seq < earliest_seq) {
} else if (snap_seq < earliest_seq) {
need_to_read_sst = true;
if (cache_only) {
@ -91,7 +91,7 @@ Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
"max_write_buffer_number_to_maintain option could reduce the "
"frequency "
"of this error.",
key_seq, earliest_seq);
snap_seq, earliest_seq);
result = Status::TryAgain(msg);
}
}
@ -105,9 +105,13 @@ Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
result = s;
} else if (found_record_for_key && (seq > key_seq)) {
// Write Conflict
result = Status::Busy();
} else if (found_record_for_key) {
bool write_conflict = snap_checker == nullptr
? snap_seq < seq
: !snap_checker->IsCommitted(seq);
if (write_conflict) {
result = Status::Busy();
}
}
}

View File

@ -10,6 +10,8 @@
#include <string>
#include <unordered_map>
#include "db/read_callback.h"
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
@ -40,7 +42,7 @@ class WriteBatchWithIndex;
class TransactionUtil {
public:
// Verifies there have been no writes to this key in the db since this
// Verifies there have been no commits to this key in the db since this
// sequence number.
//
// If cache_only is true, then this function will not attempt to read any
@ -52,7 +54,8 @@ class TransactionUtil {
static Status CheckKeyForConflicts(DBImpl* db_impl,
ColumnFamilyHandle* column_family,
const std::string& key,
SequenceNumber key_seq, bool cache_only);
SequenceNumber snap_seq, bool cache_only,
ReadCallback* snap_checker = nullptr);
// For each key,SequenceNumber pair in the TransactionKeyMap, this function
// will verify there have been no writes to the key in the db since that
@ -69,8 +72,9 @@ class TransactionUtil {
private:
static Status CheckKey(DBImpl* db_impl, SuperVersion* sv,
SequenceNumber earliest_seq, SequenceNumber key_seq,
const std::string& key, bool cache_only);
SequenceNumber earliest_seq, SequenceNumber snap_seq,
const std::string& key, bool cache_only,
ReadCallback* snap_checker = nullptr);
};
} // namespace rocksdb

View File

@ -240,6 +240,34 @@ Status WritePreparedTxn::RollbackInternal() {
return s;
}
Status WritePreparedTxn::ValidateSnapshot(ColumnFamilyHandle* column_family,
const Slice& key,
SequenceNumber prev_seqno,
SequenceNumber* new_seqno) {
assert(snapshot_);
SequenceNumber snap_seq = snapshot_->GetSequenceNumber();
// prev_seqno is either max or the last snapshot with which this key was
// trackeed so there is no need to apply the IsInSnapshot to this comparison
// here as prev_seqno is not a prepare seq.
if (prev_seqno <= snap_seq) {
// If the key has been previous validated at a sequence number earlier
// than the curent snapshot's sequence number, we already know it has not
// been modified.
return Status::OK();
}
*new_seqno = snap_seq;
ColumnFamilyHandle* cfh =
column_family ? column_family : db_impl_->DefaultColumnFamily();
WritePreparedTxnReadCallback snap_checker(wpt_db_, snap_seq);
return TransactionUtil::CheckKeyForConflicts(
db_impl_, cfh, key.ToString(), snapshot_->GetSequenceNumber(),
false /* cache_only */, &snap_checker);
}
} // namespace rocksdb
#endif // ROCKSDB_LITE

View File

@ -82,12 +82,9 @@ class WritePreparedTxn : public PessimisticTransaction {
Status RollbackInternal() override;
// TODO(myabandeh): verify that the current impl work with values being
// written with prepare sequence number too.
// Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice&
// key,
// SequenceNumber prev_seqno, SequenceNumber*
// new_seqno);
virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
const Slice& key, SequenceNumber prev_seqno,
SequenceNumber* new_seqno) override;
// No copying allowed
WritePreparedTxn(const WritePreparedTxn&);