mirror of https://github.com/facebook/rocksdb.git
WritePrepared Txn: ValidateSnapshot
Summary: Implements ValidateSnapshot for WritePrepared txns and also adds a unit test to clarify the contract of this function. Closes https://github.com/facebook/rocksdb/pull/3101 Differential Revision: D6199405 Pulled By: maysamyabandeh fbshipit-source-id: ace509934c307ea5d26f4bbac5f836d7c80fd240
This commit is contained in:
parent
7fe3b32896
commit
02693f64fc
|
@ -1644,8 +1644,8 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
|
||||||
uint64_t oldest_snapshot;
|
uint64_t oldest_snapshot;
|
||||||
if (snapshots_.empty()) {
|
if (snapshots_.empty()) {
|
||||||
oldest_snapshot = concurrent_prepare_ && seq_per_batch_
|
oldest_snapshot = concurrent_prepare_ && seq_per_batch_
|
||||||
? versions_->LastToBeWrittenSequence()
|
? versions_->LastToBeWrittenSequence()
|
||||||
: versions_->LastSequence();
|
: versions_->LastSequence();
|
||||||
} else {
|
} else {
|
||||||
oldest_snapshot = snapshots_.oldest()->number_;
|
oldest_snapshot = snapshots_.oldest()->number_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -145,6 +145,7 @@ class PessimisticTransaction : public TransactionBaseImpl {
|
||||||
uint64_t expiration_time_;
|
uint64_t expiration_time_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend class TransactionTest_ValidateSnapshotTest_Test;
|
||||||
// Used to create unique ids for transactions.
|
// Used to create unique ids for transactions.
|
||||||
static std::atomic<TransactionID> txn_id_counter_;
|
static std::atomic<TransactionID> txn_id_counter_;
|
||||||
|
|
||||||
|
@ -179,8 +180,9 @@ class PessimisticTransaction : public TransactionBaseImpl {
|
||||||
// Whether to perform deadlock detection or not.
|
// Whether to perform deadlock detection or not.
|
||||||
int64_t deadlock_detect_depth_;
|
int64_t deadlock_detect_depth_;
|
||||||
|
|
||||||
Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key,
|
virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
|
||||||
SequenceNumber prev_seqno, SequenceNumber* new_seqno);
|
const Slice& key, SequenceNumber prev_seqno,
|
||||||
|
SequenceNumber* new_seqno);
|
||||||
|
|
||||||
void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
|
void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
|
||||||
const Slice& key) override;
|
const Slice& key) override;
|
||||||
|
@ -208,9 +210,6 @@ class WriteCommittedTxn : public PessimisticTransaction {
|
||||||
|
|
||||||
Status RollbackInternal() override;
|
Status RollbackInternal() override;
|
||||||
|
|
||||||
Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key,
|
|
||||||
SequenceNumber prev_seqno, SequenceNumber* new_seqno);
|
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
WriteCommittedTxn(const WriteCommittedTxn&);
|
WriteCommittedTxn(const WriteCommittedTxn&);
|
||||||
void operator=(const WriteCommittedTxn&);
|
void operator=(const WriteCommittedTxn&);
|
||||||
|
|
|
@ -110,6 +110,47 @@ TEST_P(TransactionTest, SuccessTest) {
|
||||||
delete txn;
|
delete txn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This test clarifies the contract of ValidateSnapshot
|
||||||
|
TEST_P(TransactionTest, ValidateSnapshotTest) {
|
||||||
|
for (bool with_2pc : {true, false}) {
|
||||||
|
ReOpen();
|
||||||
|
WriteOptions write_options;
|
||||||
|
ReadOptions read_options;
|
||||||
|
string value;
|
||||||
|
Status s;
|
||||||
|
|
||||||
|
Transaction* txn1 =
|
||||||
|
db->BeginTransaction(write_options, TransactionOptions());
|
||||||
|
ASSERT_TRUE(txn1);
|
||||||
|
s = txn1->Put(Slice("foo"), Slice("bar1"));
|
||||||
|
ASSERT_OK(s);
|
||||||
|
if (with_2pc) {
|
||||||
|
s = txn1->SetName("xid1");
|
||||||
|
ASSERT_OK(s);
|
||||||
|
s = txn1->Prepare();
|
||||||
|
ASSERT_OK(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
Transaction* txn2 =
|
||||||
|
db->BeginTransaction(write_options, TransactionOptions());
|
||||||
|
ASSERT_TRUE(txn2);
|
||||||
|
txn2->SetSnapshot();
|
||||||
|
|
||||||
|
s = txn1->Commit();
|
||||||
|
ASSERT_OK(s);
|
||||||
|
delete txn1;
|
||||||
|
|
||||||
|
SequenceNumber dont_care;
|
||||||
|
auto pes_txn2 = dynamic_cast<PessimisticTransaction*>(txn2);
|
||||||
|
// Test the simple case where the key is not tracked yet
|
||||||
|
auto trakced_seq = kMaxSequenceNumber;
|
||||||
|
s = pes_txn2->ValidateSnapshot(db->DefaultColumnFamily(), "foo",
|
||||||
|
trakced_seq, &dont_care);
|
||||||
|
ASSERT_TRUE(s.IsBusy());
|
||||||
|
delete txn2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(TransactionTest, WaitingTxn) {
|
TEST_P(TransactionTest, WaitingTxn) {
|
||||||
WriteOptions write_options;
|
WriteOptions write_options;
|
||||||
ReadOptions read_options;
|
ReadOptions read_options;
|
||||||
|
|
|
@ -22,11 +22,9 @@
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
Status TransactionUtil::CheckKeyForConflicts(DBImpl* db_impl,
|
Status TransactionUtil::CheckKeyForConflicts(
|
||||||
ColumnFamilyHandle* column_family,
|
DBImpl* db_impl, ColumnFamilyHandle* column_family, const std::string& key,
|
||||||
const std::string& key,
|
SequenceNumber snap_seq, bool cache_only, ReadCallback* snap_checker) {
|
||||||
SequenceNumber key_seq,
|
|
||||||
bool cache_only) {
|
|
||||||
Status result;
|
Status result;
|
||||||
|
|
||||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||||
|
@ -42,7 +40,8 @@ Status TransactionUtil::CheckKeyForConflicts(DBImpl* db_impl,
|
||||||
SequenceNumber earliest_seq =
|
SequenceNumber earliest_seq =
|
||||||
db_impl->GetEarliestMemTableSequenceNumber(sv, true);
|
db_impl->GetEarliestMemTableSequenceNumber(sv, true);
|
||||||
|
|
||||||
result = CheckKey(db_impl, sv, earliest_seq, key_seq, key, cache_only);
|
result = CheckKey(db_impl, sv, earliest_seq, snap_seq, key, cache_only,
|
||||||
|
snap_checker);
|
||||||
|
|
||||||
db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
|
db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
|
||||||
}
|
}
|
||||||
|
@ -52,8 +51,9 @@ Status TransactionUtil::CheckKeyForConflicts(DBImpl* db_impl,
|
||||||
|
|
||||||
Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
||||||
SequenceNumber earliest_seq,
|
SequenceNumber earliest_seq,
|
||||||
SequenceNumber key_seq, const std::string& key,
|
SequenceNumber snap_seq,
|
||||||
bool cache_only) {
|
const std::string& key, bool cache_only,
|
||||||
|
ReadCallback* snap_checker) {
|
||||||
Status result;
|
Status result;
|
||||||
bool need_to_read_sst = false;
|
bool need_to_read_sst = false;
|
||||||
|
|
||||||
|
@ -73,9 +73,9 @@ Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
||||||
result = Status::TryAgain(
|
result = Status::TryAgain(
|
||||||
"Transaction ould not check for conflicts as the MemTable does not "
|
"Transaction ould not check for conflicts as the MemTable does not "
|
||||||
"countain a long enough history to check write at SequenceNumber: ",
|
"countain a long enough history to check write at SequenceNumber: ",
|
||||||
ToString(key_seq));
|
ToString(snap_seq));
|
||||||
}
|
}
|
||||||
} else if (key_seq < earliest_seq) {
|
} else if (snap_seq < earliest_seq) {
|
||||||
need_to_read_sst = true;
|
need_to_read_sst = true;
|
||||||
|
|
||||||
if (cache_only) {
|
if (cache_only) {
|
||||||
|
@ -91,7 +91,7 @@ Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
||||||
"max_write_buffer_number_to_maintain option could reduce the "
|
"max_write_buffer_number_to_maintain option could reduce the "
|
||||||
"frequency "
|
"frequency "
|
||||||
"of this error.",
|
"of this error.",
|
||||||
key_seq, earliest_seq);
|
snap_seq, earliest_seq);
|
||||||
result = Status::TryAgain(msg);
|
result = Status::TryAgain(msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,9 +105,13 @@ Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
||||||
|
|
||||||
if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
|
if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
|
||||||
result = s;
|
result = s;
|
||||||
} else if (found_record_for_key && (seq > key_seq)) {
|
} else if (found_record_for_key) {
|
||||||
// Write Conflict
|
bool write_conflict = snap_checker == nullptr
|
||||||
result = Status::Busy();
|
? snap_seq < seq
|
||||||
|
: !snap_checker->IsCommitted(seq);
|
||||||
|
if (write_conflict) {
|
||||||
|
result = Status::Busy();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,8 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "db/read_callback.h"
|
||||||
|
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
#include "rocksdb/status.h"
|
#include "rocksdb/status.h"
|
||||||
|
@ -40,7 +42,7 @@ class WriteBatchWithIndex;
|
||||||
|
|
||||||
class TransactionUtil {
|
class TransactionUtil {
|
||||||
public:
|
public:
|
||||||
// Verifies there have been no writes to this key in the db since this
|
// Verifies there have been no commits to this key in the db since this
|
||||||
// sequence number.
|
// sequence number.
|
||||||
//
|
//
|
||||||
// If cache_only is true, then this function will not attempt to read any
|
// If cache_only is true, then this function will not attempt to read any
|
||||||
|
@ -52,7 +54,8 @@ class TransactionUtil {
|
||||||
static Status CheckKeyForConflicts(DBImpl* db_impl,
|
static Status CheckKeyForConflicts(DBImpl* db_impl,
|
||||||
ColumnFamilyHandle* column_family,
|
ColumnFamilyHandle* column_family,
|
||||||
const std::string& key,
|
const std::string& key,
|
||||||
SequenceNumber key_seq, bool cache_only);
|
SequenceNumber snap_seq, bool cache_only,
|
||||||
|
ReadCallback* snap_checker = nullptr);
|
||||||
|
|
||||||
// For each key,SequenceNumber pair in the TransactionKeyMap, this function
|
// For each key,SequenceNumber pair in the TransactionKeyMap, this function
|
||||||
// will verify there have been no writes to the key in the db since that
|
// will verify there have been no writes to the key in the db since that
|
||||||
|
@ -69,8 +72,9 @@ class TransactionUtil {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static Status CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
static Status CheckKey(DBImpl* db_impl, SuperVersion* sv,
|
||||||
SequenceNumber earliest_seq, SequenceNumber key_seq,
|
SequenceNumber earliest_seq, SequenceNumber snap_seq,
|
||||||
const std::string& key, bool cache_only);
|
const std::string& key, bool cache_only,
|
||||||
|
ReadCallback* snap_checker = nullptr);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
|
@ -240,6 +240,34 @@ Status WritePreparedTxn::RollbackInternal() {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status WritePreparedTxn::ValidateSnapshot(ColumnFamilyHandle* column_family,
|
||||||
|
const Slice& key,
|
||||||
|
SequenceNumber prev_seqno,
|
||||||
|
SequenceNumber* new_seqno) {
|
||||||
|
assert(snapshot_);
|
||||||
|
|
||||||
|
SequenceNumber snap_seq = snapshot_->GetSequenceNumber();
|
||||||
|
// prev_seqno is either max or the last snapshot with which this key was
|
||||||
|
// trackeed so there is no need to apply the IsInSnapshot to this comparison
|
||||||
|
// here as prev_seqno is not a prepare seq.
|
||||||
|
if (prev_seqno <= snap_seq) {
|
||||||
|
// If the key has been previous validated at a sequence number earlier
|
||||||
|
// than the curent snapshot's sequence number, we already know it has not
|
||||||
|
// been modified.
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
*new_seqno = snap_seq;
|
||||||
|
|
||||||
|
ColumnFamilyHandle* cfh =
|
||||||
|
column_family ? column_family : db_impl_->DefaultColumnFamily();
|
||||||
|
|
||||||
|
WritePreparedTxnReadCallback snap_checker(wpt_db_, snap_seq);
|
||||||
|
return TransactionUtil::CheckKeyForConflicts(
|
||||||
|
db_impl_, cfh, key.ToString(), snapshot_->GetSequenceNumber(),
|
||||||
|
false /* cache_only */, &snap_checker);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
#endif // ROCKSDB_LITE
|
#endif // ROCKSDB_LITE
|
||||||
|
|
|
@ -82,12 +82,9 @@ class WritePreparedTxn : public PessimisticTransaction {
|
||||||
|
|
||||||
Status RollbackInternal() override;
|
Status RollbackInternal() override;
|
||||||
|
|
||||||
// TODO(myabandeh): verify that the current impl work with values being
|
virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
|
||||||
// written with prepare sequence number too.
|
const Slice& key, SequenceNumber prev_seqno,
|
||||||
// Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice&
|
SequenceNumber* new_seqno) override;
|
||||||
// key,
|
|
||||||
// SequenceNumber prev_seqno, SequenceNumber*
|
|
||||||
// new_seqno);
|
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
WritePreparedTxn(const WritePreparedTxn&);
|
WritePreparedTxn(const WritePreparedTxn&);
|
||||||
|
|
Loading…
Reference in New Issue