WriteUnprepared: commit only from the 2nd queue (#5439)

Summary:
This is a port of this PR into WriteUnprepared:
https://github.com/facebook/rocksdb/pull/5014

This also reverts this test change to restore some flaky write unprepared
tests: https://github.com/facebook/rocksdb/pull/5315

Tested with:
$ gtest-parallel ./transaction_test --gtest_filter=MySQLStyleTransactionTest/MySQLStyleTransactionTest.TransactionStressTest/9 --repeat=128
[128/128] MySQLStyleTransactionTest/MySQLStyleTransactionTest.TransactionStressTest/9 (18250 ms)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5439

Differential Revision: D15761405

Pulled By: lth

fbshipit-source-id: ae2581fd942d8a5b3f9278fd6bc3c1ac0b2c964c
This commit is contained in:
Manuel Ung 2019-06-11 17:58:31 -07:00 committed by Facebook Github Bot
parent ba64a4cf52
commit ca1aee2a19
2 changed files with 32 additions and 26 deletions

View file

@ -74,6 +74,10 @@ INSTANTIATE_TEST_CASE_P(
std::make_tuple(false, false, WRITE_PREPARED, kOrderedWrite, true),
std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, false),
std::make_tuple(false, true, WRITE_PREPARED, kOrderedWrite, true),
std::make_tuple(false, false, WRITE_UNPREPARED, kOrderedWrite, false),
std::make_tuple(false, false, WRITE_UNPREPARED, kOrderedWrite, true),
std::make_tuple(false, true, WRITE_UNPREPARED, kOrderedWrite, false),
std::make_tuple(false, true, WRITE_UNPREPARED, kOrderedWrite, true),
std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, false),
std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, true)));
#endif // ROCKSDB_VALGRIND_RUN

View file

@ -281,23 +281,30 @@ Status WriteUnpreparedTxn::CommitInternal() {
const bool disable_memtable = !includes_data;
const bool do_one_write =
!db_impl_->immutable_db_options().two_write_queues || disable_memtable;
const bool publish_seq = do_one_write;
// Note: CommitTimeWriteBatch does not need AddPrepared since it is written to
// DB in one shot. min_uncommitted still works since it requires capturing
// data that is written to DB but not yet committed, while
// CommitTimeWriteBatch commits with PreReleaseCallback.
WriteUnpreparedCommitEntryPreReleaseCallback update_commit_map(
wpt_db_, db_impl_, unprep_seqs_, commit_batch_cnt, publish_seq);
wpt_db_, db_impl_, unprep_seqs_, commit_batch_cnt);
const bool kFirstPrepareBatch = true;
AddPreparedCallback add_prepared_callback(
wpt_db_, db_impl_, commit_batch_cnt,
db_impl_->immutable_db_options().two_write_queues, !kFirstPrepareBatch);
PreReleaseCallback* pre_release_callback;
if (do_one_write) {
pre_release_callback = &update_commit_map;
} else {
pre_release_callback = &add_prepared_callback;
}
uint64_t seq_used = kMaxSequenceNumber;
// Since the prepared batch is directly written to memtable, there is already
// a connection between the memtable and its WAL, so there is no need to
// redundantly reference the log that contains the prepared data.
// Since the prepared batch is directly written to memtable, there is
// already a connection between the memtable and its WAL, so there is no
// need to redundantly reference the log that contains the prepared data.
const uint64_t zero_log_number = 0ull;
size_t batch_cnt = UNLIKELY(commit_batch_cnt) ? commit_batch_cnt : 1;
auto s = db_impl_->WriteImpl(write_options_, working_batch, nullptr, nullptr,
zero_log_number, disable_memtable, &seq_used,
batch_cnt, &update_commit_map);
batch_cnt, pre_release_callback);
assert(!s.ok() || seq_used != kMaxSequenceNumber);
const SequenceNumber commit_batch_seq = seq_used;
if (LIKELY(do_one_write || !s.ok())) {
if (LIKELY(s.ok())) {
// Note RemovePrepared should be called after WriteImpl that publishsed
@ -306,30 +313,25 @@ Status WriteUnpreparedTxn::CommitInternal() {
wpt_db_->RemovePrepared(seq.first, seq.second);
}
}
if (UNLIKELY(!do_one_write)) {
wpt_db_->RemovePrepared(commit_batch_seq, commit_batch_cnt);
}
unprep_seqs_.clear();
write_set_keys_.clear();
return s;
} // else do the 2nd write to publish seq
// Populate unprep_seqs_ with commit_batch_seq, since we treat data in the
// commit write batch as just another "unprepared" batch. This will also
// update the unprep_seqs_ in the update_commit_map callback.
unprep_seqs_[commit_batch_seq] = commit_batch_cnt;
// Note: the 2nd write comes with a performance penality. So if we have too
// many of commits accompanied with ComitTimeWriteBatch and yet we cannot
// enable use_only_the_last_commit_time_batch_for_recovery_ optimization,
// two_write_queues should be disabled to avoid many additional writes here.
class PublishSeqPreReleaseCallback : public PreReleaseCallback {
public:
explicit PublishSeqPreReleaseCallback(DBImpl* db_impl)
: db_impl_(db_impl) {}
Status Callback(SequenceNumber seq,
bool is_mem_disabled __attribute__((__unused__)), uint64_t,
size_t /*index*/, size_t /*total*/) override {
assert(is_mem_disabled);
assert(db_impl_->immutable_db_options().two_write_queues);
db_impl_->SetLastPublishedSequence(seq);
return Status::OK();
}
private:
DBImpl* db_impl_;
} publish_seq_callback(db_impl_);
// Update commit map only from the 2nd queue
WriteBatch empty_batch;
empty_batch.PutLogData(Slice());
// In the absence of Prepare markers, use Noop as a batch separator
@ -339,7 +341,7 @@ Status WriteUnpreparedTxn::CommitInternal() {
const uint64_t NO_REF_LOG = 0;
s = db_impl_->WriteImpl(write_options_, &empty_batch, nullptr, nullptr,
NO_REF_LOG, DISABLE_MEMTABLE, &seq_used, ONE_BATCH,
&publish_seq_callback);
&update_commit_map);
assert(!s.ok() || seq_used != kMaxSequenceNumber);
// Note RemovePrepared should be called after WriteImpl that publishsed the
// seq. Otherwise SmallestUnCommittedSeq optimization breaks.