mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
3bfd3d39a3
Summary: Currently, transactions can fail even if there is no actual write conflict. This is due to relying on only the memtables to check for write-conflicts. Users have to tune memtable settings to try to avoid this, but it's hard to figure out exactly how to tune these settings. With this diff, TransactionDB will use both memtables and SST files to determine if there are any write conflicts. This relies on the fact that BlockBasedTable stores sequence numbers for all writes that happen after any open snapshot. Also, D50295 is needed to prevent SingleDelete from disappearing writes (the TODOs in this test code will be fixed once the other diff is approved and merged). Note that Optimistic transactions will still rely on tuning memtable settings as we do not want to read from SST while on the write thread. Also, memtable settings can still be used to reduce how often TransactionDB needs to read SST files. Test Plan: unit tests, db bench Reviewers: rven, yhchiang, kradhakrishnan, IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: dhruba, leveldb, yoshinorim Differential Revision: https://reviews.facebook.net/D50475
131 lines
3.5 KiB
C++
131 lines
3.5 KiB
C++
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#pragma once
|
|
#include <vector>
|
|
|
|
#include "rocksdb/db.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
class SnapshotList;
|
|
|
|
// Snapshots are kept in a doubly-linked list in the DB.
|
|
// Each SnapshotImpl corresponds to a particular sequence number.
|
|
class SnapshotImpl : public Snapshot {
|
|
public:
|
|
SequenceNumber number_; // const after creation
|
|
|
|
virtual SequenceNumber GetSequenceNumber() const override { return number_; }
|
|
|
|
private:
|
|
friend class SnapshotList;
|
|
|
|
// SnapshotImpl is kept in a doubly-linked circular list
|
|
SnapshotImpl* prev_;
|
|
SnapshotImpl* next_;
|
|
|
|
SnapshotList* list_; // just for sanity checks
|
|
|
|
int64_t unix_time_;
|
|
|
|
// Will this snapshot be used by a Transaction to do write-conflict checking?
|
|
bool is_write_conflict_boundary_;
|
|
};
|
|
|
|
class SnapshotList {
|
|
public:
|
|
SnapshotList() {
|
|
list_.prev_ = &list_;
|
|
list_.next_ = &list_;
|
|
list_.number_ = 0xFFFFFFFFL; // placeholder marker, for debugging
|
|
count_ = 0;
|
|
}
|
|
|
|
bool empty() const { return list_.next_ == &list_; }
|
|
SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
|
|
SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
|
|
|
|
const SnapshotImpl* New(SnapshotImpl* s, SequenceNumber seq,
|
|
uint64_t unix_time, bool is_write_conflict_boundary) {
|
|
s->number_ = seq;
|
|
s->unix_time_ = unix_time;
|
|
s->is_write_conflict_boundary_ = is_write_conflict_boundary;
|
|
s->list_ = this;
|
|
s->next_ = &list_;
|
|
s->prev_ = list_.prev_;
|
|
s->prev_->next_ = s;
|
|
s->next_->prev_ = s;
|
|
count_++;
|
|
return s;
|
|
}
|
|
|
|
// Do not responsible to free the object.
|
|
void Delete(const SnapshotImpl* s) {
|
|
assert(s->list_ == this);
|
|
s->prev_->next_ = s->next_;
|
|
s->next_->prev_ = s->prev_;
|
|
count_--;
|
|
}
|
|
|
|
// retrieve all snapshot numbers. They are sorted in ascending order.
|
|
std::vector<SequenceNumber> GetAll(
|
|
SequenceNumber* oldest_write_conflict_snapshot = nullptr) {
|
|
std::vector<SequenceNumber> ret;
|
|
|
|
if (oldest_write_conflict_snapshot != nullptr) {
|
|
*oldest_write_conflict_snapshot = kMaxSequenceNumber;
|
|
}
|
|
|
|
if (empty()) {
|
|
return ret;
|
|
}
|
|
SnapshotImpl* s = &list_;
|
|
while (s->next_ != &list_) {
|
|
ret.push_back(s->next_->number_);
|
|
|
|
if (oldest_write_conflict_snapshot != nullptr &&
|
|
*oldest_write_conflict_snapshot == kMaxSequenceNumber &&
|
|
s->next_->is_write_conflict_boundary_) {
|
|
// If this is the first write-conflict boundary snapshot in the list,
|
|
// it is the oldest
|
|
*oldest_write_conflict_snapshot = s->next_->number_;
|
|
}
|
|
|
|
s = s->next_;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// get the sequence number of the most recent snapshot
|
|
SequenceNumber GetNewest() {
|
|
if (empty()) {
|
|
return 0;
|
|
}
|
|
return newest()->number_;
|
|
}
|
|
|
|
int64_t GetOldestSnapshotTime() const {
|
|
if (empty()) {
|
|
return 0;
|
|
} else {
|
|
return oldest()->unix_time_;
|
|
}
|
|
}
|
|
|
|
uint64_t count() const { return count_; }
|
|
|
|
private:
|
|
// Dummy head of doubly-linked list of snapshots
|
|
SnapshotImpl list_;
|
|
uint64_t count_;
|
|
};
|
|
|
|
} // namespace rocksdb
|