mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 13:41:46 +00:00
151242ce46
Summary: **Context:** The existing stat rocksdb.sst.read.micros does not reflect each of compaction and flush cases but aggregate them, which is not so helpful for us to understand IO read behavior of each of them. **Summary** - Update `StopWatch` and `RandomAccessFileReader` to record `rocksdb.sst.read.micros` and `rocksdb.file.{flush/compaction}.read.micros` - Fixed the default histogram in `RandomAccessFileReader` - New field `ReadOptions/IOOptions::io_activity`; Pass `ReadOptions` through paths under db open, flush and compaction to where we can prepare `IOOptions` and pass it to `RandomAccessFileReader` - Use `thread_status_util` for assertion in `DbStressFSWrapper` for continuous testing on we are passing correct `io_activity` under db open, flush and compaction Pull Request resolved: https://github.com/facebook/rocksdb/pull/11288 Test Plan: - **Stress test** - **Db bench 1: rocksdb.sst.read.micros COUNT ≈ sum of rocksdb.file.read.flush.micros's and rocksdb.file.read.compaction.micros's.** (without blob) - May not be exactly the same due to `HistogramStat::Add` only guarantees atomic not accuracy across threads. ``` ./db_bench -db=/dev/shm/testdb/ -statistics=true -benchmarks="fillseq" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=false -compression_type=none -bloom_bits=3 (-use_plain_table=1 -prefix_size=10) ``` ``` // BlockBasedTable rocksdb.sst.read.micros P50 : 2.009374 P95 : 4.968548 P99 : 8.110362 P100 : 43.000000 COUNT : 40456 SUM : 114805 rocksdb.file.read.flush.micros P50 : 1.871841 P95 : 3.872407 P99 : 5.540541 P100 : 43.000000 COUNT : 2250 SUM : 6116 rocksdb.file.read.compaction.micros P50 : 2.023109 P95 : 5.029149 P99 : 8.196910 P100 : 26.000000 COUNT : 38206 SUM : 108689 // PlainTable Does not apply ``` - **Db bench 2: performance** **Read** SETUP: db with 900 files ``` ./db_bench -db=/dev/shm/testdb/ -benchmarks="fillseq" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -disable_auto_compactions=true -target_file_size_base=655 -compression_type=none ```run till convergence ``` ./db_bench -seed=1678564177044286 -use_existing_db=true -db=/dev/shm/testdb -benchmarks=readrandom[-X60] -statistics=true -num=1000000 -disable_auto_compactions=true -compression_type=none -bloom_bits=3 ``` Pre-change `readrandom [AVG 60 runs] : 21568 (± 248) ops/sec` Post-change (no regression, -0.3%) `readrandom [AVG 60 runs] : 21486 (± 236) ops/sec` **Compaction/Flush**run till convergence ``` ./db_bench -db=/dev/shm/testdb2/ -seed=1678564177044286 -benchmarks="fillseq[-X60]" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -disable_auto_compactions=false -target_file_size_base=655 -compression_type=none rocksdb.sst.read.micros COUNT : 33820 rocksdb.sst.read.flush.micros COUNT : 1800 rocksdb.sst.read.compaction.micros COUNT : 32020 ``` Pre-change `fillseq [AVG 46 runs] : 1391 (± 214) ops/sec; 0.7 (± 0.1) MB/sec` Post-change (no regression, ~-0.4%) `fillseq [AVG 46 runs] : 1385 (± 216) ops/sec; 0.7 (± 0.1) MB/sec` Reviewed By: ajkr Differential Revision: D44007011 Pulled By: hx235 fbshipit-source-id: a54c89e4846dfc9a135389edf3f3eedfea257132
758 lines
24 KiB
C++
758 lines
24 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
#include "utilities/transactions/transaction_base.h"
|
|
|
|
#include <cinttypes>
|
|
|
|
#include "db/column_family.h"
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "logging/logging.h"
|
|
#include "rocksdb/comparator.h"
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/status.h"
|
|
#include "util/cast_util.h"
|
|
#include "util/string_util.h"
|
|
#include "utilities/transactions/lock/lock_tracker.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
Status Transaction::CommitAndTryCreateSnapshot(
|
|
std::shared_ptr<TransactionNotifier> notifier, TxnTimestamp ts,
|
|
std::shared_ptr<const Snapshot>* snapshot) {
|
|
if (snapshot) {
|
|
snapshot->reset();
|
|
}
|
|
TxnTimestamp commit_ts = GetCommitTimestamp();
|
|
if (commit_ts == kMaxTxnTimestamp) {
|
|
if (ts == kMaxTxnTimestamp) {
|
|
return Status::InvalidArgument("Commit timestamp unset");
|
|
} else {
|
|
const Status s = SetCommitTimestamp(ts);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
}
|
|
} else if (ts != kMaxTxnTimestamp) {
|
|
if (ts != commit_ts) {
|
|
// For now we treat this as error.
|
|
return Status::InvalidArgument("Different commit ts specified");
|
|
}
|
|
}
|
|
SetSnapshotOnNextOperation(notifier);
|
|
Status s = Commit();
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
assert(s.ok());
|
|
// If we reach here, we must return ok status for this function.
|
|
std::shared_ptr<const Snapshot> new_snapshot = GetTimestampedSnapshot();
|
|
|
|
if (snapshot) {
|
|
*snapshot = new_snapshot;
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
TransactionBaseImpl::TransactionBaseImpl(
|
|
DB* db, const WriteOptions& write_options,
|
|
const LockTrackerFactory& lock_tracker_factory)
|
|
: db_(db),
|
|
dbimpl_(static_cast_with_check<DBImpl>(db)),
|
|
write_options_(write_options),
|
|
cmp_(GetColumnFamilyUserComparator(db->DefaultColumnFamily())),
|
|
lock_tracker_factory_(lock_tracker_factory),
|
|
start_time_(dbimpl_->GetSystemClock()->NowMicros()),
|
|
write_batch_(cmp_, 0, true, 0, write_options.protection_bytes_per_key),
|
|
tracked_locks_(lock_tracker_factory_.Create()),
|
|
commit_time_batch_(0 /* reserved_bytes */, 0 /* max_bytes */,
|
|
write_options.protection_bytes_per_key,
|
|
0 /* default_cf_ts_sz */),
|
|
indexing_enabled_(true) {
|
|
assert(dynamic_cast<DBImpl*>(db_) != nullptr);
|
|
log_number_ = 0;
|
|
if (dbimpl_->allow_2pc()) {
|
|
InitWriteBatch();
|
|
}
|
|
}
|
|
|
|
TransactionBaseImpl::~TransactionBaseImpl() {
|
|
// Release snapshot if snapshot is set
|
|
SetSnapshotInternal(nullptr);
|
|
}
|
|
|
|
void TransactionBaseImpl::Clear() {
|
|
save_points_.reset(nullptr);
|
|
write_batch_.Clear();
|
|
commit_time_batch_.Clear();
|
|
tracked_locks_->Clear();
|
|
num_puts_ = 0;
|
|
num_deletes_ = 0;
|
|
num_merges_ = 0;
|
|
|
|
if (dbimpl_->allow_2pc()) {
|
|
InitWriteBatch();
|
|
}
|
|
}
|
|
|
|
void TransactionBaseImpl::Reinitialize(DB* db,
|
|
const WriteOptions& write_options) {
|
|
Clear();
|
|
ClearSnapshot();
|
|
id_ = 0;
|
|
db_ = db;
|
|
name_.clear();
|
|
log_number_ = 0;
|
|
write_options_ = write_options;
|
|
start_time_ = dbimpl_->GetSystemClock()->NowMicros();
|
|
indexing_enabled_ = true;
|
|
cmp_ = GetColumnFamilyUserComparator(db_->DefaultColumnFamily());
|
|
WriteBatchInternal::UpdateProtectionInfo(
|
|
write_batch_.GetWriteBatch(), write_options_.protection_bytes_per_key)
|
|
.PermitUncheckedError();
|
|
WriteBatchInternal::UpdateProtectionInfo(
|
|
&commit_time_batch_, write_options_.protection_bytes_per_key)
|
|
.PermitUncheckedError();
|
|
}
|
|
|
|
void TransactionBaseImpl::SetSnapshot() {
|
|
const Snapshot* snapshot = dbimpl_->GetSnapshotForWriteConflictBoundary();
|
|
SetSnapshotInternal(snapshot);
|
|
}
|
|
|
|
void TransactionBaseImpl::SetSnapshotInternal(const Snapshot* snapshot) {
|
|
// Set a custom deleter for the snapshot_ SharedPtr as the snapshot needs to
|
|
// be released, not deleted when it is no longer referenced.
|
|
snapshot_.reset(snapshot, std::bind(&TransactionBaseImpl::ReleaseSnapshot,
|
|
this, std::placeholders::_1, db_));
|
|
snapshot_needed_ = false;
|
|
snapshot_notifier_ = nullptr;
|
|
}
|
|
|
|
void TransactionBaseImpl::SetSnapshotOnNextOperation(
|
|
std::shared_ptr<TransactionNotifier> notifier) {
|
|
snapshot_needed_ = true;
|
|
snapshot_notifier_ = notifier;
|
|
}
|
|
|
|
void TransactionBaseImpl::SetSnapshotIfNeeded() {
|
|
if (snapshot_needed_) {
|
|
std::shared_ptr<TransactionNotifier> notifier = snapshot_notifier_;
|
|
SetSnapshot();
|
|
if (notifier != nullptr) {
|
|
notifier->SnapshotCreated(GetSnapshot());
|
|
}
|
|
}
|
|
}
|
|
|
|
Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family,
|
|
const SliceParts& key, bool read_only,
|
|
bool exclusive, const bool do_validate,
|
|
const bool assume_tracked) {
|
|
size_t key_size = 0;
|
|
for (int i = 0; i < key.num_parts; ++i) {
|
|
key_size += key.parts[i].size();
|
|
}
|
|
|
|
std::string str;
|
|
str.reserve(key_size);
|
|
|
|
for (int i = 0; i < key.num_parts; ++i) {
|
|
str.append(key.parts[i].data(), key.parts[i].size());
|
|
}
|
|
|
|
return TryLock(column_family, str, read_only, exclusive, do_validate,
|
|
assume_tracked);
|
|
}
|
|
|
|
void TransactionBaseImpl::SetSavePoint() {
|
|
if (save_points_ == nullptr) {
|
|
save_points_.reset(
|
|
new std::stack<TransactionBaseImpl::SavePoint,
|
|
autovector<TransactionBaseImpl::SavePoint>>());
|
|
}
|
|
save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_,
|
|
num_puts_, num_deletes_, num_merges_,
|
|
lock_tracker_factory_);
|
|
write_batch_.SetSavePoint();
|
|
}
|
|
|
|
Status TransactionBaseImpl::RollbackToSavePoint() {
|
|
if (save_points_ != nullptr && save_points_->size() > 0) {
|
|
// Restore saved SavePoint
|
|
TransactionBaseImpl::SavePoint& save_point = save_points_->top();
|
|
snapshot_ = save_point.snapshot_;
|
|
snapshot_needed_ = save_point.snapshot_needed_;
|
|
snapshot_notifier_ = save_point.snapshot_notifier_;
|
|
num_puts_ = save_point.num_puts_;
|
|
num_deletes_ = save_point.num_deletes_;
|
|
num_merges_ = save_point.num_merges_;
|
|
|
|
// Rollback batch
|
|
Status s = write_batch_.RollbackToSavePoint();
|
|
assert(s.ok());
|
|
|
|
// Rollback any keys that were tracked since the last savepoint
|
|
tracked_locks_->Subtract(*save_point.new_locks_);
|
|
|
|
save_points_->pop();
|
|
|
|
return s;
|
|
} else {
|
|
assert(write_batch_.RollbackToSavePoint().IsNotFound());
|
|
return Status::NotFound();
|
|
}
|
|
}
|
|
|
|
Status TransactionBaseImpl::PopSavePoint() {
|
|
if (save_points_ == nullptr || save_points_->empty()) {
|
|
// No SavePoint yet.
|
|
assert(write_batch_.PopSavePoint().IsNotFound());
|
|
return Status::NotFound();
|
|
}
|
|
|
|
assert(!save_points_->empty());
|
|
// If there is another savepoint A below the current savepoint B, then A needs
|
|
// to inherit tracked_keys in B so that if we rollback to savepoint A, we
|
|
// remember to unlock keys in B. If there is no other savepoint below, then we
|
|
// can safely discard savepoint info.
|
|
if (save_points_->size() == 1) {
|
|
save_points_->pop();
|
|
} else {
|
|
TransactionBaseImpl::SavePoint top(lock_tracker_factory_);
|
|
std::swap(top, save_points_->top());
|
|
save_points_->pop();
|
|
|
|
save_points_->top().new_locks_->Merge(*top.new_locks_);
|
|
}
|
|
|
|
return write_batch_.PopSavePoint();
|
|
}
|
|
|
|
Status TransactionBaseImpl::Get(const ReadOptions& read_options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice& key, std::string* value) {
|
|
if (read_options.io_activity != Env::IOActivity::kUnknown) {
|
|
return Status::InvalidArgument(
|
|
"Cannot call Get with `ReadOptions::io_activity` != "
|
|
"`Env::IOActivity::kUnknown`");
|
|
}
|
|
assert(value != nullptr);
|
|
PinnableSlice pinnable_val(value);
|
|
assert(!pinnable_val.IsPinned());
|
|
auto s = Get(read_options, column_family, key, &pinnable_val);
|
|
if (s.ok() && pinnable_val.IsPinned()) {
|
|
value->assign(pinnable_val.data(), pinnable_val.size());
|
|
} // else value is already assigned
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::Get(const ReadOptions& read_options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice& key, PinnableSlice* pinnable_val) {
|
|
return write_batch_.GetFromBatchAndDB(db_, read_options, column_family, key,
|
|
pinnable_val);
|
|
}
|
|
|
|
Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice& key, std::string* value,
|
|
bool exclusive,
|
|
const bool do_validate) {
|
|
if (!do_validate && read_options.snapshot != nullptr) {
|
|
return Status::InvalidArgument(
|
|
"If do_validate is false then GetForUpdate with snapshot is not "
|
|
"defined.");
|
|
}
|
|
if (read_options.io_activity != Env::IOActivity::kUnknown) {
|
|
return Status::InvalidArgument(
|
|
"Cannot call GetForUpdate with `ReadOptions::io_activity` != "
|
|
"`Env::IOActivity::kUnknown`");
|
|
}
|
|
Status s =
|
|
TryLock(column_family, key, true /* read_only */, exclusive, do_validate);
|
|
|
|
if (s.ok() && value != nullptr) {
|
|
assert(value != nullptr);
|
|
PinnableSlice pinnable_val(value);
|
|
assert(!pinnable_val.IsPinned());
|
|
s = Get(read_options, column_family, key, &pinnable_val);
|
|
if (s.ok() && pinnable_val.IsPinned()) {
|
|
value->assign(pinnable_val.data(), pinnable_val.size());
|
|
} // else value is already assigned
|
|
}
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options,
|
|
ColumnFamilyHandle* column_family,
|
|
const Slice& key,
|
|
PinnableSlice* pinnable_val,
|
|
bool exclusive,
|
|
const bool do_validate) {
|
|
if (!do_validate && read_options.snapshot != nullptr) {
|
|
return Status::InvalidArgument(
|
|
"If do_validate is false then GetForUpdate with snapshot is not "
|
|
"defined.");
|
|
}
|
|
if (read_options.io_activity != Env::IOActivity::kUnknown) {
|
|
return Status::InvalidArgument(
|
|
"Cannot call GetForUpdate with `ReadOptions::io_activity` != "
|
|
"`Env::IOActivity::kUnknown`");
|
|
}
|
|
Status s =
|
|
TryLock(column_family, key, true /* read_only */, exclusive, do_validate);
|
|
|
|
if (s.ok() && pinnable_val != nullptr) {
|
|
s = Get(read_options, column_family, key, pinnable_val);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
std::vector<Status> TransactionBaseImpl::MultiGet(
|
|
const ReadOptions& read_options,
|
|
const std::vector<ColumnFamilyHandle*>& column_family,
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) {
|
|
size_t num_keys = keys.size();
|
|
if (read_options.io_activity != Env::IOActivity::kUnknown) {
|
|
Status s = Status::InvalidArgument(
|
|
"Cannot call MultiGet with `ReadOptions::io_activity` != "
|
|
"`Env::IOActivity::kUnknown`");
|
|
return std::vector<Status>(num_keys, s);
|
|
}
|
|
|
|
values->resize(num_keys);
|
|
|
|
std::vector<Status> stat_list(num_keys);
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
stat_list[i] = Get(read_options, column_family[i], keys[i], &(*values)[i]);
|
|
}
|
|
|
|
return stat_list;
|
|
}
|
|
|
|
void TransactionBaseImpl::MultiGet(const ReadOptions& read_options,
|
|
ColumnFamilyHandle* column_family,
|
|
const size_t num_keys, const Slice* keys,
|
|
PinnableSlice* values, Status* statuses,
|
|
const bool sorted_input) {
|
|
assert(read_options.io_activity == Env::IOActivity::kUnknown);
|
|
write_batch_.MultiGetFromBatchAndDB(db_, read_options, column_family,
|
|
num_keys, keys, values, statuses,
|
|
sorted_input);
|
|
}
|
|
|
|
std::vector<Status> TransactionBaseImpl::MultiGetForUpdate(
|
|
const ReadOptions& read_options,
|
|
const std::vector<ColumnFamilyHandle*>& column_family,
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) {
|
|
// Regardless of whether the MultiGet succeeded, track these keys.
|
|
size_t num_keys = keys.size();
|
|
if (read_options.io_activity != Env::IOActivity::kUnknown) {
|
|
Status s = Status::InvalidArgument(
|
|
"Cannot call MultiGetForUpdate with `ReadOptions::io_activity` != "
|
|
"`Env::IOActivity::kUnknown`");
|
|
return std::vector<Status>(num_keys, s);
|
|
}
|
|
values->resize(num_keys);
|
|
|
|
// Lock all keys
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
Status s = TryLock(column_family[i], keys[i], true /* read_only */,
|
|
true /* exclusive */);
|
|
if (!s.ok()) {
|
|
// Fail entire multiget if we cannot lock all keys
|
|
return std::vector<Status>(num_keys, s);
|
|
}
|
|
}
|
|
|
|
// TODO(agiardullo): optimize multiget?
|
|
std::vector<Status> stat_list(num_keys);
|
|
for (size_t i = 0; i < num_keys; ++i) {
|
|
stat_list[i] = Get(read_options, column_family[i], keys[i], &(*values)[i]);
|
|
}
|
|
|
|
return stat_list;
|
|
}
|
|
|
|
Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options) {
|
|
Iterator* db_iter = db_->NewIterator(read_options);
|
|
assert(db_iter);
|
|
|
|
return write_batch_.NewIteratorWithBase(db_->DefaultColumnFamily(), db_iter,
|
|
&read_options);
|
|
}
|
|
|
|
Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options,
|
|
ColumnFamilyHandle* column_family) {
|
|
Iterator* db_iter = db_->NewIterator(read_options, column_family);
|
|
assert(db_iter);
|
|
|
|
return write_batch_.NewIteratorWithBase(column_family, db_iter,
|
|
&read_options);
|
|
}
|
|
|
|
Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
|
|
const Slice& key, const Slice& value,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Put(column_family, key, value);
|
|
if (s.ok()) {
|
|
num_puts_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
|
|
const SliceParts& key, const SliceParts& value,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Put(column_family, key, value);
|
|
if (s.ok()) {
|
|
num_puts_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::Merge(ColumnFamilyHandle* column_family,
|
|
const Slice& key, const Slice& value,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Merge(column_family, key, value);
|
|
if (s.ok()) {
|
|
num_merges_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family,
|
|
const Slice& key,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Delete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family,
|
|
const SliceParts& key,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Delete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family,
|
|
const Slice& key,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->SingleDelete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family,
|
|
const SliceParts& key,
|
|
const bool assume_tracked) {
|
|
const bool do_validate = !assume_tracked;
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, do_validate, assume_tracked);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->SingleDelete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family,
|
|
const Slice& key, const Slice& value) {
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, false /* do_validate */);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Put(column_family, key, value);
|
|
if (s.ok()) {
|
|
num_puts_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family,
|
|
const SliceParts& key,
|
|
const SliceParts& value) {
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, false /* do_validate */);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Put(column_family, key, value);
|
|
if (s.ok()) {
|
|
num_puts_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::MergeUntracked(ColumnFamilyHandle* column_family,
|
|
const Slice& key,
|
|
const Slice& value) {
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, false /* do_validate */);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Merge(column_family, key, value);
|
|
if (s.ok()) {
|
|
num_merges_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family,
|
|
const Slice& key) {
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, false /* do_validate */);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Delete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family,
|
|
const SliceParts& key) {
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, false /* do_validate */);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->Delete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
Status TransactionBaseImpl::SingleDeleteUntracked(
|
|
ColumnFamilyHandle* column_family, const Slice& key) {
|
|
Status s = TryLock(column_family, key, false /* read_only */,
|
|
true /* exclusive */, false /* do_validate */);
|
|
|
|
if (s.ok()) {
|
|
s = GetBatchForWrite()->SingleDelete(column_family, key);
|
|
if (s.ok()) {
|
|
num_deletes_++;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
void TransactionBaseImpl::PutLogData(const Slice& blob) {
|
|
auto s = write_batch_.PutLogData(blob);
|
|
(void)s;
|
|
assert(s.ok());
|
|
}
|
|
|
|
WriteBatchWithIndex* TransactionBaseImpl::GetWriteBatch() {
|
|
return &write_batch_;
|
|
}
|
|
|
|
uint64_t TransactionBaseImpl::GetElapsedTime() const {
|
|
return (dbimpl_->GetSystemClock()->NowMicros() - start_time_) / 1000;
|
|
}
|
|
|
|
uint64_t TransactionBaseImpl::GetNumPuts() const { return num_puts_; }
|
|
|
|
uint64_t TransactionBaseImpl::GetNumDeletes() const { return num_deletes_; }
|
|
|
|
uint64_t TransactionBaseImpl::GetNumMerges() const { return num_merges_; }
|
|
|
|
uint64_t TransactionBaseImpl::GetNumKeys() const {
|
|
return tracked_locks_->GetNumPointLocks();
|
|
}
|
|
|
|
void TransactionBaseImpl::TrackKey(uint32_t cfh_id, const std::string& key,
|
|
SequenceNumber seq, bool read_only,
|
|
bool exclusive) {
|
|
PointLockRequest r;
|
|
r.column_family_id = cfh_id;
|
|
r.key = key;
|
|
r.seq = seq;
|
|
r.read_only = read_only;
|
|
r.exclusive = exclusive;
|
|
|
|
// Update map of all tracked keys for this transaction
|
|
tracked_locks_->Track(r);
|
|
|
|
if (save_points_ != nullptr && !save_points_->empty()) {
|
|
// Update map of tracked keys in this SavePoint
|
|
save_points_->top().new_locks_->Track(r);
|
|
}
|
|
}
|
|
|
|
// Gets the write batch that should be used for Put/Merge/Deletes.
|
|
//
|
|
// Returns either a WriteBatch or WriteBatchWithIndex depending on whether
|
|
// DisableIndexing() has been called.
|
|
WriteBatchBase* TransactionBaseImpl::GetBatchForWrite() {
|
|
if (indexing_enabled_) {
|
|
// Use WriteBatchWithIndex
|
|
return &write_batch_;
|
|
} else {
|
|
// Don't use WriteBatchWithIndex. Return base WriteBatch.
|
|
return write_batch_.GetWriteBatch();
|
|
}
|
|
}
|
|
|
|
void TransactionBaseImpl::ReleaseSnapshot(const Snapshot* snapshot, DB* db) {
|
|
if (snapshot != nullptr) {
|
|
ROCKS_LOG_DETAILS(dbimpl_->immutable_db_options().info_log,
|
|
"ReleaseSnapshot %" PRIu64 " Set",
|
|
snapshot->GetSequenceNumber());
|
|
db->ReleaseSnapshot(snapshot);
|
|
}
|
|
}
|
|
|
|
void TransactionBaseImpl::UndoGetForUpdate(ColumnFamilyHandle* column_family,
|
|
const Slice& key) {
|
|
PointLockRequest r;
|
|
r.column_family_id = GetColumnFamilyID(column_family);
|
|
r.key = key.ToString();
|
|
r.read_only = true;
|
|
|
|
bool can_untrack = false;
|
|
if (save_points_ != nullptr && !save_points_->empty()) {
|
|
// If there is no GetForUpdate of the key in this save point,
|
|
// then cannot untrack from the global lock tracker.
|
|
UntrackStatus s = save_points_->top().new_locks_->Untrack(r);
|
|
can_untrack = (s != UntrackStatus::NOT_TRACKED);
|
|
} else {
|
|
// No save point, so can untrack from the global lock tracker.
|
|
can_untrack = true;
|
|
}
|
|
|
|
if (can_untrack) {
|
|
// If erased from the global tracker, then can unlock the key.
|
|
UntrackStatus s = tracked_locks_->Untrack(r);
|
|
bool can_unlock = (s == UntrackStatus::REMOVED);
|
|
if (can_unlock) {
|
|
UnlockGetForUpdate(column_family, key);
|
|
}
|
|
}
|
|
}
|
|
|
|
Status TransactionBaseImpl::RebuildFromWriteBatch(WriteBatch* src_batch) {
|
|
struct IndexedWriteBatchBuilder : public WriteBatch::Handler {
|
|
Transaction* txn_;
|
|
DBImpl* db_;
|
|
IndexedWriteBatchBuilder(Transaction* txn, DBImpl* db)
|
|
: txn_(txn), db_(db) {
|
|
assert(dynamic_cast<TransactionBaseImpl*>(txn_) != nullptr);
|
|
}
|
|
|
|
Status PutCF(uint32_t cf, const Slice& key, const Slice& val) override {
|
|
return txn_->Put(db_->GetColumnFamilyHandle(cf), key, val);
|
|
}
|
|
|
|
Status DeleteCF(uint32_t cf, const Slice& key) override {
|
|
return txn_->Delete(db_->GetColumnFamilyHandle(cf), key);
|
|
}
|
|
|
|
Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
|
|
return txn_->SingleDelete(db_->GetColumnFamilyHandle(cf), key);
|
|
}
|
|
|
|
Status MergeCF(uint32_t cf, const Slice& key, const Slice& val) override {
|
|
return txn_->Merge(db_->GetColumnFamilyHandle(cf), key, val);
|
|
}
|
|
|
|
// this is used for reconstructing prepared transactions upon
|
|
// recovery. there should not be any meta markers in the batches
|
|
// we are processing.
|
|
Status MarkBeginPrepare(bool) override { return Status::InvalidArgument(); }
|
|
|
|
Status MarkEndPrepare(const Slice&) override {
|
|
return Status::InvalidArgument();
|
|
}
|
|
|
|
Status MarkCommit(const Slice&) override {
|
|
return Status::InvalidArgument();
|
|
}
|
|
|
|
Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
|
|
return Status::InvalidArgument();
|
|
}
|
|
|
|
Status MarkRollback(const Slice&) override {
|
|
return Status::InvalidArgument();
|
|
}
|
|
};
|
|
|
|
IndexedWriteBatchBuilder copycat(this, dbimpl_);
|
|
return src_batch->Iterate(©cat);
|
|
}
|
|
|
|
WriteBatch* TransactionBaseImpl::GetCommitTimeWriteBatch() {
|
|
return &commit_time_batch_;
|
|
}
|
|
} // namespace ROCKSDB_NAMESPACE
|