2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2015-05-29 21:36:35 +00:00
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
|
2022-10-25 21:15:22 +00:00
|
|
|
#include <algorithm>
|
2020-01-07 22:19:06 +00:00
|
|
|
#include <mutex>
|
|
|
|
#include <vector>
|
|
|
|
|
2015-05-29 21:36:35 +00:00
|
|
|
#include "rocksdb/db.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "rocksdb/utilities/optimistic_transaction_db.h"
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2015-05-29 21:36:35 +00:00
|
|
|
|
|
|
|
class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
|
|
|
|
public:
|
2020-01-07 22:19:06 +00:00
|
|
|
explicit OptimisticTransactionDBImpl(
|
|
|
|
DB* db, const OptimisticTransactionDBOptions& occ_options,
|
|
|
|
bool take_ownership = true)
|
|
|
|
: OptimisticTransactionDB(db),
|
|
|
|
db_owner_(take_ownership),
|
|
|
|
validate_policy_(occ_options.validate_policy) {
|
|
|
|
if (validate_policy_ == OccValidationPolicy::kValidateParallel) {
|
|
|
|
uint32_t bucket_size = std::max(16u, occ_options.occ_lock_buckets);
|
|
|
|
bucketed_locks_.reserve(bucket_size);
|
|
|
|
for (size_t i = 0; i < bucket_size; ++i) {
|
|
|
|
bucketed_locks_.emplace_back(
|
|
|
|
std::unique_ptr<std::mutex>(new std::mutex));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-05-29 21:36:35 +00:00
|
|
|
|
2017-05-10 21:54:35 +00:00
|
|
|
~OptimisticTransactionDBImpl() {
|
2018-04-03 22:24:23 +00:00
|
|
|
// Prevent this stackable from destroying
|
|
|
|
// base db
|
2017-05-10 21:54:35 +00:00
|
|
|
if (!db_owner_) {
|
2018-04-03 22:24:23 +00:00
|
|
|
db_ = nullptr;
|
2017-05-10 21:54:35 +00:00
|
|
|
}
|
|
|
|
}
|
2015-05-29 21:36:35 +00:00
|
|
|
|
2016-03-04 00:33:26 +00:00
|
|
|
Transaction* BeginTransaction(const WriteOptions& write_options,
|
|
|
|
const OptimisticTransactionOptions& txn_options,
|
|
|
|
Transaction* old_txn) override;
|
2015-05-29 21:36:35 +00:00
|
|
|
|
2021-02-05 23:55:34 +00:00
|
|
|
// Transactional `DeleteRange()` is not yet supported.
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
using StackableDB::DeleteRange;
|
2021-02-05 23:55:34 +00:00
|
|
|
virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*,
|
|
|
|
const Slice&, const Slice&) override {
|
|
|
|
return Status::NotSupported();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Range deletions also must not be snuck into `WriteBatch`es as they are
|
|
|
|
// incompatible with `OptimisticTransactionDB`.
|
|
|
|
virtual Status Write(const WriteOptions& write_opts,
|
|
|
|
WriteBatch* batch) override {
|
|
|
|
if (batch->HasDeleteRange()) {
|
|
|
|
return Status::NotSupported();
|
|
|
|
}
|
|
|
|
return OptimisticTransactionDB::Write(write_opts, batch);
|
|
|
|
}
|
|
|
|
|
2020-01-07 22:19:06 +00:00
|
|
|
size_t GetLockBucketsSize() const { return bucketed_locks_.size(); }
|
|
|
|
|
|
|
|
OccValidationPolicy GetValidatePolicy() const { return validate_policy_; }
|
|
|
|
|
|
|
|
std::unique_lock<std::mutex> LockBucket(size_t idx);
|
|
|
|
|
2015-05-29 21:36:35 +00:00
|
|
|
private:
|
2020-01-07 22:19:06 +00:00
|
|
|
// NOTE: used in validation phase. Each key is hashed into some
|
|
|
|
// bucket. We then take the lock in the hash value order to avoid deadlock.
|
|
|
|
std::vector<std::unique_ptr<std::mutex>> bucketed_locks_;
|
|
|
|
|
|
|
|
bool db_owner_;
|
2018-04-03 22:24:23 +00:00
|
|
|
|
2020-01-07 22:19:06 +00:00
|
|
|
const OccValidationPolicy validate_policy_;
|
2016-03-04 00:33:26 +00:00
|
|
|
|
|
|
|
void ReinitializeTransaction(Transaction* txn,
|
|
|
|
const WriteOptions& write_options,
|
|
|
|
const OptimisticTransactionOptions& txn_options =
|
|
|
|
OptimisticTransactionOptions());
|
2015-05-29 21:36:35 +00:00
|
|
|
};
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2015-05-29 21:36:35 +00:00
|
|
|
#endif // ROCKSDB_LITE
|