mirror of https://github.com/facebook/rocksdb.git
WriteUnPrepared: Add new variable write_batch_flush_threshold (#5633)
Summary: Instead of reusing `TransactionOptions::max_write_batch_size` for determining when to flush a write batch for write unprepared, add a new variable called `write_batch_flush_threshold` for this use case instead. Also add `TransactionDBOptions::default_write_batch_flush_threshold` which sets the default value if `TransactionOptions::write_batch_flush_threshold` is unspecified. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5633 Differential Revision: D16520364 Pulled By: lth fbshipit-source-id: d75ae5a2141ce7708982d5069dc3f0b58d250e8c
This commit is contained in:
parent
3617287e0e
commit
41df734830
|
@ -101,6 +101,11 @@ struct TransactionDBOptions {
|
||||||
// ordering rather than concurrency control.
|
// ordering rather than concurrency control.
|
||||||
bool skip_concurrency_control = false;
|
bool skip_concurrency_control = false;
|
||||||
|
|
||||||
|
// This option is only valid for write unprepared. If a write batch exceeds
|
||||||
|
// this threshold, then the transaction will implicitly flush the currently
|
||||||
|
// pending writes into the database. A value of 0 or less means no limit.
|
||||||
|
ssize_t default_write_batch_flush_threshold = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// 128 entries
|
// 128 entries
|
||||||
size_t wp_snapshot_cache_bits = static_cast<size_t>(7);
|
size_t wp_snapshot_cache_bits = static_cast<size_t>(7);
|
||||||
|
@ -162,6 +167,11 @@ struct TransactionOptions {
|
||||||
// back/commit before new transactions start.
|
// back/commit before new transactions start.
|
||||||
// Default: false
|
// Default: false
|
||||||
bool skip_concurrency_control = false;
|
bool skip_concurrency_control = false;
|
||||||
|
|
||||||
|
// See TransactionDBOptions::default_write_batch_flush_threshold for
|
||||||
|
// description. If a negative value is specified, then the default value from
|
||||||
|
// TransactionDBOptions is used.
|
||||||
|
ssize_t write_batch_flush_threshold = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
// The per-write optimizations that do not involve transactions. TransactionDB
|
// The per-write optimizations that do not involve transactions. TransactionDB
|
||||||
|
|
|
@ -5303,16 +5303,8 @@ TEST_P(TransactionTest, MemoryLimitTest) {
|
||||||
ASSERT_EQ(2, txn->GetNumPuts());
|
ASSERT_EQ(2, txn->GetNumPuts());
|
||||||
|
|
||||||
s = txn->Put(Slice("b"), Slice("...."));
|
s = txn->Put(Slice("b"), Slice("...."));
|
||||||
auto pdb = reinterpret_cast<PessimisticTransactionDB*>(db);
|
ASSERT_TRUE(s.IsMemoryLimit());
|
||||||
// For write unprepared, write batches exceeding max_write_batch_size will
|
ASSERT_EQ(2, txn->GetNumPuts());
|
||||||
// just flush to DB instead of returning a memory limit error.
|
|
||||||
if (pdb->GetTxnDBOptions().write_policy != WRITE_UNPREPARED) {
|
|
||||||
ASSERT_TRUE(s.IsMemoryLimit());
|
|
||||||
ASSERT_EQ(2, txn->GetNumPuts());
|
|
||||||
} else {
|
|
||||||
ASSERT_OK(s);
|
|
||||||
ASSERT_EQ(3, txn->GetNumPuts());
|
|
||||||
}
|
|
||||||
|
|
||||||
txn->Rollback();
|
txn->Rollback();
|
||||||
delete txn;
|
delete txn;
|
||||||
|
|
|
@ -157,7 +157,7 @@ TEST_P(WriteUnpreparedTransactionTest, ReadYourOwnWriteStress) {
|
||||||
Transaction* txn;
|
Transaction* txn;
|
||||||
TransactionOptions txn_options;
|
TransactionOptions txn_options;
|
||||||
// batch_size of 1 causes writes to DB for every marker.
|
// batch_size of 1 causes writes to DB for every marker.
|
||||||
txn_options.max_write_batch_size = 1;
|
txn_options.write_batch_flush_threshold = 1;
|
||||||
ReadOptions read_options;
|
ReadOptions read_options;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < kNumIter; i++) {
|
for (uint32_t i = 0; i < kNumIter; i++) {
|
||||||
|
@ -311,7 +311,7 @@ TEST_P(WriteUnpreparedTransactionTest, RecoveryTest) {
|
||||||
|
|
||||||
// batch_size of 1 causes writes to DB for every marker.
|
// batch_size of 1 causes writes to DB for every marker.
|
||||||
for (size_t batch_size : {1, 1000000}) {
|
for (size_t batch_size : {1, 1000000}) {
|
||||||
txn_options.max_write_batch_size = batch_size;
|
txn_options.write_batch_flush_threshold = batch_size;
|
||||||
for (bool empty : {true, false}) {
|
for (bool empty : {true, false}) {
|
||||||
for (Action a : {UNPREPARED, ROLLBACK, COMMIT}) {
|
for (Action a : {UNPREPARED, ROLLBACK, COMMIT}) {
|
||||||
for (int num_batches = 1; num_batches < 10; num_batches++) {
|
for (int num_batches = 1; num_batches < 10; num_batches++) {
|
||||||
|
@ -332,7 +332,7 @@ TEST_P(WriteUnpreparedTransactionTest, RecoveryTest) {
|
||||||
txn->SetName("xid");
|
txn->SetName("xid");
|
||||||
for (int i = 0; i < num_batches; i++) {
|
for (int i = 0; i < num_batches; i++) {
|
||||||
ASSERT_OK(txn->Put("k" + ToString(i), "value" + ToString(i)));
|
ASSERT_OK(txn->Put("k" + ToString(i), "value" + ToString(i)));
|
||||||
if (txn_options.max_write_batch_size == 1) {
|
if (txn_options.write_batch_flush_threshold == 1) {
|
||||||
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), i + 1);
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), i + 1);
|
||||||
} else {
|
} else {
|
||||||
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), 0);
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), 0);
|
||||||
|
@ -398,7 +398,7 @@ TEST_P(WriteUnpreparedTransactionTest, UnpreparedBatch) {
|
||||||
|
|
||||||
// batch_size of 1 causes writes to DB for every marker.
|
// batch_size of 1 causes writes to DB for every marker.
|
||||||
for (size_t batch_size : {1, 1000000}) {
|
for (size_t batch_size : {1, 1000000}) {
|
||||||
txn_options.max_write_batch_size = batch_size;
|
txn_options.write_batch_flush_threshold = batch_size;
|
||||||
for (bool prepare : {false, true}) {
|
for (bool prepare : {false, true}) {
|
||||||
for (bool commit : {false, true}) {
|
for (bool commit : {false, true}) {
|
||||||
ReOpen();
|
ReOpen();
|
||||||
|
@ -408,7 +408,7 @@ TEST_P(WriteUnpreparedTransactionTest, UnpreparedBatch) {
|
||||||
|
|
||||||
for (int i = 0; i < kNumKeys; i++) {
|
for (int i = 0; i < kNumKeys; i++) {
|
||||||
txn->Put("k" + ToString(i), "v" + ToString(i));
|
txn->Put("k" + ToString(i), "v" + ToString(i));
|
||||||
if (txn_options.max_write_batch_size == 1) {
|
if (txn_options.write_batch_flush_threshold == 1) {
|
||||||
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), i + 1);
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), i + 1);
|
||||||
} else {
|
} else {
|
||||||
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), 0);
|
ASSERT_EQ(wup_txn->GetUnpreparedSequenceNumbers().size(), 0);
|
||||||
|
@ -457,7 +457,7 @@ TEST_P(WriteUnpreparedTransactionTest, MarkLogWithPrepSection) {
|
||||||
WriteOptions write_options;
|
WriteOptions write_options;
|
||||||
TransactionOptions txn_options;
|
TransactionOptions txn_options;
|
||||||
// batch_size of 1 causes writes to DB for every marker.
|
// batch_size of 1 causes writes to DB for every marker.
|
||||||
txn_options.max_write_batch_size = 1;
|
txn_options.write_batch_flush_threshold = 1;
|
||||||
const int kNumKeys = 10;
|
const int kNumKeys = 10;
|
||||||
|
|
||||||
WriteOptions wopts;
|
WriteOptions wopts;
|
||||||
|
|
|
@ -35,13 +35,12 @@ WriteUnpreparedTxn::WriteUnpreparedTxn(WriteUnpreparedTxnDB* txn_db,
|
||||||
wupt_db_(txn_db),
|
wupt_db_(txn_db),
|
||||||
recovered_txn_(false),
|
recovered_txn_(false),
|
||||||
largest_validated_seq_(0) {
|
largest_validated_seq_(0) {
|
||||||
max_write_batch_size_ = txn_options.max_write_batch_size;
|
if (txn_options.write_batch_flush_threshold < 0) {
|
||||||
// We set max bytes to zero so that we don't get a memory limit error.
|
write_batch_flush_threshold_ =
|
||||||
// Instead of trying to keep write batch strictly under the size limit, we
|
txn_db_impl_->GetTxnDBOptions().default_write_batch_flush_threshold;
|
||||||
// just flush to DB when the limit is exceeded in write unprepared, to avoid
|
} else {
|
||||||
// having retry logic. This also allows very big key-value pairs that exceed
|
write_batch_flush_threshold_ = txn_options.write_batch_flush_threshold;
|
||||||
// max bytes to succeed.
|
}
|
||||||
write_batch_.SetMaxBytes(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WriteUnpreparedTxn::~WriteUnpreparedTxn() {
|
WriteUnpreparedTxn::~WriteUnpreparedTxn() {
|
||||||
|
@ -71,8 +70,13 @@ WriteUnpreparedTxn::~WriteUnpreparedTxn() {
|
||||||
|
|
||||||
void WriteUnpreparedTxn::Initialize(const TransactionOptions& txn_options) {
|
void WriteUnpreparedTxn::Initialize(const TransactionOptions& txn_options) {
|
||||||
PessimisticTransaction::Initialize(txn_options);
|
PessimisticTransaction::Initialize(txn_options);
|
||||||
max_write_batch_size_ = txn_options.max_write_batch_size;
|
if (txn_options.write_batch_flush_threshold < 0) {
|
||||||
write_batch_.SetMaxBytes(0);
|
write_batch_flush_threshold_ =
|
||||||
|
txn_db_impl_->GetTxnDBOptions().default_write_batch_flush_threshold;
|
||||||
|
} else {
|
||||||
|
write_batch_flush_threshold_ = txn_options.write_batch_flush_threshold;
|
||||||
|
}
|
||||||
|
|
||||||
unprep_seqs_.clear();
|
unprep_seqs_.clear();
|
||||||
recovered_txn_ = false;
|
recovered_txn_ = false;
|
||||||
largest_validated_seq_ = 0;
|
largest_validated_seq_ = 0;
|
||||||
|
@ -222,8 +226,9 @@ Status WriteUnpreparedTxn::RebuildFromWriteBatch(WriteBatch* wb) {
|
||||||
Status WriteUnpreparedTxn::MaybeFlushWriteBatchToDB() {
|
Status WriteUnpreparedTxn::MaybeFlushWriteBatchToDB() {
|
||||||
const bool kPrepared = true;
|
const bool kPrepared = true;
|
||||||
Status s;
|
Status s;
|
||||||
if (max_write_batch_size_ != 0 &&
|
if (write_batch_flush_threshold_ > 0 &&
|
||||||
write_batch_.GetDataSize() > max_write_batch_size_) {
|
write_batch_.GetDataSize() >
|
||||||
|
static_cast<size_t>(write_batch_flush_threshold_)) {
|
||||||
assert(GetState() != PREPARED);
|
assert(GetState() != PREPARED);
|
||||||
s = FlushWriteBatchToDB(!kPrepared);
|
s = FlushWriteBatchToDB(!kPrepared);
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,10 +164,10 @@ class WriteUnpreparedTxn : public WritePreparedTxn {
|
||||||
Status HandleWrite(std::function<Status()> do_write);
|
Status HandleWrite(std::function<Status()> do_write);
|
||||||
|
|
||||||
// For write unprepared, we check on every writebatch append to see if
|
// For write unprepared, we check on every writebatch append to see if
|
||||||
// max_write_batch_size_ has been exceeded, and then call
|
// write_batch_flush_threshold_ has been exceeded, and then call
|
||||||
// FlushWriteBatchToDB if so. This logic is encapsulated in
|
// FlushWriteBatchToDB if so. This logic is encapsulated in
|
||||||
// MaybeFlushWriteBatchToDB.
|
// MaybeFlushWriteBatchToDB.
|
||||||
size_t max_write_batch_size_;
|
ssize_t write_batch_flush_threshold_;
|
||||||
WriteUnpreparedTxnDB* wupt_db_;
|
WriteUnpreparedTxnDB* wupt_db_;
|
||||||
|
|
||||||
// Ordered list of unprep_seq sequence numbers that we have already written
|
// Ordered list of unprep_seq sequence numbers that we have already written
|
||||||
|
|
Loading…
Reference in New Issue