mirror of https://github.com/facebook/rocksdb.git
Fix double counting of BYTES_WRITTEN ticker (#12111)
Summary: Fixes https://github.com/facebook/rocksdb/issues/12061. We were double counting the `BYTES_WRITTEN` ticker when doing writes with transactions. During transactions, after writing, a client can call `Prepare()`, which writes the values to WAL but not to the Memtable. After that, they can call `Commit()`, which writes a commit marker to the WAL and the values to Memtable. The cause of this bug is previously during writes, we didn't take into account `writer->ShouldWriteToMemtable()` before adding to `total_byte_size`, so it is still added to during the `Prepare()` phase even though we're not writing to the Memtable, which was why we saw the value to be double of what's written to WAL. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12111 Test Plan: Added a test in `db/db_statistics_test.cc` that tests writes with and without transactions, by comparing the values of `BYTES_WRITTEN` and `WAL_FILE_BYTES` after doing writes. Reviewed By: jaykorean Differential Revision: D51954327 Pulled By: jowlyzhang fbshipit-source-id: 57a0986a14e5b94eb5188715d819212529110d2c
This commit is contained in:
parent
a143f93236
commit
44fd914128
|
@ -437,10 +437,10 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
|
||||||
valid_batches += writer->batch_cnt;
|
valid_batches += writer->batch_cnt;
|
||||||
if (writer->ShouldWriteToMemtable()) {
|
if (writer->ShouldWriteToMemtable()) {
|
||||||
total_count += WriteBatchInternal::Count(writer->batch);
|
total_count += WriteBatchInternal::Count(writer->batch);
|
||||||
|
total_byte_size = WriteBatchInternal::AppendedByteSize(
|
||||||
|
total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
|
||||||
parallel = parallel && !writer->batch->HasMerge();
|
parallel = parallel && !writer->batch->HasMerge();
|
||||||
}
|
}
|
||||||
total_byte_size = WriteBatchInternal::AppendedByteSize(
|
|
||||||
total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
|
|
||||||
if (writer->pre_release_callback) {
|
if (writer->pre_release_callback) {
|
||||||
pre_release_callback_cnt++;
|
pre_release_callback_cnt++;
|
||||||
}
|
}
|
||||||
|
@ -720,11 +720,11 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
|
||||||
if (writer->ShouldWriteToMemtable()) {
|
if (writer->ShouldWriteToMemtable()) {
|
||||||
writer->sequence = next_sequence;
|
writer->sequence = next_sequence;
|
||||||
size_t count = WriteBatchInternal::Count(writer->batch);
|
size_t count = WriteBatchInternal::Count(writer->batch);
|
||||||
|
total_byte_size = WriteBatchInternal::AppendedByteSize(
|
||||||
|
total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
|
||||||
next_sequence += count;
|
next_sequence += count;
|
||||||
total_count += count;
|
total_count += count;
|
||||||
}
|
}
|
||||||
total_byte_size = WriteBatchInternal::AppendedByteSize(
|
|
||||||
total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (w.disable_wal) {
|
if (w.disable_wal) {
|
||||||
|
|
|
@ -6,9 +6,11 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "db/db_test_util.h"
|
#include "db/db_test_util.h"
|
||||||
|
#include "db/write_batch_internal.h"
|
||||||
#include "monitoring/thread_status_util.h"
|
#include "monitoring/thread_status_util.h"
|
||||||
#include "port/stack_trace.h"
|
#include "port/stack_trace.h"
|
||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
|
#include "rocksdb/utilities/transaction_db.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
@ -283,6 +285,78 @@ TEST_F(DBStatisticsTest, BlockChecksumStats) {
|
||||||
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
|
options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DBStatisticsTest, BytesWrittenStats) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
||||||
|
options.statistics->set_stats_level(StatsLevel::kExceptHistogramOrTimers);
|
||||||
|
Reopen(options);
|
||||||
|
|
||||||
|
EXPECT_EQ(0, options.statistics->getAndResetTickerCount(WAL_FILE_BYTES));
|
||||||
|
EXPECT_EQ(0, options.statistics->getAndResetTickerCount(BYTES_WRITTEN));
|
||||||
|
|
||||||
|
const int kNumKeysWritten = 100;
|
||||||
|
|
||||||
|
// Scenario 0: Not using transactions.
|
||||||
|
// This will write to WAL and memtable directly.
|
||||||
|
ASSERT_OK(options.statistics->Reset());
|
||||||
|
|
||||||
|
for (int i = 0; i < kNumKeysWritten; ++i) {
|
||||||
|
ASSERT_OK(Put(Key(i), "val"));
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(options.statistics->getAndResetTickerCount(WAL_FILE_BYTES),
|
||||||
|
options.statistics->getAndResetTickerCount(BYTES_WRITTEN));
|
||||||
|
|
||||||
|
// Scenario 1: Using transactions.
|
||||||
|
// This should not double count BYTES_WRITTEN (issue #12061).
|
||||||
|
for (bool enable_pipelined_write : {false, true}) {
|
||||||
|
ASSERT_OK(options.statistics->Reset());
|
||||||
|
|
||||||
|
// Destroy the DB to recreate as a TransactionDB.
|
||||||
|
Destroy(options, true);
|
||||||
|
|
||||||
|
// Create a TransactionDB.
|
||||||
|
TransactionDB* txn_db = nullptr;
|
||||||
|
TransactionDBOptions txn_db_opts;
|
||||||
|
txn_db_opts.write_policy = TxnDBWritePolicy::WRITE_COMMITTED;
|
||||||
|
options.enable_pipelined_write = enable_pipelined_write;
|
||||||
|
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db));
|
||||||
|
ASSERT_NE(txn_db, nullptr);
|
||||||
|
db_ = txn_db->GetBaseDB();
|
||||||
|
|
||||||
|
WriteOptions wopts;
|
||||||
|
TransactionOptions txn_opts;
|
||||||
|
Transaction* txn = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
|
||||||
|
ASSERT_NE(txn, nullptr);
|
||||||
|
ASSERT_OK(txn->SetName("txn1"));
|
||||||
|
|
||||||
|
for (int i = 0; i < kNumKeysWritten; ++i) {
|
||||||
|
ASSERT_OK(txn->Put(Key(i), "val"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare() writes to WAL, but not to memtable. (WriteCommitted)
|
||||||
|
ASSERT_OK(txn->Prepare());
|
||||||
|
EXPECT_NE(0, options.statistics->getTickerCount(WAL_FILE_BYTES));
|
||||||
|
// BYTES_WRITTEN would have been non-zero previously (issue #12061).
|
||||||
|
EXPECT_EQ(0, options.statistics->getTickerCount(BYTES_WRITTEN));
|
||||||
|
|
||||||
|
// Commit() writes to memtable and also a commit marker to WAL.
|
||||||
|
ASSERT_OK(txn->Commit());
|
||||||
|
delete txn;
|
||||||
|
|
||||||
|
// The WAL has an extra header of size `kHeader` written to it,
|
||||||
|
// as we are writing twice to it (first during Prepare, second during
|
||||||
|
// Commit).
|
||||||
|
EXPECT_EQ(options.statistics->getAndResetTickerCount(WAL_FILE_BYTES),
|
||||||
|
options.statistics->getAndResetTickerCount(BYTES_WRITTEN) +
|
||||||
|
WriteBatchInternal::kHeader);
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
db_ = nullptr;
|
||||||
|
delete txn_db;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
|
@ -74,7 +74,7 @@ extern thread_local PerfContext perf_context;
|
||||||
#define PERF_COUNTER_ADD(metric, value) \
|
#define PERF_COUNTER_ADD(metric, value) \
|
||||||
if (perf_level >= PerfLevel::kEnableCount) { \
|
if (perf_level >= PerfLevel::kEnableCount) { \
|
||||||
perf_context.metric += value; \
|
perf_context.metric += value; \
|
||||||
} \
|
} \
|
||||||
static_assert(true, "semicolon required")
|
static_assert(true, "semicolon required")
|
||||||
|
|
||||||
// Increase metric value
|
// Increase metric value
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Fix double counting of BYTES_WRITTEN ticker when doing writes with transactions.
|
Loading…
Reference in New Issue