mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-27 20:43:57 +00:00
151242ce46
Summary: **Context:** The existing stat rocksdb.sst.read.micros does not reflect each of compaction and flush cases but aggregate them, which is not so helpful for us to understand IO read behavior of each of them. **Summary** - Update `StopWatch` and `RandomAccessFileReader` to record `rocksdb.sst.read.micros` and `rocksdb.file.{flush/compaction}.read.micros` - Fixed the default histogram in `RandomAccessFileReader` - New field `ReadOptions/IOOptions::io_activity`; Pass `ReadOptions` through paths under db open, flush and compaction to where we can prepare `IOOptions` and pass it to `RandomAccessFileReader` - Use `thread_status_util` for assertion in `DbStressFSWrapper` for continuous testing on we are passing correct `io_activity` under db open, flush and compaction Pull Request resolved: https://github.com/facebook/rocksdb/pull/11288 Test Plan: - **Stress test** - **Db bench 1: rocksdb.sst.read.micros COUNT ≈ sum of rocksdb.file.read.flush.micros's and rocksdb.file.read.compaction.micros's.** (without blob) - May not be exactly the same due to `HistogramStat::Add` only guarantees atomic not accuracy across threads. ``` ./db_bench -db=/dev/shm/testdb/ -statistics=true -benchmarks="fillseq" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=false -compression_type=none -bloom_bits=3 (-use_plain_table=1 -prefix_size=10) ``` ``` // BlockBasedTable rocksdb.sst.read.micros P50 : 2.009374 P95 : 4.968548 P99 : 8.110362 P100 : 43.000000 COUNT : 40456 SUM : 114805 rocksdb.file.read.flush.micros P50 : 1.871841 P95 : 3.872407 P99 : 5.540541 P100 : 43.000000 COUNT : 2250 SUM : 6116 rocksdb.file.read.compaction.micros P50 : 2.023109 P95 : 5.029149 P99 : 8.196910 P100 : 26.000000 COUNT : 38206 SUM : 108689 // PlainTable Does not apply ``` - **Db bench 2: performance** **Read** SETUP: db with 900 files ``` ./db_bench -db=/dev/shm/testdb/ -benchmarks="fillseq" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -disable_auto_compactions=true -target_file_size_base=655 -compression_type=none ```run till convergence ``` ./db_bench -seed=1678564177044286 -use_existing_db=true -db=/dev/shm/testdb -benchmarks=readrandom[-X60] -statistics=true -num=1000000 -disable_auto_compactions=true -compression_type=none -bloom_bits=3 ``` Pre-change `readrandom [AVG 60 runs] : 21568 (± 248) ops/sec` Post-change (no regression, -0.3%) `readrandom [AVG 60 runs] : 21486 (± 236) ops/sec` **Compaction/Flush**run till convergence ``` ./db_bench -db=/dev/shm/testdb2/ -seed=1678564177044286 -benchmarks="fillseq[-X60]" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -disable_auto_compactions=false -target_file_size_base=655 -compression_type=none rocksdb.sst.read.micros COUNT : 33820 rocksdb.sst.read.flush.micros COUNT : 1800 rocksdb.sst.read.compaction.micros COUNT : 32020 ``` Pre-change `fillseq [AVG 46 runs] : 1391 (± 214) ops/sec; 0.7 (± 0.1) MB/sec` Post-change (no regression, ~-0.4%) `fillseq [AVG 46 runs] : 1385 (± 216) ops/sec; 0.7 (± 0.1) MB/sec` Reviewed By: ajkr Differential Revision: D44007011 Pulled By: hx235 fbshipit-source-id: a54c89e4846dfc9a135389edf3f3eedfea257132
114 lines
4 KiB
C++
114 lines
4 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// This file defines the structures for thread operation and state.
|
|
// Thread operations are used to describe high level action of a
|
|
// thread such as doing compaction or flush, while thread state
|
|
// are used to describe lower-level action such as reading /
|
|
// writing a file or waiting for a mutex. Operations and states
|
|
// are designed to be independent. Typically, a thread usually involves
|
|
// in one operation and one state at any specific point in time.
|
|
|
|
#pragma once
|
|
|
|
#include <string>
|
|
|
|
#include "rocksdb/thread_status.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
#ifdef ROCKSDB_USING_THREAD_STATUS
|
|
|
|
// The structure that describes a major thread operation.
|
|
struct OperationInfo {
|
|
const ThreadStatus::OperationType type;
|
|
const std::string name;
|
|
};
|
|
|
|
// The global operation table.
|
|
//
|
|
// When updating a status of a thread, the pointer of the OperationInfo
|
|
// of the current ThreadStatusData will be pointing to one of the
|
|
// rows in this global table.
|
|
//
|
|
// Note that it's not designed to be constant as in the future we
|
|
// might consider adding global count to the OperationInfo.
|
|
static OperationInfo global_operation_table[] = {
|
|
{ThreadStatus::OP_UNKNOWN, ""},
|
|
{ThreadStatus::OP_COMPACTION, "Compaction"},
|
|
{ThreadStatus::OP_FLUSH, "Flush"},
|
|
{ThreadStatus::OP_DBOPEN, "DBOpen"}};
|
|
|
|
struct OperationStageInfo {
|
|
const ThreadStatus::OperationStage stage;
|
|
const std::string name;
|
|
};
|
|
|
|
// A table maintains the mapping from stage type to stage string.
|
|
// Note that the string must be changed accordingly when the
|
|
// associated function name changed.
|
|
static OperationStageInfo global_op_stage_table[] = {
|
|
{ThreadStatus::STAGE_UNKNOWN, ""},
|
|
{ThreadStatus::STAGE_FLUSH_RUN, "FlushJob::Run"},
|
|
{ThreadStatus::STAGE_FLUSH_WRITE_L0, "FlushJob::WriteLevel0Table"},
|
|
{ThreadStatus::STAGE_COMPACTION_PREPARE, "CompactionJob::Prepare"},
|
|
{ThreadStatus::STAGE_COMPACTION_RUN, "CompactionJob::Run"},
|
|
{ThreadStatus::STAGE_COMPACTION_PROCESS_KV,
|
|
"CompactionJob::ProcessKeyValueCompaction"},
|
|
{ThreadStatus::STAGE_COMPACTION_INSTALL, "CompactionJob::Install"},
|
|
{ThreadStatus::STAGE_COMPACTION_SYNC_FILE,
|
|
"CompactionJob::FinishCompactionOutputFile"},
|
|
{ThreadStatus::STAGE_PICK_MEMTABLES_TO_FLUSH,
|
|
"MemTableList::PickMemtablesToFlush"},
|
|
{ThreadStatus::STAGE_MEMTABLE_ROLLBACK,
|
|
"MemTableList::RollbackMemtableFlush"},
|
|
{ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS,
|
|
"MemTableList::TryInstallMemtableFlushResults"},
|
|
};
|
|
|
|
// The structure that describes a state.
|
|
struct StateInfo {
|
|
const ThreadStatus::StateType type;
|
|
const std::string name;
|
|
};
|
|
|
|
// The global state table.
|
|
//
|
|
// When updating a status of a thread, the pointer of the StateInfo
|
|
// of the current ThreadStatusData will be pointing to one of the
|
|
// rows in this global table.
|
|
static StateInfo global_state_table[] = {
|
|
{ThreadStatus::STATE_UNKNOWN, ""},
|
|
{ThreadStatus::STATE_MUTEX_WAIT, "Mutex Wait"},
|
|
};
|
|
|
|
struct OperationProperty {
|
|
int code;
|
|
std::string name;
|
|
};
|
|
|
|
static OperationProperty compaction_operation_properties[] = {
|
|
{ThreadStatus::COMPACTION_JOB_ID, "JobID"},
|
|
{ThreadStatus::COMPACTION_INPUT_OUTPUT_LEVEL, "InputOutputLevel"},
|
|
{ThreadStatus::COMPACTION_PROP_FLAGS, "Manual/Deletion/Trivial"},
|
|
{ThreadStatus::COMPACTION_TOTAL_INPUT_BYTES, "TotalInputBytes"},
|
|
{ThreadStatus::COMPACTION_BYTES_READ, "BytesRead"},
|
|
{ThreadStatus::COMPACTION_BYTES_WRITTEN, "BytesWritten"},
|
|
};
|
|
|
|
static OperationProperty flush_operation_properties[] = {
|
|
{ThreadStatus::FLUSH_JOB_ID, "JobID"},
|
|
{ThreadStatus::FLUSH_BYTES_MEMTABLES, "BytesMemtables"},
|
|
{ThreadStatus::FLUSH_BYTES_WRITTEN, "BytesWritten"}};
|
|
|
|
#else
|
|
|
|
struct OperationInfo {};
|
|
|
|
struct StateInfo {};
|
|
|
|
#endif // ROCKSDB_USING_THREAD_STATUS
|
|
} // namespace ROCKSDB_NAMESPACE
|