rocksdb/include/rocksdb/statistics.h
anand76 c21fe1a47f Add ticker stats for read corruption retries (#12923)
Summary:
Add a couple of ticker stats for corruption retry count and successful retries. This PR also eliminates an extra read attempt when there's a checksum mismatch in a block read from the prefetch buffer.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12923

Test Plan: Update existing tests

Reviewed By: jowlyzhang

Differential Revision: D61024687

Pulled By: anand1976

fbshipit-source-id: 3a08403580ab244000e0d480b7ee0f5a03d76b06
2024-08-12 15:32:07 -07:00

800 lines
30 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "rocksdb/customizable.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
/**
* Keep adding tickers here. Note that the C++ enum values, unlike the values in
* the Java bindings, are not guaranteed to be stable; also, the C++ and Java
* values for any given ticker are not guaranteed to match.
* 1. Add the new ticker before TICKER_ENUM_MAX.
* 2. Add a readable string in TickersNameMap below for the newly added ticker.
* 3. Add a corresponding enum value to TickerType.java in the Java API.
* 4. Add the enum conversions from/to Java/C++ to portal.h's toJavaTickerType
* and toCppTickers.
*/
enum Tickers : uint32_t {
// total block cache misses
// REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS +
// BLOCK_CACHE_FILTER_MISS +
// BLOCK_CACHE_DATA_MISS;
BLOCK_CACHE_MISS = 0,
// total block cache hit
// REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT +
// BLOCK_CACHE_FILTER_HIT +
// BLOCK_CACHE_DATA_HIT;
BLOCK_CACHE_HIT,
// # of blocks added to block cache.
BLOCK_CACHE_ADD,
// # of failures when adding blocks to block cache.
BLOCK_CACHE_ADD_FAILURES,
// # of times cache miss when accessing index block from block cache.
BLOCK_CACHE_INDEX_MISS,
// # of times cache hit when accessing index block from block cache.
BLOCK_CACHE_INDEX_HIT,
// # of index blocks added to block cache.
BLOCK_CACHE_INDEX_ADD,
// # of bytes of index blocks inserted into cache
BLOCK_CACHE_INDEX_BYTES_INSERT,
// # of times cache miss when accessing filter block from block cache.
BLOCK_CACHE_FILTER_MISS,
// # of times cache hit when accessing filter block from block cache.
BLOCK_CACHE_FILTER_HIT,
// # of filter blocks added to block cache.
BLOCK_CACHE_FILTER_ADD,
// # of bytes of bloom filter blocks inserted into cache
BLOCK_CACHE_FILTER_BYTES_INSERT,
// # of times cache miss when accessing data block from block cache.
BLOCK_CACHE_DATA_MISS,
// # of times cache hit when accessing data block from block cache.
BLOCK_CACHE_DATA_HIT,
// # of data blocks added to block cache.
BLOCK_CACHE_DATA_ADD,
// # of bytes of data blocks inserted into cache
BLOCK_CACHE_DATA_BYTES_INSERT,
// # of bytes read from cache.
BLOCK_CACHE_BYTES_READ,
// # of bytes written into cache.
BLOCK_CACHE_BYTES_WRITE,
BLOCK_CACHE_COMPRESSION_DICT_MISS,
BLOCK_CACHE_COMPRESSION_DICT_HIT,
BLOCK_CACHE_COMPRESSION_DICT_ADD,
BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT,
// # of blocks redundantly inserted into block cache.
// REQUIRES: BLOCK_CACHE_ADD_REDUNDANT <= BLOCK_CACHE_ADD
BLOCK_CACHE_ADD_REDUNDANT,
// # of index blocks redundantly inserted into block cache.
// REQUIRES: BLOCK_CACHE_INDEX_ADD_REDUNDANT <= BLOCK_CACHE_INDEX_ADD
BLOCK_CACHE_INDEX_ADD_REDUNDANT,
// # of filter blocks redundantly inserted into block cache.
// REQUIRES: BLOCK_CACHE_FILTER_ADD_REDUNDANT <= BLOCK_CACHE_FILTER_ADD
BLOCK_CACHE_FILTER_ADD_REDUNDANT,
// # of data blocks redundantly inserted into block cache.
// REQUIRES: BLOCK_CACHE_DATA_ADD_REDUNDANT <= BLOCK_CACHE_DATA_ADD
BLOCK_CACHE_DATA_ADD_REDUNDANT,
// # of dict blocks redundantly inserted into block cache.
// REQUIRES: BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT
// <= BLOCK_CACHE_COMPRESSION_DICT_ADD
BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT,
// Secondary cache statistics
SECONDARY_CACHE_HITS,
// Fine grained secondary cache stats
SECONDARY_CACHE_FILTER_HITS,
SECONDARY_CACHE_INDEX_HITS,
SECONDARY_CACHE_DATA_HITS,
// Compressed secondary cache related stats
COMPRESSED_SECONDARY_CACHE_DUMMY_HITS,
COMPRESSED_SECONDARY_CACHE_HITS,
COMPRESSED_SECONDARY_CACHE_PROMOTIONS,
COMPRESSED_SECONDARY_CACHE_PROMOTION_SKIPS,
// # of times bloom filter has avoided file reads, i.e., negatives.
BLOOM_FILTER_USEFUL,
// # of times bloom FullFilter has not avoided the reads.
BLOOM_FILTER_FULL_POSITIVE,
// # of times bloom FullFilter has not avoided the reads and data actually
// exist.
BLOOM_FILTER_FULL_TRUE_POSITIVE,
// Prefix filter stats when used for point lookups (Get / MultiGet).
// (For prefix filter stats on iterators, see *_LEVEL_SEEK_*.)
// Checked: filter was queried
BLOOM_FILTER_PREFIX_CHECKED,
// Useful: filter returned false so prevented accessing data+index blocks
BLOOM_FILTER_PREFIX_USEFUL,
// True positive: found a key matching the point query. When another key
// with the same prefix matches, it is considered a false positive by
// these statistics even though the filter returned a true positive.
BLOOM_FILTER_PREFIX_TRUE_POSITIVE,
// # persistent cache hit
PERSISTENT_CACHE_HIT,
// # persistent cache miss
PERSISTENT_CACHE_MISS,
// # total simulation block cache hits
SIM_BLOCK_CACHE_HIT,
// # total simulation block cache misses
SIM_BLOCK_CACHE_MISS,
// # of memtable hits.
MEMTABLE_HIT,
// # of memtable misses.
MEMTABLE_MISS,
// # of Get() queries served by L0
GET_HIT_L0,
// # of Get() queries served by L1
GET_HIT_L1,
// # of Get() queries served by L2 and up
GET_HIT_L2_AND_UP,
/**
* COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
* There are 4 reasons currently.
*/
COMPACTION_KEY_DROP_NEWER_ENTRY, // key was written with a newer value.
// Also includes keys dropped for range del.
COMPACTION_KEY_DROP_OBSOLETE, // The key is obsolete.
COMPACTION_KEY_DROP_RANGE_DEL, // key was covered by a range tombstone.
COMPACTION_KEY_DROP_USER, // user compaction function has dropped the key.
COMPACTION_RANGE_DEL_DROP_OBSOLETE, // all keys in range were deleted.
// Deletions obsoleted before bottom level due to file gap optimization.
COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
// If a compaction was canceled in sfm to prevent ENOSPC
COMPACTION_CANCELLED,
// Number of keys written to the database via the Put and Write call's
NUMBER_KEYS_WRITTEN,
// Number of Keys read,
NUMBER_KEYS_READ,
// Number keys updated, if inplace update is enabled
NUMBER_KEYS_UPDATED,
// The number of uncompressed bytes issued by DB::Put(), DB::Delete(),
// DB::Merge(), and DB::Write().
BYTES_WRITTEN,
// The number of uncompressed bytes read from DB::Get(). It could be
// either from memtables, cache, or table files.
// For the number of logical bytes read from DB::MultiGet(),
// please use NUMBER_MULTIGET_BYTES_READ.
BYTES_READ,
// The number of calls to seek/next/prev
NUMBER_DB_SEEK,
NUMBER_DB_NEXT,
NUMBER_DB_PREV,
// The number of calls to seek/next/prev that returned data
NUMBER_DB_SEEK_FOUND,
NUMBER_DB_NEXT_FOUND,
NUMBER_DB_PREV_FOUND,
// The number of uncompressed bytes read from an iterator.
// Includes size of key and value.
ITER_BYTES_READ,
// Number of internal keys skipped by Iterator
NUMBER_ITER_SKIP,
// Number of times we had to reseek inside an iteration to skip
// over large number of keys with same userkey.
NUMBER_OF_RESEEKS_IN_ITERATION,
NO_ITERATOR_CREATED, // number of iterators created
NO_ITERATOR_DELETED, // number of iterators deleted
NO_FILE_OPENS,
NO_FILE_ERRORS,
// Writer has to wait for compaction or flush to finish.
STALL_MICROS,
// The wait time for db mutex.
// Disabled by default. To enable it set stats level to kAll
DB_MUTEX_WAIT_MICROS,
// Number of MultiGet calls, keys read, and bytes read
NUMBER_MULTIGET_CALLS,
NUMBER_MULTIGET_KEYS_READ,
NUMBER_MULTIGET_BYTES_READ,
// Number of keys actually found in MultiGet calls (vs number requested by
// caller)
// NUMBER_MULTIGET_KEYS_READ gives the number requested by caller
NUMBER_MULTIGET_KEYS_FOUND,
NUMBER_MERGE_FAILURES,
// Record the number of calls to GetUpdatesSince. Useful to keep track of
// transaction log iterator refreshes
GET_UPDATES_SINCE_CALLS,
WAL_FILE_SYNCED, // Number of times WAL sync is done
WAL_FILE_BYTES, // Number of bytes written to WAL
// Writes can be processed by requesting thread or by the thread at the
// head of the writers queue.
WRITE_DONE_BY_SELF,
WRITE_DONE_BY_OTHER, // Equivalent to writes done for others
WRITE_WITH_WAL, // Number of Write calls that request WAL
COMPACT_READ_BYTES, // Bytes read during compaction
COMPACT_WRITE_BYTES, // Bytes written during compaction
FLUSH_WRITE_BYTES, // Bytes written during flush
// Compaction read and write statistics broken down by CompactionReason
COMPACT_READ_BYTES_MARKED,
COMPACT_READ_BYTES_PERIODIC,
COMPACT_READ_BYTES_TTL,
COMPACT_WRITE_BYTES_MARKED,
COMPACT_WRITE_BYTES_PERIODIC,
COMPACT_WRITE_BYTES_TTL,
// Number of table's properties loaded directly from file, without creating
// table reader object.
NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
NUMBER_SUPERVERSION_ACQUIRES,
NUMBER_SUPERVERSION_RELEASES,
NUMBER_SUPERVERSION_CLEANUPS,
// # of compressions/decompressions executed
NUMBER_BLOCK_COMPRESSED,
NUMBER_BLOCK_DECOMPRESSED,
// Number of input bytes (uncompressed) to compression for SST blocks that
// are stored compressed.
BYTES_COMPRESSED_FROM,
// Number of output bytes (compressed) from compression for SST blocks that
// are stored compressed.
BYTES_COMPRESSED_TO,
// Number of uncompressed bytes for SST blocks that are stored uncompressed
// because compression type is kNoCompression, or some error case caused
// compression not to run or produce an output. Index blocks are only counted
// if enable_index_compression is true.
BYTES_COMPRESSION_BYPASSED,
// Number of input bytes (uncompressed) to compression for SST blocks that
// are stored uncompressed because the compression result was rejected,
// either because the ratio was not acceptable (see
// CompressionOptions::max_compressed_bytes_per_kb) or found invalid by the
// `verify_compression` option.
BYTES_COMPRESSION_REJECTED,
// Like BYTES_COMPRESSION_BYPASSED but counting number of blocks
NUMBER_BLOCK_COMPRESSION_BYPASSED,
// Like BYTES_COMPRESSION_REJECTED but counting number of blocks
NUMBER_BLOCK_COMPRESSION_REJECTED,
// Number of input bytes (compressed) to decompression in reading compressed
// SST blocks from storage.
BYTES_DECOMPRESSED_FROM,
// Number of output bytes (uncompressed) from decompression in reading
// compressed SST blocks from storage.
BYTES_DECOMPRESSED_TO,
// Tickers that record cumulative time.
MERGE_OPERATION_TOTAL_TIME,
FILTER_OPERATION_TOTAL_TIME,
COMPACTION_CPU_TOTAL_TIME,
// Row cache.
ROW_CACHE_HIT,
ROW_CACHE_MISS,
// Read amplification statistics.
// Read amplification can be calculated using this formula
// (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES)
//
// REQUIRES: ReadOptions::read_amp_bytes_per_bit to be enabled
READ_AMP_ESTIMATE_USEFUL_BYTES, // Estimate of total bytes actually used.
READ_AMP_TOTAL_READ_BYTES, // Total size of loaded data blocks.
// Number of refill intervals where rate limiter's bytes are fully consumed.
NUMBER_RATE_LIMITER_DRAINS,
// BlobDB specific stats
// # of Put/PutTTL/PutUntil to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_PUT,
// # of Write to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_WRITE,
// # of Get to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_GET,
// # of MultiGet to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_MULTIGET,
// # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. Only
// applicable to legacy BlobDB.
BLOB_DB_NUM_SEEK,
// # of Next to BlobDB iterator. Only applicable to legacy BlobDB.
BLOB_DB_NUM_NEXT,
// # of Prev to BlobDB iterator. Only applicable to legacy BlobDB.
BLOB_DB_NUM_PREV,
// # of keys written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_KEYS_WRITTEN,
// # of keys read from BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_KEYS_READ,
// # of bytes (key + value) written to BlobDB. Only applicable to legacy
// BlobDB.
BLOB_DB_BYTES_WRITTEN,
// # of bytes (keys + value) read from BlobDB. Only applicable to legacy
// BlobDB.
BLOB_DB_BYTES_READ,
// # of keys written by BlobDB as non-TTL inlined value. Only applicable to
// legacy BlobDB.
BLOB_DB_WRITE_INLINED,
// # of keys written by BlobDB as TTL inlined value. Only applicable to legacy
// BlobDB.
BLOB_DB_WRITE_INLINED_TTL,
// # of keys written by BlobDB as non-TTL blob value. Only applicable to
// legacy BlobDB.
BLOB_DB_WRITE_BLOB,
// # of keys written by BlobDB as TTL blob value. Only applicable to legacy
// BlobDB.
BLOB_DB_WRITE_BLOB_TTL,
// # of bytes written to blob file.
BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
// # of bytes read from blob file.
BLOB_DB_BLOB_FILE_BYTES_READ,
// # of times a blob files being synced.
BLOB_DB_BLOB_FILE_SYNCED,
// # of blob index evicted from base DB by BlobDB compaction filter because
// of expiration. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EXPIRED_COUNT,
// size of blob index evicted from base DB by BlobDB compaction filter
// because of expiration. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EXPIRED_SIZE,
// # of blob index evicted from base DB by BlobDB compaction filter because
// of corresponding file deleted. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EVICTED_COUNT,
// size of blob index evicted from base DB by BlobDB compaction filter
// because of corresponding file deleted. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EVICTED_SIZE,
// # of blob files that were obsoleted by garbage collection. Only applicable
// to legacy BlobDB.
BLOB_DB_GC_NUM_FILES,
// # of blob files generated by garbage collection. Only applicable to legacy
// BlobDB.
BLOB_DB_GC_NUM_NEW_FILES,
// # of BlobDB garbage collection failures. Only applicable to legacy BlobDB.
BLOB_DB_GC_FAILURES,
// # of keys relocated to new blob file by garbage collection.
BLOB_DB_GC_NUM_KEYS_RELOCATED,
// # of bytes relocated to new blob file by garbage collection.
BLOB_DB_GC_BYTES_RELOCATED,
// # of blob files evicted because of BlobDB is full. Only applicable to
// legacy BlobDB.
BLOB_DB_FIFO_NUM_FILES_EVICTED,
// # of keys in the blob files evicted because of BlobDB is full. Only
// applicable to legacy BlobDB.
BLOB_DB_FIFO_NUM_KEYS_EVICTED,
// # of bytes in the blob files evicted because of BlobDB is full. Only
// applicable to legacy BlobDB.
BLOB_DB_FIFO_BYTES_EVICTED,
// Integrated BlobDB specific stats
// # of times cache miss when accessing blob from blob cache.
BLOB_DB_CACHE_MISS,
// # of times cache hit when accessing blob from blob cache.
BLOB_DB_CACHE_HIT,
// # of data blocks added to blob cache.
BLOB_DB_CACHE_ADD,
// # of failures when adding blobs to blob cache.
BLOB_DB_CACHE_ADD_FAILURES,
// # of bytes read from blob cache.
BLOB_DB_CACHE_BYTES_READ,
// # of bytes written into blob cache.
BLOB_DB_CACHE_BYTES_WRITE,
// These counters indicate a performance issue in WritePrepared transactions.
// We should not seem them ticking them much.
// # of times prepare_mutex_ is acquired in the fast path.
TXN_PREPARE_MUTEX_OVERHEAD,
// # of times old_commit_map_mutex_ is acquired in the fast path.
TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD,
// # of times we checked a batch for duplicate keys.
TXN_DUPLICATE_KEY_OVERHEAD,
// # of times snapshot_mutex_ is acquired in the fast path.
TXN_SNAPSHOT_MUTEX_OVERHEAD,
// # of times ::Get returned TryAgain due to expired snapshot seq
TXN_GET_TRY_AGAIN,
// # of files marked as trash by sst file manager and will be deleted
// later by background thread.
FILES_MARKED_TRASH,
// # of trash files deleted by the background thread from the trash queue.
FILES_DELETED_FROM_TRASH_QUEUE,
// # of files deleted immediately by sst file manager through delete
// scheduler.
FILES_DELETED_IMMEDIATELY,
// The counters for error handler, note that, bg_io_error is the subset of
// bg_error and bg_retryable_io_error is the subset of bg_io_error.
ERROR_HANDLER_BG_ERROR_COUNT,
ERROR_HANDLER_BG_IO_ERROR_COUNT,
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
ERROR_HANDLER_AUTORESUME_COUNT,
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
// Statistics for memtable garbage collection:
// Raw bytes of data (payload) present on memtable at flush time.
MEMTABLE_PAYLOAD_BYTES_AT_FLUSH,
// Outdated bytes of data present on memtable at flush time.
MEMTABLE_GARBAGE_BYTES_AT_FLUSH,
// Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs.
VERIFY_CHECKSUM_READ_BYTES,
// Bytes read/written while creating backups
BACKUP_READ_BYTES,
BACKUP_WRITE_BYTES,
// Remote compaction read/write statistics
REMOTE_COMPACT_READ_BYTES,
REMOTE_COMPACT_WRITE_BYTES,
// Tiered storage related statistics
HOT_FILE_READ_BYTES,
WARM_FILE_READ_BYTES,
COLD_FILE_READ_BYTES,
HOT_FILE_READ_COUNT,
WARM_FILE_READ_COUNT,
COLD_FILE_READ_COUNT,
// Last level and non-last level read statistics
LAST_LEVEL_READ_BYTES,
LAST_LEVEL_READ_COUNT,
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,
// Statistics on iterator Seek() (and variants) for each sorted run. I.e. a
// single user Seek() can result in many sorted run Seek()s.
// The stats are split between last level and non-last level.
// Filtered: a filter such as prefix Bloom filter indicate the Seek() would
// not find anything relevant, so avoided a likely access to data+index
// blocks.
LAST_LEVEL_SEEK_FILTERED,
// Filter match: a filter such as prefix Bloom filter was queried but did
// not filter out the seek.
LAST_LEVEL_SEEK_FILTER_MATCH,
// At least one data block was accessed for a Seek() (or variant) on a
// sorted run.
LAST_LEVEL_SEEK_DATA,
// At least one value() was accessed for the seek (suggesting it was useful),
// and no filter such as prefix Bloom was queried.
LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
// At least one value() was accessed for the seek (suggesting it was useful),
// after querying a filter such as prefix Bloom.
LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
// The same set of stats, but for non-last level seeks.
NON_LAST_LEVEL_SEEK_FILTERED,
NON_LAST_LEVEL_SEEK_FILTER_MATCH,
NON_LAST_LEVEL_SEEK_DATA,
NON_LAST_LEVEL_SEEK_DATA_USEFUL_NO_FILTER,
NON_LAST_LEVEL_SEEK_DATA_USEFUL_FILTER_MATCH,
// Number of block checksum verifications
BLOCK_CHECKSUM_COMPUTE_COUNT,
// Number of times RocksDB detected a corruption while verifying a block
// checksum. RocksDB does not remember corruptions that happened during user
// reads so the same block corruption may be detected multiple times.
BLOCK_CHECKSUM_MISMATCH_COUNT,
MULTIGET_COROUTINE_COUNT,
// Time spent in the ReadAsync file system call
READ_ASYNC_MICROS,
// Number of errors returned to the async read callback
ASYNC_READ_ERROR_COUNT,
// Number of lookup into the prefetched tail (see
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
// that can't find its data for table open
TABLE_OPEN_PREFETCH_TAIL_MISS,
// Number of lookup into the prefetched tail (see
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
// that finds its data for table open
TABLE_OPEN_PREFETCH_TAIL_HIT,
// Statistics on the filtering by user-defined timestamps
// # of times timestamps are checked on accessing the table
TIMESTAMP_FILTER_TABLE_CHECKED,
// # of times timestamps can successfully help skip the table access
TIMESTAMP_FILTER_TABLE_FILTERED,
// Number of times readahead is trimmed during scans when
// ReadOptions.auto_readahead_size is set.
READAHEAD_TRIMMED,
// Number of FIFO compactions that drop files based on different reasons
FIFO_MAX_SIZE_COMPACTIONS,
FIFO_TTL_COMPACTIONS,
// Number of bytes prefetched during user initiated scan
PREFETCH_BYTES,
// Number of prefetched bytes that were actually useful
PREFETCH_BYTES_USEFUL,
// Number of FS reads avoided due to scan prefetching
PREFETCH_HITS,
// Footer corruption detected when opening an SST file for reading
SST_FOOTER_CORRUPTION_COUNT,
// Counters for file read retries with the verify_and_reconstruct_read
// file system option after detecting a checksum mismatch
FILE_READ_CORRUPTION_RETRY_COUNT,
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
TICKER_ENUM_MAX
};
// The order of items listed in Tickers should be the same as
// the order listed in TickersNameMap
extern const std::vector<std::pair<Tickers, std::string>> TickersNameMap;
/**
* Keep adding histograms here. Note that the C++ enum values, unlike the values
* in the Java bindings, are not guaranteed to be stable; also, the C++ and Java
* values for any given histogram are not guaranteed to match.
* 1. Add the new histogram before HISTOGRAM_ENUM_MAX.
* 2. Add a readable string in HistogramsNameMap below for the newly added
* histogram.
* 3. Add a corresponding enum value to HistogramType.java in the Java API.
* 4. Add the enum conversions from/to Java/C++ to portal.h's
* toJavaHistogramsType and toCppHistograms.
*/
enum Histograms : uint32_t {
DB_GET = 0,
DB_WRITE,
COMPACTION_TIME,
COMPACTION_CPU_TIME,
SUBCOMPACTION_SETUP_TIME,
TABLE_SYNC_MICROS,
COMPACTION_OUTFILE_SYNC_MICROS,
WAL_FILE_SYNC_MICROS,
MANIFEST_FILE_SYNC_MICROS,
// TIME SPENT IN IO DURING TABLE OPEN
TABLE_OPEN_IO_MICROS,
DB_MULTIGET,
READ_BLOCK_COMPACTION_MICROS,
READ_BLOCK_GET_MICROS,
WRITE_RAW_BLOCK_MICROS,
NUM_FILES_IN_SINGLE_COMPACTION,
DB_SEEK,
WRITE_STALL,
// Time spent in reading block-based or plain SST table
SST_READ_MICROS,
// Time spent in reading SST table (currently only block-based table) or blob
// file corresponding to `Env::IOActivity`
FILE_READ_FLUSH_MICROS,
FILE_READ_COMPACTION_MICROS,
FILE_READ_DB_OPEN_MICROS,
// The following `FILE_READ_*` require stats level greater than
// `StatsLevel::kExceptDetailedTimers`
FILE_READ_GET_MICROS,
FILE_READ_MULTIGET_MICROS,
FILE_READ_DB_ITERATOR_MICROS,
FILE_READ_VERIFY_DB_CHECKSUM_MICROS,
FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS,
// Time spent in writing SST files
SST_WRITE_MICROS,
// Time spent in writing SST table (currently only block-based table) or blob
// file for flush, compaction or db open
FILE_WRITE_FLUSH_MICROS,
FILE_WRITE_COMPACTION_MICROS,
FILE_WRITE_DB_OPEN_MICROS,
// The number of subcompactions actually scheduled during a compaction
NUM_SUBCOMPACTIONS_SCHEDULED,
// Value size distribution in each operation
BYTES_PER_READ,
BYTES_PER_WRITE,
BYTES_PER_MULTIGET,
COMPRESSION_TIMES_NANOS,
DECOMPRESSION_TIMES_NANOS,
// Number of merge operands passed to the merge operator in user read
// requests.
READ_NUM_MERGE_OPERANDS,
// BlobDB specific stats
// Size of keys written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_KEY_SIZE,
// Size of values written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_VALUE_SIZE,
// BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
// BlobDB.
BLOB_DB_WRITE_MICROS,
// BlobDB Get latency. Only applicable to legacy BlobDB.
BLOB_DB_GET_MICROS,
// BlobDB MultiGet latency. Only applicable to legacy BlobDB.
BLOB_DB_MULTIGET_MICROS,
// BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. Only applicable to
// legacy BlobDB.
BLOB_DB_SEEK_MICROS,
// BlobDB Next latency. Only applicable to legacy BlobDB.
BLOB_DB_NEXT_MICROS,
// BlobDB Prev latency. Only applicable to legacy BlobDB.
BLOB_DB_PREV_MICROS,
// Blob file write latency.
BLOB_DB_BLOB_FILE_WRITE_MICROS,
// Blob file read latency.
BLOB_DB_BLOB_FILE_READ_MICROS,
// Blob file sync latency.
BLOB_DB_BLOB_FILE_SYNC_MICROS,
// BlobDB compression time.
BLOB_DB_COMPRESSION_MICROS,
// BlobDB decompression time.
BLOB_DB_DECOMPRESSION_MICROS,
// Time spent flushing memtable to disk
FLUSH_TIME,
SST_BATCH_SIZE,
// Number of IOs issued in parallel in a MultiGet batch
MULTIGET_IO_BATCH_SIZE,
// MultiGet stats logged per level
// Num of index and filter blocks read from file system per level.
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
// Num of sst files read from file system per level.
NUM_SST_READ_PER_LEVEL,
// Number of levels requiring IO for MultiGet
NUM_LEVEL_READ_PER_MULTIGET,
// Error handler statistics
ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
// Stats related to asynchronous read requests.
ASYNC_READ_BYTES,
POLL_WAIT_MICROS,
// Number of prefetched bytes discarded by RocksDB.
PREFETCHED_BYTES_DISCARDED,
// Wait time for aborting async read in FilePrefetchBuffer destructor
ASYNC_PREFETCH_ABORT_MICROS,
// Number of bytes read for RocksDB's prefetching contents (as opposed to file
// system's prefetch) from the end of SST table during block based table open
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
HISTOGRAM_ENUM_MAX
};
extern const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap;
struct HistogramData {
double median;
double percentile95;
double percentile99;
double average;
double standard_deviation;
// zero-initialize new members since old Statistics::histogramData()
// implementations won't write them.
double max = 0.0;
uint64_t count = 0;
uint64_t sum = 0;
double min = 0.0;
};
// StatsLevel can be used to reduce statistics overhead by skipping certain
// types of stats in the stats collection process.
// Usage:
// options.statistics->set_stats_level(StatsLevel::kExceptTimeForMutex);
enum StatsLevel : uint8_t {
// Disable all metrics
kDisableAll,
// Disable tickers
kExceptTickers = kDisableAll,
// Disable timer stats, and skip histogram stats
kExceptHistogramOrTimers,
// Skip timer stats
kExceptTimers,
// Collect all stats except time inside mutex lock AND time spent on
// compression.
kExceptDetailedTimers,
// Collect all stats except the counters requiring to get time inside the
// mutex lock.
kExceptTimeForMutex,
// Collect all stats, including measuring duration of mutex operations.
// If getting time is expensive on the platform to run, it can
// reduce scalability to more threads, especially for writes.
kAll,
};
// Analyze the performance of a db by providing cumulative stats over time.
// Usage:
// Options options;
// options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
// Status s = DB::Open(options, kDBPath, &db);
// ...
// options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
// HistogramData hist;
// options.statistics->histogramData(FLUSH_TIME, &hist);
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class Statistics : public Customizable {
public:
~Statistics() override {}
static const char* Type() { return "Statistics"; }
static Status CreateFromString(const ConfigOptions& opts,
const std::string& value,
std::shared_ptr<Statistics>* result);
// Default name of empty, for backwards compatibility. Derived classes should
// override this method.
// This default implementation will likely be removed in a future release
const char* Name() const override { return ""; }
virtual uint64_t getTickerCount(uint32_t tickerType) const = 0;
virtual void histogramData(uint32_t type,
HistogramData* const data) const = 0;
virtual std::string getHistogramString(uint32_t /*type*/) const { return ""; }
virtual void recordTick(uint32_t tickerType, uint64_t count = 1) = 0;
virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0;
virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) {
if (get_stats_level() <= StatsLevel::kExceptTimers) {
return;
}
recordInHistogram(histogramType, time);
}
// The function is here only for backward compatibility reason.
// Users implementing their own Statistics class should override
// recordInHistogram() instead and leave measureTime() as it is.
virtual void measureTime(uint32_t /*histogramType*/, uint64_t /*time*/) {
// This is not supposed to be called.
assert(false);
}
virtual void recordInHistogram(uint32_t histogramType, uint64_t time) {
// measureTime() is the old and inaccurate function name.
// To keep backward compatible. If users implement their own
// statistics, which overrides measureTime() but doesn't override
// this function. We forward to measureTime().
measureTime(histogramType, time);
}
// Resets all ticker and histogram stats
virtual Status Reset() { return Status::NotSupported("Not implemented"); }
using Customizable::ToString;
// String representation of the statistic object. Must be thread-safe.
virtual std::string ToString() const {
// Do nothing by default
return std::string("ToString(): not implemented");
}
virtual bool getTickerMap(std::map<std::string, uint64_t>*) const {
// Do nothing by default
return false;
}
// Override this function to disable particular histogram collection
virtual bool HistEnabledForType(uint32_t type) const {
return type < HISTOGRAM_ENUM_MAX;
}
void set_stats_level(StatsLevel sl) {
stats_level_.store(sl, std::memory_order_relaxed);
}
StatsLevel get_stats_level() const {
return stats_level_.load(std::memory_order_relaxed);
}
private:
std::atomic<StatsLevel> stats_level_{kExceptDetailedTimers};
};
// Create a concrete DBStatistics object
std::shared_ptr<Statistics> CreateDBStatistics();
} // namespace ROCKSDB_NAMESPACE