Add the statistics and info log for Error handler (#8050)

Summary:
Add statistics and info log for error handler: counters for bg error, bg io error, bg retryable io error, auto resume, auto resume total retry, and auto resume sucess; Histogram for auto resume retry count in each recovery call.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8050

Test Plan: make check and add test to error_handler_fs_test

Reviewed By: anand1976

Differential Revision: D26990565

Pulled By: zhichao-cao

fbshipit-source-id: 49f71e8ea4e9db8b189943976404205b56ab883f
This commit is contained in:
Zhichao Cao 2021-03-17 22:36:42 -07:00 committed by Facebook GitHub Bot
parent 27d57a035e
commit 08ec5e7321
9 changed files with 232 additions and 8 deletions

View File

@ -21,6 +21,7 @@
* Add new options for db_bench --benchmarks: flush, waitforcompaction, compact0, compact1.
* Add an option to BackupEngine::GetBackupInfo to include the name and size of each backed-up file. Especially in the presence of file sharing among backups, this offers detailed insight into backup space usage.
* Enable backward iteration on keys with user-defined timestamps.
* Add statistics and info log for error handler: counters for bg error, bg io error, bg retryable io error, auto resume count, auto resume total retry number, and auto resume sucess; Histogram for auto resume retry count in each recovery call. Note that, each auto resume attempt will have one or multiple retries.
## 6.18.0 (02/19/2021)
### Behavior Changes

View File

@ -4,9 +4,11 @@
// (found in the LICENSE.Apache file in the root directory).
//
#include "db/error_handler.h"
#include "db/db_impl/db_impl.h"
#include "db/event_helpers.h"
#include "file/sst_file_manager_impl.h"
#include "logging/logging.h"
namespace ROCKSDB_NAMESPACE {
@ -274,6 +276,12 @@ const Status& ErrorHandler::SetBGError(const Status& bg_err,
return bg_err;
}
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
}
ROCKS_LOG_INFO(db_options_.info_log,
"ErrorHandler: Set regular background error\n");
bool paranoid = db_options_.paranoid_checks;
Status::Severity sev = Status::Severity::kFatalError;
Status new_bg_err;
@ -399,6 +407,13 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
if (recovery_in_prog_ && recovery_error_.ok()) {
recovery_error_ = bg_err;
}
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
}
ROCKS_LOG_INFO(
db_options_.info_log,
"ErrorHandler: Set background IO error as unrecoverable error\n");
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
&bg_err, db_mutex_, &auto_recovery);
recover_context_ = context;
@ -416,12 +431,26 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
&new_bg_io_err, db_mutex_,
&auto_recovery);
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
RecordTick(bg_error_stats_.get(),
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT);
}
ROCKS_LOG_INFO(db_options_.info_log,
"ErrorHandler: Set background retryable IO error\n");
if (BackgroundErrorReason::kCompaction == reason) {
// We map the retryable IO error during compaction to soft error. Since
// compaction can reschedule by itself. We will not set the BG error in
// this case
// TODO: a better way to set or clean the retryable IO error which
// happens during compaction SST file write.
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT);
}
ROCKS_LOG_INFO(
db_options_.info_log,
"ErrorHandler: Compaction will schedule by itself to resume\n");
return bg_error_;
} else if (BackgroundErrorReason::kFlushNoWAL == reason ||
BackgroundErrorReason::kManifestWriteNoWAL == reason) {
@ -455,6 +484,9 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
return StartRecoverFromRetryableBGIOError(bg_io_err);
}
} else {
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
}
return SetBGError(new_bg_io_err, reason);
}
}
@ -603,7 +635,12 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
// Auto resume BG error is not enabled, directly return bg_error_.
return bg_error_;
}
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT);
}
ROCKS_LOG_INFO(
db_options_.info_log,
"ErrorHandler: Call StartRecoverFromRetryableBGIOError to resume\n");
if (recovery_thread_) {
// In this case, if recovery_in_prog_ is false, current thread should
// wait the previous recover thread to finish and create a new thread
@ -642,6 +679,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
DBRecoverContext context = recover_context_;
int resume_count = db_options_.max_bgerror_resume_count;
uint64_t wait_interval = db_options_.bgerror_resume_retry_interval;
uint64_t retry_count = 0;
// Recover from the retryable error. Create a separate thread to do it.
while (resume_count > 0) {
if (end_recovery_) {
@ -651,15 +689,24 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume1");
recovery_io_error_ = IOStatus::OK();
recovery_error_ = Status::OK();
retry_count++;
Status s = db_->ResumeImpl(context);
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume0");
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume1");
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(),
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT);
}
if (s.IsShutdownInProgress() ||
bg_error_.severity() >= Status::Severity::kFatalError) {
// If DB shutdown in progress or the error severity is higher than
// Hard Error, stop auto resume and returns.
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverFail0");
recovery_in_prog_ = false;
if (bg_error_stats_ != nullptr) {
RecordInHistogram(bg_error_stats_.get(),
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
}
return;
}
if (!recovery_io_error_.ok() &&
@ -686,6 +733,12 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
bg_error_.PermitUncheckedError();
EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
old_bg_error, db_mutex_);
if (bg_error_stats_ != nullptr) {
RecordTick(bg_error_stats_.get(),
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT);
RecordInHistogram(bg_error_stats_.get(),
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
}
recovery_in_prog_ = false;
if (soft_error_no_bg_work_) {
soft_error_no_bg_work_ = false;
@ -696,6 +749,10 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
// In this case: 1) recovery_io_error is more serious or not retryable
// 2) other Non IO recovery_error happens. The auto recovery stops.
recovery_in_prog_ = false;
if (bg_error_stats_ != nullptr) {
RecordInHistogram(bg_error_stats_.get(),
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
}
return;
}
}
@ -703,6 +760,10 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
}
recovery_in_prog_ = false;
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:LoopOut");
if (bg_error_stats_ != nullptr) {
RecordInHistogram(bg_error_stats_.get(),
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
}
return;
#else
return;

View File

@ -37,7 +37,8 @@ class ErrorHandler {
db_mutex_(db_mutex),
auto_recovery_(false),
recovery_in_prog_(false),
soft_error_no_bg_work_(false) {
soft_error_no_bg_work_(false),
bg_error_stats_(db_options.statistics) {
// Clear the checked flag for uninitialized errors
bg_error_.PermitUncheckedError();
recovery_error_.PermitUncheckedError();
@ -108,6 +109,9 @@ class ErrorHandler {
// Used to store the context for recover, such as flush reason.
DBRecoverContext recover_context_;
// The pointer of DB statistics.
std::shared_ptr<Statistics> bg_error_stats_;
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
void RecoverFromNoSpace();
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);

View File

@ -158,6 +158,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) {
options.env = fault_env_.get();
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.statistics = CreateDBStatistics();
Status s;
listener->EnableAutoRecovery(false);
@ -174,13 +175,25 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) {
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
ASSERT_OK(s);
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_IO_ERROR_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
Reopen(options);
ASSERT_EQ("val", Get(Key(0)));
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
@ -188,6 +201,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 0;
options.statistics = CreateDBStatistics();
Status s;
listener->EnableAutoRecovery(false);
@ -207,6 +221,18 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
ASSERT_OK(s);
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
Reopen(options);
ASSERT_EQ("val1", Get(Key(1)));
@ -241,7 +267,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritFileScopeError) {
TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
@ -325,7 +351,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritFileScopeError) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) {
TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError1) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
@ -333,6 +359,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) {
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 0;
options.statistics = CreateDBStatistics();
Status s;
listener->EnableAutoRecovery(false);
@ -363,11 +390,23 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) {
s = Flush();
ASSERT_OK(s);
ASSERT_EQ("val3", Get(Key(3)));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError2) {
TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
@ -410,7 +449,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError2) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError3) {
TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError3) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
@ -1010,6 +1049,7 @@ TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) {
options.env = fault_env_.get();
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.statistics = CreateDBStatistics();
Status s;
listener->EnableAutoRecovery();
@ -1028,6 +1068,18 @@ TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) {
s = Put(Key(1), "val");
ASSERT_OK(s);
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_IO_ERROR_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
Reopen(options);
ASSERT_EQ("val", Get(Key(0)));
@ -1567,6 +1619,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 2;
options.bgerror_resume_retry_interval = 100000; // 0.1 second
options.statistics = CreateDBStatistics();
Status s;
listener->EnableAutoRecovery(false);
@ -1594,6 +1647,22 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
ASSERT_EQ("val1", Get(Key(1)));
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
ASSERT_EQ(3, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_ERROR_COUNT));
ASSERT_EQ(3, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_IO_ERROR_COUNT));
ASSERT_EQ(3, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(2, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
HistogramData autoresume_retry;
options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
&autoresume_retry);
ASSERT_EQ(autoresume_retry.max, 2);
ASSERT_OK(Put(Key(2), "val2", wo));
s = Flush();
// Since auto resume fails, the bg error is not cleand, flush will
@ -1620,6 +1689,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 2;
options.bgerror_resume_retry_interval = 100000; // 0.1 second
options.statistics = CreateDBStatistics();
Status s;
listener->EnableAutoRecovery(false);
@ -1643,6 +1713,22 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
fault_fs_->SetFilesystemActive(true);
ASSERT_EQ(listener->WaitForRecovery(5000000), true);
ASSERT_EQ("val1", Get(Key(1)));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
HistogramData autoresume_retry;
options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
&autoresume_retry);
ASSERT_EQ(autoresume_retry.max, 1);
ASSERT_OK(Put(Key(2), "val2", wo));
s = Flush();
// Since auto resume is successful, the bg error is cleaned, flush will

View File

@ -374,6 +374,15 @@ enum Tickers : uint32_t {
// # of files deleted immediately by sst file manger through delete scheduler.
FILES_DELETED_IMMEDIATELY,
// The counters for error handler, not that, bg_io_error is the subset of
// bg_error and bg_retryable_io_error is the subset of bg_io_error
ERROR_HANDLER_BG_ERROR_COUNT,
ERROR_HANDLER_BG_IO_ERROR_COUNT,
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
ERROR_HANDLER_AUTORESUME_COUNT,
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
TICKER_ENUM_MAX
};
@ -472,6 +481,9 @@ enum Histograms : uint32_t {
// Num of sst files read from file system per level.
NUM_SST_READ_PER_LEVEL,
// Error handler statistics
ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
HISTOGRAM_ENUM_MAX,
};

View File

@ -4982,7 +4982,20 @@ class TickerTypeJni {
return -0x14;
case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL:
return -0x15;
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT:
return -0x16;
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT:
return -0x17;
case ROCKSDB_NAMESPACE::Tickers::
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT:
return -0x18;
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT:
return -0x19;
case ROCKSDB_NAMESPACE::Tickers::
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT:
return -0x1A;
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT:
return -0x1B;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F for backwards compatibility on current minor version.
return 0x5F;
@ -5294,6 +5307,21 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_PERIODIC;
case -0x15:
return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL;
case -0x16:
return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT;
case -0x17:
return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT;
case -0x18:
return ROCKSDB_NAMESPACE::Tickers::
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT;
case -0x19:
return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT;
case -0x1A:
return ROCKSDB_NAMESPACE::Tickers::
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT;
case -0x1B:
return ROCKSDB_NAMESPACE::Tickers::
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT;
case 0x5F:
// 0x5F for backwards compatibility on current minor version.
return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX;
@ -5413,6 +5441,8 @@ class HistogramTypeJni {
return 0x30;
case ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL:
return 0x31;
case ROCKSDB_NAMESPACE::Histograms::ERROR_HANDLER_AUTORESUME_RETRY_COUNT:
return 0x31;
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
// 0x1F for backwards compatibility on current minor version.
return 0x1F;
@ -5527,6 +5557,9 @@ class HistogramTypeJni {
return ROCKSDB_NAMESPACE::Histograms::NUM_DATA_BLOCKS_READ_PER_LEVEL;
case 0x31:
return ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL;
case 0x32:
return ROCKSDB_NAMESPACE::Histograms::
ERROR_HANDLER_AUTORESUME_RETRY_COUNT;
case 0x1F:
// 0x1F for backwards compatibility on current minor version.
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;

View File

@ -175,6 +175,11 @@ public enum HistogramType {
*/
NUM_SST_READ_PER_LEVEL((byte) 0x31),
/**
* The number of retry in auto resume
*/
ERROR_HANDLER_AUTORESUME_RETRY_COUNT((byte) 0x32),
// 0x1F for backwards compatibility on current minor version.
HISTOGRAM_ENUM_MAX((byte) 0x1F);

View File

@ -742,6 +742,16 @@ public enum TickerType {
COMPACT_WRITE_BYTES_PERIODIC((byte) -0x14),
COMPACT_WRITE_BYTES_TTL((byte) -0x15),
/**
* DB error handler statistics
*/
ERROR_HANDLER_BG_ERROR_COUNT((byte) -0x16),
ERROR_HANDLER_BG_IO_ERROR_COUNT((byte) -0x17),
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT((byte) -0x18),
ERROR_HANDLER_AUTORESUME_COUNT((byte) -0x19),
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT((byte) -0x1A),
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT((byte) -0x1B),
TICKER_ENUM_MAX((byte) 0x5F);
private final byte value;

View File

@ -191,6 +191,16 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
"rocksdb.block.cache.compression.dict.add.redundant"},
{FILES_MARKED_TRASH, "rocksdb.files.marked.trash"},
{FILES_DELETED_IMMEDIATELY, "rocksdb.files.deleted.immediately"},
{ERROR_HANDLER_BG_ERROR_COUNT, "rocksdb.error.handler.bg.errro.count"},
{ERROR_HANDLER_BG_IO_ERROR_COUNT,
"rocksdb.error.handler.bg.io.errro.count"},
{ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
"rocksdb.error.handler.bg.retryable.io.errro.count"},
{ERROR_HANDLER_AUTORESUME_COUNT, "rocksdb.error.handler.autoresume.count"},
{ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
"rocksdb.error.handler.autoresume.retry.total.count"},
{ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
"rocksdb.error.handler.autoresume.success.count"},
};
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
@ -246,6 +256,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
"rocksdb.num.index.and.filter.blocks.read.per.level"},
{NUM_DATA_BLOCKS_READ_PER_LEVEL, "rocksdb.num.data.blocks.read.per.level"},
{NUM_SST_READ_PER_LEVEL, "rocksdb.num.sst.read.per.level"},
{ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
"rocksdb.error.handler.autoresume.retry.count"},
};
std::shared_ptr<Statistics> CreateDBStatistics() {