Add ticker stats for read corruption retries (#12923)

Summary:
Add a couple of ticker stats for corruption retry count and successful retries. This PR also eliminates an extra read attempt when there's a checksum mismatch in a block read from the prefetch buffer.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12923

Test Plan: Update existing tests

Reviewed By: jowlyzhang

Differential Revision: D61024687

Pulled By: anand1976

fbshipit-source-id: 3a08403580ab244000e0d480b7ee0f5a03d76b06
This commit is contained in:
anand76 2024-08-12 15:32:07 -07:00 committed by Facebook GitHub Bot
parent b65e29a4a9
commit c21fe1a47f
10 changed files with 81 additions and 2 deletions

View File

@ -530,6 +530,12 @@ Status DBImpl::Recover(
/*no_error_if_files_missing=*/false, is_retry, /*no_error_if_files_missing=*/false, is_retry,
&desc_status); &desc_status);
desc_status.PermitUncheckedError(); desc_status.PermitUncheckedError();
if (is_retry) {
RecordTick(stats_, FILE_READ_CORRUPTION_RETRY_COUNT);
if (desc_status.ok()) {
RecordTick(stats_, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
}
if (can_retry) { if (can_retry) {
// If we're opening for the first time and the failure is likely due to // If we're opening for the first time and the failure is likely due to
// a corrupt MANIFEST file (could result in either the log::Reader // a corrupt MANIFEST file (could result in either the log::Reader

View File

@ -705,6 +705,7 @@ class DBIOCorruptionTest
DBIOCorruptionTest() : DBIOFailureTest() { DBIOCorruptionTest() : DBIOFailureTest() {
BlockBasedTableOptions bbto; BlockBasedTableOptions bbto;
options_ = CurrentOptions(); options_ = CurrentOptions();
options_.statistics = CreateDBStatistics();
base_env_ = env_; base_env_ = env_;
EXPECT_NE(base_env_, nullptr); EXPECT_NE(base_env_, nullptr);
@ -727,6 +728,8 @@ class DBIOCorruptionTest
Status ReopenDB() { return TryReopen(options_); } Status ReopenDB() { return TryReopen(options_); }
Statistics* stats() { return options_.statistics.get(); }
protected: protected:
std::unique_ptr<Env> env_guard_; std::unique_ptr<Env> env_guard_;
std::shared_ptr<CorruptionFS> fs_; std::shared_ptr<CorruptionFS> fs_;
@ -749,8 +752,12 @@ TEST_P(DBIOCorruptionTest, GetReadCorruptionRetry) {
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_EQ(val, "val1"); ASSERT_EQ(val, "val1");
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else { } else {
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
} }
} }
@ -773,8 +780,12 @@ TEST_P(DBIOCorruptionTest, IterReadCorruptionRetry) {
} }
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(iter->status()); ASSERT_OK(iter->status());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else { } else {
ASSERT_TRUE(iter->status().IsCorruption()); ASSERT_TRUE(iter->status().IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
} }
delete iter; delete iter;
} }
@ -799,9 +810,13 @@ TEST_P(DBIOCorruptionTest, MultiGetReadCorruptionRetry) {
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_EQ(values[0].ToString(), "val1"); ASSERT_EQ(values[0].ToString(), "val1");
ASSERT_EQ(values[1].ToString(), "val2"); ASSERT_EQ(values[1].ToString(), "val2");
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else { } else {
ASSERT_TRUE(statuses[0].IsCorruption()); ASSERT_TRUE(statuses[0].IsCorruption());
ASSERT_TRUE(statuses[1].IsCorruption()); ASSERT_TRUE(statuses[1].IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
} }
} }
@ -818,6 +833,9 @@ TEST_P(DBIOCorruptionTest, CompactionReadCorruptionRetry) {
Status s = dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); Status s = dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
std::string val; std::string val;
ReadOptions ro; ReadOptions ro;
@ -826,6 +844,7 @@ TEST_P(DBIOCorruptionTest, CompactionReadCorruptionRetry) {
ASSERT_EQ(val, "val1"); ASSERT_EQ(val, "val1");
} else { } else {
ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(s.IsCorruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
} }
} }
@ -838,6 +857,9 @@ TEST_P(DBIOCorruptionTest, FlushReadCorruptionRetry) {
Status s = Flush(); Status s = Flush();
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(s); ASSERT_OK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
std::string val; std::string val;
ReadOptions ro; ReadOptions ro;
@ -846,6 +868,7 @@ TEST_P(DBIOCorruptionTest, FlushReadCorruptionRetry) {
ASSERT_EQ(val, "val1"); ASSERT_EQ(val, "val1");
} else { } else {
ASSERT_NOK(s); ASSERT_NOK(s);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
} }
} }
@ -862,8 +885,12 @@ TEST_P(DBIOCorruptionTest, ManifestCorruptionRetry) {
if (std::get<2>(GetParam())) { if (std::get<2>(GetParam())) {
ASSERT_OK(ReopenDB()); ASSERT_OK(ReopenDB());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 1);
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT),
1);
} else { } else {
ASSERT_EQ(ReopenDB(), Status::Corruption()); ASSERT_EQ(ReopenDB(), Status::Corruption());
ASSERT_EQ(stats()->getTickerCount(FILE_READ_CORRUPTION_RETRY_COUNT), 0);
} }
SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->DisableProcessing();
} }

View File

@ -529,6 +529,11 @@ enum Tickers : uint32_t {
// Footer corruption detected when opening an SST file for reading // Footer corruption detected when opening an SST file for reading
SST_FOOTER_CORRUPTION_COUNT, SST_FOOTER_CORRUPTION_COUNT,
// Counters for file read retries with the verify_and_reconstruct_read
// file system option after detecting a checksum mismatch
FILE_READ_CORRUPTION_RETRY_COUNT,
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };

View File

@ -5269,6 +5269,10 @@ class TickerTypeJni {
return -0x53; return -0x53;
case ROCKSDB_NAMESPACE::Tickers::SST_FOOTER_CORRUPTION_COUNT: case ROCKSDB_NAMESPACE::Tickers::SST_FOOTER_CORRUPTION_COUNT:
return -0x55; return -0x55;
case ROCKSDB_NAMESPACE::Tickers::FILE_READ_CORRUPTION_RETRY_COUNT:
return -0x56;
case ROCKSDB_NAMESPACE::Tickers::FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT:
return -0x57;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// -0x54 is the max value at this time. Since these values are exposed // -0x54 is the max value at this time. Since these values are exposed
// directly to Java clients, we'll keep the value the same till the next // directly to Java clients, we'll keep the value the same till the next
@ -5726,6 +5730,11 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS; return ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS;
case -0x55: case -0x55:
return ROCKSDB_NAMESPACE::Tickers::SST_FOOTER_CORRUPTION_COUNT; return ROCKSDB_NAMESPACE::Tickers::SST_FOOTER_CORRUPTION_COUNT;
case -0x56:
return ROCKSDB_NAMESPACE::Tickers::FILE_READ_CORRUPTION_RETRY_COUNT;
case -0x57:
return ROCKSDB_NAMESPACE::Tickers::
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT;
case -0x54: case -0x54:
// -0x54 is the max value at this time. Since these values are exposed // -0x54 is the max value at this time. Since these values are exposed
// directly to Java clients, we'll keep the value the same till the next // directly to Java clients, we'll keep the value the same till the next

View File

@ -878,6 +878,10 @@ public enum TickerType {
SST_FOOTER_CORRUPTION_COUNT((byte) -0x55), SST_FOOTER_CORRUPTION_COUNT((byte) -0x55),
FILE_READ_CORRUPTION_RETRY_COUNT((byte) -0x56),
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT((byte) -0x57),
TICKER_ENUM_MAX((byte) -0x54); TICKER_ENUM_MAX((byte) -0x54);
private final byte value; private final byte value;

View File

@ -266,6 +266,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"}, {PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"},
{PREFETCH_HITS, "rocksdb.prefetch.hits"}, {PREFETCH_HITS, "rocksdb.prefetch.hits"},
{SST_FOOTER_CORRUPTION_COUNT, "rocksdb.footer.corruption.count"}, {SST_FOOTER_CORRUPTION_COUNT, "rocksdb.footer.corruption.count"},
{FILE_READ_CORRUPTION_RETRY_COUNT,
"rocksdb.file.read.corruption.retry.count"},
{FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT,
"rocksdb.file.read.corruption.retry.success.count"},
}; };
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {

View File

@ -693,6 +693,10 @@ Status BlockBasedTable::Open(
s = ReadFooterFromFile(retry_opts, file.get(), *ioptions.fs, s = ReadFooterFromFile(retry_opts, file.get(), *ioptions.fs,
prefetch_buffer.get(), file_size, &footer, prefetch_buffer.get(), file_size, &footer,
kBlockBasedTableMagicNumber); kBlockBasedTableMagicNumber);
RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_COUNT);
if (s.ok()) {
RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
} }
} }
if (!s.ok()) { if (!s.ok()) {

View File

@ -223,13 +223,16 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
s = VerifyBlockChecksum(footer, data, handle.size(), s = VerifyBlockChecksum(footer, data, handle.size(),
rep_->file->file_name(), handle.offset()); rep_->file->file_name(), handle.offset());
RecordTick(ioptions.stats, BLOCK_CHECKSUM_COMPUTE_COUNT); RecordTick(ioptions.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
if (!s.ok()) {
RecordTick(ioptions.stats, BLOCK_CHECKSUM_MISMATCH_COUNT);
}
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s); TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
if (!s.ok() && if (!s.ok() &&
CheckFSFeatureSupport(ioptions.fs.get(), CheckFSFeatureSupport(ioptions.fs.get(),
FSSupportedOps::kVerifyAndReconstructRead)) { FSSupportedOps::kVerifyAndReconstructRead)) {
assert(s.IsCorruption()); assert(s.IsCorruption());
assert(!ioptions.allow_mmap_reads); assert(!ioptions.allow_mmap_reads);
RecordTick(ioptions.stats, BLOCK_CHECKSUM_MISMATCH_COUNT); RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_COUNT);
// Repeat the read for this particular block using the regular // Repeat the read for this particular block using the regular
// synchronous Read API. We can use the same chunk of memory // synchronous Read API. We can use the same chunk of memory
@ -246,6 +249,10 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
assert(result.size() == BlockSizeWithTrailer(handle)); assert(result.size() == BlockSizeWithTrailer(handle));
s = VerifyBlockChecksum(footer, data, handle.size(), s = VerifyBlockChecksum(footer, data, handle.size(),
rep_->file->file_name(), handle.offset()); rep_->file->file_name(), handle.offset());
if (s.ok()) {
RecordTick(ioptions.stats,
FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
} else { } else {
s = io_s; s = io_s;
} }

View File

@ -84,7 +84,9 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
if (io_status_.ok()) { if (io_status_.ok()) {
got_from_prefetch_buffer_ = true; got_from_prefetch_buffer_ = true;
used_buf_ = const_cast<char*>(slice_.data()); used_buf_ = const_cast<char*>(slice_.data());
} else if (!(io_status_.IsCorruption() && retry_corrupt_read_)) { } else if (io_status_.IsCorruption()) {
// Returning true apparently indicates we either got some data from
// the prefetch buffer, or we tried and encountered an error.
return true; return true;
} }
} }
@ -334,9 +336,15 @@ void BlockFetcher::ReadBlock(bool retry) {
ProcessTrailerIfPresent(); ProcessTrailerIfPresent();
} }
if (retry) {
RecordTick(ioptions_.stats, FILE_READ_CORRUPTION_RETRY_COUNT);
}
if (io_status_.ok()) { if (io_status_.ok()) {
InsertCompressedBlockToPersistentCacheIfNeeded(); InsertCompressedBlockToPersistentCacheIfNeeded();
fs_buf_ = std::move(read_req.fs_scratch); fs_buf_ = std::move(read_req.fs_scratch);
if (retry) {
RecordTick(ioptions_.stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
}
} else { } else {
ReleaseFileSystemProvidedBuffer(&read_req); ReleaseFileSystemProvidedBuffer(&read_req);
direct_io_buf_.reset(); direct_io_buf_.reset();
@ -355,7 +363,11 @@ IOStatus BlockFetcher::ReadBlockContents() {
return IOStatus::OK(); return IOStatus::OK();
} }
if (TryGetFromPrefetchBuffer()) { if (TryGetFromPrefetchBuffer()) {
if (io_status_.IsCorruption() && retry_corrupt_read_) {
ReadBlock(/*retry=*/true);
}
if (!io_status_.ok()) { if (!io_status_.ok()) {
assert(!fs_buf_);
return io_status_; return io_status_;
} }
} else if (!TryGetSerializedBlockFromPersistentCache()) { } else if (!TryGetSerializedBlockFromPersistentCache()) {

View File

@ -0,0 +1 @@
Add ticker stats to count file read retries due to checksum mismatch