mirror of https://github.com/facebook/rocksdb.git
Add file temperature related counter and bytes stats to and io_stats (#8710)
Summary: For tiered storage project, we need to know the block read count and read bytes of files with different temperature. Add FileIOByTemperature to IOStatsContext and collect the bytes read and read count from different temperature files through the RandomAccessFileReader. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8710 Test Plan: make check, add the testing cases Reviewed By: siying Differential Revision: D30582400 Pulled By: zhichao-cao fbshipit-source-id: d83173de594374fc8404af5ce93a6a9be72c7141
This commit is contained in:
parent
699f45049d
commit
b632ed0c67
|
@ -17,6 +17,7 @@
|
|||
#include "options/options_helper.h"
|
||||
#include "port/port.h"
|
||||
#include "port/stack_trace.h"
|
||||
#include "rocksdb/iostats_context.h"
|
||||
#include "rocksdb/persistent_cache.h"
|
||||
#include "rocksdb/trace_record.h"
|
||||
#include "rocksdb/trace_record_result.h"
|
||||
|
@ -6522,6 +6523,9 @@ TEST_F(DBTest2, BottommostTemperature) {
|
|||
ASSERT_OK(Flush());
|
||||
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
||||
|
||||
get_iostats_context()->Reset();
|
||||
IOStatsContext* iostats = get_iostats_context();
|
||||
|
||||
ColumnFamilyMetaData metadata;
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
ASSERT_EQ(1, metadata.file_count);
|
||||
|
@ -6530,11 +6534,31 @@ TEST_F(DBTest2, BottommostTemperature) {
|
|||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_GT(size, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
|
||||
ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
|
||||
|
||||
// non-bottommost file still has unknown temperature
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
ASSERT_OK(Put("bar", "bar"));
|
||||
ASSERT_OK(Flush());
|
||||
ASSERT_EQ("bar", Get("bar"));
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
|
||||
ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
|
||||
|
||||
db_->GetColumnFamilyMetaData(&metadata);
|
||||
ASSERT_EQ(2, metadata.file_count);
|
||||
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
|
||||
|
@ -6583,6 +6607,8 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
|
|||
ASSERT_EQ(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kHot);
|
||||
ASSERT_EQ(size, 0);
|
||||
get_iostats_context()->Reset();
|
||||
IOStatsContext* iostats = get_iostats_context();
|
||||
|
||||
for (int i = 0; i < kTriggerNum; i++) {
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
|
@ -6600,6 +6626,17 @@ TEST_F(DBTest2, BottommostTemperatureUniversal) {
|
|||
ASSERT_GT(size, 0);
|
||||
size = GetSstSizeHelper(Temperature::kWarm);
|
||||
ASSERT_EQ(size, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
|
||||
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
|
||||
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
ASSERT_OK(Put("bar", "bar"));
|
||||
|
|
|
@ -103,7 +103,7 @@ Status TableCache::GetTableReader(
|
|||
std::unique_ptr<TableReader>* table_reader,
|
||||
const SliceTransform* prefix_extractor, bool skip_filters, int level,
|
||||
bool prefetch_index_and_filter_in_cache,
|
||||
size_t max_file_size_for_l0_meta_pin) {
|
||||
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
|
||||
std::string fname =
|
||||
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
|
||||
std::unique_ptr<FSRandomAccessFile> file;
|
||||
|
@ -132,7 +132,8 @@ Status TableCache::GetTableReader(
|
|||
new RandomAccessFileReader(
|
||||
std::move(file), fname, ioptions_.clock, io_tracer_,
|
||||
record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
|
||||
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners));
|
||||
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
|
||||
file_temperature));
|
||||
s = ioptions_.table_factory->NewTableReader(
|
||||
ro,
|
||||
TableReaderOptions(
|
||||
|
@ -154,15 +155,13 @@ void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) {
|
|||
cache_->Erase(key);
|
||||
}
|
||||
|
||||
Status TableCache::FindTable(const ReadOptions& ro,
|
||||
const FileOptions& file_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& fd, Cache::Handle** handle,
|
||||
const SliceTransform* prefix_extractor,
|
||||
const bool no_io, bool record_read_stats,
|
||||
HistogramImpl* file_read_hist, bool skip_filters,
|
||||
int level, bool prefetch_index_and_filter_in_cache,
|
||||
size_t max_file_size_for_l0_meta_pin) {
|
||||
Status TableCache::FindTable(
|
||||
const ReadOptions& ro, const FileOptions& file_options,
|
||||
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
|
||||
Cache::Handle** handle, const SliceTransform* prefix_extractor,
|
||||
const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
|
||||
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
|
||||
PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
|
||||
uint64_t number = fd.GetNumber();
|
||||
Slice key = GetSliceForFileNumber(&number);
|
||||
|
@ -186,7 +185,7 @@ Status TableCache::FindTable(const ReadOptions& ro,
|
|||
ro, file_options, internal_comparator, fd, false /* sequential mode */,
|
||||
record_read_stats, file_read_hist, &table_reader, prefix_extractor,
|
||||
skip_filters, level, prefetch_index_and_filter_in_cache,
|
||||
max_file_size_for_l0_meta_pin);
|
||||
max_file_size_for_l0_meta_pin, file_temperature);
|
||||
if (!s.ok()) {
|
||||
assert(table_reader == nullptr);
|
||||
RecordTick(ioptions_.stats, NO_FILE_ERRORS);
|
||||
|
@ -231,7 +230,7 @@ InternalIterator* TableCache::NewIterator(
|
|||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
!for_compaction /* record_read_stats */, file_read_hist, skip_filters,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
max_file_size_for_l0_meta_pin);
|
||||
max_file_size_for_l0_meta_pin, file_meta.temperature);
|
||||
if (s.ok()) {
|
||||
table_reader = GetTableReaderFromHandle(handle);
|
||||
}
|
||||
|
@ -431,7 +430,7 @@ Status TableCache::Get(const ReadOptions& options,
|
|||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist, skip_filters,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
max_file_size_for_l0_meta_pin);
|
||||
max_file_size_for_l0_meta_pin, file_meta.temperature);
|
||||
if (s.ok()) {
|
||||
t = GetTableReaderFromHandle(handle);
|
||||
}
|
||||
|
@ -531,10 +530,12 @@ Status TableCache::MultiGet(const ReadOptions& options,
|
|||
// found in the row cache and thus the range may now be empty
|
||||
if (s.ok() && !table_range.empty()) {
|
||||
if (t == nullptr) {
|
||||
s = FindTable(
|
||||
options, file_options_, internal_comparator, fd, &handle,
|
||||
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist, skip_filters, level);
|
||||
s = FindTable(options, file_options_, internal_comparator, fd, &handle,
|
||||
prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist, skip_filters,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
|
||||
TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
|
||||
if (s.ok()) {
|
||||
t = GetTableReaderFromHandle(handle);
|
||||
|
|
|
@ -140,7 +140,8 @@ class TableCache {
|
|||
HistogramImpl* file_read_hist = nullptr,
|
||||
bool skip_filters = false, int level = -1,
|
||||
bool prefetch_index_and_filter_in_cache = true,
|
||||
size_t max_file_size_for_l0_meta_pin = 0);
|
||||
size_t max_file_size_for_l0_meta_pin = 0,
|
||||
Temperature file_temperature = Temperature::kUnknown);
|
||||
|
||||
// Get TableReader from a cache handle.
|
||||
TableReader* GetTableReaderFromHandle(Cache::Handle* handle);
|
||||
|
@ -206,7 +207,8 @@ class TableCache {
|
|||
const SliceTransform* prefix_extractor = nullptr,
|
||||
bool skip_filters = false, int level = -1,
|
||||
bool prefetch_index_and_filter_in_cache = true,
|
||||
size_t max_file_size_for_l0_meta_pin = 0);
|
||||
size_t max_file_size_for_l0_meta_pin = 0,
|
||||
Temperature file_temperature = Temperature::kUnknown);
|
||||
|
||||
// Create a key prefix for looking up the row cache. The prefix is of the
|
||||
// format row_cache_id + fd_number + seq_no. Later, the user key can be
|
||||
|
|
|
@ -1017,7 +1017,8 @@ class VersionBuilder::Rep {
|
|||
&file_meta->table_reader_handle, prefix_extractor, false /*no_io */,
|
||||
true /* record_read_stats */,
|
||||
internal_stats->GetFileReadHist(level), false, level,
|
||||
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin);
|
||||
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
|
||||
file_meta->temperature);
|
||||
if (file_meta->table_reader_handle != nullptr) {
|
||||
// Load table_reader
|
||||
file_meta->fd.table_reader = table_cache_->GetTableReaderFromHandle(
|
||||
|
|
|
@ -22,6 +22,46 @@
|
|||
#include "util/rate_limiter.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
inline void IOStatsAddBytesByTemperature(Temperature file_temperature,
|
||||
size_t value) {
|
||||
if (file_temperature == Temperature::kUnknown) {
|
||||
return;
|
||||
}
|
||||
switch (file_temperature) {
|
||||
case Temperature::kHot:
|
||||
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_bytes_read, value);
|
||||
break;
|
||||
case Temperature::kWarm:
|
||||
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_bytes_read, value);
|
||||
break;
|
||||
case Temperature::kCold:
|
||||
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_bytes_read, value);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline void IOStatsAddCountByTemperature(Temperature file_temperature,
|
||||
size_t value) {
|
||||
if (file_temperature == Temperature::kUnknown) {
|
||||
return;
|
||||
}
|
||||
switch (file_temperature) {
|
||||
case Temperature::kHot:
|
||||
IOSTATS_ADD(file_io_stats_by_temperature.hot_file_read_count, value);
|
||||
break;
|
||||
case Temperature::kWarm:
|
||||
IOSTATS_ADD(file_io_stats_by_temperature.warm_file_read_count, value);
|
||||
break;
|
||||
case Temperature::kCold:
|
||||
IOSTATS_ADD(file_io_stats_by_temperature.cold_file_read_count, value);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
IOStatus RandomAccessFileReader::Create(
|
||||
const std::shared_ptr<FileSystem>& fs, const std::string& fname,
|
||||
const FileOptions& file_opts,
|
||||
|
@ -182,6 +222,8 @@ IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
|
|||
*result = Slice(res_scratch, io_s.ok() ? pos : 0);
|
||||
}
|
||||
IOSTATS_ADD(bytes_read, result->size());
|
||||
IOStatsAddBytesByTemperature(file_temperature_, result->size());
|
||||
IOStatsAddCountByTemperature(file_temperature_, 1);
|
||||
SetPerfLevel(prev_perf_level);
|
||||
}
|
||||
if (stats_ != nullptr && file_read_hist_ != nullptr) {
|
||||
|
@ -347,6 +389,9 @@ IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts,
|
|||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
IOSTATS_ADD(bytes_read, read_reqs[i].result.size());
|
||||
IOStatsAddBytesByTemperature(file_temperature_,
|
||||
read_reqs[i].result.size());
|
||||
IOStatsAddCountByTemperature(file_temperature_, 1);
|
||||
}
|
||||
SetPerfLevel(prev_perf_level);
|
||||
}
|
||||
|
|
|
@ -73,6 +73,7 @@ class RandomAccessFileReader {
|
|||
HistogramImpl* file_read_hist_;
|
||||
RateLimiter* rate_limiter_;
|
||||
std::vector<std::shared_ptr<EventListener>> listeners_;
|
||||
Temperature file_temperature_;
|
||||
|
||||
public:
|
||||
explicit RandomAccessFileReader(
|
||||
|
@ -82,7 +83,8 @@ class RandomAccessFileReader {
|
|||
Statistics* stats = nullptr, uint32_t hist_type = 0,
|
||||
HistogramImpl* file_read_hist = nullptr,
|
||||
RateLimiter* rate_limiter = nullptr,
|
||||
const std::vector<std::shared_ptr<EventListener>>& listeners = {})
|
||||
const std::vector<std::shared_ptr<EventListener>>& listeners = {},
|
||||
Temperature file_temperature = Temperature::kUnknown)
|
||||
: file_(std::move(raf), io_tracer, _file_name),
|
||||
file_name_(std::move(_file_name)),
|
||||
clock_(clock),
|
||||
|
@ -90,7 +92,8 @@ class RandomAccessFileReader {
|
|||
hist_type_(hist_type),
|
||||
file_read_hist_(file_read_hist),
|
||||
rate_limiter_(rate_limiter),
|
||||
listeners_() {
|
||||
listeners_(),
|
||||
file_temperature_(file_temperature) {
|
||||
#ifndef ROCKSDB_LITE
|
||||
std::for_each(listeners.begin(), listeners.end(),
|
||||
[this](const std::shared_ptr<EventListener>& e) {
|
||||
|
|
|
@ -14,6 +14,32 @@
|
|||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
// EXPERIMENTAL: the IO statistics for tiered storage. It matches with each
|
||||
// item in Temperature class.
|
||||
struct FileIOByTemperature {
|
||||
// the number of bytes read to Temperature::kHot file
|
||||
uint64_t hot_file_bytes_read;
|
||||
// the number of bytes read to Temperature::kWarm file
|
||||
uint64_t warm_file_bytes_read;
|
||||
// the number of bytes read to Temperature::kCold file
|
||||
uint64_t cold_file_bytes_read;
|
||||
// total number of reads to Temperature::kHot file
|
||||
uint64_t hot_file_read_count;
|
||||
// total number of reads to Temperature::kWarm file
|
||||
uint64_t warm_file_read_count;
|
||||
// total number of reads to Temperature::kCold file
|
||||
uint64_t cold_file_read_count;
|
||||
// reset all the statistics to 0.
|
||||
void Reset() {
|
||||
hot_file_bytes_read = 0;
|
||||
warm_file_bytes_read = 0;
|
||||
cold_file_bytes_read = 0;
|
||||
hot_file_read_count = 0;
|
||||
warm_file_read_count = 0;
|
||||
cold_file_read_count = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct IOStatsContext {
|
||||
// reset all io-stats counter to zero
|
||||
void Reset();
|
||||
|
@ -48,6 +74,8 @@ struct IOStatsContext {
|
|||
uint64_t cpu_write_nanos;
|
||||
// CPU time spent in read() and pread()
|
||||
uint64_t cpu_read_nanos;
|
||||
|
||||
FileIOByTemperature file_io_stats_by_temperature;
|
||||
};
|
||||
|
||||
// If RocksDB is compiled with -DNIOSTATS_CONTEXT, then a pointer to a global,
|
||||
|
|
|
@ -39,6 +39,7 @@ void IOStatsContext::Reset() {
|
|||
logger_nanos = 0;
|
||||
cpu_write_nanos = 0;
|
||||
cpu_read_nanos = 0;
|
||||
file_io_stats_by_temperature.Reset();
|
||||
#endif //! NIOSTATS_CONTEXT
|
||||
}
|
||||
|
||||
|
@ -66,7 +67,12 @@ std::string IOStatsContext::ToString(bool exclude_zero_counters) const {
|
|||
IOSTATS_CONTEXT_OUTPUT(logger_nanos);
|
||||
IOSTATS_CONTEXT_OUTPUT(cpu_write_nanos);
|
||||
IOSTATS_CONTEXT_OUTPUT(cpu_read_nanos);
|
||||
|
||||
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.hot_file_bytes_read);
|
||||
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.warm_file_bytes_read);
|
||||
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.cold_file_bytes_read);
|
||||
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.hot_file_read_count);
|
||||
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.warm_file_read_count);
|
||||
IOSTATS_CONTEXT_OUTPUT(file_io_stats_by_temperature.cold_file_read_count);
|
||||
std::string str = ss.str();
|
||||
str.erase(str.find_last_not_of(", ") + 1);
|
||||
return str;
|
||||
|
|
|
@ -77,20 +77,19 @@ extern thread_local PerfContext perf_context;
|
|||
}
|
||||
|
||||
// Increase metric value
|
||||
#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \
|
||||
if (perf_level >= PerfLevel::kEnableCount && \
|
||||
perf_context.per_level_perf_context_enabled && \
|
||||
perf_context.level_to_perf_context) { \
|
||||
if ((*(perf_context.level_to_perf_context)).find(level) != \
|
||||
(*(perf_context.level_to_perf_context)).end()) { \
|
||||
(*(perf_context.level_to_perf_context))[level].metric += value; \
|
||||
} \
|
||||
else { \
|
||||
PerfContextByLevel empty_context; \
|
||||
(*(perf_context.level_to_perf_context))[level] = empty_context; \
|
||||
(*(perf_context.level_to_perf_context))[level].metric += value; \
|
||||
} \
|
||||
} \
|
||||
#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \
|
||||
if (perf_level >= PerfLevel::kEnableCount && \
|
||||
perf_context.per_level_perf_context_enabled && \
|
||||
perf_context.level_to_perf_context) { \
|
||||
if ((*(perf_context.level_to_perf_context)).find(level) != \
|
||||
(*(perf_context.level_to_perf_context)).end()) { \
|
||||
(*(perf_context.level_to_perf_context))[level].metric += value; \
|
||||
} else { \
|
||||
PerfContextByLevel empty_context; \
|
||||
(*(perf_context.level_to_perf_context))[level] = empty_context; \
|
||||
(*(perf_context.level_to_perf_context))[level].metric += value; \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue