From b96432aadd2635f3a9643cb7f4497e109fa9d122 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 2 Dec 2024 16:32:02 -0800 Subject: [PATCH] Add public API definitions for surfacing data age (#13138) Summary: This PR adds the definition for the public APIs for surfacing data write time info. It only contains minimum implementation. The implementations will be in follow ups. I need to sync with customers if these public APIs meet their requirements and are easy to use. And make modifications accordingly before proceeding with implementations. - `struct DataCollectionUnixWriteTimeInfo` is a struct for the unix write time info for a collection of data - `DB::GetPropertiesOfTablesForLevels` returns table properties collection per level - `GetDataCollectionUnixWriteTimeInfoForFile` returns the data write time info for a file. - `GetDataCollectionUnixWriteTimeInfoForLevels` returns the data write time info for levels. - The user property names for recording write time stats in the user collected properties are defined. Follow ups: Implement collecting the write time related user table properties Use the data write time info recorded in the table properties to implement these APIs Test Plan: No functional change, also follow ups should have tests covering the minimum implementation added in this PR. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13138 No functional change, also follow ups should have tests covering the minimum implementation added in this PR. Reviewed By: pdillinger Differential Revision: D65952586 Pulled By: jowlyzhang fbshipit-source-id: b1ebf61a35005e9ca6b4ecc28c864beb6fb4bc59 --- include/rocksdb/db.h | 10 +++ .../utilities/table_properties_collectors.h | 87 +++++++++++++++++++ .../compact_for_tiering_collector.cc | 37 +++++++- .../compact_for_tiering_collector.h | 9 +- 4 files changed, 138 insertions(+), 5 deletions(-) diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 2f73813874..9beb4e74f6 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -29,6 +29,7 @@ #include "rocksdb/transaction_log.h" #include "rocksdb/types.h" #include "rocksdb/user_write_callback.h" +#include "rocksdb/utilities/table_properties_collectors.h" #include "rocksdb/version.h" #include "rocksdb/wide_columns.h" @@ -2012,6 +2013,15 @@ class DB { ColumnFamilyHandle* column_family, const Range* range, std::size_t n, TablePropertiesCollection* props) = 0; + // Get the table properties of files per level. + virtual Status GetPropertiesOfTablesForLevels( + ColumnFamilyHandle* /* column_family */, + std::vector< + std::unique_ptr>* /* levels_props */) { + return Status::NotSupported( + "GetPropertiesOfTablesForLevels() is not implemented."); + } + virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/, const Slice* /*begin*/, const Slice* /*end*/) { diff --git a/include/rocksdb/utilities/table_properties_collectors.h b/include/rocksdb/utilities/table_properties_collectors.h index 327e80af72..d0ce766ca9 100644 --- a/include/rocksdb/utilities/table_properties_collectors.h +++ b/include/rocksdb/utilities/table_properties_collectors.h @@ -7,6 +7,8 @@ #include #include +#include "rocksdb/status.h" +#include "rocksdb/system_clock.h" #include "rocksdb/table_properties.h" namespace ROCKSDB_NAMESPACE { @@ -130,4 +132,89 @@ class CompactForTieringCollectorFactory std::shared_ptr NewCompactForTieringCollectorFactory(double compaction_trigger_ratio); + +// Information for the unix write time of a collection of data. Combined with +// the current unix time, these stats give an overview of how long the data +// have been written to the DB. +// Check `DataCollectionIsEmpty` and `TrackedDataRatio` before interpreting +// the stats. +struct DataCollectionUnixWriteTimeInfo { + // The minimum write time, a.k.a. the write time of the oldest key. + uint64_t min_write_time = 0; + // The maximum write time, a.k.a. the write time of the newest key. + uint64_t max_write_time = 0; + // The average write time. + uint64_t average_write_time = 0; + + // The number of entries that can be considered infinitely old because their + // sequence number are zeroed out. We know they are old entries but do not + // know how old exactly. These entries are separately counted and not + // aggregated in above stats. + uint64_t num_entries_infinitely_old = 0; + + // The number of entries used to create above min, max, average stats. + uint64_t num_entries_write_time_aggregated = 0; + + // The number of entries for which write time is untracked. + uint64_t num_entries_write_time_untracked = 0; + + DataCollectionUnixWriteTimeInfo() {} + + DataCollectionUnixWriteTimeInfo(uint64_t _min_write_time, + uint64_t _max_write_time, + uint64_t _average_write_time, + uint64_t _num_entries_infinitely_old, + uint64_t _num_entries_write_time_aggregated, + uint64_t _num_entries_write_time_untracked) + : min_write_time(_min_write_time), + max_write_time(_max_write_time), + average_write_time(_average_write_time), + num_entries_infinitely_old(_num_entries_infinitely_old), + num_entries_write_time_aggregated(_num_entries_write_time_aggregated), + num_entries_write_time_untracked(_num_entries_write_time_untracked) {} + + // Returns true if the data collection for which this + // `DataCollectionUnixWriteTimeInfo` is for is empty. + bool DataCollectionIsEmpty() const { + return num_entries_infinitely_old == 0 && + num_entries_write_time_aggregated == 0 && + num_entries_write_time_untracked == 0; + } + + // The closer the ratio is to 1, the more accurate the stats reflect the + // actual write time of this collection of data. If this ratio is 0, there is + // no write time information available. It could be either the data collection + // is empty, or none of its data has write time info tracked. + // + // For a single file, its data either has write time info tracked or not + // tracked, this ratio would be either 0 or 1. For a level, this ratio + // reflects what portion of the data has its write time info tracked in this + // struct. 0 is returned if the level is empty. + double TrackedDataRatio() const { + if (DataCollectionIsEmpty()) { + return 0; + } + uint64_t num_entries_write_time_tracked = + num_entries_infinitely_old + num_entries_write_time_aggregated; + return num_entries_write_time_tracked / + static_cast(num_entries_write_time_tracked + + num_entries_write_time_untracked); + } + + // Whether the file or the level has infinitely old data. + bool HasInfinitelyOldData() const { return num_entries_infinitely_old > 0; } +}; + +// Given the table properties of a file, return data's unix write time stats +// if available. +Status GetDataCollectionUnixWriteTimeInfoForFile( + const std::shared_ptr& table_properties, + std::unique_ptr* file_info); + +// Given the collection of table properties per level, return data unix write +// time stats if available. +Status GetDataCollectionUnixWriteTimeInfoForLevels( + const std::vector>& + levels_table_properties, + std::vector>* levels_info); } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/table_properties_collectors/compact_for_tiering_collector.cc b/utilities/table_properties_collectors/compact_for_tiering_collector.cc index 8904be2ae7..b700e04af1 100644 --- a/utilities/table_properties_collectors/compact_for_tiering_collector.cc +++ b/utilities/table_properties_collectors/compact_for_tiering_collector.cc @@ -21,14 +21,29 @@ namespace ROCKSDB_NAMESPACE { const std::string CompactForTieringCollector::kNumEligibleLastLevelEntriesPropertyName = "rocksdb.eligible.last.level.entries"; +const std::string + CompactForTieringCollector::kAverageDataUnixWriteTimePropertyName = + "rocksdb.data.unix.write.time.average"; +const std::string + CompactForTieringCollector::kMaxDataUnixWriteTimePropertyName = + "rocksdb.data.unix.write.time.max"; +const std::string + CompactForTieringCollector::kMinDataUnixWriteTimePropertyName = + "rocksdb.data.unix.write.time.min"; +const std::string + CompactForTieringCollector::kNumInfinitelyOldEntriesPropertyName = + "rocksdb.num.infinitely.old.entries"; CompactForTieringCollector::CompactForTieringCollector( SequenceNumber last_level_inclusive_max_seqno_threshold, - double compaction_trigger_ratio) + double compaction_trigger_ratio, bool collect_data_age_stats) : last_level_inclusive_max_seqno_threshold_( last_level_inclusive_max_seqno_threshold), - compaction_trigger_ratio_(compaction_trigger_ratio) { + compaction_trigger_ratio_(compaction_trigger_ratio), + collect_data_age_stats_(collect_data_age_stats) { assert(last_level_inclusive_max_seqno_threshold_ != kMaxSequenceNumber); + // TODO(yuzhangyu): implement collect the data age stats. + (void)collect_data_age_stats_; } Status CompactForTieringCollector::AddUserKey(const Slice& /*key*/, @@ -93,9 +108,11 @@ CompactForTieringCollectorFactory::CreateTablePropertiesCollector( context.last_level_inclusive_max_seqno_threshold == kMaxSequenceNumber) { return nullptr; } + // TODO(yuzhangyu): pass actual value. return new CompactForTieringCollector( context.last_level_inclusive_max_seqno_threshold, - compaction_trigger_ratio); + compaction_trigger_ratio, + /*collect_data_age_stats*/ false); } static std::unordered_map @@ -141,4 +158,18 @@ NewCompactForTieringCollectorFactory(double compaction_trigger_ratio) { compaction_trigger_ratio); } +Status GetDataCollectionUnixWriteTimeInfoForFile( + const std::shared_ptr& /* table_properties */, + std::unique_ptr* /* file_info */) { + return Status::NotSupported(); +} + +Status GetDataCollectionUnixWriteTimeInfoForLevels( + const std::vector>& /* levels_table_properties */, + std::vector< + std::unique_ptr>* /* levels_info */) { + return Status::NotSupported(); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/table_properties_collectors/compact_for_tiering_collector.h b/utilities/table_properties_collectors/compact_for_tiering_collector.h index 55497b671f..8b4413ada2 100644 --- a/utilities/table_properties_collectors/compact_for_tiering_collector.h +++ b/utilities/table_properties_collectors/compact_for_tiering_collector.h @@ -16,10 +16,14 @@ namespace ROCKSDB_NAMESPACE { class CompactForTieringCollector : public TablePropertiesCollector { public: static const std::string kNumEligibleLastLevelEntriesPropertyName; + static const std::string kAverageDataUnixWriteTimePropertyName; + static const std::string kMaxDataUnixWriteTimePropertyName; + static const std::string kMinDataUnixWriteTimePropertyName; + static const std::string kNumInfinitelyOldEntriesPropertyName; CompactForTieringCollector( - SequenceNumber last_level_inclusive_max_seqno_threshold_, - double compaction_trigger_ratio); + SequenceNumber last_level_inclusive_max_seqno_threshold, + double compaction_trigger_ratio, bool collect_data_age_stats); Status AddUserKey(const Slice& key, const Slice& value, EntryType type, SequenceNumber seq, uint64_t file_size) override; @@ -41,5 +45,6 @@ class CompactForTieringCollector : public TablePropertiesCollector { size_t total_entries_counter_ = 0; bool finish_called_ = false; bool need_compaction_ = false; + bool collect_data_age_stats_ = false; }; } // namespace ROCKSDB_NAMESPACE