Add public API definitions for surfacing data age (#13138)

Summary:
This PR adds the definition for the public APIs for surfacing data write time info. It only contains minimum implementation. The implementations will be in follow ups. I need to sync with customers if these public APIs meet their requirements and are easy to use. And make modifications accordingly before proceeding with implementations.

- `struct DataCollectionUnixWriteTimeInfo` is a struct for the unix write time info for a collection of data
- `DB::GetPropertiesOfTablesForLevels` returns table properties collection per level
- `GetDataCollectionUnixWriteTimeInfoForFile` returns the data write time info for a file.
- `GetDataCollectionUnixWriteTimeInfoForLevels` returns the data write time info for levels.
- The user property names for recording write time stats in the user collected properties are defined.
Follow ups:

Implement collecting the write time related user table properties
Use the data write time info recorded in the table properties to implement these APIs

Test Plan:
No functional change, also follow ups should have tests covering the minimum implementation added in this PR.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/13138

No functional change, also follow ups should have tests covering the minimum implementation added in this PR.

Reviewed By: pdillinger

Differential Revision: D65952586

Pulled By: jowlyzhang

fbshipit-source-id: b1ebf61a35005e9ca6b4ecc28c864beb6fb4bc59
This commit is contained in:
Yu Zhang 2024-12-02 16:32:02 -08:00 committed by Facebook GitHub Bot
parent 4ed79f5bd1
commit b96432aadd
4 changed files with 138 additions and 5 deletions

View file

@ -29,6 +29,7 @@
#include "rocksdb/transaction_log.h"
#include "rocksdb/types.h"
#include "rocksdb/user_write_callback.h"
#include "rocksdb/utilities/table_properties_collectors.h"
#include "rocksdb/version.h"
#include "rocksdb/wide_columns.h"
@ -2012,6 +2013,15 @@ class DB {
ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
TablePropertiesCollection* props) = 0;
// Get the table properties of files per level.
virtual Status GetPropertiesOfTablesForLevels(
ColumnFamilyHandle* /* column_family */,
std::vector<
std::unique_ptr<TablePropertiesCollection>>* /* levels_props */) {
return Status::NotSupported(
"GetPropertiesOfTablesForLevels() is not implemented.");
}
virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
const Slice* /*begin*/,
const Slice* /*end*/) {

View file

@ -7,6 +7,8 @@
#include <atomic>
#include <memory>
#include "rocksdb/status.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table_properties.h"
namespace ROCKSDB_NAMESPACE {
@ -130,4 +132,89 @@ class CompactForTieringCollectorFactory
std::shared_ptr<CompactForTieringCollectorFactory>
NewCompactForTieringCollectorFactory(double compaction_trigger_ratio);
// Information for the unix write time of a collection of data. Combined with
// the current unix time, these stats give an overview of how long the data
// have been written to the DB.
// Check `DataCollectionIsEmpty` and `TrackedDataRatio` before interpreting
// the stats.
struct DataCollectionUnixWriteTimeInfo {
// The minimum write time, a.k.a. the write time of the oldest key.
uint64_t min_write_time = 0;
// The maximum write time, a.k.a. the write time of the newest key.
uint64_t max_write_time = 0;
// The average write time.
uint64_t average_write_time = 0;
// The number of entries that can be considered infinitely old because their
// sequence number are zeroed out. We know they are old entries but do not
// know how old exactly. These entries are separately counted and not
// aggregated in above stats.
uint64_t num_entries_infinitely_old = 0;
// The number of entries used to create above min, max, average stats.
uint64_t num_entries_write_time_aggregated = 0;
// The number of entries for which write time is untracked.
uint64_t num_entries_write_time_untracked = 0;
DataCollectionUnixWriteTimeInfo() {}
DataCollectionUnixWriteTimeInfo(uint64_t _min_write_time,
uint64_t _max_write_time,
uint64_t _average_write_time,
uint64_t _num_entries_infinitely_old,
uint64_t _num_entries_write_time_aggregated,
uint64_t _num_entries_write_time_untracked)
: min_write_time(_min_write_time),
max_write_time(_max_write_time),
average_write_time(_average_write_time),
num_entries_infinitely_old(_num_entries_infinitely_old),
num_entries_write_time_aggregated(_num_entries_write_time_aggregated),
num_entries_write_time_untracked(_num_entries_write_time_untracked) {}
// Returns true if the data collection for which this
// `DataCollectionUnixWriteTimeInfo` is for is empty.
bool DataCollectionIsEmpty() const {
return num_entries_infinitely_old == 0 &&
num_entries_write_time_aggregated == 0 &&
num_entries_write_time_untracked == 0;
}
// The closer the ratio is to 1, the more accurate the stats reflect the
// actual write time of this collection of data. If this ratio is 0, there is
// no write time information available. It could be either the data collection
// is empty, or none of its data has write time info tracked.
//
// For a single file, its data either has write time info tracked or not
// tracked, this ratio would be either 0 or 1. For a level, this ratio
// reflects what portion of the data has its write time info tracked in this
// struct. 0 is returned if the level is empty.
double TrackedDataRatio() const {
if (DataCollectionIsEmpty()) {
return 0;
}
uint64_t num_entries_write_time_tracked =
num_entries_infinitely_old + num_entries_write_time_aggregated;
return num_entries_write_time_tracked /
static_cast<double>(num_entries_write_time_tracked +
num_entries_write_time_untracked);
}
// Whether the file or the level has infinitely old data.
bool HasInfinitelyOldData() const { return num_entries_infinitely_old > 0; }
};
// Given the table properties of a file, return data's unix write time stats
// if available.
Status GetDataCollectionUnixWriteTimeInfoForFile(
const std::shared_ptr<const TableProperties>& table_properties,
std::unique_ptr<DataCollectionUnixWriteTimeInfo>* file_info);
// Given the collection of table properties per level, return data unix write
// time stats if available.
Status GetDataCollectionUnixWriteTimeInfoForLevels(
const std::vector<std::unique_ptr<TablePropertiesCollection>>&
levels_table_properties,
std::vector<std::unique_ptr<DataCollectionUnixWriteTimeInfo>>* levels_info);
} // namespace ROCKSDB_NAMESPACE

View file

@ -21,14 +21,29 @@ namespace ROCKSDB_NAMESPACE {
const std::string
CompactForTieringCollector::kNumEligibleLastLevelEntriesPropertyName =
"rocksdb.eligible.last.level.entries";
const std::string
CompactForTieringCollector::kAverageDataUnixWriteTimePropertyName =
"rocksdb.data.unix.write.time.average";
const std::string
CompactForTieringCollector::kMaxDataUnixWriteTimePropertyName =
"rocksdb.data.unix.write.time.max";
const std::string
CompactForTieringCollector::kMinDataUnixWriteTimePropertyName =
"rocksdb.data.unix.write.time.min";
const std::string
CompactForTieringCollector::kNumInfinitelyOldEntriesPropertyName =
"rocksdb.num.infinitely.old.entries";
CompactForTieringCollector::CompactForTieringCollector(
SequenceNumber last_level_inclusive_max_seqno_threshold,
double compaction_trigger_ratio)
double compaction_trigger_ratio, bool collect_data_age_stats)
: last_level_inclusive_max_seqno_threshold_(
last_level_inclusive_max_seqno_threshold),
compaction_trigger_ratio_(compaction_trigger_ratio) {
compaction_trigger_ratio_(compaction_trigger_ratio),
collect_data_age_stats_(collect_data_age_stats) {
assert(last_level_inclusive_max_seqno_threshold_ != kMaxSequenceNumber);
// TODO(yuzhangyu): implement collect the data age stats.
(void)collect_data_age_stats_;
}
Status CompactForTieringCollector::AddUserKey(const Slice& /*key*/,
@ -93,9 +108,11 @@ CompactForTieringCollectorFactory::CreateTablePropertiesCollector(
context.last_level_inclusive_max_seqno_threshold == kMaxSequenceNumber) {
return nullptr;
}
// TODO(yuzhangyu): pass actual value.
return new CompactForTieringCollector(
context.last_level_inclusive_max_seqno_threshold,
compaction_trigger_ratio);
compaction_trigger_ratio,
/*collect_data_age_stats*/ false);
}
static std::unordered_map<std::string, OptionTypeInfo>
@ -141,4 +158,18 @@ NewCompactForTieringCollectorFactory(double compaction_trigger_ratio) {
compaction_trigger_ratio);
}
Status GetDataCollectionUnixWriteTimeInfoForFile(
const std::shared_ptr<const TableProperties>& /* table_properties */,
std::unique_ptr<DataCollectionUnixWriteTimeInfo>* /* file_info */) {
return Status::NotSupported();
}
Status GetDataCollectionUnixWriteTimeInfoForLevels(
const std::vector<std::unique_ptr<
TablePropertiesCollection>>& /* levels_table_properties */,
std::vector<
std::unique_ptr<DataCollectionUnixWriteTimeInfo>>* /* levels_info */) {
return Status::NotSupported();
}
} // namespace ROCKSDB_NAMESPACE

View file

@ -16,10 +16,14 @@ namespace ROCKSDB_NAMESPACE {
class CompactForTieringCollector : public TablePropertiesCollector {
public:
static const std::string kNumEligibleLastLevelEntriesPropertyName;
static const std::string kAverageDataUnixWriteTimePropertyName;
static const std::string kMaxDataUnixWriteTimePropertyName;
static const std::string kMinDataUnixWriteTimePropertyName;
static const std::string kNumInfinitelyOldEntriesPropertyName;
CompactForTieringCollector(
SequenceNumber last_level_inclusive_max_seqno_threshold_,
double compaction_trigger_ratio);
SequenceNumber last_level_inclusive_max_seqno_threshold,
double compaction_trigger_ratio, bool collect_data_age_stats);
Status AddUserKey(const Slice& key, const Slice& value, EntryType type,
SequenceNumber seq, uint64_t file_size) override;
@ -41,5 +45,6 @@ class CompactForTieringCollector : public TablePropertiesCollector {
size_t total_entries_counter_ = 0;
bool finish_called_ = false;
bool need_compaction_ = false;
bool collect_data_age_stats_ = false;
};
} // namespace ROCKSDB_NAMESPACE