From af2a36d2c7d9bf229b89f0fe559dc2d8a4e6f79e Mon Sep 17 00:00:00 2001 From: Andrew Ryan Chang Date: Fri, 1 Nov 2024 10:08:35 -0700 Subject: [PATCH] Record newest_key_time as a table property (#13083) Summary: This PR does two things: 1. Adds a new table property `newest_key_time` 2. Uses this property to improve TTL and temperature change compaction. ### Context The current `creation_time` table property should really be named `oldest_ancestor_time`. For flush output files, this is the oldest key time in the file. For compaction output files, this is the minimum among all oldest key times in the input files. The problem with using the oldest ancestor time for TTL compaction is that we may end up dropping files earlier than we should. What we really want is the newest (i.e. "youngest") key time. Right now we take a roundabout way to estimate this value -- we take the value of the _oldest_ key time for the _next_ (newer) SST file. This is also why the current code has checks for `index >= 1`. Our new property `newest_key_time` is set to the file creation time during flushes, and the max over all input files for compactions. There were some additional smaller changes that I had to make for testing purposes: - Refactoring the mock table reader to support specifying my own table properties - Refactoring out a test utility method `GetLevelFileMetadatas` that would otherwise be copy/pasted in 3 places Credit to cbi42 for the problem explanation and proposed solution ### Testing - Added a dedicated unit test to my `newest_key_time` logic in isolation (i.e. are we populating the property on flush and compaction) - Updated the existing unit tests (for TTL/temperate change compaction), which were comprehensive enough to break when I first made my code changes. I removed the test setup code which set the file metadata `oldest_ancestor_time`, so we know we are actually only using the new table property instead. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13083 Reviewed By: cbi42 Differential Revision: D65298604 Pulled By: archang19 fbshipit-source-id: 898ef91b692ab33f5129a2a16b64ecadd4c32432 --- db/compaction/compaction.cc | 19 + db/compaction/compaction.h | 6 + db/compaction/compaction_job.cc | 6 +- db/compaction/compaction_job_test.cc | 14 +- db/compaction/compaction_picker_fifo.cc | 48 +- db/compaction/compaction_picker_test.cc | 537 +++++++++++------- db/db_compaction_test.cc | 91 +++ db/db_impl/db_impl_open.cc | 9 +- db/db_test2.cc | 12 - db/db_test_util.cc | 14 + db/db_test_util.h | 2 + db/event_helpers.cc | 1 + db/flush_job.cc | 7 +- db/repair.cc | 8 +- db/repair_test.cc | 13 - db/table_properties_collector_test.cc | 2 +- db/version_edit.h | 22 + db/version_set.cc | 19 +- db/version_set_test.cc | 2 +- include/rocksdb/table_properties.h | 3 + options/options_settable_test.cc | 3 +- .../block_based/block_based_table_builder.cc | 1 + .../block_based_table_reader_test.cc | 3 +- .../block_based/data_block_hash_index_test.cc | 2 +- table/block_fetcher_test.cc | 3 +- table/meta_blocks.cc | 3 + table/mock_table.cc | 50 -- table/mock_table.h | 50 ++ table/sst_file_dumper.cc | 2 +- table/sst_file_writer.cc | 8 +- table/table_builder.h | 4 +- table/table_properties.cc | 8 + table/table_reader_bench.cc | 10 +- table/table_test.cc | 45 +- tools/sst_dump_test.cc | 2 +- .../new_features/tp_newest_key_time.md | 1 + 36 files changed, 654 insertions(+), 376 deletions(-) create mode 100644 unreleased_history/new_features/tp_newest_key_time.md diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index 035195f9d3..2362cc4170 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -935,6 +935,25 @@ bool Compaction::DoesInputReferenceBlobFiles() const { return false; } +uint64_t Compaction::MaxInputFileNewestKeyTime(const InternalKey* start, + const InternalKey* end) const { + uint64_t newest_key_time = kUnknownNewestKeyTime; + const InternalKeyComparator& icmp = + column_family_data()->internal_comparator(); + for (const auto& level_files : inputs_) { + for (const auto& file : level_files.files) { + if (start != nullptr && icmp.Compare(file->largest, *start) < 0) { + continue; + } + if (end != nullptr && icmp.Compare(file->smallest, *end) > 0) { + continue; + } + newest_key_time = std::max(newest_key_time, file->TryGetNewestKeyTime()); + } + } + return newest_key_time; +} + uint64_t Compaction::MinInputFileOldestAncesterTime( const InternalKey* start, const InternalKey* end) const { uint64_t min_oldest_ancester_time = std::numeric_limits::max(); diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h index d5e8f06c66..f43dc620d8 100644 --- a/db/compaction/compaction.h +++ b/db/compaction/compaction.h @@ -405,6 +405,12 @@ class Compaction { return blob_garbage_collection_age_cutoff_; } + // start and end are sub compact range. Null if no boundary. + // This is used to calculate the newest_key_time table property after + // compaction. + uint64_t MaxInputFileNewestKeyTime(const InternalKey* start, + const InternalKey* end) const; + // start and end are sub compact range. Null if no boundary. // This is used to filter out some input files' ancester's time range. uint64_t MinInputFileOldestAncesterTime(const InternalKey* start, diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 7aa2bb217f..c1e2035d26 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1914,6 +1914,10 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, oldest_ancester_time = current_time; } + uint64_t newest_key_time = sub_compact->compaction->MaxInputFileNewestKeyTime( + sub_compact->start.has_value() ? &tmp_start : nullptr, + sub_compact->end.has_value() ? &tmp_end : nullptr); + // Initialize a SubcompactionState::Output and add it to sub_compact->outputs uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber(); { @@ -1963,7 +1967,7 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, cfd->internal_tbl_prop_coll_factories(), sub_compact->compaction->output_compression(), sub_compact->compaction->output_compression_opts(), cfd->GetID(), - cfd->GetName(), sub_compact->compaction->output_level(), + cfd->GetName(), sub_compact->compaction->output_level(), newest_key_time, bottommost_level_, TableFileCreationReason::kCompaction, 0 /* oldest_key_time */, current_time, db_id_, db_session_id_, sub_compact->compaction->max_output_file_size(), file_number, diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 94ad1e2905..0bd20a5806 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -301,13 +301,13 @@ class CompactionJobTestBase : public testing::Test { const WriteOptions write_options; std::unique_ptr table_builder( cf_options_.table_factory->NewTableBuilder( - TableBuilderOptions(*cfd_->ioptions(), mutable_cf_options_, - read_options, write_options, - cfd_->internal_comparator(), - cfd_->internal_tbl_prop_coll_factories(), - CompressionType::kNoCompression, - CompressionOptions(), 0 /* column_family_id */, - kDefaultColumnFamilyName, -1 /* level */), + TableBuilderOptions( + *cfd_->ioptions(), mutable_cf_options_, read_options, + write_options, cfd_->internal_comparator(), + cfd_->internal_tbl_prop_coll_factories(), + CompressionType::kNoCompression, CompressionOptions(), + 0 /* column_family_id */, kDefaultColumnFamilyName, + -1 /* level */, kUnknownNewestKeyTime), file_writer.get())); // Build table. for (const auto& kv : contents) { diff --git a/db/compaction/compaction_picker_fifo.cc b/db/compaction/compaction_picker_fifo.cc index 12cf60e0e9..5a3a958e97 100644 --- a/db/compaction/compaction_picker_fifo.cc +++ b/db/compaction/compaction_picker_fifo.cc @@ -79,10 +79,14 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( FileMetaData* f = *ritr; assert(f); if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { + uint64_t newest_key_time = f->TryGetNewestKeyTime(); uint64_t creation_time = f->fd.table_reader->GetTableProperties()->creation_time; - if (creation_time == 0 || - creation_time >= (current_time - mutable_cf_options.ttl)) { + uint64_t est_newest_key_time = newest_key_time == kUnknownNewestKeyTime + ? creation_time + : newest_key_time; + if (est_newest_key_time == kUnknownNewestKeyTime || + est_newest_key_time >= (current_time - mutable_cf_options.ttl)) { break; } } @@ -102,15 +106,19 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( } for (const auto& f : inputs[0].files) { - uint64_t creation_time = 0; assert(f); + uint64_t newest_key_time = f->TryGetNewestKeyTime(); + uint64_t creation_time = 0; if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { creation_time = f->fd.table_reader->GetTableProperties()->creation_time; } + uint64_t est_newest_key_time = newest_key_time == kUnknownNewestKeyTime + ? creation_time + : newest_key_time; ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 - " with creation time %" PRIu64 " for deletion", - cf_name.c_str(), f->fd.GetNumber(), creation_time); + " with estimated newest key time %" PRIu64 " for deletion", + cf_name.c_str(), f->fd.GetNumber(), est_newest_key_time); } Compaction* c = new Compaction( @@ -350,38 +358,26 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction( Temperature compaction_target_temp = Temperature::kLastTemperature; if (current_time > min_age) { uint64_t create_time_threshold = current_time - min_age; - // We will ideally identify a file qualifying for temperature change by - // knowing the timestamp for the youngest entry in the file. However, right - // now we don't have the information. We infer it by looking at timestamp of - // the previous file's (which is just younger) oldest entry's timestamp. - // avoid index underflow assert(level_files.size() >= 1); - for (size_t index = level_files.size() - 1; index >= 1; --index) { + for (size_t index = level_files.size(); index >= 1; --index) { // Try to add cur_file to compaction inputs. - FileMetaData* cur_file = level_files[index]; - // prev_file is just younger than cur_file - FileMetaData* prev_file = level_files[index - 1]; + FileMetaData* cur_file = level_files[index - 1]; + FileMetaData* prev_file = index < 2 ? nullptr : level_files[index - 2]; if (cur_file->being_compacted) { // Should not happen since we check for // `level0_compactions_in_progress_` above. Here we simply just don't // schedule anything. return nullptr; } - uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime(); - if (oldest_ancestor_time == kUnknownOldestAncesterTime) { - // Older files might not have enough information. It is possible to - // handle these files by looking at newer files, but maintaining the - // logic isn't worth it. - break; - } - if (oldest_ancestor_time > create_time_threshold) { - // cur_file is too fresh + uint64_t est_newest_key_time = cur_file->TryGetNewestKeyTime(prev_file); + if (est_newest_key_time == kUnknownNewestKeyTime || + est_newest_key_time > create_time_threshold) { break; } Temperature cur_target_temp = ages[0].temperature; for (size_t i = 1; i < ages.size(); ++i) { if (current_time >= ages[i].age && - oldest_ancestor_time <= current_time - ages[i].age) { + est_newest_key_time <= current_time - ages[i].age) { cur_target_temp = ages[i].temperature; } } @@ -396,8 +392,8 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction( ROCKS_LOG_BUFFER( log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 - " with next file's oldest time %" PRIu64 " for temperature %s.", - cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time, + " with estimated newest key time %" PRIu64 " for temperature %s.", + cf_name.c_str(), cur_file->fd.GetNumber(), est_newest_key_time, temperature_to_string[cur_target_temp].c_str()); break; } diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 3cf0c7377d..b75df61c3b 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -13,6 +13,7 @@ #include "db/compaction/compaction_picker_universal.h" #include "db/compaction/file_pri.h" #include "rocksdb/advanced_options.h" +#include "table/mock_table.h" #include "table/unique_id_impl.h" #include "test_util/testharness.h" #include "test_util/testutil.h" @@ -77,7 +78,7 @@ class CompactionPickerTestBase : public testing::Test { ioptions_.level_compaction_dynamic_level_bytes = false; } - ~CompactionPickerTestBase() override = default; + ~CompactionPickerTestBase() override { ClearFiles(); } void NewVersionStorage(int num_levels, CompactionStyle style) { DeleteVersionStorage(); @@ -103,7 +104,7 @@ class CompactionPickerTestBase : public testing::Test { void DeleteVersionStorage() { vstorage_.reset(); temp_vstorage_.reset(); - files_.clear(); + ClearFiles(); file_map_.clear(); input_files_.clear(); } @@ -115,6 +116,7 @@ class CompactionPickerTestBase : public testing::Test { size_t compensated_file_size = 0, bool marked_for_compact = false, Temperature temperature = Temperature::kUnknown, uint64_t oldest_ancestor_time = kUnknownOldestAncesterTime, + uint64_t newest_key_time = kUnknownNewestKeyTime, Slice ts_of_smallest = Slice(), Slice ts_of_largest = Slice(), uint64_t epoch_number = kUnknownEpochNumber) { assert(ts_of_smallest.size() == ucmp_->timestamp_size()); @@ -161,7 +163,12 @@ class CompactionPickerTestBase : public testing::Test { true /* user_defined_timestamps_persisted */); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; + // oldest_ancester_time is only used if newest_key_time is not available f->oldest_ancester_time = oldest_ancestor_time; + TableProperties tp; + tp.newest_key_time = newest_key_time; + f->fd.table_reader = new mock::MockTableReader(mock::KVVector{}, tp); + vstorage->AddFile(level, f); files_.emplace_back(f); file_map_.insert({file_number, {f, level}}); @@ -206,6 +213,15 @@ class CompactionPickerTestBase : public testing::Test { return opts; } + void ClearFiles() { + for (auto& file : files_) { + if (file->fd.table_reader != nullptr) { + delete file->fd.table_reader; + } + } + files_.clear(); + } + std::unique_ptr temp_vstorage_; }; @@ -1108,235 +1124,326 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { } TEST_F(CompactionPickerTest, FIFOToCold1) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0 /* level */, 4U /* file_number */, "260", "300", 1 * kFileSize, 0, 2500, - 2600, 0, true, Temperature::kUnknown, - threshold_time - 2000 /* oldest_ancestor_time */); - // Qualifies for compaction to kCold. - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0 /* level */, 4U /* file_number */, "260", "300", 1 * kFileSize, 0, + 2500, 2600, 0, true, Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : threshold_time - 2000 /* oldest_ancestor_time */, + newestKeyTimeKnown ? threshold_time - 2000 + : kUnknownNewestKeyTime /* newest_key_time */); + // Qualifies for compaction to kCold. + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 + : kUnknownNewestKeyTime /* newest_key_time */); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToColdMaxCompactionSize) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 9; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 9; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); - // The following two files qualify for compaction to kCold. - // But only the last two should be included to respect `max_compaction_bytes`. - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kUnknown, threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time + 100, + newestKeyTimeKnown ? threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 2000, + newestKeyTimeKnown ? threshold_time - 2000 : kUnknownNewestKeyTime); + // The following two files qualify for compaction to kCold. + // But only the last two should be included to respect + // `max_compaction_bytes`. + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 : kUnknownNewestKeyTime); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 4000, + newestKeyTimeKnown ? threshold_time - 4000 : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 5000, + newestKeyTimeKnown ? threshold_time - 5000 : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - // Compaction picker picks older files first and picks one file at a time. - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Compaction picker picks older files first and picks one file at a time. + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToColdWithExistingCold) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); - // The following two files qualify for compaction to kCold. - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kCold, threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time + 100, + newestKeyTimeKnown ? threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 2000, + newestKeyTimeKnown ? threshold_time - 2000 : kUnknownNewestKeyTime); + // The following two files qualify for compaction to kCold. + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 : kUnknownNewestKeyTime); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 4000, + newestKeyTimeKnown ? threshold_time - 4000 : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kCold, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 5000, + newestKeyTimeKnown ? threshold_time - 5000 : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - // Compaction picker picks older files first and picks one file at a time. - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Compaction picker picks older files first and picks one file at a time. + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToColdWithHotBetweenCold) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kCold, threshold_time - 3000); - // Qualifies for compaction to kCold. - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kCold, threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time + 100, + newestKeyTimeKnown ? threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 2000, + newestKeyTimeKnown ? threshold_time - 2000 : kUnknownNewestKeyTime); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kCold, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 : kUnknownNewestKeyTime); + // Qualifies for compaction to kCold. + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 4000, + newestKeyTimeKnown ? threshold_time - 4000 : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kCold, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 5000, + newestKeyTimeKnown ? threshold_time - 5000 : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToHotAndWarm) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 10000; - uint64_t kHotThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 10000; + uint64_t kHotThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - // Test that multiple threshold works. - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kHot, kHotThreshold}, {Temperature::kWarm, kWarmThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + // Test that multiple threshold works. + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kHot, kHotThreshold}, + {Temperature::kWarm, kWarmThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t hot_threshold_time = - static_cast(current_time) - kHotThreshold; - uint64_t warm_threshold_time = - static_cast(current_time) - kWarmThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, hot_threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, hot_threshold_time - 200); - // Qualifies for Hot - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, warm_threshold_time - 100); - // Qualifies for Warm - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, warm_threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kUnknown, warm_threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t hot_threshold_time = + static_cast(current_time) - kHotThreshold; + uint64_t warm_threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : hot_threshold_time + 100, + newestKeyTimeKnown ? hot_threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : hot_threshold_time - 200, + newestKeyTimeKnown ? hot_threshold_time - 200 : kUnknownNewestKeyTime); + // Qualifies for Hot + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : warm_threshold_time - 100, + newestKeyTimeKnown ? warm_threshold_time - 100 : kUnknownNewestKeyTime); + // Qualifies for Warm + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : warm_threshold_time - 4000, + newestKeyTimeKnown ? warm_threshold_time - 4000 + : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : warm_threshold_time - 5000, + newestKeyTimeKnown ? warm_threshold_time - 5000 + : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - // Compaction picker picks older files first and picks one file at a time. - ASSERT_EQ(compaction->output_temperature(), Temperature::kWarm); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Compaction picker picks older files first and picks one file at a time. + ASSERT_EQ(compaction->output_temperature(), Temperature::kWarm); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) { @@ -3097,18 +3204,21 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { Add(0, 1U, "150", "200", kFileSize, 0, 500, 550, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 3); Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 2); Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 1); Add(3, 5U, "010", "080", 8 * kFileSize, 0, 200, 251); @@ -3136,6 +3246,7 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { Add(0, 7U, "150", "200", kFileSize, 0, 551, 600, 0, true, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 4); UpdateVersionStorageInfo(); @@ -3163,6 +3274,7 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap2) { Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300, 0, true, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 1); Add(3, 5U, "240", "290", 8 * kFileSize, 0, 201, 250); @@ -3189,12 +3301,14 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap2) { Add(0, 1U, "150", "200", kFileSize, 0, 500, 550, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 3); Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 2); UpdateVersionStorageInfo(); @@ -3371,23 +3485,27 @@ TEST_F(CompactionPickerTest, UniversalMarkedL0Overlap2) { /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 4); Add(0, 5U, "240", "290", 2 * kFileSize, 0, 201, 250, 0, true, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 3); Add(0, 3U, "301", "350", 4 * kFileSize, 0, 101, 150, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 2); Add(0, 6U, "501", "750", 8 * kFileSize, 0, 50, 100, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 1); UpdateVersionStorageInfo(); @@ -3414,12 +3532,14 @@ TEST_F(CompactionPickerTest, UniversalMarkedL0Overlap2) { /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 6); Add(0, 2U, "201", "250", kFileSize, 0, 401, 450, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 5); UpdateVersionStorageInfo(); @@ -3653,7 +3773,8 @@ TEST_F(CompactionPickerU64TsTest, Overlap) { /*file_size=*/1U, /*path_id=*/0, /*smallest_seq=*/100, /*largest_seq=*/100, /*compensated_file_size=*/0, /*marked_for_compact=*/false, /*temperature=*/Temperature::kUnknown, - /*oldest_ancestor_time=*/kUnknownOldestAncesterTime, ts1, ts2); + /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, ts1, ts2); UpdateVersionStorageInfo(); } @@ -3719,11 +3840,13 @@ TEST_F(CompactionPickerU64TsTest, CannotTrivialMoveUniversal) { Add(1, 1U, "150", "150", kFileSize, /*path_id=*/0, /*smallest_seq=*/100, /*largest_seq=*/100, /*compensated_file_size=*/kFileSize, /*marked_for_compact=*/false, Temperature::kUnknown, - kUnknownOldestAncesterTime, ts1, ts2); + /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, ts1, ts2); Add(2, 2U, "150", "150", kFileSize, /*path_id=*/0, /*smallest_seq=*/100, /*largest_seq=*/100, /*compensated_file_size=*/kFileSize, /*marked_for_compact=*/false, Temperature::kUnknown, - kUnknownOldestAncesterTime, ts3, ts4); + /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, ts3, ts4); UpdateVersionStorageInfo(); std::unique_ptr compaction( diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 2a8e19e7bc..c71cf2e0c1 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -10623,6 +10623,97 @@ TEST_F(DBCompactionTest, ReleaseCompactionDuringManifestWrite) { SyncPoint::GetInstance()->ClearAllCallBacks(); } +TEST_F(DBCompactionTest, RecordNewestKeyTimeForTtlCompaction) { + Options options; + SetTimeElapseOnlySleepOnReopen(&options); + options.env = CurrentOptions().env; + options.compaction_style = kCompactionStyleFIFO; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.write_buffer_size = 10 << 10; // 10KB + options.arena_block_size = 4096; + options.compression = kNoCompression; + options.create_if_missing = true; + options.compaction_options_fifo.allow_compaction = false; + options.num_levels = 1; + env_->SetMockSleep(); + options.env = env_; + options.ttl = 1 * 60 * 60; // 1 hour + ASSERT_OK(TryReopen(options)); + + // Generate and flush 4 files, each about 10KB + // Compaction is manually disabled at this point so we can check + // each file's newest_key_time + Random rnd(301); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 10; j++) { + ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); + } + ASSERT_OK(Flush()); + env_->MockSleepForSeconds(5); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 4); + + // Check that we are populating newest_key_time on flush + std::vector file_metadatas = GetLevelFileMetadatas(0); + ASSERT_EQ(file_metadatas.size(), 4); + uint64_t first_newest_key_time = + file_metadatas[0]->fd.table_reader->GetTableProperties()->newest_key_time; + ASSERT_NE(first_newest_key_time, kUnknownNewestKeyTime); + // Check that the newest_key_times are in expected ordering + uint64_t prev_newest_key_time = first_newest_key_time; + for (size_t idx = 1; idx < file_metadatas.size(); idx++) { + uint64_t newest_key_time = file_metadatas[idx] + ->fd.table_reader->GetTableProperties() + ->newest_key_time; + + ASSERT_LT(newest_key_time, prev_newest_key_time); + prev_newest_key_time = newest_key_time; + ASSERT_EQ(newest_key_time, file_metadatas[idx] + ->fd.table_reader->GetTableProperties() + ->creation_time); + } + // The delta between the first and last newest_key_times is 15s + uint64_t last_newest_key_time = prev_newest_key_time; + ASSERT_EQ(15, first_newest_key_time - last_newest_key_time); + + // After compaction, the newest_key_time of the output file should be the max + // of the input files + options.compaction_options_fifo.allow_compaction = true; + ASSERT_OK(TryReopen(options)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 1); + file_metadatas = GetLevelFileMetadatas(0); + ASSERT_EQ(file_metadatas.size(), 1); + ASSERT_EQ( + file_metadatas[0]->fd.table_reader->GetTableProperties()->newest_key_time, + first_newest_key_time); + // Contrast newest_key_time with creation_time, which records the oldest + // ancestor time (15s older than newest_key_time) + ASSERT_EQ( + file_metadatas[0]->fd.table_reader->GetTableProperties()->creation_time, + last_newest_key_time); + ASSERT_EQ(file_metadatas[0]->oldest_ancester_time, last_newest_key_time); + + // Make sure TTL of 5s causes compaction + env_->MockSleepForSeconds(6); + + // The oldest input file is older than 15s + // However the newest of the compaction input files is younger than 15s, so + // we don't compact + ASSERT_OK(dbfull()->SetOptions({{"ttl", "15"}})); + ASSERT_EQ(dbfull()->GetOptions().ttl, 15); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 1); + + // Now even the youngest input file is too old + ASSERT_OK(dbfull()->SetOptions({{"ttl", "5"}})); + ASSERT_EQ(dbfull()->GetOptions().ttl, 5); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 6727839ee0..3e1a6198ea 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1735,10 +1735,11 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, cfd->internal_comparator(), cfd->internal_tbl_prop_coll_factories(), GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), mutable_cf_options.compression_opts, cfd->GetID(), cfd->GetName(), - 0 /* level */, false /* is_bottommost */, - TableFileCreationReason::kRecovery, 0 /* oldest_key_time */, - 0 /* file_creation_time */, db_id_, db_session_id_, - 0 /* target_file_size */, meta.fd.GetNumber(), kMaxSequenceNumber); + 0 /* level */, current_time /* newest_key_time */, + false /* is_bottommost */, TableFileCreationReason::kRecovery, + 0 /* oldest_key_time */, 0 /* file_creation_time */, db_id_, + db_session_id_, 0 /* target_file_size */, meta.fd.GetNumber(), + kMaxSequenceNumber); Version* version = cfd->current(); version->Ref(); uint64_t num_input_entries = 0; diff --git a/db/db_test2.cc b/db/db_test2.cc index fbe66a986b..c4590def71 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -36,18 +36,6 @@ namespace ROCKSDB_NAMESPACE { class DBTest2 : public DBTestBase { public: DBTest2() : DBTestBase("db_test2", /*env_do_fsync=*/true) {} - std::vector GetLevelFileMetadatas(int level, int cf = 0) { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = - versions->GetColumnFamilySet()->GetColumnFamily(cf); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - return storage_info->LevelFiles(level); - } }; TEST_F(DBTest2, OpenForReadOnly) { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index d444bc5193..79532a13e6 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1263,6 +1263,20 @@ Status DBTestBase::CountFiles(size_t* count) { return Status::OK(); } +std::vector DBTestBase::GetLevelFileMetadatas(int level, + int cf) { + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + ColumnFamilyData* const cfd = + versions->GetColumnFamilySet()->GetColumnFamily(cf); + assert(cfd); + Version* const current = cfd->current(); + assert(current); + VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + return storage_info->LevelFiles(level); +} + Status DBTestBase::Size(const Slice& start, const Slice& limit, int cf, uint64_t* size) { Range r(start, limit); diff --git a/db/db_test_util.h b/db/db_test_util.h index 36a4615344..a7dc06659e 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -1273,6 +1273,8 @@ class DBTestBase : public testing::Test { Status CountFiles(size_t* count); + std::vector GetLevelFileMetadatas(int level, int cf = 0); + Status Size(const Slice& start, const Slice& limit, uint64_t* size) { return Size(start, limit, 0, size); } diff --git a/db/event_helpers.cc b/db/event_helpers.cc index fa9c5e153c..6879e450ee 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -132,6 +132,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished( << table_properties.compression_name << "compression_options" << table_properties.compression_options << "creation_time" << table_properties.creation_time << "oldest_key_time" + << table_properties.newest_key_time << "newest_key_time" << table_properties.oldest_key_time << "file_creation_time" << table_properties.file_creation_time << "slow_compression_estimated_data_size" diff --git a/db/flush_job.cc b/db/flush_job.cc index f7d585a3b8..88499cd112 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -987,9 +987,10 @@ Status FlushJob::WriteLevel0Table() { cfd_->internal_comparator(), cfd_->internal_tbl_prop_coll_factories(), output_compression_, mutable_cf_options_.compression_opts, cfd_->GetID(), cfd_->GetName(), 0 /* level */, - false /* is_bottommost */, TableFileCreationReason::kFlush, - oldest_key_time, current_time, db_id_, db_session_id_, - 0 /* target_file_size */, meta_.fd.GetNumber(), + current_time /* newest_key_time */, false /* is_bottommost */, + TableFileCreationReason::kFlush, oldest_key_time, current_time, + db_id_, db_session_id_, 0 /* target_file_size */, + meta_.fd.GetNumber(), preclude_last_level_min_seqno_ == kMaxSequenceNumber ? preclude_last_level_min_seqno_ : std::min(earliest_snapshot_, preclude_last_level_min_seqno_)); diff --git a/db/repair.cc b/db/repair.cc index 1d2b81adc4..7b063e64ee 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -474,10 +474,10 @@ class Repairer { write_option, cfd->internal_comparator(), cfd->internal_tbl_prop_coll_factories(), kNoCompression, default_compression, cfd->GetID(), cfd->GetName(), -1 /* level */, - false /* is_bottommost */, TableFileCreationReason::kRecovery, - 0 /* oldest_key_time */, 0 /* file_creation_time */, - "DB Repairer" /* db_id */, db_session_id_, 0 /*target_file_size*/, - meta.fd.GetNumber()); + current_time /* newest_key_time */, false /* is_bottommost */, + TableFileCreationReason::kRecovery, 0 /* oldest_key_time */, + 0 /* file_creation_time */, "DB Repairer" /* db_id */, db_session_id_, + 0 /*target_file_size*/, meta.fd.GetNumber()); SeqnoToTimeMapping empty_seqno_to_time_mapping; status = BuildTable( diff --git a/db/repair_test.cc b/db/repair_test.cc index 8adc06f0c5..d5e1614f32 100644 --- a/db/repair_test.cc +++ b/db/repair_test.cc @@ -60,19 +60,6 @@ class RepairTest : public DBTestBase { ASSERT_GT(verify_passed, 0); SyncPoint::GetInstance()->DisableProcessing(); } - - std::vector GetLevelFileMetadatas(int level, int cf = 0) { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = - versions->GetColumnFamilySet()->GetColumnFamily(cf); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - return storage_info->LevelFiles(level); - } }; TEST_F(RepairTest, SortRepairedDBL0ByEpochNumber) { diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index c9dfb7d0ff..e46cafa6a1 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -58,7 +58,7 @@ void MakeBuilder( ioptions, moptions, read_options, write_options, internal_comparator, internal_tbl_prop_coll_factories, options.compression, options.compression_opts, kTestColumnFamilyId, kTestColumnFamilyName, - kTestLevel); + kTestLevel, kUnknownNewestKeyTime); builder->reset(NewTableBuilder(tboptions, writable->get())); } } // namespace diff --git a/db/version_edit.h b/db/version_edit.h index 2b247e4571..34997451fe 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -107,6 +107,7 @@ class VersionSet; constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; constexpr uint64_t kUnknownOldestAncesterTime = 0; +constexpr uint64_t kUnknownNewestKeyTime = 0; constexpr uint64_t kUnknownFileCreationTime = 0; constexpr uint64_t kUnknownEpochNumber = 0; // If `Options::allow_ingest_behind` is true, this epoch number @@ -333,6 +334,27 @@ struct FileMetaData { return kUnknownFileCreationTime; } + // Tries to get the newest key time from the current file + // Falls back on oldest ancestor time of previous (newer) file + uint64_t TryGetNewestKeyTime(FileMetaData* prev_file = nullptr) { + if (fd.table_reader != nullptr && + fd.table_reader->GetTableProperties() != nullptr) { + uint64_t newest_key_time = + fd.table_reader->GetTableProperties()->newest_key_time; + if (newest_key_time != kUnknownNewestKeyTime) { + return newest_key_time; + } + } + if (prev_file != nullptr) { + uint64_t prev_oldest_ancestor_time = + prev_file->TryGetOldestAncesterTime(); + if (prev_oldest_ancestor_time != kUnknownOldestAncesterTime) { + return prev_oldest_ancestor_time; + } + } + return kUnknownNewestKeyTime; + } + // WARNING: manual update to this function is needed // whenever a new string property is added to FileMetaData // to reduce approximation error. diff --git a/db/version_set.cc b/db/version_set.cc index 2f4892fbf6..a7bbd5af6d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3386,28 +3386,25 @@ bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions, int64_t _current_time; auto status = ioptions.clock->GetCurrentTime(&_current_time); const uint64_t current_time = static_cast(_current_time); - // We use oldest_ancestor_time of a file to be the estimate age of - // the file just older than it. This is the same logic used in + // This is the same logic used in // FIFOCompactionPicker::PickTemperatureChangeCompaction(). if (status.ok() && current_time >= ages[0].age) { uint64_t create_time_threshold = current_time - ages[0].age; Temperature target_temp; assert(files.size() >= 1); - for (size_t index = files.size() - 1; index >= 1; --index) { - FileMetaData* cur_file = files[index]; - FileMetaData* prev_file = files[index - 1]; + for (size_t index = files.size(); index >= 1; --index) { + FileMetaData* cur_file = files[index - 1]; + FileMetaData* prev_file = index < 2 ? nullptr : files[index - 2]; if (!cur_file->being_compacted) { - uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime(); - if (oldest_ancestor_time == kUnknownOldestAncesterTime) { - return false; - } - if (oldest_ancestor_time > create_time_threshold) { + uint64_t est_newest_key_time = cur_file->TryGetNewestKeyTime(prev_file); + if (est_newest_key_time == kUnknownNewestKeyTime || + est_newest_key_time > create_time_threshold) { return false; } target_temp = ages[0].temperature; for (size_t i = 1; i < ages.size(); ++i) { if (current_time >= ages[i].age && - oldest_ancestor_time <= current_time - ages[i].age) { + est_newest_key_time <= current_time - ages[i].age) { target_temp = ages[i].temperature; } } diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 9264345c5b..04b9503e77 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1328,7 +1328,7 @@ class VersionSetTestBase { &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - info.column_family, info.level), + info.column_family, info.level, kUnknownNewestKeyTime), fwriter.get())); InternalKey ikey(info.key, 0, ValueType::kTypeValue); builder->Add(ikey.Encode(), "value"); diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 84677a4d2d..00e448ba7d 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -68,6 +68,7 @@ struct TablePropertiesNames { static const std::string kCompressionOptions; static const std::string kCreationTime; static const std::string kOldestKeyTime; + static const std::string kNewestKeyTime; static const std::string kFileCreationTime; static const std::string kSlowCompressionEstimatedDataSize; static const std::string kFastCompressionEstimatedDataSize; @@ -270,6 +271,8 @@ struct TableProperties { // Timestamp of the earliest key. 0 means unknown. uint64_t oldest_key_time = 0; + // Timestamp of the newest key. 0 means unknown. + uint64_t newest_key_time = 0; // Actual SST file creation time. 0 means unknown. uint64_t file_creation_time = 0; // Estimated size of data blocks if compressed using a relatively slower diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 1a86fb26ed..6036d05131 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -282,7 +282,8 @@ TEST_F(OptionsSettableTest, TablePropertiesAllFieldsSettable) { "636F6D70617261746F725F6E616D65;num_filter_entries=0;db_id=" "64625F686F73745F6964;column_family_id=2147483647;fixed_key_len=0;fast_" "compression_estimated_data_size=0;filter_policy_name=" - "66696C7465725F706F6C6963795F6E616D65;oldest_key_time=0;column_family_" + "66696C7465725F706F6C6963795F6E616D65;oldest_key_time=0;newest_key_time=" + "0;column_family_" "name=64656661756C74;user_defined_timestamps_persisted=1;num_entries=100;" "external_sst_file_global_seqno_offset=0;num_merge_operands=0;index_key_" "is_user_key=0;key_largest_seqno=18446744073709551615;", diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index ec4a695b63..6845e515e8 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -619,6 +619,7 @@ struct BlockBasedTableBuilder::Rep { props.column_family_id = tbo.column_family_id; props.column_family_name = tbo.column_family_name; props.oldest_key_time = tbo.oldest_key_time; + props.newest_key_time = tbo.newest_key_time; props.file_creation_time = tbo.file_creation_time; props.orig_file_number = tbo.cur_file_num; props.db_id = tbo.db_id; diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc index d90ee15782..414d4b1f9f 100644 --- a/table/block_based/block_based_table_reader_test.cc +++ b/table/block_based/block_based_table_reader_test.cc @@ -141,7 +141,8 @@ class BlockBasedTableReaderBaseTest : public testing::Test { TableBuilderOptions(ioptions, moptions, read_options, write_options, comparator, &factories, compression_type, compression_opts, 0 /* column_family_id */, - kDefaultColumnFamilyName, -1 /* level */), + kDefaultColumnFamilyName, -1 /* level */, + kUnknownNewestKeyTime), writer.get())); // Build table. diff --git a/table/block_based/data_block_hash_index_test.cc b/table/block_based/data_block_hash_index_test.cc index 9936a34fd0..7970ca1d9f 100644 --- a/table/block_based/data_block_hash_index_test.cc +++ b/table/block_based/data_block_hash_index_test.cc @@ -561,7 +561,7 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, &internal_tbl_prop_coll_factories, options.compression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, level_), + column_family_name, level_, kUnknownNewestKeyTime), file_writer.get())); builder->Add(ik1.Encode().ToString(), v1); diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index ca8fcb7e50..9b31eb4302 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -83,7 +83,8 @@ class BlockFetcherTest : public testing::Test { TableBuilderOptions(ioptions, moptions, read_options, write_options, comparator, &factories, compression_type, CompressionOptions(), 0 /* column_family_id */, - kDefaultColumnFamilyName, -1 /* level */), + kDefaultColumnFamilyName, -1 /* level */, + kUnknownNewestKeyTime), writer.get())); // Build table. diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index cc8f6bfce3..28923fb563 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -105,6 +105,7 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id); Add(TablePropertiesNames::kCreationTime, props.creation_time); Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time); + Add(TablePropertiesNames::kNewestKeyTime, props.newest_key_time); if (props.file_creation_time > 0) { Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time); } @@ -368,6 +369,8 @@ Status ReadTablePropertiesHelper( &new_table_properties->creation_time}, {TablePropertiesNames::kOldestKeyTime, &new_table_properties->oldest_key_time}, + {TablePropertiesNames::kNewestKeyTime, + &new_table_properties->newest_key_time}, {TablePropertiesNames::kFileCreationTime, &new_table_properties->file_creation_time}, {TablePropertiesNames::kSlowCompressionEstimatedDataSize, diff --git a/table/mock_table.cc b/table/mock_table.cc index 14fbb3f1d0..934c7f54a7 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -25,45 +25,6 @@ void SortKVVector(KVVector* kv_vector, const Comparator* ucmp) { }); } -class MockTableReader : public TableReader { - public: - explicit MockTableReader(const KVVector& table) : table_(table) {} - - InternalIterator* NewIterator(const ReadOptions&, - const SliceTransform* prefix_extractor, - Arena* arena, bool skip_filters, - TableReaderCaller caller, - size_t compaction_readahead_size = 0, - bool allow_unprepared_value = false) override; - - Status Get(const ReadOptions& readOptions, const Slice& key, - GetContext* get_context, const SliceTransform* prefix_extractor, - bool skip_filters = false) override; - - uint64_t ApproximateOffsetOf(const ReadOptions& /*read_options*/, - const Slice& /*key*/, - TableReaderCaller /*caller*/) override { - return 0; - } - - uint64_t ApproximateSize(const ReadOptions& /*read_options*/, - const Slice& /*start*/, const Slice& /*end*/, - TableReaderCaller /*caller*/) override { - return 0; - } - - size_t ApproximateMemoryUsage() const override { return 0; } - - void SetupForCompaction() override {} - - std::shared_ptr GetTableProperties() const override; - - ~MockTableReader() = default; - - private: - const KVVector& table_; -}; - class MockTableIterator : public InternalIterator { public: explicit MockTableIterator(const KVVector& table) : table_(table) { @@ -233,17 +194,6 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key, return Status::OK(); } -std::shared_ptr MockTableReader::GetTableProperties() - const { - TableProperties* tp = new TableProperties(); - tp->num_entries = table_.size(); - tp->num_range_deletions = 0; - tp->raw_key_size = 1; - tp->raw_value_size = 1; - - return std::shared_ptr(tp); -} - MockTableFactory::MockTableFactory() : next_id_(1), corrupt_mode_(MockTableFactory::kCorruptNone) {} diff --git a/table/mock_table.h b/table/mock_table.h index af90740a26..af3a4eb46c 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -91,5 +91,55 @@ class MockTableFactory : public TableFactory { size_t key_value_size_ = 1; }; +class MockTableReader : public TableReader { + public: + explicit MockTableReader(const mock::KVVector& table) : table_(table) { + tp_.num_entries = table_.size(); + tp_.num_range_deletions = 0; + tp_.raw_key_size = 1; + tp_.raw_value_size = 1; + } + explicit MockTableReader(const mock::KVVector& table, + const TableProperties& tp) + : table_(table), tp_(tp) {} + + virtual InternalIterator* NewIterator( + const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, + bool skip_filters, TableReaderCaller caller, + size_t compaction_readahead_size = 0, + bool allow_unprepared_value = false) override; + + virtual Status Get(const ReadOptions& readOptions, const Slice& key, + GetContext* get_context, + const SliceTransform* prefix_extractor, + bool skip_filters = false) override; + + virtual uint64_t ApproximateOffsetOf(const ReadOptions& /*read_options*/, + const Slice& /*key*/, + TableReaderCaller /*caller*/) override { + return 0; + } + + virtual uint64_t ApproximateSize(const ReadOptions& /*read_options*/, + const Slice& /*start*/, const Slice& /*end*/, + TableReaderCaller /*caller*/) override { + return 0; + } + + virtual size_t ApproximateMemoryUsage() const override { return 0; } + + virtual void SetupForCompaction() override {} + + virtual std::shared_ptr GetTableProperties() + const override { + return std::make_shared(tp_); + } + + ~MockTableReader() = default; + + private: + const KVVector& table_; + TableProperties tp_; +}; } // namespace mock } // namespace ROCKSDB_NAMESPACE diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 72729ffc3c..905eef7004 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -311,7 +311,7 @@ Status SstFileDumper::ShowCompressionSize( imoptions, moptions, read_options, write_options, ikc, &block_based_table_factories, compress_type, compress_opt, TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, unknown_level); + column_family_name, unknown_level, kUnknownNewestKeyTime); uint64_t num_data_blocks = 0; std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index 806da18da0..84168683f4 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -395,10 +395,10 @@ Status SstFileWriter::Open(const std::string& file_path, Temperature temp) { r->ioptions, r->mutable_cf_options, ReadOptions(), r->write_options, r->internal_comparator, &internal_tbl_prop_coll_factories, compression_type, compression_opts, cf_id, r->column_family_name, - unknown_level, false /* is_bottommost */, TableFileCreationReason::kMisc, - 0 /* oldest_key_time */, 0 /* file_creation_time */, - "SST Writer" /* db_id */, r->db_session_id, 0 /* target_file_size */, - r->next_file_number); + unknown_level, kUnknownNewestKeyTime, false /* is_bottommost */, + TableFileCreationReason::kMisc, 0 /* oldest_key_time */, + 0 /* file_creation_time */, "SST Writer" /* db_id */, r->db_session_id, + 0 /* target_file_size */, r->next_file_number); // External SST files used to each get a unique session id. Now for // slightly better uniqueness probability in constructing cache keys, we // assign fake file numbers to each file (into table properties) and keep diff --git a/table/table_builder.h b/table/table_builder.h index 0a1944e1f3..5b36f06cb5 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -108,7 +108,7 @@ struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context { CompressionType _compression_type, const CompressionOptions& _compression_opts, uint32_t _column_family_id, const std::string& _column_family_name, int _level, - bool _is_bottommost = false, + const int64_t _newest_key_time, bool _is_bottommost = false, TableFileCreationReason _reason = TableFileCreationReason::kMisc, const int64_t _oldest_key_time = 0, const uint64_t _file_creation_time = 0, const std::string& _db_id = "", @@ -129,6 +129,7 @@ struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context { compression_opts(_compression_opts), column_family_name(_column_family_name), oldest_key_time(_oldest_key_time), + newest_key_time(_newest_key_time), target_file_size(_target_file_size), file_creation_time(_file_creation_time), db_id(_db_id), @@ -147,6 +148,7 @@ struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context { const CompressionOptions& compression_opts; const std::string& column_family_name; const int64_t oldest_key_time; + const int64_t newest_key_time; const uint64_t target_file_size; const uint64_t file_creation_time; const std::string db_id; diff --git a/table/table_properties.cc b/table/table_properties.cc index e0aff583f3..7fee67d1e9 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -141,6 +141,8 @@ std::string TableProperties::ToString(const std::string& prop_delim, AppendProperty(result, "time stamp of earliest key", oldest_key_time, prop_delim, kv_delim); + AppendProperty(result, "time stamp of newest key", newest_key_time, + prop_delim, kv_delim); AppendProperty(result, "file creation time", file_creation_time, prop_delim, kv_delim); @@ -302,6 +304,8 @@ const std::string TablePropertiesNames::kCompressionOptions = const std::string TablePropertiesNames::kCreationTime = "rocksdb.creation.time"; const std::string TablePropertiesNames::kOldestKeyTime = "rocksdb.oldest.key.time"; +const std::string TablePropertiesNames::kNewestKeyTime = + "rocksdb.newest.key.time"; const std::string TablePropertiesNames::kFileCreationTime = "rocksdb.file.creation.time"; const std::string TablePropertiesNames::kSlowCompressionEstimatedDataSize = @@ -395,6 +399,10 @@ static std::unordered_map {offsetof(struct TableProperties, oldest_key_time), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"newest_key_time", + {offsetof(struct TableProperties, newest_key_time), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, {"file_creation_time", {offsetof(struct TableProperties, file_creation_time), OptionType::kUInt64T, OptionVerificationType::kNormal, diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 2e9094bfcb..a588f6eea0 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -100,11 +100,11 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, int unknown_level = -1; const WriteOptions write_options; tb = opts.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, read_options, write_options, - ikc, &internal_tbl_prop_coll_factories, - CompressionType::kNoCompression, - CompressionOptions(), 0 /* column_family_id */, - kDefaultColumnFamilyName, unknown_level), + TableBuilderOptions( + ioptions, moptions, read_options, write_options, ikc, + &internal_tbl_prop_coll_factories, CompressionType::kNoCompression, + CompressionOptions(), 0 /* column_family_id */, + kDefaultColumnFamilyName, unknown_level, kUnknownNewestKeyTime), file_writer.get()); } else { s = DB::Open(opts, dbname, &db); diff --git a/table/table_test.cc b/table/table_test.cc index f51ddf69ce..d4e4b3936d 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -380,11 +380,11 @@ class TableConstructor : public Constructor { const ReadOptions read_options; const WriteOptions write_options; builder.reset(moptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, read_options, write_options, - internal_comparator, - &internal_tbl_prop_coll_factories, - options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level_), + TableBuilderOptions( + ioptions, moptions, read_options, write_options, + internal_comparator, &internal_tbl_prop_coll_factories, + options.compression, options.compression_opts, kUnknownColumnFamily, + column_family_name, level_, kUnknownNewestKeyTime), file_writer_.get())); for (const auto& kv : kv_map) { @@ -4464,7 +4464,8 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, *comparator, &internal_tbl_prop_coll_factories, options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level), + kUnknownColumnFamily, column_family_name, level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4502,7 +4503,8 @@ TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, *comparator, &internal_tbl_prop_coll_factories, options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level), + kUnknownColumnFamily, column_family_name, level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4550,7 +4552,8 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, + kUnknownNewestKeyTime), file_writer.get())); for (char c = 'a'; c <= 'z'; ++c) { @@ -4605,7 +4608,8 @@ TEST_F(PlainTableTest, NoFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4646,7 +4650,8 @@ TEST_F(PlainTableTest, Crc32cFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4724,9 +4729,9 @@ static void DoCompressionTest(CompressionType comp) { ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3550)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3550)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7100)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3555)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3555)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7110)); c.ResetTableReader(); } @@ -5261,7 +5266,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), + column_family_name, -1, kUnknownNewestKeyTime), file_writer.get())); for (char c = 'a'; c <= 'z'; ++c) { @@ -5445,7 +5450,7 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), + column_family_name, -1, kUnknownNewestKeyTime), file_writer.get())); for (int i = 1; i <= 10000; ++i) { @@ -5631,7 +5636,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), + column_family_name, -1, kUnknownNewestKeyTime), file_writer.get())); for (int i = 1; i <= 10000; ++i) { @@ -6240,8 +6245,8 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kSnappyCompression, options.compression_opts, - kUnknownColumnFamily, "test_cf", - -1 /* level */), + kUnknownColumnFamily, "test_cf", -1 /* level */, + kUnknownNewestKeyTime), file_writer.get())); std::string key1 = "key1"; @@ -6318,7 +6323,7 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kSnappyCompression, options.compression_opts, kUnknownColumnFamily, - "test_cf", -1 /* level */), + "test_cf", -1 /* level */, kUnknownNewestKeyTime), file_writer.get())); std::string key1 = "key1"; @@ -6405,7 +6410,7 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, BasicWithCacheFull) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kSnappyCompression, options.compression_opts, kUnknownColumnFamily, - "test_cf", -1 /* level */), + "test_cf", -1 /* level */, kUnknownNewestKeyTime), file_writer.get())); std::string key1 = "key1"; diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index a5c567b38a..c513a23bc9 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -132,7 +132,7 @@ class SSTDumpToolTest : public testing::Test { &internal_tbl_prop_coll_factories, CompressionType::kNoCompression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, kUnknownNewestKeyTime), file_writer.get())); // Populate slightly more than 1K keys diff --git a/unreleased_history/new_features/tp_newest_key_time.md b/unreleased_history/new_features/tp_newest_key_time.md new file mode 100644 index 0000000000..1aaa773fa2 --- /dev/null +++ b/unreleased_history/new_features/tp_newest_key_time.md @@ -0,0 +1 @@ +* Adds a new table property "rocksdb.newest.key.time" which records the unix timestamp of the newest key. Uses this table property for FIFO TTL and temperature change compaction.