diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index 035195f9d3..2362cc4170 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -935,6 +935,25 @@ bool Compaction::DoesInputReferenceBlobFiles() const { return false; } +uint64_t Compaction::MaxInputFileNewestKeyTime(const InternalKey* start, + const InternalKey* end) const { + uint64_t newest_key_time = kUnknownNewestKeyTime; + const InternalKeyComparator& icmp = + column_family_data()->internal_comparator(); + for (const auto& level_files : inputs_) { + for (const auto& file : level_files.files) { + if (start != nullptr && icmp.Compare(file->largest, *start) < 0) { + continue; + } + if (end != nullptr && icmp.Compare(file->smallest, *end) > 0) { + continue; + } + newest_key_time = std::max(newest_key_time, file->TryGetNewestKeyTime()); + } + } + return newest_key_time; +} + uint64_t Compaction::MinInputFileOldestAncesterTime( const InternalKey* start, const InternalKey* end) const { uint64_t min_oldest_ancester_time = std::numeric_limits::max(); diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h index d5e8f06c66..f43dc620d8 100644 --- a/db/compaction/compaction.h +++ b/db/compaction/compaction.h @@ -405,6 +405,12 @@ class Compaction { return blob_garbage_collection_age_cutoff_; } + // start and end are sub compact range. Null if no boundary. + // This is used to calculate the newest_key_time table property after + // compaction. + uint64_t MaxInputFileNewestKeyTime(const InternalKey* start, + const InternalKey* end) const; + // start and end are sub compact range. Null if no boundary. // This is used to filter out some input files' ancester's time range. uint64_t MinInputFileOldestAncesterTime(const InternalKey* start, diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 7aa2bb217f..c1e2035d26 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1914,6 +1914,10 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, oldest_ancester_time = current_time; } + uint64_t newest_key_time = sub_compact->compaction->MaxInputFileNewestKeyTime( + sub_compact->start.has_value() ? &tmp_start : nullptr, + sub_compact->end.has_value() ? &tmp_end : nullptr); + // Initialize a SubcompactionState::Output and add it to sub_compact->outputs uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber(); { @@ -1963,7 +1967,7 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, cfd->internal_tbl_prop_coll_factories(), sub_compact->compaction->output_compression(), sub_compact->compaction->output_compression_opts(), cfd->GetID(), - cfd->GetName(), sub_compact->compaction->output_level(), + cfd->GetName(), sub_compact->compaction->output_level(), newest_key_time, bottommost_level_, TableFileCreationReason::kCompaction, 0 /* oldest_key_time */, current_time, db_id_, db_session_id_, sub_compact->compaction->max_output_file_size(), file_number, diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 94ad1e2905..0bd20a5806 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -301,13 +301,13 @@ class CompactionJobTestBase : public testing::Test { const WriteOptions write_options; std::unique_ptr table_builder( cf_options_.table_factory->NewTableBuilder( - TableBuilderOptions(*cfd_->ioptions(), mutable_cf_options_, - read_options, write_options, - cfd_->internal_comparator(), - cfd_->internal_tbl_prop_coll_factories(), - CompressionType::kNoCompression, - CompressionOptions(), 0 /* column_family_id */, - kDefaultColumnFamilyName, -1 /* level */), + TableBuilderOptions( + *cfd_->ioptions(), mutable_cf_options_, read_options, + write_options, cfd_->internal_comparator(), + cfd_->internal_tbl_prop_coll_factories(), + CompressionType::kNoCompression, CompressionOptions(), + 0 /* column_family_id */, kDefaultColumnFamilyName, + -1 /* level */, kUnknownNewestKeyTime), file_writer.get())); // Build table. for (const auto& kv : contents) { diff --git a/db/compaction/compaction_picker_fifo.cc b/db/compaction/compaction_picker_fifo.cc index 12cf60e0e9..5a3a958e97 100644 --- a/db/compaction/compaction_picker_fifo.cc +++ b/db/compaction/compaction_picker_fifo.cc @@ -79,10 +79,14 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( FileMetaData* f = *ritr; assert(f); if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { + uint64_t newest_key_time = f->TryGetNewestKeyTime(); uint64_t creation_time = f->fd.table_reader->GetTableProperties()->creation_time; - if (creation_time == 0 || - creation_time >= (current_time - mutable_cf_options.ttl)) { + uint64_t est_newest_key_time = newest_key_time == kUnknownNewestKeyTime + ? creation_time + : newest_key_time; + if (est_newest_key_time == kUnknownNewestKeyTime || + est_newest_key_time >= (current_time - mutable_cf_options.ttl)) { break; } } @@ -102,15 +106,19 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( } for (const auto& f : inputs[0].files) { - uint64_t creation_time = 0; assert(f); + uint64_t newest_key_time = f->TryGetNewestKeyTime(); + uint64_t creation_time = 0; if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { creation_time = f->fd.table_reader->GetTableProperties()->creation_time; } + uint64_t est_newest_key_time = newest_key_time == kUnknownNewestKeyTime + ? creation_time + : newest_key_time; ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 - " with creation time %" PRIu64 " for deletion", - cf_name.c_str(), f->fd.GetNumber(), creation_time); + " with estimated newest key time %" PRIu64 " for deletion", + cf_name.c_str(), f->fd.GetNumber(), est_newest_key_time); } Compaction* c = new Compaction( @@ -350,38 +358,26 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction( Temperature compaction_target_temp = Temperature::kLastTemperature; if (current_time > min_age) { uint64_t create_time_threshold = current_time - min_age; - // We will ideally identify a file qualifying for temperature change by - // knowing the timestamp for the youngest entry in the file. However, right - // now we don't have the information. We infer it by looking at timestamp of - // the previous file's (which is just younger) oldest entry's timestamp. - // avoid index underflow assert(level_files.size() >= 1); - for (size_t index = level_files.size() - 1; index >= 1; --index) { + for (size_t index = level_files.size(); index >= 1; --index) { // Try to add cur_file to compaction inputs. - FileMetaData* cur_file = level_files[index]; - // prev_file is just younger than cur_file - FileMetaData* prev_file = level_files[index - 1]; + FileMetaData* cur_file = level_files[index - 1]; + FileMetaData* prev_file = index < 2 ? nullptr : level_files[index - 2]; if (cur_file->being_compacted) { // Should not happen since we check for // `level0_compactions_in_progress_` above. Here we simply just don't // schedule anything. return nullptr; } - uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime(); - if (oldest_ancestor_time == kUnknownOldestAncesterTime) { - // Older files might not have enough information. It is possible to - // handle these files by looking at newer files, but maintaining the - // logic isn't worth it. - break; - } - if (oldest_ancestor_time > create_time_threshold) { - // cur_file is too fresh + uint64_t est_newest_key_time = cur_file->TryGetNewestKeyTime(prev_file); + if (est_newest_key_time == kUnknownNewestKeyTime || + est_newest_key_time > create_time_threshold) { break; } Temperature cur_target_temp = ages[0].temperature; for (size_t i = 1; i < ages.size(); ++i) { if (current_time >= ages[i].age && - oldest_ancestor_time <= current_time - ages[i].age) { + est_newest_key_time <= current_time - ages[i].age) { cur_target_temp = ages[i].temperature; } } @@ -396,8 +392,8 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction( ROCKS_LOG_BUFFER( log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 - " with next file's oldest time %" PRIu64 " for temperature %s.", - cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time, + " with estimated newest key time %" PRIu64 " for temperature %s.", + cf_name.c_str(), cur_file->fd.GetNumber(), est_newest_key_time, temperature_to_string[cur_target_temp].c_str()); break; } diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 3cf0c7377d..b75df61c3b 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -13,6 +13,7 @@ #include "db/compaction/compaction_picker_universal.h" #include "db/compaction/file_pri.h" #include "rocksdb/advanced_options.h" +#include "table/mock_table.h" #include "table/unique_id_impl.h" #include "test_util/testharness.h" #include "test_util/testutil.h" @@ -77,7 +78,7 @@ class CompactionPickerTestBase : public testing::Test { ioptions_.level_compaction_dynamic_level_bytes = false; } - ~CompactionPickerTestBase() override = default; + ~CompactionPickerTestBase() override { ClearFiles(); } void NewVersionStorage(int num_levels, CompactionStyle style) { DeleteVersionStorage(); @@ -103,7 +104,7 @@ class CompactionPickerTestBase : public testing::Test { void DeleteVersionStorage() { vstorage_.reset(); temp_vstorage_.reset(); - files_.clear(); + ClearFiles(); file_map_.clear(); input_files_.clear(); } @@ -115,6 +116,7 @@ class CompactionPickerTestBase : public testing::Test { size_t compensated_file_size = 0, bool marked_for_compact = false, Temperature temperature = Temperature::kUnknown, uint64_t oldest_ancestor_time = kUnknownOldestAncesterTime, + uint64_t newest_key_time = kUnknownNewestKeyTime, Slice ts_of_smallest = Slice(), Slice ts_of_largest = Slice(), uint64_t epoch_number = kUnknownEpochNumber) { assert(ts_of_smallest.size() == ucmp_->timestamp_size()); @@ -161,7 +163,12 @@ class CompactionPickerTestBase : public testing::Test { true /* user_defined_timestamps_persisted */); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; + // oldest_ancester_time is only used if newest_key_time is not available f->oldest_ancester_time = oldest_ancestor_time; + TableProperties tp; + tp.newest_key_time = newest_key_time; + f->fd.table_reader = new mock::MockTableReader(mock::KVVector{}, tp); + vstorage->AddFile(level, f); files_.emplace_back(f); file_map_.insert({file_number, {f, level}}); @@ -206,6 +213,15 @@ class CompactionPickerTestBase : public testing::Test { return opts; } + void ClearFiles() { + for (auto& file : files_) { + if (file->fd.table_reader != nullptr) { + delete file->fd.table_reader; + } + } + files_.clear(); + } + std::unique_ptr temp_vstorage_; }; @@ -1108,235 +1124,326 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { } TEST_F(CompactionPickerTest, FIFOToCold1) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0 /* level */, 4U /* file_number */, "260", "300", 1 * kFileSize, 0, 2500, - 2600, 0, true, Temperature::kUnknown, - threshold_time - 2000 /* oldest_ancestor_time */); - // Qualifies for compaction to kCold. - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0 /* level */, 4U /* file_number */, "260", "300", 1 * kFileSize, 0, + 2500, 2600, 0, true, Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : threshold_time - 2000 /* oldest_ancestor_time */, + newestKeyTimeKnown ? threshold_time - 2000 + : kUnknownNewestKeyTime /* newest_key_time */); + // Qualifies for compaction to kCold. + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 + : kUnknownNewestKeyTime /* newest_key_time */); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToColdMaxCompactionSize) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 9; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 9; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); - // The following two files qualify for compaction to kCold. - // But only the last two should be included to respect `max_compaction_bytes`. - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kUnknown, threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time + 100, + newestKeyTimeKnown ? threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 2000, + newestKeyTimeKnown ? threshold_time - 2000 : kUnknownNewestKeyTime); + // The following two files qualify for compaction to kCold. + // But only the last two should be included to respect + // `max_compaction_bytes`. + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 : kUnknownNewestKeyTime); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 4000, + newestKeyTimeKnown ? threshold_time - 4000 : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 5000, + newestKeyTimeKnown ? threshold_time - 5000 : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - // Compaction picker picks older files first and picks one file at a time. - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Compaction picker picks older files first and picks one file at a time. + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToColdWithExistingCold) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); - // The following two files qualify for compaction to kCold. - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, threshold_time - 3000); - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kCold, threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time + 100, + newestKeyTimeKnown ? threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 2000, + newestKeyTimeKnown ? threshold_time - 2000 : kUnknownNewestKeyTime); + // The following two files qualify for compaction to kCold. + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 : kUnknownNewestKeyTime); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 4000, + newestKeyTimeKnown ? threshold_time - 4000 : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kCold, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 5000, + newestKeyTimeKnown ? threshold_time - 5000 : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - // Compaction picker picks older files first and picks one file at a time. - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Compaction picker picks older files first and picks one file at a time. + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToColdWithHotBetweenCold) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kColdThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kColdThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kCold, kColdThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kCold, kColdThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t threshold_time = - static_cast(current_time) - kColdThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, threshold_time - 2000); - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kCold, threshold_time - 3000); - // Qualifies for compaction to kCold. - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kCold, threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kColdThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time + 100, + newestKeyTimeKnown ? threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 2000, + newestKeyTimeKnown ? threshold_time - 2000 : kUnknownNewestKeyTime); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kCold, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 3000, + newestKeyTimeKnown ? threshold_time - 3000 : kUnknownNewestKeyTime); + // Qualifies for compaction to kCold. + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 4000, + newestKeyTimeKnown ? threshold_time - 4000 : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kCold, + newestKeyTimeKnown ? kUnknownOldestAncesterTime : threshold_time - 5000, + newestKeyTimeKnown ? threshold_time - 5000 : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + ASSERT_EQ(compaction->output_temperature(), Temperature::kCold); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, FIFOToHotAndWarm) { - NewVersionStorage(1, kCompactionStyleFIFO); - const uint64_t kFileSize = 100000; - const uint64_t kMaxSize = kFileSize * 100000; - uint64_t kWarmThreshold = 10000; - uint64_t kHotThreshold = 2000; + // Test fallback behavior from newest_key_time to oldest_ancestor_time + for (bool newestKeyTimeKnown : {false, true}) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 10000; + uint64_t kHotThreshold = 2000; - fifo_options_.max_table_files_size = kMaxSize; - // Test that multiple threshold works. - fifo_options_.file_temperature_age_thresholds = { - {Temperature::kHot, kHotThreshold}, {Temperature::kWarm, kWarmThreshold}}; - mutable_cf_options_.compaction_options_fifo = fifo_options_; - mutable_cf_options_.level0_file_num_compaction_trigger = 100; - mutable_cf_options_.max_compaction_bytes = kFileSize * 100; - FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + fifo_options_.max_table_files_size = kMaxSize; + // Test that multiple threshold works. + fifo_options_.file_temperature_age_thresholds = { + {Temperature::kHot, kHotThreshold}, + {Temperature::kWarm, kWarmThreshold}}; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 100; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); - int64_t current_time = 0; - ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); - uint64_t hot_threshold_time = - static_cast(current_time) - kHotThreshold; - uint64_t warm_threshold_time = - static_cast(current_time) - kWarmThreshold; - Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, - Temperature::kUnknown, static_cast(current_time) - 100); - Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, - Temperature::kUnknown, hot_threshold_time + 100); - Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, - Temperature::kUnknown, hot_threshold_time - 200); - // Qualifies for Hot - Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, - Temperature::kUnknown, warm_threshold_time - 100); - // Qualifies for Warm - Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, - Temperature::kUnknown, warm_threshold_time - 4000); - Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, - Temperature::kUnknown, warm_threshold_time - 5000); - UpdateVersionStorageInfo(); + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t hot_threshold_time = + static_cast(current_time) - kHotThreshold; + uint64_t warm_threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : static_cast(current_time) - 100, + newestKeyTimeKnown ? static_cast(current_time) - 100 + : kUnknownNewestKeyTime); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : hot_threshold_time + 100, + newestKeyTimeKnown ? hot_threshold_time + 100 : kUnknownNewestKeyTime); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : hot_threshold_time - 200, + newestKeyTimeKnown ? hot_threshold_time - 200 : kUnknownNewestKeyTime); + // Qualifies for Hot + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : warm_threshold_time - 100, + newestKeyTimeKnown ? warm_threshold_time - 100 : kUnknownNewestKeyTime); + // Qualifies for Warm + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : warm_threshold_time - 4000, + newestKeyTimeKnown ? warm_threshold_time - 4000 + : kUnknownNewestKeyTime); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kUnknown, + newestKeyTimeKnown ? kUnknownOldestAncesterTime + : warm_threshold_time - 5000, + newestKeyTimeKnown ? warm_threshold_time - 5000 + : kUnknownNewestKeyTime); + UpdateVersionStorageInfo(); - ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); - std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, mutable_db_options_, - /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, - vstorage_.get(), &log_buffer_)); - ASSERT_TRUE(compaction.get() != nullptr); - ASSERT_EQ(compaction->compaction_reason(), - CompactionReason::kChangeTemperature); - // Compaction picker picks older files first and picks one file at a time. - ASSERT_EQ(compaction->output_temperature(), Temperature::kWarm); - ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction( + fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, + /*existing_snapshots=*/{}, /* snapshot_checker */ nullptr, + vstorage_.get(), &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kChangeTemperature); + // Compaction picker picks older files first and picks one file at a time. + ASSERT_EQ(compaction->output_temperature(), Temperature::kWarm); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + } } TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) { @@ -3097,18 +3204,21 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { Add(0, 1U, "150", "200", kFileSize, 0, 500, 550, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 3); Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 2); Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 1); Add(3, 5U, "010", "080", 8 * kFileSize, 0, 200, 251); @@ -3136,6 +3246,7 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { Add(0, 7U, "150", "200", kFileSize, 0, 551, 600, 0, true, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 4); UpdateVersionStorageInfo(); @@ -3163,6 +3274,7 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap2) { Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300, 0, true, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 1); Add(3, 5U, "240", "290", 8 * kFileSize, 0, 201, 250); @@ -3189,12 +3301,14 @@ TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap2) { Add(0, 1U, "150", "200", kFileSize, 0, 500, 550, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 3); Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 2); UpdateVersionStorageInfo(); @@ -3371,23 +3485,27 @@ TEST_F(CompactionPickerTest, UniversalMarkedL0Overlap2) { /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 4); Add(0, 5U, "240", "290", 2 * kFileSize, 0, 201, 250, 0, true, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 3); Add(0, 3U, "301", "350", 4 * kFileSize, 0, 101, 150, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 2); Add(0, 6U, "501", "750", 8 * kFileSize, 0, 50, 100, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 1); UpdateVersionStorageInfo(); @@ -3414,12 +3532,14 @@ TEST_F(CompactionPickerTest, UniversalMarkedL0Overlap2) { /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 6); Add(0, 2U, "201", "250", kFileSize, 0, 401, 450, /*compensated_file_size*/ 0, /*marked_for_compact*/ false, /* temperature*/ Temperature::kUnknown, /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, /*ts_of_smallest*/ Slice(), /*ts_of_largest*/ Slice(), /*epoch_number*/ 5); UpdateVersionStorageInfo(); @@ -3653,7 +3773,8 @@ TEST_F(CompactionPickerU64TsTest, Overlap) { /*file_size=*/1U, /*path_id=*/0, /*smallest_seq=*/100, /*largest_seq=*/100, /*compensated_file_size=*/0, /*marked_for_compact=*/false, /*temperature=*/Temperature::kUnknown, - /*oldest_ancestor_time=*/kUnknownOldestAncesterTime, ts1, ts2); + /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, ts1, ts2); UpdateVersionStorageInfo(); } @@ -3719,11 +3840,13 @@ TEST_F(CompactionPickerU64TsTest, CannotTrivialMoveUniversal) { Add(1, 1U, "150", "150", kFileSize, /*path_id=*/0, /*smallest_seq=*/100, /*largest_seq=*/100, /*compensated_file_size=*/kFileSize, /*marked_for_compact=*/false, Temperature::kUnknown, - kUnknownOldestAncesterTime, ts1, ts2); + /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, ts1, ts2); Add(2, 2U, "150", "150", kFileSize, /*path_id=*/0, /*smallest_seq=*/100, /*largest_seq=*/100, /*compensated_file_size=*/kFileSize, /*marked_for_compact=*/false, Temperature::kUnknown, - kUnknownOldestAncesterTime, ts3, ts4); + /*oldest_ancestor_time*/ kUnknownOldestAncesterTime, + /*newest_key_time*/ kUnknownNewestKeyTime, ts3, ts4); UpdateVersionStorageInfo(); std::unique_ptr compaction( diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 2a8e19e7bc..c71cf2e0c1 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -10623,6 +10623,97 @@ TEST_F(DBCompactionTest, ReleaseCompactionDuringManifestWrite) { SyncPoint::GetInstance()->ClearAllCallBacks(); } +TEST_F(DBCompactionTest, RecordNewestKeyTimeForTtlCompaction) { + Options options; + SetTimeElapseOnlySleepOnReopen(&options); + options.env = CurrentOptions().env; + options.compaction_style = kCompactionStyleFIFO; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.write_buffer_size = 10 << 10; // 10KB + options.arena_block_size = 4096; + options.compression = kNoCompression; + options.create_if_missing = true; + options.compaction_options_fifo.allow_compaction = false; + options.num_levels = 1; + env_->SetMockSleep(); + options.env = env_; + options.ttl = 1 * 60 * 60; // 1 hour + ASSERT_OK(TryReopen(options)); + + // Generate and flush 4 files, each about 10KB + // Compaction is manually disabled at this point so we can check + // each file's newest_key_time + Random rnd(301); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 10; j++) { + ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); + } + ASSERT_OK(Flush()); + env_->MockSleepForSeconds(5); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 4); + + // Check that we are populating newest_key_time on flush + std::vector file_metadatas = GetLevelFileMetadatas(0); + ASSERT_EQ(file_metadatas.size(), 4); + uint64_t first_newest_key_time = + file_metadatas[0]->fd.table_reader->GetTableProperties()->newest_key_time; + ASSERT_NE(first_newest_key_time, kUnknownNewestKeyTime); + // Check that the newest_key_times are in expected ordering + uint64_t prev_newest_key_time = first_newest_key_time; + for (size_t idx = 1; idx < file_metadatas.size(); idx++) { + uint64_t newest_key_time = file_metadatas[idx] + ->fd.table_reader->GetTableProperties() + ->newest_key_time; + + ASSERT_LT(newest_key_time, prev_newest_key_time); + prev_newest_key_time = newest_key_time; + ASSERT_EQ(newest_key_time, file_metadatas[idx] + ->fd.table_reader->GetTableProperties() + ->creation_time); + } + // The delta between the first and last newest_key_times is 15s + uint64_t last_newest_key_time = prev_newest_key_time; + ASSERT_EQ(15, first_newest_key_time - last_newest_key_time); + + // After compaction, the newest_key_time of the output file should be the max + // of the input files + options.compaction_options_fifo.allow_compaction = true; + ASSERT_OK(TryReopen(options)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 1); + file_metadatas = GetLevelFileMetadatas(0); + ASSERT_EQ(file_metadatas.size(), 1); + ASSERT_EQ( + file_metadatas[0]->fd.table_reader->GetTableProperties()->newest_key_time, + first_newest_key_time); + // Contrast newest_key_time with creation_time, which records the oldest + // ancestor time (15s older than newest_key_time) + ASSERT_EQ( + file_metadatas[0]->fd.table_reader->GetTableProperties()->creation_time, + last_newest_key_time); + ASSERT_EQ(file_metadatas[0]->oldest_ancester_time, last_newest_key_time); + + // Make sure TTL of 5s causes compaction + env_->MockSleepForSeconds(6); + + // The oldest input file is older than 15s + // However the newest of the compaction input files is younger than 15s, so + // we don't compact + ASSERT_OK(dbfull()->SetOptions({{"ttl", "15"}})); + ASSERT_EQ(dbfull()->GetOptions().ttl, 15); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 1); + + // Now even the youngest input file is too old + ASSERT_OK(dbfull()->SetOptions({{"ttl", "5"}})); + ASSERT_EQ(dbfull()->GetOptions().ttl, 5); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 6727839ee0..3e1a6198ea 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1735,10 +1735,11 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, cfd->internal_comparator(), cfd->internal_tbl_prop_coll_factories(), GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), mutable_cf_options.compression_opts, cfd->GetID(), cfd->GetName(), - 0 /* level */, false /* is_bottommost */, - TableFileCreationReason::kRecovery, 0 /* oldest_key_time */, - 0 /* file_creation_time */, db_id_, db_session_id_, - 0 /* target_file_size */, meta.fd.GetNumber(), kMaxSequenceNumber); + 0 /* level */, current_time /* newest_key_time */, + false /* is_bottommost */, TableFileCreationReason::kRecovery, + 0 /* oldest_key_time */, 0 /* file_creation_time */, db_id_, + db_session_id_, 0 /* target_file_size */, meta.fd.GetNumber(), + kMaxSequenceNumber); Version* version = cfd->current(); version->Ref(); uint64_t num_input_entries = 0; diff --git a/db/db_test2.cc b/db/db_test2.cc index fbe66a986b..c4590def71 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -36,18 +36,6 @@ namespace ROCKSDB_NAMESPACE { class DBTest2 : public DBTestBase { public: DBTest2() : DBTestBase("db_test2", /*env_do_fsync=*/true) {} - std::vector GetLevelFileMetadatas(int level, int cf = 0) { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = - versions->GetColumnFamilySet()->GetColumnFamily(cf); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - return storage_info->LevelFiles(level); - } }; TEST_F(DBTest2, OpenForReadOnly) { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index d444bc5193..79532a13e6 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1263,6 +1263,20 @@ Status DBTestBase::CountFiles(size_t* count) { return Status::OK(); } +std::vector DBTestBase::GetLevelFileMetadatas(int level, + int cf) { + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + ColumnFamilyData* const cfd = + versions->GetColumnFamilySet()->GetColumnFamily(cf); + assert(cfd); + Version* const current = cfd->current(); + assert(current); + VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + return storage_info->LevelFiles(level); +} + Status DBTestBase::Size(const Slice& start, const Slice& limit, int cf, uint64_t* size) { Range r(start, limit); diff --git a/db/db_test_util.h b/db/db_test_util.h index 36a4615344..a7dc06659e 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -1273,6 +1273,8 @@ class DBTestBase : public testing::Test { Status CountFiles(size_t* count); + std::vector GetLevelFileMetadatas(int level, int cf = 0); + Status Size(const Slice& start, const Slice& limit, uint64_t* size) { return Size(start, limit, 0, size); } diff --git a/db/event_helpers.cc b/db/event_helpers.cc index fa9c5e153c..6879e450ee 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -132,6 +132,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished( << table_properties.compression_name << "compression_options" << table_properties.compression_options << "creation_time" << table_properties.creation_time << "oldest_key_time" + << table_properties.newest_key_time << "newest_key_time" << table_properties.oldest_key_time << "file_creation_time" << table_properties.file_creation_time << "slow_compression_estimated_data_size" diff --git a/db/flush_job.cc b/db/flush_job.cc index f7d585a3b8..88499cd112 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -987,9 +987,10 @@ Status FlushJob::WriteLevel0Table() { cfd_->internal_comparator(), cfd_->internal_tbl_prop_coll_factories(), output_compression_, mutable_cf_options_.compression_opts, cfd_->GetID(), cfd_->GetName(), 0 /* level */, - false /* is_bottommost */, TableFileCreationReason::kFlush, - oldest_key_time, current_time, db_id_, db_session_id_, - 0 /* target_file_size */, meta_.fd.GetNumber(), + current_time /* newest_key_time */, false /* is_bottommost */, + TableFileCreationReason::kFlush, oldest_key_time, current_time, + db_id_, db_session_id_, 0 /* target_file_size */, + meta_.fd.GetNumber(), preclude_last_level_min_seqno_ == kMaxSequenceNumber ? preclude_last_level_min_seqno_ : std::min(earliest_snapshot_, preclude_last_level_min_seqno_)); diff --git a/db/repair.cc b/db/repair.cc index 1d2b81adc4..7b063e64ee 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -474,10 +474,10 @@ class Repairer { write_option, cfd->internal_comparator(), cfd->internal_tbl_prop_coll_factories(), kNoCompression, default_compression, cfd->GetID(), cfd->GetName(), -1 /* level */, - false /* is_bottommost */, TableFileCreationReason::kRecovery, - 0 /* oldest_key_time */, 0 /* file_creation_time */, - "DB Repairer" /* db_id */, db_session_id_, 0 /*target_file_size*/, - meta.fd.GetNumber()); + current_time /* newest_key_time */, false /* is_bottommost */, + TableFileCreationReason::kRecovery, 0 /* oldest_key_time */, + 0 /* file_creation_time */, "DB Repairer" /* db_id */, db_session_id_, + 0 /*target_file_size*/, meta.fd.GetNumber()); SeqnoToTimeMapping empty_seqno_to_time_mapping; status = BuildTable( diff --git a/db/repair_test.cc b/db/repair_test.cc index 8adc06f0c5..d5e1614f32 100644 --- a/db/repair_test.cc +++ b/db/repair_test.cc @@ -60,19 +60,6 @@ class RepairTest : public DBTestBase { ASSERT_GT(verify_passed, 0); SyncPoint::GetInstance()->DisableProcessing(); } - - std::vector GetLevelFileMetadatas(int level, int cf = 0) { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = - versions->GetColumnFamilySet()->GetColumnFamily(cf); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - return storage_info->LevelFiles(level); - } }; TEST_F(RepairTest, SortRepairedDBL0ByEpochNumber) { diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index c9dfb7d0ff..e46cafa6a1 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -58,7 +58,7 @@ void MakeBuilder( ioptions, moptions, read_options, write_options, internal_comparator, internal_tbl_prop_coll_factories, options.compression, options.compression_opts, kTestColumnFamilyId, kTestColumnFamilyName, - kTestLevel); + kTestLevel, kUnknownNewestKeyTime); builder->reset(NewTableBuilder(tboptions, writable->get())); } } // namespace diff --git a/db/version_edit.h b/db/version_edit.h index 2b247e4571..34997451fe 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -107,6 +107,7 @@ class VersionSet; constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; constexpr uint64_t kUnknownOldestAncesterTime = 0; +constexpr uint64_t kUnknownNewestKeyTime = 0; constexpr uint64_t kUnknownFileCreationTime = 0; constexpr uint64_t kUnknownEpochNumber = 0; // If `Options::allow_ingest_behind` is true, this epoch number @@ -333,6 +334,27 @@ struct FileMetaData { return kUnknownFileCreationTime; } + // Tries to get the newest key time from the current file + // Falls back on oldest ancestor time of previous (newer) file + uint64_t TryGetNewestKeyTime(FileMetaData* prev_file = nullptr) { + if (fd.table_reader != nullptr && + fd.table_reader->GetTableProperties() != nullptr) { + uint64_t newest_key_time = + fd.table_reader->GetTableProperties()->newest_key_time; + if (newest_key_time != kUnknownNewestKeyTime) { + return newest_key_time; + } + } + if (prev_file != nullptr) { + uint64_t prev_oldest_ancestor_time = + prev_file->TryGetOldestAncesterTime(); + if (prev_oldest_ancestor_time != kUnknownOldestAncesterTime) { + return prev_oldest_ancestor_time; + } + } + return kUnknownNewestKeyTime; + } + // WARNING: manual update to this function is needed // whenever a new string property is added to FileMetaData // to reduce approximation error. diff --git a/db/version_set.cc b/db/version_set.cc index 2f4892fbf6..a7bbd5af6d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3386,28 +3386,25 @@ bool ShouldChangeFileTemperature(const ImmutableOptions& ioptions, int64_t _current_time; auto status = ioptions.clock->GetCurrentTime(&_current_time); const uint64_t current_time = static_cast(_current_time); - // We use oldest_ancestor_time of a file to be the estimate age of - // the file just older than it. This is the same logic used in + // This is the same logic used in // FIFOCompactionPicker::PickTemperatureChangeCompaction(). if (status.ok() && current_time >= ages[0].age) { uint64_t create_time_threshold = current_time - ages[0].age; Temperature target_temp; assert(files.size() >= 1); - for (size_t index = files.size() - 1; index >= 1; --index) { - FileMetaData* cur_file = files[index]; - FileMetaData* prev_file = files[index - 1]; + for (size_t index = files.size(); index >= 1; --index) { + FileMetaData* cur_file = files[index - 1]; + FileMetaData* prev_file = index < 2 ? nullptr : files[index - 2]; if (!cur_file->being_compacted) { - uint64_t oldest_ancestor_time = prev_file->TryGetOldestAncesterTime(); - if (oldest_ancestor_time == kUnknownOldestAncesterTime) { - return false; - } - if (oldest_ancestor_time > create_time_threshold) { + uint64_t est_newest_key_time = cur_file->TryGetNewestKeyTime(prev_file); + if (est_newest_key_time == kUnknownNewestKeyTime || + est_newest_key_time > create_time_threshold) { return false; } target_temp = ages[0].temperature; for (size_t i = 1; i < ages.size(); ++i) { if (current_time >= ages[i].age && - oldest_ancestor_time <= current_time - ages[i].age) { + est_newest_key_time <= current_time - ages[i].age) { target_temp = ages[i].temperature; } } diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 9264345c5b..04b9503e77 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1328,7 +1328,7 @@ class VersionSetTestBase { &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - info.column_family, info.level), + info.column_family, info.level, kUnknownNewestKeyTime), fwriter.get())); InternalKey ikey(info.key, 0, ValueType::kTypeValue); builder->Add(ikey.Encode(), "value"); diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 84677a4d2d..00e448ba7d 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -68,6 +68,7 @@ struct TablePropertiesNames { static const std::string kCompressionOptions; static const std::string kCreationTime; static const std::string kOldestKeyTime; + static const std::string kNewestKeyTime; static const std::string kFileCreationTime; static const std::string kSlowCompressionEstimatedDataSize; static const std::string kFastCompressionEstimatedDataSize; @@ -270,6 +271,8 @@ struct TableProperties { // Timestamp of the earliest key. 0 means unknown. uint64_t oldest_key_time = 0; + // Timestamp of the newest key. 0 means unknown. + uint64_t newest_key_time = 0; // Actual SST file creation time. 0 means unknown. uint64_t file_creation_time = 0; // Estimated size of data blocks if compressed using a relatively slower diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 1a86fb26ed..6036d05131 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -282,7 +282,8 @@ TEST_F(OptionsSettableTest, TablePropertiesAllFieldsSettable) { "636F6D70617261746F725F6E616D65;num_filter_entries=0;db_id=" "64625F686F73745F6964;column_family_id=2147483647;fixed_key_len=0;fast_" "compression_estimated_data_size=0;filter_policy_name=" - "66696C7465725F706F6C6963795F6E616D65;oldest_key_time=0;column_family_" + "66696C7465725F706F6C6963795F6E616D65;oldest_key_time=0;newest_key_time=" + "0;column_family_" "name=64656661756C74;user_defined_timestamps_persisted=1;num_entries=100;" "external_sst_file_global_seqno_offset=0;num_merge_operands=0;index_key_" "is_user_key=0;key_largest_seqno=18446744073709551615;", diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index ec4a695b63..6845e515e8 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -619,6 +619,7 @@ struct BlockBasedTableBuilder::Rep { props.column_family_id = tbo.column_family_id; props.column_family_name = tbo.column_family_name; props.oldest_key_time = tbo.oldest_key_time; + props.newest_key_time = tbo.newest_key_time; props.file_creation_time = tbo.file_creation_time; props.orig_file_number = tbo.cur_file_num; props.db_id = tbo.db_id; diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc index d90ee15782..414d4b1f9f 100644 --- a/table/block_based/block_based_table_reader_test.cc +++ b/table/block_based/block_based_table_reader_test.cc @@ -141,7 +141,8 @@ class BlockBasedTableReaderBaseTest : public testing::Test { TableBuilderOptions(ioptions, moptions, read_options, write_options, comparator, &factories, compression_type, compression_opts, 0 /* column_family_id */, - kDefaultColumnFamilyName, -1 /* level */), + kDefaultColumnFamilyName, -1 /* level */, + kUnknownNewestKeyTime), writer.get())); // Build table. diff --git a/table/block_based/data_block_hash_index_test.cc b/table/block_based/data_block_hash_index_test.cc index 9936a34fd0..7970ca1d9f 100644 --- a/table/block_based/data_block_hash_index_test.cc +++ b/table/block_based/data_block_hash_index_test.cc @@ -561,7 +561,7 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, &internal_tbl_prop_coll_factories, options.compression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, level_), + column_family_name, level_, kUnknownNewestKeyTime), file_writer.get())); builder->Add(ik1.Encode().ToString(), v1); diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index ca8fcb7e50..9b31eb4302 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -83,7 +83,8 @@ class BlockFetcherTest : public testing::Test { TableBuilderOptions(ioptions, moptions, read_options, write_options, comparator, &factories, compression_type, CompressionOptions(), 0 /* column_family_id */, - kDefaultColumnFamilyName, -1 /* level */), + kDefaultColumnFamilyName, -1 /* level */, + kUnknownNewestKeyTime), writer.get())); // Build table. diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index cc8f6bfce3..28923fb563 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -105,6 +105,7 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id); Add(TablePropertiesNames::kCreationTime, props.creation_time); Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time); + Add(TablePropertiesNames::kNewestKeyTime, props.newest_key_time); if (props.file_creation_time > 0) { Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time); } @@ -368,6 +369,8 @@ Status ReadTablePropertiesHelper( &new_table_properties->creation_time}, {TablePropertiesNames::kOldestKeyTime, &new_table_properties->oldest_key_time}, + {TablePropertiesNames::kNewestKeyTime, + &new_table_properties->newest_key_time}, {TablePropertiesNames::kFileCreationTime, &new_table_properties->file_creation_time}, {TablePropertiesNames::kSlowCompressionEstimatedDataSize, diff --git a/table/mock_table.cc b/table/mock_table.cc index 14fbb3f1d0..934c7f54a7 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -25,45 +25,6 @@ void SortKVVector(KVVector* kv_vector, const Comparator* ucmp) { }); } -class MockTableReader : public TableReader { - public: - explicit MockTableReader(const KVVector& table) : table_(table) {} - - InternalIterator* NewIterator(const ReadOptions&, - const SliceTransform* prefix_extractor, - Arena* arena, bool skip_filters, - TableReaderCaller caller, - size_t compaction_readahead_size = 0, - bool allow_unprepared_value = false) override; - - Status Get(const ReadOptions& readOptions, const Slice& key, - GetContext* get_context, const SliceTransform* prefix_extractor, - bool skip_filters = false) override; - - uint64_t ApproximateOffsetOf(const ReadOptions& /*read_options*/, - const Slice& /*key*/, - TableReaderCaller /*caller*/) override { - return 0; - } - - uint64_t ApproximateSize(const ReadOptions& /*read_options*/, - const Slice& /*start*/, const Slice& /*end*/, - TableReaderCaller /*caller*/) override { - return 0; - } - - size_t ApproximateMemoryUsage() const override { return 0; } - - void SetupForCompaction() override {} - - std::shared_ptr GetTableProperties() const override; - - ~MockTableReader() = default; - - private: - const KVVector& table_; -}; - class MockTableIterator : public InternalIterator { public: explicit MockTableIterator(const KVVector& table) : table_(table) { @@ -233,17 +194,6 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key, return Status::OK(); } -std::shared_ptr MockTableReader::GetTableProperties() - const { - TableProperties* tp = new TableProperties(); - tp->num_entries = table_.size(); - tp->num_range_deletions = 0; - tp->raw_key_size = 1; - tp->raw_value_size = 1; - - return std::shared_ptr(tp); -} - MockTableFactory::MockTableFactory() : next_id_(1), corrupt_mode_(MockTableFactory::kCorruptNone) {} diff --git a/table/mock_table.h b/table/mock_table.h index af90740a26..af3a4eb46c 100644 --- a/table/mock_table.h +++ b/table/mock_table.h @@ -91,5 +91,55 @@ class MockTableFactory : public TableFactory { size_t key_value_size_ = 1; }; +class MockTableReader : public TableReader { + public: + explicit MockTableReader(const mock::KVVector& table) : table_(table) { + tp_.num_entries = table_.size(); + tp_.num_range_deletions = 0; + tp_.raw_key_size = 1; + tp_.raw_value_size = 1; + } + explicit MockTableReader(const mock::KVVector& table, + const TableProperties& tp) + : table_(table), tp_(tp) {} + + virtual InternalIterator* NewIterator( + const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, + bool skip_filters, TableReaderCaller caller, + size_t compaction_readahead_size = 0, + bool allow_unprepared_value = false) override; + + virtual Status Get(const ReadOptions& readOptions, const Slice& key, + GetContext* get_context, + const SliceTransform* prefix_extractor, + bool skip_filters = false) override; + + virtual uint64_t ApproximateOffsetOf(const ReadOptions& /*read_options*/, + const Slice& /*key*/, + TableReaderCaller /*caller*/) override { + return 0; + } + + virtual uint64_t ApproximateSize(const ReadOptions& /*read_options*/, + const Slice& /*start*/, const Slice& /*end*/, + TableReaderCaller /*caller*/) override { + return 0; + } + + virtual size_t ApproximateMemoryUsage() const override { return 0; } + + virtual void SetupForCompaction() override {} + + virtual std::shared_ptr GetTableProperties() + const override { + return std::make_shared(tp_); + } + + ~MockTableReader() = default; + + private: + const KVVector& table_; + TableProperties tp_; +}; } // namespace mock } // namespace ROCKSDB_NAMESPACE diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 72729ffc3c..905eef7004 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -311,7 +311,7 @@ Status SstFileDumper::ShowCompressionSize( imoptions, moptions, read_options, write_options, ikc, &block_based_table_factories, compress_type, compress_opt, TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, unknown_level); + column_family_name, unknown_level, kUnknownNewestKeyTime); uint64_t num_data_blocks = 0; std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index 806da18da0..84168683f4 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -395,10 +395,10 @@ Status SstFileWriter::Open(const std::string& file_path, Temperature temp) { r->ioptions, r->mutable_cf_options, ReadOptions(), r->write_options, r->internal_comparator, &internal_tbl_prop_coll_factories, compression_type, compression_opts, cf_id, r->column_family_name, - unknown_level, false /* is_bottommost */, TableFileCreationReason::kMisc, - 0 /* oldest_key_time */, 0 /* file_creation_time */, - "SST Writer" /* db_id */, r->db_session_id, 0 /* target_file_size */, - r->next_file_number); + unknown_level, kUnknownNewestKeyTime, false /* is_bottommost */, + TableFileCreationReason::kMisc, 0 /* oldest_key_time */, + 0 /* file_creation_time */, "SST Writer" /* db_id */, r->db_session_id, + 0 /* target_file_size */, r->next_file_number); // External SST files used to each get a unique session id. Now for // slightly better uniqueness probability in constructing cache keys, we // assign fake file numbers to each file (into table properties) and keep diff --git a/table/table_builder.h b/table/table_builder.h index 0a1944e1f3..5b36f06cb5 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -108,7 +108,7 @@ struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context { CompressionType _compression_type, const CompressionOptions& _compression_opts, uint32_t _column_family_id, const std::string& _column_family_name, int _level, - bool _is_bottommost = false, + const int64_t _newest_key_time, bool _is_bottommost = false, TableFileCreationReason _reason = TableFileCreationReason::kMisc, const int64_t _oldest_key_time = 0, const uint64_t _file_creation_time = 0, const std::string& _db_id = "", @@ -129,6 +129,7 @@ struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context { compression_opts(_compression_opts), column_family_name(_column_family_name), oldest_key_time(_oldest_key_time), + newest_key_time(_newest_key_time), target_file_size(_target_file_size), file_creation_time(_file_creation_time), db_id(_db_id), @@ -147,6 +148,7 @@ struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context { const CompressionOptions& compression_opts; const std::string& column_family_name; const int64_t oldest_key_time; + const int64_t newest_key_time; const uint64_t target_file_size; const uint64_t file_creation_time; const std::string db_id; diff --git a/table/table_properties.cc b/table/table_properties.cc index e0aff583f3..7fee67d1e9 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -141,6 +141,8 @@ std::string TableProperties::ToString(const std::string& prop_delim, AppendProperty(result, "time stamp of earliest key", oldest_key_time, prop_delim, kv_delim); + AppendProperty(result, "time stamp of newest key", newest_key_time, + prop_delim, kv_delim); AppendProperty(result, "file creation time", file_creation_time, prop_delim, kv_delim); @@ -302,6 +304,8 @@ const std::string TablePropertiesNames::kCompressionOptions = const std::string TablePropertiesNames::kCreationTime = "rocksdb.creation.time"; const std::string TablePropertiesNames::kOldestKeyTime = "rocksdb.oldest.key.time"; +const std::string TablePropertiesNames::kNewestKeyTime = + "rocksdb.newest.key.time"; const std::string TablePropertiesNames::kFileCreationTime = "rocksdb.file.creation.time"; const std::string TablePropertiesNames::kSlowCompressionEstimatedDataSize = @@ -395,6 +399,10 @@ static std::unordered_map {offsetof(struct TableProperties, oldest_key_time), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"newest_key_time", + {offsetof(struct TableProperties, newest_key_time), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, {"file_creation_time", {offsetof(struct TableProperties, file_creation_time), OptionType::kUInt64T, OptionVerificationType::kNormal, diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 2e9094bfcb..a588f6eea0 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -100,11 +100,11 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, int unknown_level = -1; const WriteOptions write_options; tb = opts.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, read_options, write_options, - ikc, &internal_tbl_prop_coll_factories, - CompressionType::kNoCompression, - CompressionOptions(), 0 /* column_family_id */, - kDefaultColumnFamilyName, unknown_level), + TableBuilderOptions( + ioptions, moptions, read_options, write_options, ikc, + &internal_tbl_prop_coll_factories, CompressionType::kNoCompression, + CompressionOptions(), 0 /* column_family_id */, + kDefaultColumnFamilyName, unknown_level, kUnknownNewestKeyTime), file_writer.get()); } else { s = DB::Open(opts, dbname, &db); diff --git a/table/table_test.cc b/table/table_test.cc index f51ddf69ce..d4e4b3936d 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -380,11 +380,11 @@ class TableConstructor : public Constructor { const ReadOptions read_options; const WriteOptions write_options; builder.reset(moptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, read_options, write_options, - internal_comparator, - &internal_tbl_prop_coll_factories, - options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level_), + TableBuilderOptions( + ioptions, moptions, read_options, write_options, + internal_comparator, &internal_tbl_prop_coll_factories, + options.compression, options.compression_opts, kUnknownColumnFamily, + column_family_name, level_, kUnknownNewestKeyTime), file_writer_.get())); for (const auto& kv : kv_map) { @@ -4464,7 +4464,8 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, *comparator, &internal_tbl_prop_coll_factories, options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level), + kUnknownColumnFamily, column_family_name, level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4502,7 +4503,8 @@ TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, *comparator, &internal_tbl_prop_coll_factories, options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level), + kUnknownColumnFamily, column_family_name, level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4550,7 +4552,8 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, + kUnknownNewestKeyTime), file_writer.get())); for (char c = 'a'; c <= 'z'; ++c) { @@ -4605,7 +4608,8 @@ TEST_F(PlainTableTest, NoFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4646,7 +4650,8 @@ TEST_F(PlainTableTest, Crc32cFileChecksum) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, + kUnknownNewestKeyTime), f.GetFileWriter())); ASSERT_OK(f.ResetTableBuilder(std::move(builder))); f.AddKVtoKVMap(1000); @@ -4724,9 +4729,9 @@ static void DoCompressionTest(CompressionType comp) { ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3550)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3550)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7100)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3555)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3555)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7110)); c.ResetTableReader(); } @@ -5261,7 +5266,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), + column_family_name, -1, kUnknownNewestKeyTime), file_writer.get())); for (char c = 'a'; c <= 'z'; ++c) { @@ -5445,7 +5450,7 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), + column_family_name, -1, kUnknownNewestKeyTime), file_writer.get())); for (int i = 1; i <= 10000; ++i) { @@ -5631,7 +5636,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), + column_family_name, -1, kUnknownNewestKeyTime), file_writer.get())); for (int i = 1; i <= 10000; ++i) { @@ -6240,8 +6245,8 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kSnappyCompression, options.compression_opts, - kUnknownColumnFamily, "test_cf", - -1 /* level */), + kUnknownColumnFamily, "test_cf", -1 /* level */, + kUnknownNewestKeyTime), file_writer.get())); std::string key1 = "key1"; @@ -6318,7 +6323,7 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kSnappyCompression, options.compression_opts, kUnknownColumnFamily, - "test_cf", -1 /* level */), + "test_cf", -1 /* level */, kUnknownNewestKeyTime), file_writer.get())); std::string key1 = "key1"; @@ -6405,7 +6410,7 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, BasicWithCacheFull) { TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc, &internal_tbl_prop_coll_factories, kSnappyCompression, options.compression_opts, kUnknownColumnFamily, - "test_cf", -1 /* level */), + "test_cf", -1 /* level */, kUnknownNewestKeyTime), file_writer.get())); std::string key1 = "key1"; diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index a5c567b38a..c513a23bc9 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -132,7 +132,7 @@ class SSTDumpToolTest : public testing::Test { &internal_tbl_prop_coll_factories, CompressionType::kNoCompression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, unknown_level), + column_family_name, unknown_level, kUnknownNewestKeyTime), file_writer.get())); // Populate slightly more than 1K keys diff --git a/unreleased_history/new_features/tp_newest_key_time.md b/unreleased_history/new_features/tp_newest_key_time.md new file mode 100644 index 0000000000..1aaa773fa2 --- /dev/null +++ b/unreleased_history/new_features/tp_newest_key_time.md @@ -0,0 +1 @@ +* Adds a new table property "rocksdb.newest.key.time" which records the unix timestamp of the newest key. Uses this table property for FIFO TTL and temperature change compaction.