Fix no internal time recorded for small preclude_last_level (#10829)

Summary:
When the `preclude_last_level_data_seconds` or
`preserve_internal_time_seconds` is smaller than 100 (seconds), no seqno->time information was recorded.
Also make sure all data will be compacted to the last level even if there's no write to record the time information.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10829

Test Plan: added unittest

Reviewed By: siying

Differential Revision: D40443934

Pulled By: jay-zhuang

fbshipit-source-id: 2ecf1361daf9f3e5c3385aee6dc924fa59e2813a
This commit is contained in:
Jay Zhuang 2022-10-20 17:11:38 -07:00 committed by Facebook GitHub Bot
parent 865d5576ad
commit 1663f77d2a
3 changed files with 59 additions and 2 deletions

View file

@ -1104,7 +1104,7 @@ void CompactionIterator::DecideOutputLevel() {
// if the key is newer than the cutoff sequence or within the earliest
// snapshot, it should output to the penultimate level.
if (ikey_.sequence >= preclude_last_level_min_seqno_ ||
if (ikey_.sequence > preclude_last_level_min_seqno_ ||
ikey_.sequence > earliest_snapshot_) {
output_to_penultimate_level_ = true;
}

View file

@ -1434,6 +1434,60 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimePartial) {
Close();
}
TEST_F(PrecludeLastLevelTest, SmallPrecludeTime) {
const int kNumTrigger = 4;
const int kNumLevels = 7;
const int kNumKeys = 100;
Options options = CurrentOptions();
options.compaction_style = kCompactionStyleUniversal;
options.preclude_last_level_data_seconds = 60;
options.preserve_internal_time_seconds = 0;
options.env = mock_env_.get();
options.level0_file_num_compaction_trigger = kNumTrigger;
options.num_levels = kNumLevels;
options.last_level_temperature = Temperature::kCold;
DestroyAndReopen(options);
Random rnd(301);
dbfull()->TEST_WaitForPeridicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(rnd.Uniform(10) + 1));
});
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(Put(Key(i), rnd.RandomString(100)));
dbfull()->TEST_WaitForPeridicTaskRun([&] {
mock_clock_->MockSleepForSeconds(static_cast<int>(rnd.Uniform(2)));
});
}
ASSERT_OK(Flush());
TablePropertiesCollection tables_props;
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
ASSERT_EQ(tables_props.size(), 1);
ASSERT_FALSE(tables_props.begin()->second->seqno_to_time_mapping.empty());
SeqnoToTimeMapping tp_mapping;
ASSERT_OK(
tp_mapping.Add(tables_props.begin()->second->seqno_to_time_mapping));
ASSERT_OK(tp_mapping.Sort());
ASSERT_FALSE(tp_mapping.Empty());
auto seqs = tp_mapping.TEST_GetInternalMapping();
ASSERT_FALSE(seqs.empty());
// Wait more than preclude_last_level time, then make sure all the data is
// compacted to the last level even there's no write (no seqno -> time
// information was flushed to any SST).
mock_clock_->MockSleepForSeconds(100);
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel());
ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0);
ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0);
Close();
}
#endif // !defined(ROCKSDB_LITE)
} // namespace ROCKSDB_NAMESPACE

View file

@ -865,8 +865,11 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() {
uint64_t seqno_time_cadence = 0;
if (min_time_duration != std::numeric_limits<uint64_t>::max()) {
// round up to 1 when the time_duration is smaller than
// kMaxSeqnoTimePairsPerCF
seqno_time_cadence =
min_time_duration / SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF;
(min_time_duration + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF - 1) /
SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF;
}
Status s;