mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-27 20:43:57 +00:00
1104eaa35e
Summary: This PR adds support for `TimedPut` API. We introduced a new type `kTypeValuePreferredSeqno` for entries added to the DB via the `TimedPut` API. The life cycle of such an entry on the write/flush/compaction paths are: 1) It is initially added to memtable as: `<user_key, seq, kTypeValuePreferredSeqno>: {value, write_unix_time}` 2) When it's flushed to L0 sst files, it's converted to: `<user_key, seq, kTypeValuePreferredSeqno>: {value, preferred_seqno}` when we have easy access to the seqno to time mapping. 3) During compaction, if certain conditions are met, we swap in the `preferred_seqno` and the entry will become: `<user_key, preferred_seqno, kTypeValue>: value`. This step helps fast track these entries to the cold tier if they are eligible after the sequence number swap. On the read path: A `kTypeValuePreferredSeqno` entry acts the same as a `kTypeValue` entry, the unix_write_time/preferred seqno part packed in value is completely ignored. Needed follow ups: 1) The seqno to time mapping accessible in flush needs to be extended to cover the `write_unix_time` for possible `kTypeValuePreferredSeqno` entries. This also means we need to track these `write_unix_time` in memtable. 2) Compaction filter support for the new `kTypeValuePreferredSeqno` type for feature parity with other `kTypeValue` and equivalent types. 3) Stress test coverage for the feature Pull Request resolved: https://github.com/facebook/rocksdb/pull/12419 Test Plan: Added unit tests Reviewed By: pdillinger Differential Revision: D54920296 Pulled By: jowlyzhang fbshipit-source-id: c8b43f7a7c465e569141770e93c748371ff1da9e
1652 lines
57 KiB
C++
1652 lines
57 KiB
C++
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
//
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "db/db_test_util.h"
|
|
#include "db/periodic_task_scheduler.h"
|
|
#include "db/seqno_to_time_mapping.h"
|
|
#include "port/stack_trace.h"
|
|
#include "rocksdb/iostats_context.h"
|
|
#include "rocksdb/utilities/debug.h"
|
|
#include "test_util/mock_time_env.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class SeqnoTimeTest : public DBTestBase {
|
|
public:
|
|
SeqnoTimeTest() : DBTestBase("seqno_time_test", /*env_do_fsync=*/false) {
|
|
mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
|
|
mock_clock_->SetCurrentTime(kMockStartTime);
|
|
mock_env_ = std::make_unique<CompositeEnvWrapper>(env_, mock_clock_);
|
|
}
|
|
|
|
protected:
|
|
std::unique_ptr<Env> mock_env_;
|
|
std::shared_ptr<MockSystemClock> mock_clock_;
|
|
|
|
// Sufficient starting time that preserve time doesn't under-flow into
|
|
// pre-history
|
|
static constexpr uint32_t kMockStartTime = 10000000;
|
|
|
|
void SetUp() override {
|
|
mock_clock_->InstallTimedWaitFixCallback();
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::StartPeriodicTaskScheduler:Init",
|
|
[mock_clock = mock_clock_](void* arg) {
|
|
auto periodic_task_scheduler_ptr =
|
|
static_cast<PeriodicTaskScheduler*>(arg);
|
|
periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock.get());
|
|
});
|
|
mock_clock_->SetCurrentTime(kMockStartTime);
|
|
}
|
|
|
|
// make sure the file is not in cache, otherwise it won't have IO info
|
|
void AssertKeyTemperature(int key_id, Temperature expected_temperature) {
|
|
get_iostats_context()->Reset();
|
|
IOStatsContext* iostats = get_iostats_context();
|
|
std::string result = Get(Key(key_id));
|
|
ASSERT_FALSE(result.empty());
|
|
ASSERT_GT(iostats->bytes_read, 0);
|
|
switch (expected_temperature) {
|
|
case Temperature::kUnknown:
|
|
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_read_count,
|
|
0);
|
|
ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read,
|
|
0);
|
|
break;
|
|
case Temperature::kCold:
|
|
ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_read_count,
|
|
0);
|
|
ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_bytes_read,
|
|
0);
|
|
break;
|
|
default:
|
|
// the test only support kCold now for the bottommost temperature
|
|
FAIL();
|
|
}
|
|
}
|
|
};
|
|
|
|
TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) {
|
|
const int kNumTrigger = 4;
|
|
const int kNumLevels = 7;
|
|
const int kNumKeys = 100;
|
|
const int kKeyPerSec = 10;
|
|
|
|
Options options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
options.preclude_last_level_data_seconds = 10000;
|
|
options.env = mock_env_.get();
|
|
options.last_level_temperature = Temperature::kCold;
|
|
options.num_levels = kNumLevels;
|
|
DestroyAndReopen(options);
|
|
|
|
int sst_num = 0;
|
|
// Write files that are overlap and enough to trigger compaction
|
|
for (; sst_num < kNumTrigger; sst_num++) {
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
|
|
});
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
// All data is hot, only output to penultimate level
|
|
ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
|
|
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
|
|
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
|
|
|
|
// read a random key, which should be hot (kUnknown)
|
|
AssertKeyTemperature(20, Temperature::kUnknown);
|
|
|
|
// Write more data, but still all hot until the 10th SST, as:
|
|
// write a key every 10 seconds, 100 keys per SST, each SST takes 1000 seconds
|
|
// The preclude_last_level_data_seconds is 10k
|
|
for (; sst_num < kNumTrigger * 2; sst_num++) {
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
|
|
});
|
|
}
|
|
ASSERT_OK(Flush());
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
|
|
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
|
|
}
|
|
|
|
// Now we have both hot data and cold data
|
|
for (; sst_num < kNumTrigger * 3; sst_num++) {
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
|
|
});
|
|
}
|
|
ASSERT_OK(Flush());
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
}
|
|
|
|
CompactRangeOptions cro;
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
|
|
uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold);
|
|
ASSERT_GT(hot_data_size, 0);
|
|
ASSERT_GT(cold_data_size, 0);
|
|
// the first a few key should be cold
|
|
AssertKeyTemperature(20, Temperature::kCold);
|
|
|
|
for (int i = 0; i < 30; i++) {
|
|
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(20 * kKeyPerSec));
|
|
});
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
// the hot/cold data cut off range should be between i * 20 + 200 -> 250
|
|
AssertKeyTemperature(i * 20 + 250, Temperature::kUnknown);
|
|
AssertKeyTemperature(i * 20 + 200, Temperature::kCold);
|
|
}
|
|
|
|
ASSERT_LT(GetSstSizeHelper(Temperature::kUnknown), hot_data_size);
|
|
ASSERT_GT(GetSstSizeHelper(Temperature::kCold), cold_data_size);
|
|
|
|
// Wait again, the most of the data should be cold after that
|
|
// but it may not be all cold, because if there's no new data write to SST,
|
|
// the compaction will not get the new seqno->time sampling to decide the last
|
|
// a few data's time.
|
|
for (int i = 0; i < 5; i++) {
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1000)); });
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
}
|
|
|
|
// any random data close to the end should be cold
|
|
AssertKeyTemperature(1000, Temperature::kCold);
|
|
|
|
// close explicitly, because the env is local variable which will be released
|
|
// first.
|
|
Close();
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, TemperatureBasicLevel) {
|
|
const int kNumLevels = 7;
|
|
const int kNumKeys = 100;
|
|
|
|
Options options = CurrentOptions();
|
|
options.preclude_last_level_data_seconds = 10000;
|
|
options.env = mock_env_.get();
|
|
options.last_level_temperature = Temperature::kCold;
|
|
options.num_levels = kNumLevels;
|
|
options.level_compaction_dynamic_level_bytes = true;
|
|
// TODO(zjay): for level compaction, auto-compaction may stuck in deadloop, if
|
|
// the penultimate level score > 1, but the hot is not cold enough to compact
|
|
// to last level, which will keep triggering compaction.
|
|
options.disable_auto_compactions = true;
|
|
DestroyAndReopen(options);
|
|
|
|
int sst_num = 0;
|
|
// Write files that are overlap
|
|
for (; sst_num < 4; sst_num++) {
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
|
|
CompactRangeOptions cro;
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
// All data is hot, only output to penultimate level
|
|
ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
|
|
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
|
|
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
|
|
|
|
// read a random key, which should be hot (kUnknown)
|
|
AssertKeyTemperature(20, Temperature::kUnknown);
|
|
|
|
// Adding more data to have mixed hot and cold data
|
|
for (; sst_num < 14; sst_num++) {
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
// Second to last level
|
|
MoveFilesToLevel(5);
|
|
ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
|
|
ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
|
|
|
|
// Compact the files to the last level which should split the hot/cold data
|
|
MoveFilesToLevel(6);
|
|
uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
|
|
uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold);
|
|
ASSERT_GT(hot_data_size, 0);
|
|
ASSERT_GT(cold_data_size, 0);
|
|
// the first a few key should be cold
|
|
AssertKeyTemperature(20, Temperature::kCold);
|
|
|
|
// Wait some time, with each wait, the cold data is increasing and hot data is
|
|
// decreasing
|
|
for (int i = 0; i < 30; i++) {
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(200)); });
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
uint64_t pre_hot = hot_data_size;
|
|
uint64_t pre_cold = cold_data_size;
|
|
hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
|
|
cold_data_size = GetSstSizeHelper(Temperature::kCold);
|
|
ASSERT_LT(hot_data_size, pre_hot);
|
|
ASSERT_GT(cold_data_size, pre_cold);
|
|
|
|
// the hot/cold cut_off key should be around i * 20 + 400 -> 450
|
|
AssertKeyTemperature(i * 20 + 450, Temperature::kUnknown);
|
|
AssertKeyTemperature(i * 20 + 400, Temperature::kCold);
|
|
}
|
|
|
|
// Wait again, the most of the data should be cold after that
|
|
// hot data might not be empty, because if we don't write new data, there's
|
|
// no seqno->time sampling available to the compaction
|
|
for (int i = 0; i < 5; i++) {
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1000)); });
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
}
|
|
|
|
// any random data close to the end should be cold
|
|
AssertKeyTemperature(1000, Temperature::kCold);
|
|
|
|
Close();
|
|
}
|
|
|
|
enum class SeqnoTimeTestType : char {
|
|
kTrackInternalTimeSeconds = 0,
|
|
kPrecludeLastLevel = 1,
|
|
kBothSetTrackSmaller = 2,
|
|
};
|
|
|
|
class SeqnoTimeTablePropTest
|
|
: public SeqnoTimeTest,
|
|
public ::testing::WithParamInterface<SeqnoTimeTestType> {
|
|
public:
|
|
SeqnoTimeTablePropTest() : SeqnoTimeTest() {}
|
|
|
|
void SetTrackTimeDurationOptions(uint64_t track_time_duration,
|
|
Options& options) const {
|
|
// either option set will enable the time tracking feature
|
|
switch (GetParam()) {
|
|
case SeqnoTimeTestType::kTrackInternalTimeSeconds:
|
|
options.preclude_last_level_data_seconds = 0;
|
|
options.preserve_internal_time_seconds = track_time_duration;
|
|
break;
|
|
case SeqnoTimeTestType::kPrecludeLastLevel:
|
|
options.preclude_last_level_data_seconds = track_time_duration;
|
|
options.preserve_internal_time_seconds = 0;
|
|
break;
|
|
case SeqnoTimeTestType::kBothSetTrackSmaller:
|
|
options.preclude_last_level_data_seconds = track_time_duration;
|
|
options.preserve_internal_time_seconds = track_time_duration / 10;
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
SeqnoTimeTablePropTest, SeqnoTimeTablePropTest,
|
|
::testing::Values(SeqnoTimeTestType::kTrackInternalTimeSeconds,
|
|
SeqnoTimeTestType::kPrecludeLastLevel,
|
|
SeqnoTimeTestType::kBothSetTrackSmaller));
|
|
|
|
TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) {
|
|
Options options = CurrentOptions();
|
|
SetTrackTimeDurationOptions(10000, options);
|
|
|
|
options.env = mock_env_.get();
|
|
options.disable_auto_compactions = true;
|
|
DestroyAndReopen(options);
|
|
|
|
std::set<uint64_t> checked_file_nums;
|
|
SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber() + 1;
|
|
uint64_t start_time = mock_clock_->NowSeconds();
|
|
|
|
// Write a key every 10 seconds
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
TablePropertiesCollection tables_props;
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
auto it = tables_props.begin();
|
|
SeqnoToTimeMapping tp_mapping;
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
ASSERT_FALSE(tp_mapping.Empty());
|
|
auto seqs = tp_mapping.TEST_GetInternalMapping();
|
|
// about ~20 seqs->time entries, because the sample rate is 10000/100, and it
|
|
// passes 2k time. Add (roughly) one for starting entry.
|
|
// Revised: with automatic pre-population of mappings, some of these entries
|
|
// might be purged to keep the DB mapping within capacity.
|
|
EXPECT_GE(seqs.size(), 20 / 2);
|
|
EXPECT_LE(seqs.size(), 22);
|
|
|
|
auto ValidateProximalSeqnos = [&](const char* name, double fuzz_ratio) {
|
|
SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber() + 1;
|
|
uint64_t end_time = mock_clock_->NowSeconds();
|
|
uint64_t seqno_fuzz =
|
|
static_cast<uint64_t>((seq_end - start_seq) * fuzz_ratio + 0.999999);
|
|
for (unsigned time_pct = 0; time_pct <= 100; time_pct++) {
|
|
SCOPED_TRACE("name=" + std::string(name) +
|
|
" time_pct=" + std::to_string(time_pct));
|
|
// Validate the important proximal API (GetProximalSeqnoBeforeTime)
|
|
uint64_t t = start_time + time_pct * (end_time - start_time) / 100;
|
|
auto seqno_reported = tp_mapping.GetProximalSeqnoBeforeTime(t);
|
|
auto seqno_expected = start_seq + time_pct * (seq_end - start_seq) / 100;
|
|
EXPECT_LE(seqno_reported, seqno_expected);
|
|
if (end_time - t < 10000) {
|
|
EXPECT_LE(seqno_expected, seqno_reported + seqno_fuzz);
|
|
}
|
|
}
|
|
start_seq = seq_end;
|
|
start_time = end_time;
|
|
};
|
|
|
|
ValidateProximalSeqnos("a", 0.1);
|
|
|
|
checked_file_nums.insert(it->second->orig_file_number);
|
|
|
|
// Write a key every 1 seconds
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(Key(i + 190), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1)); });
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 2);
|
|
it = tables_props.begin();
|
|
while (it != tables_props.end()) {
|
|
if (!checked_file_nums.count(it->second->orig_file_number)) {
|
|
break;
|
|
}
|
|
it++;
|
|
}
|
|
ASSERT_TRUE(it != tables_props.end());
|
|
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
seqs = tp_mapping.TEST_GetInternalMapping();
|
|
// There only a few time sample
|
|
ASSERT_GE(seqs.size(), 1);
|
|
ASSERT_LE(seqs.size(), 3);
|
|
|
|
// High fuzz ratio because of low number of samples
|
|
ValidateProximalSeqnos("b", 0.5);
|
|
|
|
checked_file_nums.insert(it->second->orig_file_number);
|
|
|
|
// Write a key every 200 seconds
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(Key(i + 380), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(200)); });
|
|
}
|
|
// seq_end = dbfull()->GetLatestSequenceNumber() + 1;
|
|
ASSERT_OK(Flush());
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 3);
|
|
it = tables_props.begin();
|
|
while (it != tables_props.end()) {
|
|
if (!checked_file_nums.count(it->second->orig_file_number)) {
|
|
break;
|
|
}
|
|
it++;
|
|
}
|
|
ASSERT_TRUE(it != tables_props.end());
|
|
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
seqs = tp_mapping.TEST_GetInternalMapping();
|
|
// For the preserved time span, only 10000/200=50 (+1) entries were recorded
|
|
ASSERT_GE(seqs.size(), 50);
|
|
ASSERT_LE(seqs.size(), 51);
|
|
|
|
ValidateProximalSeqnos("c", 0.04);
|
|
|
|
checked_file_nums.insert(it->second->orig_file_number);
|
|
|
|
// Write a key every 100 seconds
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(Key(i + 570), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 4);
|
|
it = tables_props.begin();
|
|
while (it != tables_props.end()) {
|
|
if (!checked_file_nums.count(it->second->orig_file_number)) {
|
|
break;
|
|
}
|
|
it++;
|
|
}
|
|
ASSERT_TRUE(it != tables_props.end());
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
seqs = tp_mapping.TEST_GetInternalMapping();
|
|
// For the preserved time span, max entries were recorded and
|
|
// preserved (10000/100=100 (+1))
|
|
ASSERT_GE(seqs.size(), 99);
|
|
ASSERT_LE(seqs.size(), 101);
|
|
|
|
checked_file_nums.insert(it->second->orig_file_number);
|
|
|
|
// re-enable compaction
|
|
ASSERT_OK(dbfull()->SetOptions({
|
|
{"disable_auto_compactions", "false"},
|
|
}));
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_GE(tables_props.size(), 1);
|
|
it = tables_props.begin();
|
|
while (it != tables_props.end()) {
|
|
if (!checked_file_nums.count(it->second->orig_file_number)) {
|
|
break;
|
|
}
|
|
it++;
|
|
}
|
|
ASSERT_TRUE(it != tables_props.end());
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
seqs = tp_mapping.TEST_GetInternalMapping();
|
|
ASSERT_GE(seqs.size(), 99);
|
|
ASSERT_LE(seqs.size(), 101);
|
|
|
|
ValidateProximalSeqnos("d", 0.02);
|
|
|
|
ASSERT_OK(db_->Close());
|
|
}
|
|
|
|
TEST_P(SeqnoTimeTablePropTest, MultiCFs) {
|
|
Options options = CurrentOptions();
|
|
options.preclude_last_level_data_seconds = 0;
|
|
options.preserve_internal_time_seconds = 0;
|
|
options.env = mock_env_.get();
|
|
options.stats_dump_period_sec = 0;
|
|
options.stats_persist_period_sec = 0;
|
|
ReopenWithColumnFamilies({"default"}, options);
|
|
|
|
const PeriodicTaskScheduler& scheduler =
|
|
dbfull()->TEST_GetPeriodicTaskScheduler();
|
|
ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
|
|
|
|
// Write some data and increase the current time
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
TablePropertiesCollection tables_props;
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
auto it = tables_props.begin();
|
|
ASSERT_TRUE(it->second->seqno_to_time_mapping.empty());
|
|
|
|
ASSERT_TRUE(dbfull()->TEST_GetSeqnoToTimeMapping().Empty());
|
|
|
|
Options options_1 = options;
|
|
SetTrackTimeDurationOptions(10000, options_1);
|
|
CreateColumnFamilies({"one"}, options_1);
|
|
ASSERT_TRUE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
|
|
|
|
// Write some data to the default CF (without preclude_last_level feature)
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// Write some data to the CF one
|
|
for (int i = 0; i < 20; i++) {
|
|
ASSERT_OK(Put(1, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
|
|
}
|
|
ASSERT_OK(Flush(1));
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[1], &tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
it = tables_props.begin();
|
|
SeqnoToTimeMapping tp_mapping;
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
ASSERT_FALSE(tp_mapping.Empty());
|
|
auto seqs = tp_mapping.TEST_GetInternalMapping();
|
|
ASSERT_GE(seqs.size(), 1);
|
|
ASSERT_LE(seqs.size(), 4);
|
|
|
|
// Create one more CF with larger preclude_last_level time
|
|
Options options_2 = options;
|
|
SetTrackTimeDurationOptions(1000000, options_2); // 1m
|
|
CreateColumnFamilies({"two"}, options_2);
|
|
|
|
// Add more data to CF "two" to fill the in memory mapping
|
|
for (int i = 0; i < 2000; i++) {
|
|
ASSERT_OK(Put(2, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping();
|
|
ASSERT_GE(seqs.size(), 1000 - 1);
|
|
// Non-strict limit can exceed capacity by a reasonable fraction
|
|
ASSERT_LE(seqs.size(), 1000 * 9 / 8);
|
|
|
|
ASSERT_OK(Flush(2));
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
it = tables_props.begin();
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
seqs = tp_mapping.TEST_GetInternalMapping();
|
|
// the max encoded entries is 100
|
|
ASSERT_GE(seqs.size(), 100 - 1);
|
|
ASSERT_LE(seqs.size(), 100 + 1);
|
|
|
|
// Write some data to default CF, as all memtable with preclude_last_level
|
|
// enabled have flushed, the in-memory seqno->time mapping should be cleared
|
|
for (int i = 0; i < 10; i++) {
|
|
ASSERT_OK(Put(0, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping();
|
|
ASSERT_OK(Flush(0));
|
|
|
|
// trigger compaction for CF "two" and make sure the compaction output has
|
|
// seqno_to_time_mapping
|
|
for (int j = 0; j < 3; j++) {
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(2, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
ASSERT_OK(Flush(2));
|
|
}
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
it = tables_props.begin();
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
seqs = tp_mapping.TEST_GetInternalMapping();
|
|
ASSERT_GE(seqs.size(), 99);
|
|
ASSERT_LE(seqs.size(), 101);
|
|
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(Put(0, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
ASSERT_OK(Flush(0));
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[0], &tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
it = tables_props.begin();
|
|
ASSERT_TRUE(it->second->seqno_to_time_mapping.empty());
|
|
|
|
// Write some data to CF "two", but don't flush to accumulate
|
|
for (int i = 0; i < 1000; i++) {
|
|
ASSERT_OK(Put(2, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
ASSERT_GE(
|
|
dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
|
|
500);
|
|
// After dropping CF "one", the in-memory mapping will be change to only
|
|
// follow CF "two" options.
|
|
ASSERT_OK(db_->DropColumnFamily(handles_[1]));
|
|
ASSERT_LE(
|
|
dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
|
|
100 + 5);
|
|
|
|
// After dropping CF "two", the in-memory mapping is also clear.
|
|
ASSERT_OK(db_->DropColumnFamily(handles_[2]));
|
|
ASSERT_EQ(
|
|
dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
|
|
0);
|
|
|
|
// And the timer worker is stopped
|
|
ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
|
|
Close();
|
|
}
|
|
|
|
TEST_P(SeqnoTimeTablePropTest, MultiInstancesBasic) {
|
|
const int kInstanceNum = 2;
|
|
|
|
Options options = CurrentOptions();
|
|
SetTrackTimeDurationOptions(10000, options);
|
|
options.env = mock_env_.get();
|
|
options.stats_dump_period_sec = 0;
|
|
options.stats_persist_period_sec = 0;
|
|
|
|
auto dbs = std::vector<DB*>(kInstanceNum);
|
|
for (int i = 0; i < kInstanceNum; i++) {
|
|
ASSERT_OK(
|
|
DB::Open(options, test::PerThreadDBPath(std::to_string(i)), &(dbs[i])));
|
|
}
|
|
|
|
// Make sure the second instance has the worker enabled
|
|
auto dbi = static_cast_with_check<DBImpl>(dbs[1]);
|
|
WriteOptions wo;
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_OK(dbi->Put(wo, Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
|
|
}
|
|
SeqnoToTimeMapping seqno_to_time_mapping = dbi->TEST_GetSeqnoToTimeMapping();
|
|
ASSERT_GT(seqno_to_time_mapping.Size(), 10);
|
|
|
|
for (int i = 0; i < kInstanceNum; i++) {
|
|
ASSERT_OK(dbs[i]->Close());
|
|
delete dbs[i];
|
|
}
|
|
}
|
|
|
|
TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) {
|
|
const int kNumTrigger = 4;
|
|
const int kNumLevels = 7;
|
|
const int kNumKeys = 100;
|
|
|
|
Options options = CurrentOptions();
|
|
SetTrackTimeDurationOptions(10000, options);
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
options.num_levels = kNumLevels;
|
|
options.env = mock_env_.get();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::atomic_uint64_t num_seqno_zeroing{0};
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
"CompactionIterator::PrepareOutput:ZeroingSeq",
|
|
[&](void* /*arg*/) { num_seqno_zeroing++; });
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
int sst_num = 0;
|
|
for (; sst_num < kNumTrigger - 1; sst_num++) {
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
TablePropertiesCollection tables_props;
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 3);
|
|
for (const auto& props : tables_props) {
|
|
ASSERT_FALSE(props.second->seqno_to_time_mapping.empty());
|
|
SeqnoToTimeMapping tp_mapping;
|
|
ASSERT_OK(tp_mapping.DecodeFrom(props.second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
ASSERT_FALSE(tp_mapping.Empty());
|
|
auto seqs = tp_mapping.TEST_GetInternalMapping();
|
|
// Add (roughly) one for starting entry.
|
|
ASSERT_GE(seqs.size(), 10);
|
|
ASSERT_LE(seqs.size(), 10 + 2);
|
|
}
|
|
|
|
// Trigger a compaction
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun(
|
|
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
|
|
}
|
|
sst_num++;
|
|
ASSERT_OK(Flush());
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
tables_props.clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
|
|
auto it = tables_props.begin();
|
|
SeqnoToTimeMapping tp_mapping;
|
|
ASSERT_FALSE(it->second->seqno_to_time_mapping.empty());
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
|
|
// compact to the last level
|
|
CompactRangeOptions cro;
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
// make sure the data is all compacted to penultimate level if the feature is
|
|
// on, otherwise, compacted to the last level.
|
|
if (options.preclude_last_level_data_seconds > 0) {
|
|
ASSERT_GT(NumTableFilesAtLevel(5), 0);
|
|
ASSERT_EQ(NumTableFilesAtLevel(6), 0);
|
|
} else {
|
|
ASSERT_EQ(NumTableFilesAtLevel(5), 0);
|
|
ASSERT_GT(NumTableFilesAtLevel(6), 0);
|
|
}
|
|
|
|
// regardless the file is on the last level or not, it should keep the time
|
|
// information and sequence number are not set
|
|
tables_props.clear();
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
ASSERT_EQ(num_seqno_zeroing, 0);
|
|
|
|
it = tables_props.begin();
|
|
ASSERT_FALSE(it->second->seqno_to_time_mapping.empty());
|
|
ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
|
|
ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
|
|
|
|
// make half of the data expired
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(8000));
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
tables_props.clear();
|
|
tp_mapping.Clear();
|
|
ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
|
|
|
|
if (options.preclude_last_level_data_seconds > 0) {
|
|
ASSERT_EQ(tables_props.size(), 2);
|
|
} else {
|
|
ASSERT_EQ(tables_props.size(), 1);
|
|
}
|
|
ASSERT_GT(num_seqno_zeroing, 0);
|
|
std::vector<KeyVersion> key_versions;
|
|
ASSERT_OK(GetAllKeyVersions(db_, Slice(), Slice(),
|
|
std::numeric_limits<size_t>::max(),
|
|
&key_versions));
|
|
// make sure there're more than 300 keys and first 100 keys are having seqno
|
|
// zeroed out, the last 100 key seqno not zeroed out
|
|
ASSERT_GT(key_versions.size(), 300);
|
|
for (int i = 0; i < 100; i++) {
|
|
ASSERT_EQ(key_versions[i].sequence, 0);
|
|
}
|
|
auto rit = key_versions.rbegin();
|
|
for (int i = 0; i < 100; i++) {
|
|
ASSERT_GT(rit->sequence, 0);
|
|
rit++;
|
|
}
|
|
|
|
// make all data expired and compact again to push it to the last level
|
|
// regardless if the tiering feature is enabled or not
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(20000));
|
|
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
ASSERT_GT(num_seqno_zeroing, 0);
|
|
ASSERT_GT(NumTableFilesAtLevel(6), 0);
|
|
|
|
Close();
|
|
}
|
|
|
|
TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) {
|
|
Options base_options = CurrentOptions();
|
|
base_options.env = mock_env_.get();
|
|
base_options.disable_auto_compactions = true;
|
|
base_options.create_missing_column_families = true;
|
|
Options track_options = base_options;
|
|
constexpr uint32_t kPreserveSecs = 1234567;
|
|
SetTrackTimeDurationOptions(kPreserveSecs, track_options);
|
|
SeqnoToTimeMapping sttm;
|
|
SequenceNumber latest_seqno;
|
|
uint64_t start_time, end_time;
|
|
|
|
// #### DB#1, #2: No pre-population without preserve/preclude ####
|
|
// #### But a single entry is added when preserve/preclude enabled ####
|
|
for (bool with_write : {false, true}) {
|
|
SCOPED_TRACE("with_write=" + std::to_string(with_write));
|
|
DestroyAndReopen(base_options);
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
ASSERT_TRUE(sttm.Empty());
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
if (with_write) {
|
|
// Ensure that writes before new CF with preserve/preclude option don't
|
|
// interfere with the seqno-to-time mapping getting a starting entry.
|
|
ASSERT_OK(Put("foo", "bar"));
|
|
ASSERT_OK(Flush());
|
|
} else {
|
|
// FIXME: currently, starting entry after CreateColumnFamily requires
|
|
// non-zero seqno
|
|
ASSERT_OK(Delete("blah"));
|
|
}
|
|
|
|
// Unfortunately, if we add a CF with preserve/preclude option after
|
|
// open, that does not reserve seqnos with pre-populated time mappings.
|
|
CreateColumnFamilies({"one"}, track_options);
|
|
|
|
// No pre-population (unfortunately), just a single starting entry
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
latest_seqno = db_->GetLatestSequenceNumber();
|
|
start_time = mock_clock_->NowSeconds();
|
|
ASSERT_EQ(sttm.Size(), 1);
|
|
ASSERT_EQ(latest_seqno, 1U);
|
|
// Current time maps to starting entry / seqno
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), 1U);
|
|
// Any older times are unknown.
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - 1),
|
|
kUnknownSeqnoBeforeAll);
|
|
|
|
// Now check that writes can proceed normally (passing about 20% of preserve
|
|
// time)
|
|
for (int i = 0; i < 20; i++) {
|
|
ASSERT_OK(Put(Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(kPreserveSecs / 99));
|
|
});
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// Check that mappings are getting populated
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
latest_seqno = db_->GetLatestSequenceNumber();
|
|
end_time = mock_clock_->NowSeconds();
|
|
ASSERT_EQ(sttm.Size(), 21);
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(end_time), latest_seqno);
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), 1U);
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - 1),
|
|
kUnknownSeqnoBeforeAll);
|
|
}
|
|
|
|
// ### DB#3, #4: Read-only DB with preserve/preclude after not ####
|
|
// Make sure we don't hit issues with read-only DBs, which don't need
|
|
// the mapping in the DB state (though it wouldn't hurt anything)
|
|
for (bool with_write : {false, true}) {
|
|
SCOPED_TRACE("with_write=" + std::to_string(with_write));
|
|
DestroyAndReopen(base_options);
|
|
if (with_write) {
|
|
ASSERT_OK(Put("foo", "bar"));
|
|
ASSERT_OK(Flush());
|
|
}
|
|
|
|
ASSERT_OK(ReadOnlyReopen(base_options));
|
|
if (with_write) {
|
|
ASSERT_EQ(Get("foo"), "bar");
|
|
}
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
ASSERT_EQ(sttm.Size(), 0);
|
|
if (!with_write) {
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0);
|
|
}
|
|
|
|
ASSERT_OK(ReadOnlyReopen(track_options));
|
|
if (with_write) {
|
|
ASSERT_EQ(Get("foo"), "bar");
|
|
}
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
ASSERT_EQ(sttm.Size(), 0);
|
|
if (!with_write) {
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0);
|
|
|
|
// And even if we re-open read-write, we do not get pre-population,
|
|
// because that's only for new DBs.
|
|
Reopen(track_options);
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
ASSERT_EQ(sttm.Size(), 0);
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0);
|
|
}
|
|
}
|
|
|
|
// #### DB#5: Destroy and open with preserve/preclude option ####
|
|
DestroyAndReopen(track_options);
|
|
|
|
// Ensure pre-population
|
|
constexpr auto kPrePopPairs = kMaxSeqnoTimePairsPerSST;
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
latest_seqno = db_->GetLatestSequenceNumber();
|
|
start_time = mock_clock_->NowSeconds();
|
|
ASSERT_EQ(sttm.Size(), kPrePopPairs);
|
|
// One nono-zero sequence number per pre-populated pair (this could be
|
|
// revised if we want to use interpolation for better approximate time
|
|
// mappings with no guarantee of erring in just one direction).
|
|
ASSERT_EQ(latest_seqno, kPrePopPairs);
|
|
// Current time maps to last pre-allocated seqno
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), latest_seqno);
|
|
// Oldest tracking time maps to first pre-allocated seqno
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1);
|
|
|
|
// In more detail, check that estimated seqnos (pre-allocated) are uniformly
|
|
// spread over the tracked time.
|
|
for (auto ratio : {0.0, 0.433, 0.678, 0.987, 1.0}) {
|
|
// Round up query time
|
|
uint64_t t = start_time - kPreserveSecs +
|
|
static_cast<uint64_t>(ratio * kPreserveSecs + 0.9999999);
|
|
// Round down estimated seqno
|
|
SequenceNumber s =
|
|
static_cast<SequenceNumber>(ratio * (latest_seqno - 1)) + 1;
|
|
// Match
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(t), s);
|
|
}
|
|
|
|
// Now check that writes can proceed normally (passing about 20% of preserve
|
|
// time)
|
|
for (int i = 0; i < 20; i++) {
|
|
ASSERT_OK(Put(Key(i), "value"));
|
|
dbfull()->TEST_WaitForPeriodicTaskRun([&] {
|
|
mock_clock_->MockSleepForSeconds(static_cast<int>(kPreserveSecs / 99));
|
|
});
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// Can still see some pre-populated mappings, though some displaced
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
latest_seqno = db_->GetLatestSequenceNumber();
|
|
end_time = mock_clock_->NowSeconds();
|
|
ASSERT_GE(sttm.Size(), kPrePopPairs);
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(end_time), latest_seqno);
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs / 2),
|
|
kPrePopPairs / 2);
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs),
|
|
kUnknownSeqnoBeforeAll);
|
|
|
|
// Make sure we don't hit issues with read-only DBs, which don't need
|
|
// the mapping in the DB state (though it wouldn't hurt anything)
|
|
ASSERT_OK(ReadOnlyReopen(track_options));
|
|
ASSERT_EQ(Get(Key(0)), "value");
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
ASSERT_EQ(sttm.Size(), 0);
|
|
|
|
// #### DB#6: Destroy and open+create an extra CF with preserve/preclude ####
|
|
// (default CF does not have the option)
|
|
Destroy(track_options);
|
|
ReopenWithColumnFamilies({"default", "one"},
|
|
List({base_options, track_options}));
|
|
|
|
// Ensure pre-population (not as exhaustive checking here)
|
|
sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
|
|
latest_seqno = db_->GetLatestSequenceNumber();
|
|
start_time = mock_clock_->NowSeconds();
|
|
ASSERT_EQ(sttm.Size(), kPrePopPairs);
|
|
// One nono-zero sequence number per pre-populated pair (this could be
|
|
// revised if we want to use interpolation for better approximate time
|
|
// mappings with no guarantee of erring in just one direction).
|
|
ASSERT_EQ(latest_seqno, kPrePopPairs);
|
|
// Current time maps to last pre-allocated seqno
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), latest_seqno);
|
|
// Oldest tracking time maps to first pre-allocated seqno
|
|
ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1);
|
|
|
|
// Even after no writes and DB re-open without tracking options, sequence
|
|
// numbers should not go backward into those that were pre-allocated.
|
|
// (Future work: persist the mapping)
|
|
ReopenWithColumnFamilies({"default", "one"},
|
|
List({base_options, base_options}));
|
|
ASSERT_EQ(latest_seqno, db_->GetLatestSequenceNumber());
|
|
|
|
Close();
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, MappingAppend) {
|
|
using P = SeqnoToTimeMapping::SeqnoTimePair;
|
|
SeqnoToTimeMapping test;
|
|
test.SetMaxTimeSpan(100).SetCapacity(10);
|
|
|
|
// ignore seqno == 0, as it may mean the seqno is zeroed out
|
|
ASSERT_FALSE(test.Append(0, 100));
|
|
|
|
ASSERT_TRUE(test.Append(3, 200));
|
|
auto size = test.Size();
|
|
// normal add
|
|
ASSERT_TRUE(test.Append(10, 300));
|
|
size++;
|
|
ASSERT_EQ(size, test.Size());
|
|
|
|
// Append with the same seqno, newer time is rejected because that makes
|
|
// GetProximalSeqnoBeforeTime queries worse (see later test)
|
|
ASSERT_FALSE(test.Append(10, 301));
|
|
ASSERT_EQ(size, test.Size());
|
|
ASSERT_EQ(test.TEST_GetLastEntry(), P({10, 300}));
|
|
|
|
// Same or new seqno with same or older time (as last successfully added) is
|
|
// accepted by replacing last entry (improves GetProximalSeqnoBeforeTime
|
|
// queries without blowing up size)
|
|
ASSERT_FALSE(test.Append(10, 299));
|
|
ASSERT_EQ(size, test.Size());
|
|
ASSERT_EQ(test.TEST_GetLastEntry(), P({10, 299}));
|
|
|
|
ASSERT_FALSE(test.Append(11, 299));
|
|
ASSERT_EQ(size, test.Size());
|
|
ASSERT_EQ(test.TEST_GetLastEntry(), P({11, 299}));
|
|
|
|
ASSERT_FALSE(test.Append(11, 250));
|
|
ASSERT_EQ(size, test.Size());
|
|
ASSERT_EQ(test.TEST_GetLastEntry(), P({11, 250}));
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, CapacityLimits) {
|
|
using P = SeqnoToTimeMapping::SeqnoTimePair;
|
|
SeqnoToTimeMapping test;
|
|
|
|
test.SetCapacity(3);
|
|
EXPECT_TRUE(test.Append(10, 300));
|
|
EXPECT_TRUE(test.Append(20, 400));
|
|
EXPECT_TRUE(test.Append(30, 500));
|
|
EXPECT_TRUE(test.Append(40, 600));
|
|
// Capacity 3 is small enough that the non-strict limit is
|
|
// equal to the strict limit.
|
|
EXPECT_EQ(3U, test.Size());
|
|
EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
|
|
|
|
// Same for Capacity 2
|
|
test.SetCapacity(2);
|
|
EXPECT_EQ(2U, test.Size());
|
|
EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
|
|
|
|
EXPECT_TRUE(test.Append(50, 700));
|
|
EXPECT_EQ(2U, test.Size());
|
|
EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
|
|
|
|
// Capacity 1 is difficult to work with internally, so is
|
|
// coerced to 2.
|
|
test.SetCapacity(1);
|
|
EXPECT_EQ(2U, test.Size());
|
|
EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
|
|
|
|
EXPECT_TRUE(test.Append(60, 800));
|
|
EXPECT_EQ(2U, test.Size());
|
|
EXPECT_EQ(test.TEST_GetLastEntry(), P({60, 800}));
|
|
|
|
// Capacity 0 means throw everything away
|
|
test.SetCapacity(0);
|
|
EXPECT_EQ(0U, test.Size());
|
|
|
|
EXPECT_FALSE(test.Append(70, 900));
|
|
EXPECT_EQ(0U, test.Size());
|
|
|
|
// Unlimited capacity
|
|
test.SetCapacity(UINT64_MAX);
|
|
for (unsigned i = 1; i <= 10101U; i++) {
|
|
EXPECT_TRUE(test.Append(i, 11U * i));
|
|
}
|
|
EXPECT_EQ(10101U, test.Size());
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, TimeSpanLimits) {
|
|
SeqnoToTimeMapping test;
|
|
|
|
// Default: no limit
|
|
for (unsigned i = 1; i <= 63U; i++) {
|
|
EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
|
|
}
|
|
// None dropped.
|
|
EXPECT_EQ(63U, test.Size());
|
|
|
|
test.Clear();
|
|
|
|
// Explicit no limit
|
|
test.SetMaxTimeSpan(UINT64_MAX);
|
|
for (unsigned i = 1; i <= 63U; i++) {
|
|
EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
|
|
}
|
|
// None dropped.
|
|
EXPECT_EQ(63U, test.Size());
|
|
|
|
// We generally keep 2 entries as long as the configured max time span
|
|
// is non-zero
|
|
test.SetMaxTimeSpan(10);
|
|
EXPECT_EQ(2U, test.Size());
|
|
|
|
test.SetMaxTimeSpan(1);
|
|
EXPECT_EQ(2U, test.Size());
|
|
|
|
// But go down to 1 entry if the max time span is zero
|
|
test.SetMaxTimeSpan(0);
|
|
EXPECT_EQ(1U, test.Size());
|
|
|
|
EXPECT_TRUE(test.Append(2000, (uint64_t{1} << 63) + 42U));
|
|
EXPECT_EQ(1U, test.Size());
|
|
|
|
test.Clear();
|
|
|
|
// Test more typical behavior. Note that one entry at or beyond the max span
|
|
// is kept.
|
|
test.SetMaxTimeSpan(100);
|
|
EXPECT_TRUE(test.Append(1001, 123));
|
|
EXPECT_TRUE(test.Append(1002, 134));
|
|
EXPECT_TRUE(test.Append(1003, 150));
|
|
EXPECT_TRUE(test.Append(1004, 189));
|
|
EXPECT_TRUE(test.Append(1005, 220));
|
|
EXPECT_EQ(5U, test.Size());
|
|
EXPECT_TRUE(test.Append(1006, 233));
|
|
EXPECT_EQ(6U, test.Size());
|
|
EXPECT_TRUE(test.Append(1007, 234));
|
|
EXPECT_EQ(6U, test.Size());
|
|
EXPECT_TRUE(test.Append(1008, 235));
|
|
EXPECT_EQ(7U, test.Size());
|
|
EXPECT_TRUE(test.Append(1009, 300));
|
|
EXPECT_EQ(6U, test.Size());
|
|
EXPECT_TRUE(test.Append(1010, 350));
|
|
EXPECT_EQ(3U, test.Size());
|
|
EXPECT_TRUE(test.Append(1011, 470));
|
|
EXPECT_EQ(2U, test.Size());
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, ProximalFunctions) {
|
|
SeqnoToTimeMapping test;
|
|
test.SetCapacity(10);
|
|
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1), kUnknownTimeBeforeAll);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U),
|
|
kUnknownTimeBeforeAll);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1), kUnknownSeqnoBeforeAll);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U),
|
|
kUnknownSeqnoBeforeAll);
|
|
|
|
// (Taken from example in SeqnoToTimeMapping class comment)
|
|
// Time 500 is after seqno 10 and before seqno 11
|
|
EXPECT_TRUE(test.Append(10, 500));
|
|
|
|
// Seqno too early
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(9), kUnknownTimeBeforeAll);
|
|
// We only know that 500 is after 10
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll);
|
|
// Found
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), 500U);
|
|
|
|
// Time too early
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(499), kUnknownSeqnoBeforeAll);
|
|
// Found
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(501), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), 10U);
|
|
|
|
// More samples
|
|
EXPECT_TRUE(test.Append(20, 600));
|
|
EXPECT_TRUE(test.Append(30, 700));
|
|
EXPECT_EQ(test.Size(), 3U);
|
|
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(20), 500U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(21), 600U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), 700U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(499), kUnknownSeqnoBeforeAll);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(501), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(601), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(701), 30U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), 30U);
|
|
|
|
// Redundant sample ignored
|
|
EXPECT_EQ(test.Size(), 3U);
|
|
EXPECT_FALSE(test.Append(30, 700));
|
|
EXPECT_EQ(test.Size(), 3U);
|
|
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
|
|
|
|
// Later sample with same seqno is ignored, to provide best results
|
|
// for GetProximalSeqnoBeforeTime function while saving entries
|
|
// in SeqnoToTimeMapping.
|
|
EXPECT_FALSE(test.Append(30, 800));
|
|
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
|
|
// Could return 800, but saving space in SeqnoToTimeMapping instead.
|
|
// Can reconsider if/when GetProximalTimeBeforeSeqno is used in
|
|
// production.
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
|
|
// If the existing {30, 700} entry were replaced with {30, 800}, this
|
|
// would return seqno 20 instead of 30, which would preclude more than
|
|
// necessary for "preclude_last_level_data_seconds" feature.
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(800), 30U);
|
|
|
|
// Still OK
|
|
EXPECT_TRUE(test.Append(40, 900));
|
|
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(41), 900U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 30U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 40U);
|
|
|
|
// Burst of writes during a short time creates an opportunity
|
|
// for better results from GetProximalSeqnoBeforeTime(), at the
|
|
// expense of GetProximalTimeBeforeSeqno(). False return indicates
|
|
// merge with previous entry.
|
|
EXPECT_FALSE(test.Append(50, 900));
|
|
|
|
// These are subject to later revision depending on priorities
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(49), 700U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(51), 900U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 30U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 50U);
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, PrePopulate) {
|
|
SeqnoToTimeMapping test;
|
|
test.SetMaxTimeSpan(100).SetCapacity(10);
|
|
|
|
EXPECT_EQ(test.Size(), 0U);
|
|
|
|
// Smallest case is like two Appends
|
|
test.PrePopulate(10, 11, 500, 600);
|
|
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U);
|
|
EXPECT_EQ(test.GetProximalTimeBeforeSeqno(12), 600U);
|
|
|
|
test.Clear();
|
|
|
|
// Populate a small range
|
|
uint64_t kTimeIncrement = 1234567;
|
|
test.PrePopulate(1, 12, kTimeIncrement, kTimeIncrement * 2);
|
|
|
|
for (uint64_t i = 0; i <= 12; ++i) {
|
|
// NOTE: with 1 and 12 as the pre-populated end points, the duration is
|
|
// broken into 11 equal(-ish) spans
|
|
uint64_t t = kTimeIncrement + (i * kTimeIncrement) / 11 - 1;
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(t), i);
|
|
}
|
|
|
|
test.Clear();
|
|
|
|
// Populate an excessively large range (in the future we might want to
|
|
// interpolate estimated times for seqnos between entries)
|
|
test.PrePopulate(1, 34567, kTimeIncrement, kTimeIncrement * 2);
|
|
|
|
for (auto ratio : {0.0, 0.433, 0.678, 0.987, 1.0}) {
|
|
// Round up query time
|
|
uint64_t t = kTimeIncrement +
|
|
static_cast<uint64_t>(ratio * kTimeIncrement + 0.9999999);
|
|
// Round down estimated seqno
|
|
SequenceNumber s = static_cast<SequenceNumber>(ratio * (34567 - 1)) + 1;
|
|
// Match
|
|
// TODO: for now this is exact, but in the future might need approximation
|
|
// bounds to account for limited samples.
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(t), s);
|
|
}
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, CopyFromSeqnoRange) {
|
|
SeqnoToTimeMapping test_from;
|
|
SeqnoToTimeMapping test_to;
|
|
|
|
// With zero to draw from
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
|
|
EXPECT_EQ(test_to.Size(), 0U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 100, 100);
|
|
EXPECT_EQ(test_to.Size(), 0U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
|
|
EXPECT_EQ(test_to.Size(), 0U);
|
|
|
|
// With one to draw from
|
|
EXPECT_TRUE(test_from.Append(10, 500));
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
// Includes one entry before range
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 100, 100);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
// Includes one entry before range (even if somewhat nonsensical)
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 9);
|
|
EXPECT_EQ(test_to.Size(), 0U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 10);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
// With more to draw from
|
|
EXPECT_TRUE(test_from.Append(20, 600));
|
|
EXPECT_TRUE(test_from.Append(30, 700));
|
|
EXPECT_TRUE(test_from.Append(40, 800));
|
|
EXPECT_TRUE(test_from.Append(50, 900));
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
|
|
EXPECT_EQ(test_to.Size(), 5U);
|
|
|
|
// Includes one entry before range
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 100, 100);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 19, 19);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
// Includes one entry before range (even if somewhat nonsensical)
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 9);
|
|
EXPECT_EQ(test_to.Size(), 0U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 0, 10);
|
|
EXPECT_EQ(test_to.Size(), 1U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 20, 20);
|
|
EXPECT_EQ(test_to.Size(), 2U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 20, 29);
|
|
EXPECT_EQ(test_to.Size(), 2U);
|
|
|
|
test_to.Clear();
|
|
test_to.CopyFromSeqnoRange(test_from, 20, 30);
|
|
EXPECT_EQ(test_to.Size(), 3U);
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, EnforceWithNow) {
|
|
constexpr uint64_t kMaxTimeSpan = 420;
|
|
SeqnoToTimeMapping test;
|
|
test.SetMaxTimeSpan(kMaxTimeSpan).SetCapacity(10);
|
|
|
|
EXPECT_EQ(test.Size(), 0U);
|
|
|
|
// Safe on empty mapping
|
|
test.Enforce(/*now=*/500);
|
|
|
|
EXPECT_EQ(test.Size(), 0U);
|
|
|
|
// (Taken from example in SeqnoToTimeMapping class comment)
|
|
// Time 500 is after seqno 10 and before seqno 11
|
|
EXPECT_TRUE(test.Append(10, 500));
|
|
EXPECT_TRUE(test.Append(20, 600));
|
|
EXPECT_TRUE(test.Append(30, 700));
|
|
EXPECT_TRUE(test.Append(40, 800));
|
|
EXPECT_TRUE(test.Append(50, 900));
|
|
|
|
EXPECT_EQ(test.Size(), 5U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), 10U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
|
|
// etc.
|
|
|
|
// Must keep first entry
|
|
test.Enforce(/*now=*/500 + kMaxTimeSpan);
|
|
EXPECT_EQ(test.Size(), 5U);
|
|
test.Enforce(/*now=*/599 + kMaxTimeSpan);
|
|
EXPECT_EQ(test.Size(), 5U);
|
|
|
|
// Purges first entry
|
|
test.Enforce(/*now=*/600 + kMaxTimeSpan);
|
|
EXPECT_EQ(test.Size(), 4U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), kUnknownSeqnoBeforeAll);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), kUnknownSeqnoBeforeAll);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
|
|
|
|
// No effect
|
|
test.Enforce(/*now=*/600 + kMaxTimeSpan);
|
|
EXPECT_EQ(test.Size(), 4U);
|
|
test.Enforce(/*now=*/699 + kMaxTimeSpan);
|
|
EXPECT_EQ(test.Size(), 4U);
|
|
|
|
// Purges next two
|
|
test.Enforce(/*now=*/899 + kMaxTimeSpan);
|
|
EXPECT_EQ(test.Size(), 2U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(799), kUnknownSeqnoBeforeAll);
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 40U);
|
|
|
|
// Always keep last entry, to have a non-trivial seqno bound
|
|
test.Enforce(/*now=*/10000000);
|
|
EXPECT_EQ(test.Size(), 1U);
|
|
|
|
EXPECT_EQ(test.GetProximalSeqnoBeforeTime(10000000), 50U);
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, Sort) {
|
|
SeqnoToTimeMapping test;
|
|
|
|
// single entry
|
|
test.AddUnenforced(10, 11);
|
|
test.Enforce();
|
|
ASSERT_EQ(test.Size(), 1);
|
|
|
|
// duplicate is ignored
|
|
test.AddUnenforced(10, 11);
|
|
test.Enforce();
|
|
ASSERT_EQ(test.Size(), 1);
|
|
|
|
// add some revised mappings for that seqno
|
|
test.AddUnenforced(10, 10);
|
|
test.AddUnenforced(10, 12);
|
|
|
|
// We currently favor GetProximalSeqnoBeforeTime over
|
|
// GetProximalTimeBeforeSeqno by keeping the older time.
|
|
test.Enforce();
|
|
auto seqs = test.TEST_GetInternalMapping();
|
|
std::deque<SeqnoToTimeMapping::SeqnoTimePair> expected;
|
|
expected.emplace_back(10, 10);
|
|
ASSERT_EQ(expected, seqs);
|
|
|
|
// add an inconsistent / unuseful mapping
|
|
test.AddUnenforced(9, 11);
|
|
test.Enforce();
|
|
seqs = test.TEST_GetInternalMapping();
|
|
ASSERT_EQ(expected, seqs);
|
|
|
|
// And a mapping that is considered more useful (for
|
|
// GetProximalSeqnoBeforeTime) and thus replaces that one
|
|
test.AddUnenforced(11, 9);
|
|
test.Enforce();
|
|
seqs = test.TEST_GetInternalMapping();
|
|
expected.clear();
|
|
expected.emplace_back(11, 9);
|
|
ASSERT_EQ(expected, seqs);
|
|
|
|
// Add more good, non-mergable entries
|
|
test.AddUnenforced(1, 5);
|
|
test.AddUnenforced(100, 100);
|
|
test.Enforce();
|
|
seqs = test.TEST_GetInternalMapping();
|
|
expected.clear();
|
|
expected.emplace_back(1, 5);
|
|
expected.emplace_back(11, 9);
|
|
expected.emplace_back(100, 100);
|
|
ASSERT_EQ(expected, seqs);
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, EncodeDecodeBasic) {
|
|
constexpr uint32_t kOriginalSamples = 1000;
|
|
SeqnoToTimeMapping test;
|
|
test.SetCapacity(kOriginalSamples);
|
|
|
|
std::string output;
|
|
test.EncodeTo(output);
|
|
ASSERT_TRUE(output.empty());
|
|
|
|
ASSERT_OK(test.DecodeFrom(output));
|
|
ASSERT_EQ(test.Size(), 0U);
|
|
|
|
Random rnd(123);
|
|
for (uint32_t i = 1; i <= kOriginalSamples; i++) {
|
|
ASSERT_TRUE(test.Append(i, i * 10 + rnd.Uniform(10)));
|
|
}
|
|
output.clear();
|
|
test.EncodeTo(output);
|
|
ASSERT_FALSE(output.empty());
|
|
|
|
SeqnoToTimeMapping decoded;
|
|
ASSERT_OK(decoded.DecodeFrom(output));
|
|
ASSERT_TRUE(decoded.TEST_IsEnforced());
|
|
ASSERT_EQ(test.Size(), decoded.Size());
|
|
ASSERT_EQ(test.TEST_GetInternalMapping(), decoded.TEST_GetInternalMapping());
|
|
|
|
// Encode a reduced set of mappings
|
|
constexpr uint32_t kReducedSize = 51U;
|
|
output.clear();
|
|
SeqnoToTimeMapping(test).SetCapacity(kReducedSize).EncodeTo(output);
|
|
|
|
decoded.Clear();
|
|
ASSERT_OK(decoded.DecodeFrom(output));
|
|
ASSERT_TRUE(decoded.TEST_IsEnforced());
|
|
ASSERT_EQ(decoded.Size(), kReducedSize);
|
|
|
|
for (uint64_t t = 1; t <= kOriginalSamples * 11; t += 1 + t / 100) {
|
|
SCOPED_TRACE("t=" + std::to_string(t));
|
|
// `test` has the more accurate time mapping, but the reduced set should
|
|
// nicely span and approximate the whole range
|
|
auto orig_s = test.GetProximalSeqnoBeforeTime(t);
|
|
auto approx_s = decoded.GetProximalSeqnoBeforeTime(t);
|
|
// The oldest entry should be preserved exactly
|
|
ASSERT_EQ(orig_s == kUnknownSeqnoBeforeAll,
|
|
approx_s == kUnknownSeqnoBeforeAll);
|
|
// The newest entry should be preserved exactly
|
|
ASSERT_EQ(orig_s == kOriginalSamples, approx_s == kOriginalSamples);
|
|
|
|
// Approximate seqno before time should err toward older seqno to avoid
|
|
// classifying data as old too early, but should be within a reasonable
|
|
// bound.
|
|
constexpr uint32_t kSeqnoFuzz = kOriginalSamples * 3 / 2 / kReducedSize;
|
|
EXPECT_GE(approx_s + kSeqnoFuzz, orig_s);
|
|
EXPECT_GE(orig_s, approx_s);
|
|
}
|
|
}
|
|
|
|
TEST_F(SeqnoTimeTest, EncodeDecodeMinimizeTimeGaps) {
|
|
SeqnoToTimeMapping test;
|
|
test.SetCapacity(10);
|
|
|
|
test.Append(1, 10);
|
|
test.Append(5, 17);
|
|
test.Append(6, 25);
|
|
test.Append(8, 30);
|
|
|
|
std::string output;
|
|
SeqnoToTimeMapping(test).SetCapacity(3).EncodeTo(output);
|
|
|
|
SeqnoToTimeMapping decoded;
|
|
ASSERT_OK(decoded.DecodeFrom(output));
|
|
ASSERT_TRUE(decoded.TEST_IsEnforced());
|
|
|
|
ASSERT_EQ(decoded.Size(), 3);
|
|
|
|
auto seqs = decoded.TEST_GetInternalMapping();
|
|
std::deque<SeqnoToTimeMapping::SeqnoTimePair> expected;
|
|
expected.emplace_back(1, 10);
|
|
expected.emplace_back(5, 17);
|
|
expected.emplace_back(8, 30);
|
|
ASSERT_EQ(expected, seqs);
|
|
|
|
// Add a few large time number
|
|
test.Append(10, 100);
|
|
test.Append(13, 200);
|
|
test.Append(40, 250);
|
|
test.Append(70, 300);
|
|
|
|
output.clear();
|
|
SeqnoToTimeMapping(test).SetCapacity(4).EncodeTo(output);
|
|
decoded.Clear();
|
|
ASSERT_OK(decoded.DecodeFrom(output));
|
|
ASSERT_TRUE(decoded.TEST_IsEnforced());
|
|
ASSERT_EQ(decoded.Size(), 4);
|
|
|
|
expected.clear();
|
|
// Except for beginning and end, entries are removed that minimize the
|
|
// remaining time gaps, regardless of seqno gaps.
|
|
expected.emplace_back(1, 10);
|
|
expected.emplace_back(10, 100);
|
|
expected.emplace_back(13, 200);
|
|
expected.emplace_back(70, 300);
|
|
seqs = decoded.TEST_GetInternalMapping();
|
|
ASSERT_EQ(expected, seqs);
|
|
}
|
|
|
|
TEST(PackValueAndSeqnoTest, Basic) {
|
|
std::string packed_value_buf;
|
|
Slice packed_value_slice =
|
|
PackValueAndWriteTime("foo", 30u, &packed_value_buf);
|
|
auto [unpacked_value, write_time] =
|
|
ParsePackedValueWithWriteTime(packed_value_slice);
|
|
ASSERT_EQ(unpacked_value, "foo");
|
|
ASSERT_EQ(write_time, 30u);
|
|
ASSERT_EQ(ParsePackedValueForValue(packed_value_slice), "foo");
|
|
}
|
|
|
|
TEST(PackValueAndWriteTimeTest, Basic) {
|
|
std::string packed_value_buf;
|
|
Slice packed_value_slice = PackValueAndSeqno("foo", 30u, &packed_value_buf);
|
|
auto [unpacked_value, write_time] =
|
|
ParsePackedValueWithSeqno(packed_value_slice);
|
|
ASSERT_EQ(unpacked_value, "foo");
|
|
ASSERT_EQ(write_time, 30u);
|
|
ASSERT_EQ(ParsePackedValueForValue(packed_value_slice), "foo");
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|