2020-04-10 23:03:33 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/compaction/compaction.h"
|
|
|
|
#include "db/db_test_util.h"
|
|
|
|
#include "port/stack_trace.h"
|
2021-09-08 14:45:59 +00:00
|
|
|
#include "test_util/testutil.h"
|
2020-04-10 23:03:33 +00:00
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
std::string Key1(uint64_t key) {
|
|
|
|
std::string ret;
|
|
|
|
PutFixed64(&ret, key);
|
|
|
|
std::reverse(ret.begin(), ret.end());
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Timestamp(uint64_t ts) {
|
|
|
|
std::string ret;
|
|
|
|
PutFixed64(&ret, ts);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
class TimestampCompatibleCompactionTest : public DBTestBase {
|
|
|
|
public:
|
|
|
|
TimestampCompatibleCompactionTest()
|
2021-07-23 15:37:27 +00:00
|
|
|
: DBTestBase("ts_compatible_compaction_test", /*env_do_fsync=*/true) {}
|
2020-04-10 23:03:33 +00:00
|
|
|
|
|
|
|
std::string Get(const std::string& key, uint64_t ts) {
|
|
|
|
ReadOptions read_opts;
|
|
|
|
std::string ts_str = Timestamp(ts);
|
|
|
|
Slice ts_slice = ts_str;
|
|
|
|
read_opts.timestamp = &ts_slice;
|
|
|
|
std::string value;
|
|
|
|
Status s = db_->Get(read_opts, key, &value);
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
value.assign("NOT_FOUND");
|
|
|
|
} else if (!s.ok()) {
|
|
|
|
value.assign(s.ToString());
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(TimestampCompatibleCompactionTest, UserKeyCrossFileBoundary) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
options.compaction_style = kCompactionStyleLevel;
|
2022-02-08 20:14:25 +00:00
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
2020-04-10 23:03:33 +00:00
|
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
constexpr size_t kNumKeysPerFile = 101;
|
2021-09-08 14:45:59 +00:00
|
|
|
options.memtable_factory.reset(
|
|
|
|
test::NewSpecialSkipListFactory(kNumKeysPerFile));
|
2020-04-10 23:03:33 +00:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
const auto* compaction = static_cast<Compaction*>(arg);
|
2020-04-10 23:03:33 +00:00
|
|
|
ASSERT_NE(nullptr, compaction);
|
|
|
|
ASSERT_EQ(0, compaction->start_level());
|
|
|
|
ASSERT_EQ(1, compaction->num_input_levels());
|
|
|
|
// Check that all 3 L0 ssts are picked for level compaction.
|
|
|
|
ASSERT_EQ(3, compaction->num_input_files(0));
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
// Write a L0 with keys 0, 1, ..., 99 with ts from 100 to 199.
|
|
|
|
uint64_t ts = 100;
|
|
|
|
uint64_t key = 0;
|
|
|
|
WriteOptions write_opts;
|
|
|
|
for (; key < kNumKeysPerFile - 1; ++key, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_opts, Key1(key), ts_str, "foo_" + std::to_string(key)));
|
2020-04-10 23:03:33 +00:00
|
|
|
}
|
|
|
|
// Write another L0 with keys 99 with newer ts.
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
uint64_t saved_read_ts1 = ts++;
|
|
|
|
key = 99;
|
|
|
|
for (int i = 0; i < 4; ++i, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_opts, Key1(key), ts_str, "bar_" + std::to_string(key)));
|
2020-04-10 23:03:33 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
uint64_t saved_read_ts2 = ts++;
|
|
|
|
// Write another L0 with keys 99, 100, 101, ..., 150
|
|
|
|
for (; key <= 150; ++key, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_opts, Key1(key), ts_str, "foo1_" + std::to_string(key)));
|
2020-04-10 23:03:33 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
// Wait for compaction to finish
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
uint64_t read_ts = ts;
|
|
|
|
ASSERT_EQ("foo_99", Get(Key1(99), saved_read_ts1));
|
|
|
|
ASSERT_EQ("bar_99", Get(Key1(99), saved_read_ts2));
|
|
|
|
ASSERT_EQ("foo1_99", Get(Key1(99), read_ts));
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
}
|
|
|
|
|
2022-07-31 18:39:16 +00:00
|
|
|
TEST_F(TimestampCompatibleCompactionTest, MultipleSubCompactions) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
options.max_subcompactions = 3;
|
|
|
|
options.target_file_size_base = 1024;
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
uint64_t ts = 100;
|
|
|
|
uint64_t key = 0;
|
|
|
|
WriteOptions write_opts;
|
|
|
|
|
|
|
|
// Write keys 0, 1, ..., 499 with ts from 100 to 599.
|
|
|
|
{
|
|
|
|
for (; key <= 499; ++key, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
|
|
|
ASSERT_OK(db_->Put(write_opts, Key1(key), ts_str,
|
|
|
|
"foo_" + std::to_string(key)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write keys 500, ..., 999 with ts from 600 to 1099.
|
|
|
|
{
|
|
|
|
for (; key <= 999; ++key, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
|
|
|
ASSERT_OK(db_->Put(write_opts, Key1(key), ts_str,
|
|
|
|
"foo_" + std::to_string(key)));
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for compaction to finish
|
|
|
|
{
|
|
|
|
ASSERT_OK(dbfull()->RunManualCompaction(
|
|
|
|
static_cast_with_check<ColumnFamilyHandleImpl>(
|
|
|
|
db_->DefaultColumnFamily())
|
|
|
|
->cfd(),
|
|
|
|
0 /* input_level */, 1 /* output_level */, CompactRangeOptions(),
|
|
|
|
nullptr /* begin */, nullptr /* end */, true /* exclusive */,
|
|
|
|
true /* disallow_trivial_move */,
|
|
|
|
std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
|
|
|
|
"" /*trim_ts*/));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check stats to make sure multiple subcompactions were scheduled for
|
|
|
|
// boundaries not to be nullptr.
|
|
|
|
{
|
|
|
|
HistogramData num_sub_compactions;
|
|
|
|
options.statistics->histogramData(NUM_SUBCOMPACTIONS_SCHEDULED,
|
|
|
|
&num_sub_compactions);
|
|
|
|
ASSERT_GT(num_sub_compactions.sum, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (key = 0; key <= 999; ++key) {
|
|
|
|
ASSERT_EQ("foo_" + std::to_string(key), Get(Key1(key), ts));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-08 20:03:07 +00:00
|
|
|
class TestFilePartitioner : public SstPartitioner {
|
|
|
|
public:
|
2024-01-05 19:53:57 +00:00
|
|
|
explicit TestFilePartitioner() = default;
|
|
|
|
~TestFilePartitioner() override = default;
|
2022-09-08 20:03:07 +00:00
|
|
|
|
|
|
|
const char* Name() const override { return "TestFilePartitioner"; }
|
|
|
|
PartitionerResult ShouldPartition(
|
|
|
|
const PartitionerRequest& /*request*/) override {
|
|
|
|
return PartitionerResult::kRequired;
|
|
|
|
}
|
|
|
|
bool CanDoTrivialMove(const Slice& /*smallest_user_key*/,
|
|
|
|
const Slice& /*largest_user_key*/) override {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class TestFilePartitionerFactory : public SstPartitionerFactory {
|
|
|
|
public:
|
2024-01-05 19:53:57 +00:00
|
|
|
explicit TestFilePartitionerFactory() = default;
|
2022-09-08 20:03:07 +00:00
|
|
|
std::unique_ptr<SstPartitioner> CreatePartitioner(
|
|
|
|
const SstPartitioner::Context& /*context*/) const override {
|
|
|
|
std::unique_ptr<SstPartitioner> ret =
|
|
|
|
std::make_unique<TestFilePartitioner>();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
const char* Name() const override { return "TestFilePartitionerFactory"; }
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(TimestampCompatibleCompactionTest, CompactFilesRangeCheckL0) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
options.sst_partitioner_factory =
|
|
|
|
std::make_shared<TestFilePartitionerFactory>();
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
constexpr int kNumFiles = 10;
|
|
|
|
constexpr int kKeysPerFile = 2;
|
|
|
|
const std::string user_key = "foo";
|
|
|
|
constexpr uint64_t start_ts = 10000;
|
|
|
|
|
|
|
|
uint64_t cur_ts = start_ts;
|
|
|
|
for (int k = 0; k < kNumFiles; ++k) {
|
|
|
|
for (int i = 0; i < kKeysPerFile; ++i) {
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts),
|
|
|
|
"v" + std::to_string(i)));
|
|
|
|
++cur_ts;
|
|
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> input_files{};
|
|
|
|
{
|
|
|
|
std::vector<std::string> files;
|
|
|
|
ASSERT_OK(env_->GetChildren(dbname_, &files));
|
|
|
|
for (const auto& f : files) {
|
|
|
|
uint64_t file_num = 0;
|
|
|
|
FileType file_type = FileType::kWalFile;
|
|
|
|
if (!ParseFileName(f, &file_num, &file_type) ||
|
|
|
|
file_type != FileType::kTableFile) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
input_files.emplace_back(f);
|
|
|
|
}
|
|
|
|
// sorting here by name, which also happens to sort by generation date.
|
|
|
|
std::sort(input_files.begin(), input_files.end());
|
|
|
|
assert(kNumFiles == input_files.size());
|
|
|
|
std::vector<std::string> tmp;
|
|
|
|
tmp.emplace_back(input_files[input_files.size() / 2]);
|
|
|
|
input_files.swap(tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
std::vector<std::string> output_file_names;
|
|
|
|
CompactionJobInfo compaction_job_info;
|
|
|
|
ASSERT_OK(db_->CompactFiles(CompactionOptions(), input_files,
|
|
|
|
/*output_level=*/1, /*output_path_id=*/-1,
|
|
|
|
&output_file_names, &compaction_job_info));
|
|
|
|
// We expect the L0 files older than the original provided input were all
|
|
|
|
// included in the compaction.
|
|
|
|
ASSERT_EQ(static_cast<size_t>(kNumFiles / 2 + 1),
|
|
|
|
compaction_job_info.input_files.size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(TimestampCompatibleCompactionTest, CompactFilesRangeCheckL1) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
options.sst_partitioner_factory =
|
|
|
|
std::make_shared<TestFilePartitionerFactory>();
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
|
|
|
|
constexpr int kNumFiles = 4;
|
|
|
|
options.level0_file_num_compaction_trigger = kNumFiles;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
constexpr int kKeysPerFile = 2;
|
|
|
|
const std::string user_key = "foo";
|
|
|
|
constexpr uint64_t start_ts = 10000;
|
|
|
|
|
|
|
|
uint64_t cur_ts = start_ts;
|
|
|
|
// Generate some initial files in both L0 and L1.
|
|
|
|
for (int k = 0; k < kNumFiles; ++k) {
|
|
|
|
for (int i = 0; i < kKeysPerFile; ++i) {
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts),
|
|
|
|
"v" + std::to_string(i)));
|
|
|
|
++cur_ts;
|
|
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
}
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
|
|
|
|
ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/0, /*cf=*/0));
|
|
|
|
ASSERT_EQ(kNumFiles * kKeysPerFile,
|
|
|
|
NumTableFilesAtLevel(/*level=*/1, /*cf=*/0));
|
|
|
|
|
|
|
|
constexpr int additional_l0s = 2;
|
|
|
|
for (int i = 0; i < additional_l0s; ++i, ++cur_ts) {
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts), "v"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
}
|
|
|
|
ASSERT_EQ(additional_l0s, NumTableFilesAtLevel(/*level=*/0, /*cf=*/0));
|
|
|
|
|
|
|
|
std::vector<std::string> inputs;
|
|
|
|
{
|
|
|
|
std::vector<LiveFileMetaData> fmetas;
|
|
|
|
db_->GetLiveFilesMetaData(&fmetas);
|
|
|
|
bool included_one_l1 = false;
|
|
|
|
for (const auto& meta : fmetas) {
|
|
|
|
if (meta.level == 0) {
|
|
|
|
inputs.emplace_back(meta.relative_filename);
|
|
|
|
} else if (!included_one_l1) {
|
|
|
|
inputs.emplace_back(meta.relative_filename);
|
|
|
|
included_one_l1 = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_EQ(static_cast<size_t>(3), inputs.size());
|
|
|
|
{
|
|
|
|
std::vector<std::string> output_file_names;
|
|
|
|
CompactionJobInfo compaction_job_info;
|
|
|
|
|
|
|
|
ASSERT_OK(db_->CompactFiles(CompactionOptions(), inputs, /*output_level=*/1,
|
|
|
|
/*output_path_id=*/-1, &output_file_names,
|
|
|
|
&compaction_job_info));
|
|
|
|
ASSERT_EQ(kNumFiles * kKeysPerFile + 2, output_file_names.size());
|
|
|
|
ASSERT_EQ(kNumFiles * kKeysPerFile + 2,
|
|
|
|
static_cast<int>(compaction_job_info.input_files.size()));
|
|
|
|
}
|
|
|
|
}
|
2022-12-05 21:46:27 +00:00
|
|
|
|
|
|
|
TEST_F(TimestampCompatibleCompactionTest, EmptyCompactionOutput) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
std::string ts_str = Timestamp(1);
|
|
|
|
WriteOptions wopts;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->DeleteRange(wopts, db_->DefaultColumnFamily(), "k1", "k3", ts_str));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ts_str = Timestamp(3);
|
|
|
|
Slice ts = ts_str;
|
|
|
|
CompactRangeOptions cro;
|
|
|
|
// range tombstone will be dropped during compaction
|
|
|
|
cro.full_history_ts_low = &ts;
|
|
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
|
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
}
|
2022-09-08 20:03:07 +00:00
|
|
|
|
2020-04-10 23:03:33 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|