2020-04-10 23:03:33 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/compaction/compaction.h"
|
|
|
|
#include "db/db_test_util.h"
|
|
|
|
#include "port/stack_trace.h"
|
2021-09-08 14:45:59 +00:00
|
|
|
#include "test_util/testutil.h"
|
2020-04-10 23:03:33 +00:00
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
std::string Key1(uint64_t key) {
|
|
|
|
std::string ret;
|
|
|
|
PutFixed64(&ret, key);
|
|
|
|
std::reverse(ret.begin(), ret.end());
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Timestamp(uint64_t ts) {
|
|
|
|
std::string ret;
|
|
|
|
PutFixed64(&ret, ts);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
class TimestampCompatibleCompactionTest : public DBTestBase {
|
|
|
|
public:
|
|
|
|
TimestampCompatibleCompactionTest()
|
2021-07-23 15:37:27 +00:00
|
|
|
: DBTestBase("ts_compatible_compaction_test", /*env_do_fsync=*/true) {}
|
2020-04-10 23:03:33 +00:00
|
|
|
|
|
|
|
std::string Get(const std::string& key, uint64_t ts) {
|
|
|
|
ReadOptions read_opts;
|
|
|
|
std::string ts_str = Timestamp(ts);
|
|
|
|
Slice ts_slice = ts_str;
|
|
|
|
read_opts.timestamp = &ts_slice;
|
|
|
|
std::string value;
|
|
|
|
Status s = db_->Get(read_opts, key, &value);
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
value.assign("NOT_FOUND");
|
|
|
|
} else if (!s.ok()) {
|
|
|
|
value.assign(s.ToString());
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(TimestampCompatibleCompactionTest, UserKeyCrossFileBoundary) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = env_;
|
|
|
|
options.compaction_style = kCompactionStyleLevel;
|
2022-02-08 20:14:25 +00:00
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
2020-04-10 23:03:33 +00:00
|
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
constexpr size_t kNumKeysPerFile = 101;
|
2021-09-08 14:45:59 +00:00
|
|
|
options.memtable_factory.reset(
|
|
|
|
test::NewSpecialSkipListFactory(kNumKeysPerFile));
|
2020-04-10 23:03:33 +00:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
|
|
|
|
const auto* compaction = reinterpret_cast<Compaction*>(arg);
|
|
|
|
ASSERT_NE(nullptr, compaction);
|
|
|
|
ASSERT_EQ(0, compaction->start_level());
|
|
|
|
ASSERT_EQ(1, compaction->num_input_levels());
|
|
|
|
// Check that all 3 L0 ssts are picked for level compaction.
|
|
|
|
ASSERT_EQ(3, compaction->num_input_files(0));
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
// Write a L0 with keys 0, 1, ..., 99 with ts from 100 to 199.
|
|
|
|
uint64_t ts = 100;
|
|
|
|
uint64_t key = 0;
|
|
|
|
WriteOptions write_opts;
|
|
|
|
for (; key < kNumKeysPerFile - 1; ++key, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_opts, Key1(key), ts_str, "foo_" + std::to_string(key)));
|
2020-04-10 23:03:33 +00:00
|
|
|
}
|
|
|
|
// Write another L0 with keys 99 with newer ts.
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
uint64_t saved_read_ts1 = ts++;
|
|
|
|
key = 99;
|
|
|
|
for (int i = 0; i < 4; ++i, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_opts, Key1(key), ts_str, "bar_" + std::to_string(key)));
|
2020-04-10 23:03:33 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
uint64_t saved_read_ts2 = ts++;
|
|
|
|
// Write another L0 with keys 99, 100, 101, ..., 150
|
|
|
|
for (; key <= 150; ++key, ++ts) {
|
|
|
|
std::string ts_str = Timestamp(ts);
|
Revise APIs related to user-defined timestamp (#8946)
Summary:
ajkr reminded me that we have a rule of not including per-kv related data in `WriteOptions`.
Namely, `WriteOptions` should not include information about "what-to-write", but should just
include information about "how-to-write".
According to this rule, `WriteOptions::timestamp` (experimental) is clearly a violation. Therefore,
this PR removes `WriteOptions::timestamp` for compliance.
After the removal, we need to pass timestamp info via another set of APIs. This PR proposes a set
of overloaded functions `Put(write_opts, key, value, ts)`, `Delete(write_opts, key, ts)`, and
`SingleDelete(write_opts, key, ts)`. Planned to add `Write(write_opts, batch, ts)`, but its complexity
made me reconsider doing it in another PR (maybe).
For better checking and returning error early, we also add a new set of APIs to `WriteBatch` that take
extra `timestamp` information when writing to `WriteBatch`es.
These set of APIs in `WriteBatchWithIndex` are currently not supported, and are on our TODO list.
Removed `WriteBatch::AssignTimestamps()` and renamed `WriteBatch::AssignTimestamp()` to
`WriteBatch::UpdateTimestamps()` since this method require that all keys have space for timestamps
allocated already and multiple timestamps can be updated.
The constructor of `WriteBatch` now takes a fourth argument `default_cf_ts_sz` which is the timestamp
size of the default column family. This will be used to allocate space when calling APIs that do not
specify a column family handle.
Also, updated `DB::Get()`, `DB::MultiGet()`, `DB::NewIterator()`, `DB::NewIterators()` methods, replacing
some assertions about timestamp to returning Status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8946
Test Plan:
make check
./db_bench -benchmarks=fillseq,fillrandom,readrandom,readseq,deleterandom -user_timestamp_size=8
./db_stress --user_timestamp_size=8 -nooverwritepercent=0 -test_secondary=0 -secondary_catch_up_one_in=0 -continuous_verification_interval=0
Make sure there is no perf regression by running the following
```
./db_bench_opt -db=/dev/shm/rocksdb -use_existing_db=0 -level0_stop_writes_trigger=256 -level0_slowdown_writes_trigger=256 -level0_file_num_compaction_trigger=256 -disable_wal=1 -duration=10 -benchmarks=fillrandom
```
Before this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.831 micros/op 546235 ops/sec; 60.4 MB/s
```
After this PR
```
DB path: [/dev/shm/rocksdb]
fillrandom : 1.820 micros/op 549404 ops/sec; 60.8 MB/s
```
Reviewed By: ltamasi
Differential Revision: D33721359
Pulled By: riversand963
fbshipit-source-id: c131561534272c120ffb80711d42748d21badf09
2022-02-02 06:17:46 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(write_opts, Key1(key), ts_str, "foo1_" + std::to_string(key)));
|
2020-04-10 23:03:33 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
// Wait for compaction to finish
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
uint64_t read_ts = ts;
|
|
|
|
ASSERT_EQ("foo_99", Get(Key1(99), saved_read_ts1));
|
|
|
|
ASSERT_EQ("bar_99", Get(Key1(99), saved_read_ts2));
|
|
|
|
ASSERT_EQ("foo1_99", Get(Key1(99), read_ts));
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|