2016-09-07 22:41:54 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2017-02-28 19:05:08 +00:00
|
|
|
#include <functional>
|
2023-01-27 22:51:39 +00:00
|
|
|
#include <memory>
|
2020-04-09 04:19:49 +00:00
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
#include "db/db_test_util.h"
|
2020-04-09 04:19:49 +00:00
|
|
|
#include "db/dbformat.h"
|
2019-05-30 03:44:08 +00:00
|
|
|
#include "file/filename.h"
|
Implement XXH3 block checksum type (#9069)
Summary:
XXH3 - latest hash function that is extremely fast on large
data, easily faster than crc32c on most any x86_64 hardware. In
integrating this hash function, I have handled the compression type byte
in a non-standard way to avoid using the streaming API (extra data
movement and active code size because of hash function complexity). This
approach got a thumbs-up from Yann Collet.
Existing functionality change:
* reject bad ChecksumType in options with InvalidArgument
This change split off from https://github.com/facebook/rocksdb/issues/9058 because context-aware checksum is
likely to be handled through different configuration than ChecksumType.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9069
Test Plan:
tests updated, and substantially expanded. Unit tests now check
that we don't accidentally change the values generated by the checksum
algorithms ("schema test") and that we properly handle
invalid/unrecognized checksum types in options or in file footer.
DBTestBase::ChangeOptions (etc.) updated from two to one configuration
changing from default CRC32c ChecksumType. The point of this test code
is to detect possible interactions among features, and the likelihood of
some bad interaction being detected by including configurations other
than XXH3 and CRC32c--and then not detected by stress/crash test--is
extremely low.
Stress/crash test also updated (manual run long enough to see it accepts
new checksum type). db_bench also updated for microbenchmarking
checksums.
### Performance microbenchmark (PORTABLE=0 DEBUG_LEVEL=0, Broadwell processor)
./db_bench -benchmarks=crc32c,xxhash,xxhash64,xxh3,crc32c,xxhash,xxhash64,xxh3,crc32c,xxhash,xxhash64,xxh3
crc32c : 0.200 micros/op 5005220 ops/sec; 19551.6 MB/s (4096 per op)
xxhash : 0.807 micros/op 1238408 ops/sec; 4837.5 MB/s (4096 per op)
xxhash64 : 0.421 micros/op 2376514 ops/sec; 9283.3 MB/s (4096 per op)
xxh3 : 0.171 micros/op 5858391 ops/sec; 22884.3 MB/s (4096 per op)
crc32c : 0.206 micros/op 4859566 ops/sec; 18982.7 MB/s (4096 per op)
xxhash : 0.793 micros/op 1260850 ops/sec; 4925.2 MB/s (4096 per op)
xxhash64 : 0.410 micros/op 2439182 ops/sec; 9528.1 MB/s (4096 per op)
xxh3 : 0.161 micros/op 6202872 ops/sec; 24230.0 MB/s (4096 per op)
crc32c : 0.203 micros/op 4924686 ops/sec; 19237.1 MB/s (4096 per op)
xxhash : 0.839 micros/op 1192388 ops/sec; 4657.8 MB/s (4096 per op)
xxhash64 : 0.424 micros/op 2357391 ops/sec; 9208.6 MB/s (4096 per op)
xxh3 : 0.162 micros/op 6182678 ops/sec; 24151.1 MB/s (4096 per op)
As you can see, especially once warmed up, xxh3 is fastest.
### Performance macrobenchmark (PORTABLE=0 DEBUG_LEVEL=0, Broadwell processor)
Test
for I in `seq 1 50`; do for CHK in 0 1 2 3 4; do TEST_TMPDIR=/dev/shm/rocksdb$CHK ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=30000000 -checksum_type=$CHK 2>&1 | grep 'micros/op' | tee -a results-$CHK & done; wait; done
Results (ops/sec)
for FILE in results*; do echo -n "$FILE "; awk '{ s += $5; c++; } END { print 1.0 * s / c; }' < $FILE; done
results-0 252118 # kNoChecksum
results-1 251588 # kCRC32c
results-2 251863 # kxxHash
results-3 252016 # kxxHash64
results-4 252038 # kXXH3
Reviewed By: mrambacher
Differential Revision: D31905249
Pulled By: pdillinger
fbshipit-source-id: cb9b998ebe2523fc7c400eedf62124a78bf4b4d1
2021-10-29 05:13:47 +00:00
|
|
|
#include "options/options_helper.h"
|
2016-09-07 22:41:54 +00:00
|
|
|
#include "port/port.h"
|
|
|
|
#include "port/stack_trace.h"
|
2020-06-19 23:16:57 +00:00
|
|
|
#include "rocksdb/sst_file_reader.h"
|
2016-09-07 22:41:54 +00:00
|
|
|
#include "rocksdb/sst_file_writer.h"
|
2019-05-30 18:21:38 +00:00
|
|
|
#include "test_util/testutil.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
#include "util/random.h"
|
2021-03-26 05:08:16 +00:00
|
|
|
#include "util/thread_guard.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
#include "utilities/fault_injection_env.h"
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2019-05-24 04:54:23 +00:00
|
|
|
// A test environment that can be configured to fail the Link operation.
|
2023-01-27 22:51:39 +00:00
|
|
|
class ExternalSSTTestFS : public FileSystemWrapper {
|
2019-05-24 04:54:23 +00:00
|
|
|
public:
|
2023-01-27 22:51:39 +00:00
|
|
|
ExternalSSTTestFS(const std::shared_ptr<FileSystem>& t, bool fail_link)
|
|
|
|
: FileSystemWrapper(t), fail_link_(fail_link) {}
|
|
|
|
static const char* kClassName() { return "ExternalSSTTestFS"; }
|
2022-01-05 00:44:54 +00:00
|
|
|
const char* Name() const override { return kClassName(); }
|
2019-05-24 04:54:23 +00:00
|
|
|
|
2023-01-27 22:51:39 +00:00
|
|
|
IOStatus LinkFile(const std::string& s, const std::string& t,
|
|
|
|
const IOOptions& options, IODebugContext* dbg) override {
|
2019-05-24 04:54:23 +00:00
|
|
|
if (fail_link_) {
|
2023-01-27 22:51:39 +00:00
|
|
|
return IOStatus::NotSupported("Link failed");
|
2019-05-24 04:54:23 +00:00
|
|
|
}
|
2023-01-27 22:51:39 +00:00
|
|
|
return target()->LinkFile(s, t, options, dbg);
|
2019-05-24 04:54:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void set_fail_link(bool fail_link) { fail_link_ = fail_link; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
bool fail_link_;
|
|
|
|
};
|
|
|
|
|
2021-12-10 16:12:09 +00:00
|
|
|
class ExternalSSTFileTestBase : public DBTestBase {
|
|
|
|
public:
|
|
|
|
ExternalSSTFileTestBase()
|
|
|
|
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) {
|
|
|
|
sst_files_dir_ = dbname_ + "/sst_files/";
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DestroyAndRecreateExternalSSTFilesDir() {
|
|
|
|
ASSERT_OK(DestroyDir(env_, sst_files_dir_));
|
|
|
|
ASSERT_OK(env_->CreateDir(sst_files_dir_));
|
|
|
|
}
|
|
|
|
|
|
|
|
~ExternalSSTFileTestBase() override {
|
|
|
|
DestroyDir(env_, sst_files_dir_).PermitUncheckedError();
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
std::string sst_files_dir_;
|
|
|
|
};
|
|
|
|
|
2019-05-24 04:54:23 +00:00
|
|
|
class ExternSSTFileLinkFailFallbackTest
|
2021-12-10 16:12:09 +00:00
|
|
|
: public ExternalSSTFileTestBase,
|
2019-05-24 04:54:23 +00:00
|
|
|
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
|
|
|
public:
|
2023-01-27 22:51:39 +00:00
|
|
|
ExternSSTFileLinkFailFallbackTest() {
|
|
|
|
fs_ = std::make_shared<ExternalSSTTestFS>(env_->GetFileSystem(), true);
|
|
|
|
test_env_.reset(new CompositeEnvWrapper(env_, fs_));
|
2019-05-24 04:54:23 +00:00
|
|
|
options_ = CurrentOptions();
|
|
|
|
options_.disable_auto_compactions = true;
|
2023-01-27 22:51:39 +00:00
|
|
|
options_.env = test_env_.get();
|
2019-05-24 04:54:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void TearDown() override {
|
|
|
|
delete db_;
|
|
|
|
db_ = nullptr;
|
|
|
|
ASSERT_OK(DestroyDB(dbname_, options_));
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
Options options_;
|
2023-01-27 22:51:39 +00:00
|
|
|
std::shared_ptr<ExternalSSTTestFS> fs_;
|
|
|
|
std::unique_ptr<Env> test_env_;
|
2019-05-24 04:54:23 +00:00
|
|
|
};
|
|
|
|
|
2019-01-30 00:16:53 +00:00
|
|
|
class ExternalSSTFileTest
|
2021-12-10 16:12:09 +00:00
|
|
|
: public ExternalSSTFileTestBase,
|
2019-01-30 00:16:53 +00:00
|
|
|
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
2016-09-07 22:41:54 +00:00
|
|
|
public:
|
2024-01-05 19:53:57 +00:00
|
|
|
ExternalSSTFileTest() = default;
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2019-02-13 03:07:25 +00:00
|
|
|
Status GenerateOneExternalFile(
|
|
|
|
const Options& options, ColumnFamilyHandle* cfh,
|
|
|
|
std::vector<std::pair<std::string, std::string>>& data, int file_id,
|
|
|
|
bool sort_data, std::string* external_file_path,
|
|
|
|
std::map<std::string, std::string>* true_data) {
|
|
|
|
// Generate a file id if not provided
|
|
|
|
if (-1 == file_id) {
|
|
|
|
file_id = (++last_file_id_);
|
|
|
|
}
|
|
|
|
// Sort data if asked to do so
|
|
|
|
if (sort_data) {
|
|
|
|
std::sort(data.begin(), data.end(),
|
|
|
|
[&](const std::pair<std::string, std::string>& e1,
|
|
|
|
const std::pair<std::string, std::string>& e2) {
|
|
|
|
return options.comparator->Compare(e1.first, e2.first) < 0;
|
|
|
|
});
|
|
|
|
auto uniq_iter = std::unique(
|
|
|
|
data.begin(), data.end(),
|
|
|
|
[&](const std::pair<std::string, std::string>& e1,
|
|
|
|
const std::pair<std::string, std::string>& e2) {
|
|
|
|
return options.comparator->Compare(e1.first, e2.first) == 0;
|
|
|
|
});
|
|
|
|
data.resize(uniq_iter - data.begin());
|
|
|
|
}
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string file_path = sst_files_dir_ + std::to_string(file_id);
|
2019-02-13 03:07:25 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options, cfh);
|
|
|
|
Status s = sst_file_writer.Open(file_path);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
for (const auto& entry : data) {
|
|
|
|
s = sst_file_writer.Put(entry.first, entry.second);
|
|
|
|
if (!s.ok()) {
|
2020-12-22 23:08:17 +00:00
|
|
|
sst_file_writer.Finish().PermitUncheckedError();
|
2019-02-13 03:07:25 +00:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = sst_file_writer.Finish();
|
|
|
|
if (s.ok() && external_file_path != nullptr) {
|
|
|
|
*external_file_path = file_path;
|
|
|
|
}
|
|
|
|
if (s.ok() && nullptr != true_data) {
|
|
|
|
for (const auto& entry : data) {
|
|
|
|
true_data->insert({entry.first, entry.second});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
Status GenerateAndAddExternalFile(
|
|
|
|
const Options options,
|
|
|
|
std::vector<std::pair<std::string, std::string>> data, int file_id = -1,
|
2018-11-01 23:21:30 +00:00
|
|
|
bool allow_global_seqno = false, bool write_global_seqno = false,
|
2019-04-08 18:12:25 +00:00
|
|
|
bool verify_checksums_before_ingest = true, bool ingest_behind = false,
|
|
|
|
bool sort_data = false,
|
2016-12-06 21:56:17 +00:00
|
|
|
std::map<std::string, std::string>* true_data = nullptr,
|
|
|
|
ColumnFamilyHandle* cfh = nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
// Generate a file id if not provided
|
|
|
|
if (file_id == -1) {
|
|
|
|
file_id = last_file_id_ + 1;
|
|
|
|
last_file_id_++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sort data if asked to do so
|
|
|
|
if (sort_data) {
|
|
|
|
std::sort(data.begin(), data.end(),
|
|
|
|
[&](const std::pair<std::string, std::string>& e1,
|
|
|
|
const std::pair<std::string, std::string>& e2) {
|
|
|
|
return options.comparator->Compare(e1.first, e2.first) < 0;
|
|
|
|
});
|
|
|
|
auto uniq_iter = std::unique(
|
|
|
|
data.begin(), data.end(),
|
|
|
|
[&](const std::pair<std::string, std::string>& e1,
|
|
|
|
const std::pair<std::string, std::string>& e2) {
|
|
|
|
return options.comparator->Compare(e1.first, e2.first) == 0;
|
|
|
|
});
|
|
|
|
data.resize(uniq_iter - data.begin());
|
|
|
|
}
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string file_path = sst_files_dir_ + std::to_string(file_id);
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options, cfh);
|
2016-10-18 23:59:37 +00:00
|
|
|
|
|
|
|
Status s = sst_file_writer.Open(file_path);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
2016-10-21 00:05:32 +00:00
|
|
|
for (auto& entry : data) {
|
2017-05-26 19:05:19 +00:00
|
|
|
s = sst_file_writer.Put(entry.first, entry.second);
|
2016-10-18 23:59:37 +00:00
|
|
|
if (!s.ok()) {
|
2020-12-22 23:08:17 +00:00
|
|
|
sst_file_writer.Finish().PermitUncheckedError();
|
2016-10-18 23:59:37 +00:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = sst_file_writer.Finish();
|
|
|
|
|
|
|
|
if (s.ok()) {
|
2016-10-21 00:05:32 +00:00
|
|
|
IngestExternalFileOptions ifo;
|
|
|
|
ifo.allow_global_seqno = allow_global_seqno;
|
2018-11-01 23:21:30 +00:00
|
|
|
ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false;
|
2019-01-30 00:16:53 +00:00
|
|
|
ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
|
2019-04-08 18:12:25 +00:00
|
|
|
ifo.ingest_behind = ingest_behind;
|
2017-05-17 18:32:26 +00:00
|
|
|
if (cfh) {
|
|
|
|
s = db_->IngestExternalFile(cfh, {file_path}, ifo);
|
|
|
|
} else {
|
|
|
|
s = db_->IngestExternalFile({file_path}, ifo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok() && true_data) {
|
|
|
|
for (auto& entry : data) {
|
|
|
|
(*true_data)[entry.first] = entry.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-02-13 03:07:25 +00:00
|
|
|
Status GenerateAndAddExternalFiles(
|
|
|
|
const Options& options,
|
|
|
|
const std::vector<ColumnFamilyHandle*>& column_families,
|
|
|
|
const std::vector<IngestExternalFileOptions>& ifos,
|
|
|
|
std::vector<std::vector<std::pair<std::string, std::string>>>& data,
|
|
|
|
int file_id, bool sort_data,
|
|
|
|
std::vector<std::map<std::string, std::string>>& true_data) {
|
|
|
|
if (-1 == file_id) {
|
|
|
|
file_id = (++last_file_id_);
|
|
|
|
}
|
|
|
|
// Generate external SST files, one for each column family
|
|
|
|
size_t num_cfs = column_families.size();
|
|
|
|
assert(ifos.size() == num_cfs);
|
|
|
|
assert(data.size() == num_cfs);
|
|
|
|
std::vector<IngestExternalFileArg> args(num_cfs);
|
|
|
|
for (size_t i = 0; i != num_cfs; ++i) {
|
|
|
|
std::string external_file_path;
|
2020-12-22 23:08:17 +00:00
|
|
|
Status s = GenerateOneExternalFile(
|
2019-02-13 03:07:25 +00:00
|
|
|
options, column_families[i], data[i], file_id, sort_data,
|
|
|
|
&external_file_path,
|
|
|
|
true_data.size() == num_cfs ? &true_data[i] : nullptr);
|
|
|
|
if (!s.ok()) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
++file_id;
|
|
|
|
|
|
|
|
args[i].column_family = column_families[i];
|
|
|
|
args[i].external_files.push_back(external_file_path);
|
|
|
|
args[i].options = ifos[i];
|
|
|
|
}
|
2020-12-22 23:08:17 +00:00
|
|
|
return db_->IngestExternalFiles(args);
|
2019-02-13 03:07:25 +00:00
|
|
|
}
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
Status GenerateAndAddExternalFile(
|
|
|
|
const Options options, std::vector<std::pair<int, std::string>> data,
|
2018-11-01 23:21:30 +00:00
|
|
|
int file_id = -1, bool allow_global_seqno = false,
|
2019-01-30 00:16:53 +00:00
|
|
|
bool write_global_seqno = false,
|
2019-04-08 18:12:25 +00:00
|
|
|
bool verify_checksums_before_ingest = true, bool ingest_behind = false,
|
|
|
|
bool sort_data = false,
|
2016-12-06 21:56:17 +00:00
|
|
|
std::map<std::string, std::string>* true_data = nullptr,
|
|
|
|
ColumnFamilyHandle* cfh = nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
std::vector<std::pair<std::string, std::string>> file_data;
|
|
|
|
for (auto& entry : data) {
|
|
|
|
file_data.emplace_back(Key(entry.first), entry.second);
|
|
|
|
}
|
2019-04-08 18:12:25 +00:00
|
|
|
return GenerateAndAddExternalFile(options, file_data, file_id,
|
|
|
|
allow_global_seqno, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest,
|
|
|
|
ingest_behind, sort_data, true_data, cfh);
|
2016-10-21 00:05:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Status GenerateAndAddExternalFile(
|
|
|
|
const Options options, std::vector<int> keys, int file_id = -1,
|
2018-11-01 23:21:30 +00:00
|
|
|
bool allow_global_seqno = false, bool write_global_seqno = false,
|
2019-04-08 18:12:25 +00:00
|
|
|
bool verify_checksums_before_ingest = true, bool ingest_behind = false,
|
|
|
|
bool sort_data = false,
|
2016-12-06 21:56:17 +00:00
|
|
|
std::map<std::string, std::string>* true_data = nullptr,
|
|
|
|
ColumnFamilyHandle* cfh = nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
std::vector<std::pair<std::string, std::string>> file_data;
|
|
|
|
for (auto& k : keys) {
|
2022-05-06 20:03:58 +00:00
|
|
|
file_data.emplace_back(Key(k), Key(k) + std::to_string(file_id));
|
2016-10-21 00:05:32 +00:00
|
|
|
}
|
2019-04-08 18:12:25 +00:00
|
|
|
return GenerateAndAddExternalFile(options, file_data, file_id,
|
|
|
|
allow_global_seqno, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest,
|
|
|
|
ingest_behind, sort_data, true_data, cfh);
|
2016-10-21 00:05:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Status DeprecatedAddFile(const std::vector<std::string>& files,
|
|
|
|
bool move_files = false,
|
2018-11-01 23:21:30 +00:00
|
|
|
bool skip_snapshot_check = false,
|
|
|
|
bool skip_write_global_seqno = false) {
|
2016-10-21 00:05:32 +00:00
|
|
|
IngestExternalFileOptions opts;
|
|
|
|
opts.move_files = move_files;
|
|
|
|
opts.snapshot_consistency = !skip_snapshot_check;
|
|
|
|
opts.allow_global_seqno = false;
|
|
|
|
opts.allow_blocking_flush = false;
|
2018-11-01 23:21:30 +00:00
|
|
|
opts.write_global_seqno = !skip_write_global_seqno;
|
2016-10-21 00:05:32 +00:00
|
|
|
return db_->IngestExternalFile(files, opts);
|
|
|
|
}
|
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
protected:
|
2016-10-21 00:05:32 +00:00
|
|
|
int last_file_id_ = 0;
|
2016-09-07 22:41:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, Basic) {
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
// Current file size should be 0 after sst_file_writer init and before open
|
|
|
|
// a file.
|
2016-12-17 02:26:56 +00:00
|
|
|
ASSERT_EQ(sst_file_writer.FileSize(), 0);
|
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
// file1.sst (0 => 99)
|
|
|
|
std::string file1 = sst_files_dir_ + "file1.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file1));
|
|
|
|
for (int k = 0; k < 100; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file1_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file1_info));
|
2016-12-17 02:26:56 +00:00
|
|
|
|
|
|
|
// Current file size should be non-zero after success write.
|
|
|
|
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file1_info.file_path, file1);
|
|
|
|
ASSERT_EQ(file1_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file1_info.smallest_key, Key(0));
|
|
|
|
ASSERT_EQ(file1_info.largest_key, Key(99));
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_EQ(file1_info.num_range_del_entries, 0);
|
|
|
|
ASSERT_EQ(file1_info.smallest_range_del_key, "");
|
|
|
|
ASSERT_EQ(file1_info.largest_range_del_key, "");
|
2016-09-07 22:41:54 +00:00
|
|
|
// sst_file_writer already finished, cannot add this value
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(sst_file_writer.Put(Key(100), "bad_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// file2.sst (100 => 199)
|
|
|
|
std::string file2 = sst_files_dir_ + "file2.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file2));
|
|
|
|
for (int k = 100; k < 200; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
// Cannot add this key because it's not after last added key
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(sst_file_writer.Put(Key(99), "bad_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
ExternalSstFileInfo file2_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file2_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file2_info.file_path, file2);
|
|
|
|
ASSERT_EQ(file2_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file2_info.smallest_key, Key(100));
|
|
|
|
ASSERT_EQ(file2_info.largest_key, Key(199));
|
|
|
|
|
|
|
|
// file3.sst (195 => 299)
|
|
|
|
// This file values overlap with file2 values
|
|
|
|
std::string file3 = sst_files_dir_ + "file3.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file3));
|
|
|
|
for (int k = 195; k < 300; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file3_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file3_info));
|
2016-12-17 02:26:56 +00:00
|
|
|
|
|
|
|
// Current file size should be non-zero after success finish.
|
|
|
|
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file3_info.file_path, file3);
|
|
|
|
ASSERT_EQ(file3_info.num_entries, 105);
|
|
|
|
ASSERT_EQ(file3_info.smallest_key, Key(195));
|
|
|
|
ASSERT_EQ(file3_info.largest_key, Key(299));
|
|
|
|
|
|
|
|
// file4.sst (30 => 39)
|
|
|
|
// This file values overlap with file1 values
|
|
|
|
std::string file4 = sst_files_dir_ + "file4.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file4));
|
|
|
|
for (int k = 30; k < 40; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file4_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file4_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file4_info.file_path, file4);
|
|
|
|
ASSERT_EQ(file4_info.num_entries, 10);
|
|
|
|
ASSERT_EQ(file4_info.smallest_key, Key(30));
|
|
|
|
ASSERT_EQ(file4_info.largest_key, Key(39));
|
|
|
|
|
|
|
|
// file5.sst (400 => 499)
|
|
|
|
std::string file5 = sst_files_dir_ + "file5.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file5));
|
|
|
|
for (int k = 400; k < 500; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file5_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file5_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file5_info.file_path, file5);
|
|
|
|
ASSERT_EQ(file5_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file5_info.smallest_key, Key(400));
|
|
|
|
ASSERT_EQ(file5_info.largest_key, Key(499));
|
|
|
|
|
2018-07-14 05:40:23 +00:00
|
|
|
// file6.sst (delete 400 => 500)
|
|
|
|
std::string file6 = sst_files_dir_ + "file6.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file6));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500)));
|
2018-07-14 05:40:23 +00:00
|
|
|
ExternalSstFileInfo file6_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file6_info));
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_EQ(file6_info.file_path, file6);
|
|
|
|
ASSERT_EQ(file6_info.num_entries, 0);
|
|
|
|
ASSERT_EQ(file6_info.smallest_key, "");
|
|
|
|
ASSERT_EQ(file6_info.largest_key, "");
|
|
|
|
ASSERT_EQ(file6_info.num_range_del_entries, 1);
|
|
|
|
ASSERT_EQ(file6_info.smallest_range_del_key, Key(400));
|
|
|
|
ASSERT_EQ(file6_info.largest_range_del_key, Key(500));
|
|
|
|
|
|
|
|
// file7.sst (delete 500 => 570, put 520 => 599 divisible by 2)
|
|
|
|
std::string file7 = sst_files_dir_ + "file7.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file7));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(500), Key(550)));
|
2018-07-14 05:40:23 +00:00
|
|
|
for (int k = 520; k < 560; k += 2) {
|
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
|
|
|
}
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(525), Key(575)));
|
2018-07-14 05:40:23 +00:00
|
|
|
for (int k = 560; k < 600; k += 2) {
|
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
|
|
|
}
|
|
|
|
ExternalSstFileInfo file7_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file7_info));
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_EQ(file7_info.file_path, file7);
|
|
|
|
ASSERT_EQ(file7_info.num_entries, 40);
|
|
|
|
ASSERT_EQ(file7_info.smallest_key, Key(520));
|
|
|
|
ASSERT_EQ(file7_info.largest_key, Key(598));
|
|
|
|
ASSERT_EQ(file7_info.num_range_del_entries, 2);
|
|
|
|
ASSERT_EQ(file7_info.smallest_range_del_key, Key(500));
|
|
|
|
ASSERT_EQ(file7_info.largest_range_del_key, Key(575));
|
|
|
|
|
|
|
|
// file8.sst (delete 600 => 700)
|
|
|
|
std::string file8 = sst_files_dir_ + "file8.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file8));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(600), Key(700)));
|
2018-07-14 05:40:23 +00:00
|
|
|
ExternalSstFileInfo file8_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file8_info));
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_EQ(file8_info.file_path, file8);
|
|
|
|
ASSERT_EQ(file8_info.num_entries, 0);
|
|
|
|
ASSERT_EQ(file8_info.smallest_key, "");
|
|
|
|
ASSERT_EQ(file8_info.largest_key, "");
|
|
|
|
ASSERT_EQ(file8_info.num_range_del_entries, 1);
|
|
|
|
ASSERT_EQ(file8_info.smallest_range_del_key, Key(600));
|
|
|
|
ASSERT_EQ(file8_info.largest_range_del_key, Key(700));
|
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
// Cannot create an empty sst file
|
|
|
|
std::string file_empty = sst_files_dir_ + "file_empty.sst";
|
|
|
|
ExternalSstFileInfo file_empty_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(sst_file_writer.Finish(&file_empty_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
// Add file using file path
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file1}));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
for (int k = 0; k < 100; k++) {
|
|
|
|
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add file while holding a snapshot will fail
|
|
|
|
const Snapshot* s1 = db_->GetSnapshot();
|
|
|
|
if (s1 != nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile({file2}));
|
2016-09-07 22:41:54 +00:00
|
|
|
db_->ReleaseSnapshot(s1);
|
|
|
|
}
|
|
|
|
// We can add the file after releaseing the snapshot
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file2}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
for (int k = 0; k < 200; k++) {
|
|
|
|
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
|
|
|
|
}
|
|
|
|
|
2017-01-23 20:50:40 +00:00
|
|
|
// This file has overlapping values with the existing data
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile({file3}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2017-01-23 20:50:40 +00:00
|
|
|
// This file has overlapping values with the existing data
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile({file4}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Overwrite values of keys divisible by 5
|
|
|
|
for (int k = 0; k < 200; k += 5) {
|
|
|
|
ASSERT_OK(Put(Key(k), Key(k) + "_val_new"));
|
|
|
|
}
|
|
|
|
ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// Key range of file5 (400 => 499) don't overlap with any keys in DB
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file5}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2018-07-14 05:40:23 +00:00
|
|
|
// This file has overlapping values with the existing data
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile({file6}));
|
2018-07-14 05:40:23 +00:00
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// Key range of file7 (500 => 598) don't overlap with any keys in DB
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file7}));
|
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// Key range of file7 (600 => 700) don't overlap with any keys in DB
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file8}));
|
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
// Make sure values are correct before and after flush/compaction
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
for (int k = 0; k < 200; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
if (k % 5 == 0) {
|
|
|
|
value += "_new";
|
|
|
|
}
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
|
|
|
for (int k = 400; k < 500; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
2018-07-14 05:40:23 +00:00
|
|
|
for (int k = 500; k < 600; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
if (k < 520 || k % 2 == 1) {
|
|
|
|
value = "NOT_FOUND";
|
|
|
|
}
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
Close();
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
Reopen(options);
|
|
|
|
|
|
|
|
// Delete keys in range (400 => 499)
|
|
|
|
for (int k = 400; k < 500; k++) {
|
|
|
|
ASSERT_OK(Delete(Key(k)));
|
|
|
|
}
|
|
|
|
// We deleted range (400 => 499) but cannot add file5 because
|
|
|
|
// of the range tombstones
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile({file5}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Compacting the DB will remove the tombstones
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
|
|
|
|
// Now we can add the file
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file5}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Verify values of file5 in DB
|
|
|
|
for (int k = 400; k < 500; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
2018-07-14 05:40:23 +00:00
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction |
|
|
|
|
kRangeDelSkipConfigs));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
2018-11-01 23:21:30 +00:00
|
|
|
|
2023-08-11 01:16:10 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, BasicWideColumn) {
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
|
|
|
|
// Current file size should be 0 after sst_file_writer init and before open
|
|
|
|
// a file.
|
|
|
|
ASSERT_EQ(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
std::string file = sst_files_dir_ + "wide_column_file.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file));
|
|
|
|
for (int k = 0; k < 10; k++) {
|
|
|
|
std::string val1 = Key(k) + "_attr_1_val";
|
|
|
|
std::string val2 = Key(k) + "_attr_2_val";
|
|
|
|
WideColumns columns{{"attr_1", val1}, {"attr_2", val2}};
|
|
|
|
ASSERT_OK(sst_file_writer.PutEntity(Key(k), columns));
|
|
|
|
}
|
|
|
|
ExternalSstFileInfo file_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file_info));
|
|
|
|
|
|
|
|
// Current file size should be non-zero after success write.
|
|
|
|
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
ASSERT_EQ(file_info.file_path, file);
|
|
|
|
ASSERT_EQ(file_info.num_entries, 10);
|
|
|
|
ASSERT_EQ(file_info.smallest_key, Key(0));
|
|
|
|
ASSERT_EQ(file_info.largest_key, Key(9));
|
|
|
|
ASSERT_EQ(file_info.num_range_del_entries, 0);
|
|
|
|
ASSERT_EQ(file_info.smallest_range_del_key, "");
|
|
|
|
ASSERT_EQ(file_info.largest_range_del_key, "");
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
// Add file using file path
|
|
|
|
ASSERT_OK(DeprecatedAddFile({file}));
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
for (int k = 0; k < 10; k++) {
|
|
|
|
PinnableWideColumns result;
|
|
|
|
ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
|
|
|
|
Key(k), &result));
|
|
|
|
std::string val1 = Key(k) + "_attr_1_val";
|
|
|
|
std::string val2 = Key(k) + "_attr_2_val";
|
|
|
|
WideColumns expected_columns{{"attr_1", val1}, {"attr_2", val2}};
|
|
|
|
ASSERT_EQ(result.columns(), expected_columns);
|
|
|
|
}
|
|
|
|
|
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction |
|
|
|
|
kRangeDelSkipConfigs));
|
|
|
|
}
|
|
|
|
|
Wide Column support in ldb (#11754)
Summary:
wide_columns can now be pretty-printed in the following commands
- `./ldb dump_wal`
- `./ldb dump`
- `./ldb idump`
- `./ldb dump_live_files`
- `./ldb scan`
- `./sst_dump --command=scan`
There are opportunities to refactor to reduce some nearly identical code. This PR is initial change to add wide column support in `ldb` and `sst_dump` tool. More PRs to come for the refactor.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11754
Test Plan:
**New Tests added**
- `WideColumnsHelperTest::DumpWideColumns`
- `WideColumnsHelperTest::DumpSliceAsWideColumns`
**Changes added to existing tests**
- `ExternalSSTFileTest::BasicMixed` added to cover mixed case (This test should have been added in https://github.com/facebook/rocksdb/issues/11688). This test does not verify the ldb or sst_dump output. This test was used to create test SST files having some rows with wide columns and some without and the generated SST files were used to manually test sst_dump_tool.
- `createSST()` in `sst_dump_test` now takes `wide_column_one_in` to add wide column value in SST
**dump_wal**
```
./ldb dump_wal --walfile=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/000004.log --print_value --header
```
```
Sequence,Count,ByteSize,Physical Offset,Key(s) : value
1,1,59,0,PUT_ENTITY(0) : 0x:0x68656C6C6F 0x617474725F6E616D6531:0x666F6F 0x617474725F6E616D6532:0x626172
2,1,34,42,PUT_ENTITY(0) : 0x617474725F6F6E65:0x74776F 0x617474725F7468726565:0x666F7572
3,1,17,7d,PUT(0) : 0x7468697264 : 0x62617A
```
**idump**
```
./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ idump
```
```
'first' seq:1, type:22 => :hello attr_name1:foo attr_name2:bar
'second' seq:2, type:22 => attr_one:two attr_three:four
'third' seq:3, type:1 => baz
Internal keys in range: 3
```
**SST Dump from dump_live_files**
```
./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ compact
./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ dump_live_files
```
```
...
==============================
SST Files
==============================
/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst level:1
------------------------------
Process /tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst
Sst file format: block-based
'first' seq:0, type:22 => :hello attr_name1:foo attr_name2:bar
'second' seq:0, type:22 => attr_one:two attr_three:four
'third' seq:0, type:1 => baz
...
```
**dump**
```
./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ dump
```
```
first ==> :hello attr_name1:foo attr_name2:bar
second ==> attr_one:two attr_three:four
third ==> baz
Keys in range: 3
```
**scan**
```
./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ scan
```
```
first : :hello attr_name1:foo attr_name2:bar
second : attr_one:two attr_three:four
third : baz
```
**sst_dump**
```
./sst_dump --file=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst --command=scan
```
```
options.env is 0x7ff54b296000
Process /tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst
Sst file format: block-based
from [] to []
'first' seq:0, type:22 => :hello attr_name1:foo attr_name2:bar
'second' seq:0, type:22 => attr_one:two attr_three:four
'third' seq:0, type:1 => baz
```
Reviewed By: ltamasi
Differential Revision: D48837999
Pulled By: jaykorean
fbshipit-source-id: b0280f0589d2b9716bb9b50530ffcabb397d140f
2023-08-30 19:45:52 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, BasicMixed) {
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
|
|
|
|
// Current file size should be 0 after sst_file_writer init and before open
|
|
|
|
// a file.
|
|
|
|
ASSERT_EQ(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
std::string file = sst_files_dir_ + "mixed_file.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file));
|
|
|
|
for (int k = 0; k < 100; k++) {
|
|
|
|
if (k % 5 == 0) {
|
|
|
|
std::string val1 = Key(k) + "_attr_1_val";
|
|
|
|
std::string val2 = Key(k) + "_attr_2_val";
|
|
|
|
WideColumns columns{{"attr_1", val1}, {"attr_2", val2}};
|
|
|
|
ASSERT_OK(sst_file_writer.PutEntity(Key(k), columns));
|
|
|
|
} else {
|
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ExternalSstFileInfo file_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file_info));
|
|
|
|
|
|
|
|
// Current file size should be non-zero after success write.
|
|
|
|
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
ASSERT_EQ(file_info.file_path, file);
|
|
|
|
ASSERT_EQ(file_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file_info.smallest_key, Key(0));
|
|
|
|
ASSERT_EQ(file_info.largest_key, Key(99));
|
|
|
|
ASSERT_EQ(file_info.num_range_del_entries, 0);
|
|
|
|
ASSERT_EQ(file_info.smallest_range_del_key, "");
|
|
|
|
ASSERT_EQ(file_info.largest_range_del_key, "");
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
// Add file using file path
|
|
|
|
ASSERT_OK(DeprecatedAddFile({file}));
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
for (int k = 0; k < 10; k++) {
|
|
|
|
if (k % 5 == 0) {
|
|
|
|
PinnableWideColumns result;
|
|
|
|
ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
|
|
|
|
Key(k), &result));
|
|
|
|
std::string val1 = Key(k) + "_attr_1_val";
|
|
|
|
std::string val2 = Key(k) + "_attr_2_val";
|
|
|
|
WideColumns expected_columns{{"attr_1", val1}, {"attr_2", val2}};
|
|
|
|
ASSERT_EQ(result.columns(), expected_columns);
|
|
|
|
} else {
|
|
|
|
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction |
|
|
|
|
kRangeDelSkipConfigs));
|
|
|
|
}
|
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
class SstFileWriterCollector : public TablePropertiesCollector {
|
|
|
|
public:
|
|
|
|
explicit SstFileWriterCollector(const std::string prefix) : prefix_(prefix) {
|
|
|
|
name_ = prefix_ + "_SstFileWriterCollector";
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* Name() const override { return name_.c_str(); }
|
|
|
|
|
|
|
|
Status Finish(UserCollectedProperties* properties) override {
|
2018-02-01 21:54:50 +00:00
|
|
|
std::string count = std::to_string(count_);
|
2016-09-07 22:41:54 +00:00
|
|
|
*properties = UserCollectedProperties{
|
|
|
|
{prefix_ + "_SstFileWriterCollector", "YES"},
|
2018-02-01 21:54:50 +00:00
|
|
|
{prefix_ + "_Count", count},
|
2016-09-07 22:41:54 +00:00
|
|
|
};
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
2018-03-05 21:08:17 +00:00
|
|
|
Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/,
|
|
|
|
EntryType /*type*/, SequenceNumber /*seq*/,
|
|
|
|
uint64_t /*file_size*/) override {
|
2016-09-07 22:41:54 +00:00
|
|
|
++count_;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
UserCollectedProperties GetReadableProperties() const override {
|
2016-09-07 22:41:54 +00:00
|
|
|
return UserCollectedProperties{};
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
uint32_t count_ = 0;
|
|
|
|
std::string prefix_;
|
|
|
|
std::string name_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class SstFileWriterCollectorFactory : public TablePropertiesCollectorFactory {
|
|
|
|
public:
|
|
|
|
explicit SstFileWriterCollectorFactory(std::string prefix)
|
|
|
|
: prefix_(prefix), num_created_(0) {}
|
2019-02-14 21:52:47 +00:00
|
|
|
TablePropertiesCollector* CreateTablePropertiesCollector(
|
2018-03-05 21:08:17 +00:00
|
|
|
TablePropertiesCollectorFactory::Context /*context*/) override {
|
2016-09-07 22:41:54 +00:00
|
|
|
num_created_++;
|
|
|
|
return new SstFileWriterCollector(prefix_);
|
|
|
|
}
|
|
|
|
const char* Name() const override { return "SstFileWriterCollectorFactory"; }
|
|
|
|
|
|
|
|
std::string prefix_;
|
|
|
|
uint32_t num_created_;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, AddList) {
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
|
|
|
auto abc_collector = std::make_shared<SstFileWriterCollectorFactory>("abc");
|
|
|
|
auto xyz_collector = std::make_shared<SstFileWriterCollectorFactory>("xyz");
|
|
|
|
|
|
|
|
options.table_properties_collector_factories.emplace_back(abc_collector);
|
|
|
|
options.table_properties_collector_factories.emplace_back(xyz_collector);
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// file1.sst (0 => 99)
|
|
|
|
std::string file1 = sst_files_dir_ + "file1.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file1));
|
|
|
|
for (int k = 0; k < 100; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file1_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file1_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file1_info.file_path, file1);
|
|
|
|
ASSERT_EQ(file1_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file1_info.smallest_key, Key(0));
|
|
|
|
ASSERT_EQ(file1_info.largest_key, Key(99));
|
|
|
|
// sst_file_writer already finished, cannot add this value
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(sst_file_writer.Put(Key(100), "bad_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// file2.sst (100 => 199)
|
|
|
|
std::string file2 = sst_files_dir_ + "file2.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file2));
|
|
|
|
for (int k = 100; k < 200; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
// Cannot add this key because it's not after last added key
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(sst_file_writer.Put(Key(99), "bad_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
ExternalSstFileInfo file2_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file2_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file2_info.file_path, file2);
|
|
|
|
ASSERT_EQ(file2_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file2_info.smallest_key, Key(100));
|
|
|
|
ASSERT_EQ(file2_info.largest_key, Key(199));
|
|
|
|
|
|
|
|
// file3.sst (195 => 199)
|
|
|
|
// This file values overlap with file2 values
|
|
|
|
std::string file3 = sst_files_dir_ + "file3.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file3));
|
|
|
|
for (int k = 195; k < 200; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file3_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file3_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file3_info.file_path, file3);
|
|
|
|
ASSERT_EQ(file3_info.num_entries, 5);
|
|
|
|
ASSERT_EQ(file3_info.smallest_key, Key(195));
|
|
|
|
ASSERT_EQ(file3_info.largest_key, Key(199));
|
|
|
|
|
|
|
|
// file4.sst (30 => 39)
|
|
|
|
// This file values overlap with file1 values
|
|
|
|
std::string file4 = sst_files_dir_ + "file4.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file4));
|
|
|
|
for (int k = 30; k < 40; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file4_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file4_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file4_info.file_path, file4);
|
|
|
|
ASSERT_EQ(file4_info.num_entries, 10);
|
|
|
|
ASSERT_EQ(file4_info.smallest_key, Key(30));
|
|
|
|
ASSERT_EQ(file4_info.largest_key, Key(39));
|
|
|
|
|
|
|
|
// file5.sst (200 => 299)
|
|
|
|
std::string file5 = sst_files_dir_ + "file5.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file5));
|
|
|
|
for (int k = 200; k < 300; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file5_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file5_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file5_info.file_path, file5);
|
|
|
|
ASSERT_EQ(file5_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file5_info.smallest_key, Key(200));
|
|
|
|
ASSERT_EQ(file5_info.largest_key, Key(299));
|
|
|
|
|
2018-07-14 05:40:23 +00:00
|
|
|
// file6.sst (delete 0 => 100)
|
|
|
|
std::string file6 = sst_files_dir_ + "file6.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file6));
|
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(75)));
|
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(25), Key(100)));
|
|
|
|
ExternalSstFileInfo file6_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file6_info));
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_EQ(file6_info.file_path, file6);
|
|
|
|
ASSERT_EQ(file6_info.num_entries, 0);
|
|
|
|
ASSERT_EQ(file6_info.smallest_key, "");
|
|
|
|
ASSERT_EQ(file6_info.largest_key, "");
|
|
|
|
ASSERT_EQ(file6_info.num_range_del_entries, 2);
|
|
|
|
ASSERT_EQ(file6_info.smallest_range_del_key, Key(0));
|
|
|
|
ASSERT_EQ(file6_info.largest_range_del_key, Key(100));
|
|
|
|
|
2019-08-15 03:58:59 +00:00
|
|
|
// file7.sst (delete 99 => 201)
|
2018-07-14 05:40:23 +00:00
|
|
|
std::string file7 = sst_files_dir_ + "file7.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file7));
|
2019-08-15 03:58:59 +00:00
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(Key(99), Key(201)));
|
2018-07-14 05:40:23 +00:00
|
|
|
ExternalSstFileInfo file7_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file7_info));
|
2018-07-14 05:40:23 +00:00
|
|
|
ASSERT_EQ(file7_info.file_path, file7);
|
|
|
|
ASSERT_EQ(file7_info.num_entries, 0);
|
|
|
|
ASSERT_EQ(file7_info.smallest_key, "");
|
|
|
|
ASSERT_EQ(file7_info.largest_key, "");
|
|
|
|
ASSERT_EQ(file7_info.num_range_del_entries, 1);
|
2019-08-15 03:58:59 +00:00
|
|
|
ASSERT_EQ(file7_info.smallest_range_del_key, Key(99));
|
|
|
|
ASSERT_EQ(file7_info.largest_range_del_key, Key(201));
|
2018-07-14 05:40:23 +00:00
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
// list 1 has internal key range conflict
|
|
|
|
std::vector<std::string> file_list0({file1, file2});
|
|
|
|
std::vector<std::string> file_list1({file3, file2, file1});
|
|
|
|
std::vector<std::string> file_list2({file5});
|
|
|
|
std::vector<std::string> file_list3({file3, file4});
|
2018-07-14 05:40:23 +00:00
|
|
|
std::vector<std::string> file_list4({file5, file7});
|
|
|
|
std::vector<std::string> file_list5({file6, file7});
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
2018-07-14 05:40:23 +00:00
|
|
|
// These lists of files have key ranges that overlap with each other
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(file_list1));
|
2019-08-15 03:58:59 +00:00
|
|
|
// Both of the following overlap on the range deletion tombstone.
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(file_list4));
|
|
|
|
ASSERT_NOK(DeprecatedAddFile(file_list5));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Add files using file path list
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile(file_list0));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
for (int k = 0; k < 200; k++) {
|
|
|
|
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
|
|
|
|
}
|
|
|
|
|
|
|
|
TablePropertiesCollection props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
|
|
|
|
ASSERT_EQ(props.size(), 2);
|
2024-01-05 19:53:57 +00:00
|
|
|
for (const auto& file_props : props) {
|
2016-09-07 22:41:54 +00:00
|
|
|
auto user_props = file_props.second->user_collected_properties;
|
|
|
|
ASSERT_EQ(user_props["abc_SstFileWriterCollector"], "YES");
|
|
|
|
ASSERT_EQ(user_props["xyz_SstFileWriterCollector"], "YES");
|
|
|
|
ASSERT_EQ(user_props["abc_Count"], "100");
|
|
|
|
ASSERT_EQ(user_props["xyz_Count"], "100");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add file while holding a snapshot will fail
|
|
|
|
const Snapshot* s1 = db_->GetSnapshot();
|
|
|
|
if (s1 != nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(file_list2));
|
2016-09-07 22:41:54 +00:00
|
|
|
db_->ReleaseSnapshot(s1);
|
|
|
|
}
|
|
|
|
// We can add the file after releaseing the snapshot
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile(file_list2));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
for (int k = 0; k < 300; k++) {
|
|
|
|
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
|
|
|
|
ASSERT_EQ(props.size(), 3);
|
2024-01-05 19:53:57 +00:00
|
|
|
for (const auto& file_props : props) {
|
2016-09-07 22:41:54 +00:00
|
|
|
auto user_props = file_props.second->user_collected_properties;
|
|
|
|
ASSERT_EQ(user_props["abc_SstFileWriterCollector"], "YES");
|
|
|
|
ASSERT_EQ(user_props["xyz_SstFileWriterCollector"], "YES");
|
|
|
|
ASSERT_EQ(user_props["abc_Count"], "100");
|
|
|
|
ASSERT_EQ(user_props["xyz_Count"], "100");
|
|
|
|
}
|
|
|
|
|
2017-01-23 20:50:40 +00:00
|
|
|
// This file list has overlapping values with the existing data
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(file_list3));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Overwrite values of keys divisible by 5
|
|
|
|
for (int k = 0; k < 200; k += 5) {
|
|
|
|
ASSERT_OK(Put(Key(k), Key(k) + "_val_new"));
|
|
|
|
}
|
|
|
|
ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
|
|
|
|
// Make sure values are correct before and after flush/compaction
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
for (int k = 0; k < 200; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
if (k % 5 == 0) {
|
|
|
|
value += "_new";
|
|
|
|
}
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
|
|
|
for (int k = 200; k < 300; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete keys in range (200 => 299)
|
|
|
|
for (int k = 200; k < 300; k++) {
|
|
|
|
ASSERT_OK(Delete(Key(k)));
|
|
|
|
}
|
|
|
|
// We deleted range (200 => 299) but cannot add file5 because
|
|
|
|
// of the range tombstones
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(file_list2));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Compacting the DB will remove the tombstones
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
|
|
|
|
// Now we can add the file
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile(file_list2));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Verify values of file5 in DB
|
|
|
|
for (int k = 200; k < 300; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
2018-07-14 05:40:23 +00:00
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction |
|
|
|
|
kRangeDelSkipConfigs));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, AddListAtomicity) {
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// files[0].sst (0 => 99)
|
|
|
|
// files[1].sst (100 => 199)
|
|
|
|
// ...
|
|
|
|
// file[8].sst (800 => 899)
|
|
|
|
int n = 9;
|
|
|
|
std::vector<std::string> files(n);
|
|
|
|
std::vector<ExternalSstFileInfo> files_info(n);
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
files[i] = sst_files_dir_ + "file" + std::to_string(i) + ".sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(files[i]));
|
|
|
|
for (int k = i * 100; k < (i + 1) * 100; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&files_info[i]));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(files_info[i].file_path, files[i]);
|
|
|
|
ASSERT_EQ(files_info[i].num_entries, 100);
|
|
|
|
ASSERT_EQ(files_info[i].smallest_key, Key(i * 100));
|
|
|
|
ASSERT_EQ(files_info[i].largest_key, Key((i + 1) * 100 - 1));
|
|
|
|
}
|
|
|
|
files.push_back(sst_files_dir_ + "file" + std::to_string(n) + ".sst");
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(files));
|
2016-09-07 22:41:54 +00:00
|
|
|
for (int k = 0; k < n * 100; k++) {
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(Key(k)));
|
|
|
|
}
|
2016-10-21 00:05:32 +00:00
|
|
|
files.pop_back();
|
|
|
|
ASSERT_OK(DeprecatedAddFile(files));
|
2016-09-07 22:41:54 +00:00
|
|
|
for (int k = 0; k < n * 100; k++) {
|
|
|
|
std::string value = Key(k) + "_val";
|
|
|
|
ASSERT_EQ(Get(Key(k)), value);
|
|
|
|
}
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
2017-04-26 20:28:39 +00:00
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
// This test reporduce a bug that can happen in some cases if the DB started
|
|
|
|
// purging obsolete files when we are adding an external sst file.
|
|
|
|
// This situation may result in deleting the file while it's being added.
|
|
|
|
TEST_F(ExternalSSTFileTest, PurgeObsoleteFilesBug) {
|
|
|
|
Options options = CurrentOptions();
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// file1.sst (0 => 500)
|
|
|
|
std::string sst_file_path = sst_files_dir_ + "file1.sst";
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Open(sst_file_path));
|
2016-09-07 22:41:54 +00:00
|
|
|
for (int i = 0; i < 500; i++) {
|
|
|
|
std::string k = Key(i);
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(k, k + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ExternalSstFileInfo sst_file_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&sst_file_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
options.delete_obsolete_files_period_micros = 0;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 17:47:54 +00:00
|
|
|
"ExternalSstFileIngestionJob::Prepare:FileAdded", [&](void* /* arg */) {
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_OK(Put("aaa", "bbb"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(Put("aaa", "xxx"));
|
|
|
|
ASSERT_OK(Flush());
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2016-09-07 22:41:54 +00:00
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({sst_file_path}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < 500; i++) {
|
|
|
|
std::string k = Key(i);
|
|
|
|
std::string v = k + "_val";
|
|
|
|
ASSERT_EQ(Get(k), v);
|
|
|
|
}
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, SkipSnapshot) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// file1.sst (0 => 99)
|
|
|
|
std::string file1 = sst_files_dir_ + "file1.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file1));
|
|
|
|
for (int k = 0; k < 100; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file1_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file1_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file1_info.file_path, file1);
|
|
|
|
ASSERT_EQ(file1_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file1_info.smallest_key, Key(0));
|
|
|
|
ASSERT_EQ(file1_info.largest_key, Key(99));
|
|
|
|
|
|
|
|
// file2.sst (100 => 299)
|
|
|
|
std::string file2 = sst_files_dir_ + "file2.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file2));
|
|
|
|
for (int k = 100; k < 300; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file2_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file2_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file2_info.file_path, file2);
|
|
|
|
ASSERT_EQ(file2_info.num_entries, 200);
|
|
|
|
ASSERT_EQ(file2_info.smallest_key, Key(100));
|
|
|
|
ASSERT_EQ(file2_info.largest_key, Key(299));
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file1}));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Add file will fail when holding snapshot and use the default
|
|
|
|
// skip_snapshot_check to false
|
|
|
|
const Snapshot* s1 = db_->GetSnapshot();
|
|
|
|
if (s1 != nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile({file2}));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add file will success when set skip_snapshot_check to true even db holding
|
|
|
|
// snapshot
|
|
|
|
if (s1 != nullptr) {
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file2}, false, true));
|
2016-09-07 22:41:54 +00:00
|
|
|
db_->ReleaseSnapshot(s1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// file3.sst (300 => 399)
|
|
|
|
std::string file3 = sst_files_dir_ + "file3.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file3));
|
|
|
|
for (int k = 300; k < 400; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file3_info;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file3_info));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(file3_info.file_path, file3);
|
|
|
|
ASSERT_EQ(file3_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file3_info.smallest_key, Key(300));
|
|
|
|
ASSERT_EQ(file3_info.largest_key, Key(399));
|
|
|
|
|
|
|
|
// check that we have change the old key
|
|
|
|
ASSERT_EQ(Get(Key(300)), "NOT_FOUND");
|
|
|
|
const Snapshot* s2 = db_->GetSnapshot();
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file3}, false, true));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(Get(Key(300)), Key(300) + ("_val"));
|
|
|
|
ASSERT_EQ(Get(Key(300), s2), Key(300) + ("_val"));
|
|
|
|
|
|
|
|
db_->ReleaseSnapshot(s2);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, MultiThreaded) {
|
2020-08-24 18:24:22 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-09-07 22:41:54 +00:00
|
|
|
// Bulk load 10 files every file contain 1000 keys
|
|
|
|
int num_files = 10;
|
|
|
|
int keys_per_file = 1000;
|
|
|
|
|
|
|
|
// Generate file names
|
|
|
|
std::vector<std::string> file_names;
|
|
|
|
for (int i = 0; i < num_files; i++) {
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string file_name = "file_" + std::to_string(i) + ".sst";
|
2016-09-07 22:41:54 +00:00
|
|
|
file_names.push_back(sst_files_dir_ + file_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
Add missing range conflict check between file ingestion and RefitLevel() (#10988)
Summary:
**Context:**
File ingestion never checks whether the key range it acts on overlaps with an ongoing RefitLevel() (used in `CompactRange()` with `change_level=true`). That's because RefitLevel() doesn't register and make its key range known to file ingestion. Though it checks overlapping with other compactions by https://github.com/facebook/rocksdb/blob/7.8.fb/db/external_sst_file_ingestion_job.cc#L998.
RefitLevel() (used in `CompactRange()` with `change_level=true`) doesn't check whether the key range it acts on overlaps with an ongoing file ingestion. That's because file ingestion does not register and make its key range known to other compactions.
- Note that non-refitlevel-compaction (e.g, manual compaction w/o RefitLevel() or general compaction) also does not check key range overlap with ongoing file ingestion for the same reason.
- But it's fine. Credited to cbi42's discovery, `WaitForIngestFile` was called by background and foreground compactions. They were introduced in https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.
- Regardless, this PR registers file ingestion like a compaction is a general approach that will also add range conflict check between file ingestion and non-refitlevel-compaction, though it has not been the issue motivated this PR.
Above are bugs resulting in two bad consequences:
- If file ingestion and RefitLevel() creates files in the same level, then range-overlapped files will be created at that level and caught as corruption by `force_consistency_checks=true`
- If file ingestion and RefitLevel() creates file in different levels, then with one further compaction on the ingested file, it can result in two same keys both with seqno 0 in two different levels. Then with iterator's [optimization](https://github.com/facebook/rocksdb/blame/c62f3221698fd273b673d4f7e54eabb8329a4369/db/db_iter.cc#L342-L343) that assumes no two same keys both with seqno 0, it will either break this assertion in debug build or, even worst, return value of this same key for the key after it, which is the wrong value to return, in release build.
Therefore we decide to introduce range conflict check for file ingestion and RefitLevel() inspired from the existing range conflict check among compactions.
**Summary:**
- Treat file ingestion job and RefitLevel() as `Compaction` of new compaction reasons: `CompactionReason::kExternalSstIngestion` and `CompactionReason::kRefitLevel` and register/unregister them. File ingestion is treated as compaction from L0 to different levels and RefitLevel() as compaction from source level to target level.
- Check for `RangeOverlapWithCompaction` with other ongoing compactions, `RegisterCompaction()` on this "compaction" before changing the LSM state in `VersionStorageInfo`, and `UnregisterCompaction()` after changing.
- Replace scattered fixes (https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.) that prevents overlapping between file ingestion and non-refit-level compaction with this fix cuz those practices are easy to overlook.
- Misc: logic cleanup, see PR comments
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10988
Test Plan:
- New unit test `DBCompactionTestWithOngoingFileIngestionParam*` that failed pre-fix and passed afterwards.
- Made compatible with existing tests, see PR comments
- make check
- [Ongoing] Stress test rehearsal with normal value and aggressive CI value https://github.com/facebook/rocksdb/pull/10761
Reviewed By: cbi42
Differential Revision: D41535685
Pulled By: hx235
fbshipit-source-id: 549833a577ba1496d20a870583d4caa737da1258
2022-12-29 23:05:36 +00:00
|
|
|
options.disable_auto_compactions = true;
|
2016-09-07 22:41:54 +00:00
|
|
|
std::atomic<int> thread_num(0);
|
|
|
|
std::function<void()> write_file_func = [&]() {
|
|
|
|
int file_idx = thread_num.fetch_add(1);
|
|
|
|
int range_start = file_idx * keys_per_file;
|
|
|
|
int range_end = range_start + keys_per_file;
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file_names[file_idx]));
|
|
|
|
|
|
|
|
for (int k = range_start; k < range_end; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k)));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Finish());
|
2016-09-07 22:41:54 +00:00
|
|
|
};
|
|
|
|
// Write num_files files in parallel
|
2017-02-06 22:43:55 +00:00
|
|
|
std::vector<port::Thread> sst_writer_threads;
|
2016-09-07 22:41:54 +00:00
|
|
|
for (int i = 0; i < num_files; ++i) {
|
|
|
|
sst_writer_threads.emplace_back(write_file_func);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& t : sst_writer_threads) {
|
|
|
|
t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "Wrote %d files (%d keys)\n", num_files,
|
|
|
|
num_files * keys_per_file);
|
|
|
|
|
|
|
|
thread_num.store(0);
|
|
|
|
std::atomic<int> files_added(0);
|
|
|
|
// Thread 0 -> Load {f0,f1}
|
|
|
|
// Thread 1 -> Load {f0,f1}
|
|
|
|
// Thread 2 -> Load {f2,f3}
|
|
|
|
// Thread 3 -> Load {f2,f3}
|
|
|
|
// Thread 4 -> Load {f4,f5}
|
|
|
|
// Thread 5 -> Load {f4,f5}
|
|
|
|
// ...
|
|
|
|
std::function<void()> load_file_func = [&]() {
|
|
|
|
// We intentionally add every file twice, and assert that it was added
|
|
|
|
// only once and the other add failed
|
|
|
|
int thread_id = thread_num.fetch_add(1);
|
|
|
|
int file_idx = (thread_id / 2) * 2;
|
|
|
|
// sometimes we use copy, sometimes link .. the result should be the same
|
|
|
|
bool move_file = (thread_id % 3 == 0);
|
|
|
|
|
|
|
|
std::vector<std::string> files_to_add;
|
|
|
|
|
|
|
|
files_to_add = {file_names[file_idx]};
|
|
|
|
if (static_cast<size_t>(file_idx + 1) < file_names.size()) {
|
|
|
|
files_to_add.push_back(file_names[file_idx + 1]);
|
|
|
|
}
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
Status s = DeprecatedAddFile(files_to_add, move_file);
|
2016-09-07 22:41:54 +00:00
|
|
|
if (s.ok()) {
|
|
|
|
files_added += static_cast<int>(files_to_add.size());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// Bulk load num_files files in parallel
|
2017-02-06 22:43:55 +00:00
|
|
|
std::vector<port::Thread> add_file_threads;
|
2016-09-07 22:41:54 +00:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
for (int i = 0; i < num_files; ++i) {
|
|
|
|
add_file_threads.emplace_back(load_file_func);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& t : add_file_threads) {
|
|
|
|
t.join();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(files_added.load(), num_files);
|
|
|
|
fprintf(stderr, "Loaded %d files (%d keys)\n", num_files,
|
|
|
|
num_files * keys_per_file);
|
|
|
|
|
|
|
|
// Overwrite values of keys divisible by 100
|
|
|
|
for (int k = 0; k < num_files * keys_per_file; k += 100) {
|
|
|
|
std::string key = Key(k);
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(Put(key, key + "_new"));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
// Make sure the values are correct before and after flush/compaction
|
|
|
|
for (int k = 0; k < num_files * keys_per_file; ++k) {
|
|
|
|
std::string key = Key(k);
|
|
|
|
std::string value = (k % 100 == 0) ? (key + "_new") : key;
|
|
|
|
ASSERT_EQ(Get(key), value);
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "Verified %d values\n", num_files * keys_per_file);
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
2017-04-26 20:28:39 +00:00
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, OverlappingRanges) {
|
2020-08-24 18:24:22 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-09-07 22:41:54 +00:00
|
|
|
Random rnd(301);
|
2018-06-18 21:38:50 +00:00
|
|
|
SequenceNumber assigned_seqno = 0;
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"ExternalSstFileIngestionJob::Run", [&assigned_seqno](void* arg) {
|
|
|
|
ASSERT_TRUE(arg != nullptr);
|
|
|
|
assigned_seqno = *(static_cast<SequenceNumber*>(arg));
|
|
|
|
});
|
2017-04-26 20:28:39 +00:00
|
|
|
bool need_flush = false;
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::IngestExternalFile:NeedFlush", [&need_flush](void* arg) {
|
|
|
|
ASSERT_TRUE(arg != nullptr);
|
|
|
|
need_flush = *(static_cast<bool*>(arg));
|
|
|
|
});
|
2017-04-26 20:28:39 +00:00
|
|
|
bool overlap_with_db = false;
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2017-04-26 20:28:39 +00:00
|
|
|
"ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile",
|
|
|
|
[&overlap_with_db](void* arg) {
|
|
|
|
ASSERT_TRUE(arg != nullptr);
|
|
|
|
overlap_with_db = *(static_cast<bool*>(arg));
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
2020-06-29 23:55:35 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-09-07 22:41:54 +00:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
printf("Option config = %d\n", option_config_);
|
|
|
|
std::vector<std::pair<int, int>> key_ranges;
|
2018-07-13 00:40:31 +00:00
|
|
|
for (int i = 0; i < 100; i++) {
|
2016-09-07 22:41:54 +00:00
|
|
|
int range_start = rnd.Uniform(20000);
|
|
|
|
int keys_per_range = 10 + rnd.Uniform(41);
|
|
|
|
|
|
|
|
key_ranges.emplace_back(range_start, range_start + keys_per_range);
|
|
|
|
}
|
|
|
|
|
|
|
|
int memtable_add = 0;
|
|
|
|
int success_add_file = 0;
|
|
|
|
int failed_add_file = 0;
|
|
|
|
std::map<std::string, std::string> true_data;
|
|
|
|
for (size_t i = 0; i < key_ranges.size(); i++) {
|
|
|
|
int range_start = key_ranges[i].first;
|
|
|
|
int range_end = key_ranges[i].second;
|
|
|
|
|
|
|
|
Status s;
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string range_val = "range_" + std::to_string(i);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// For 20% of ranges we use DB::Put, for 80% we use DB::AddFile
|
|
|
|
if (i && i % 5 == 0) {
|
|
|
|
// Use DB::Put to insert range (insert into memtable)
|
|
|
|
range_val += "_put";
|
|
|
|
for (int k = range_start; k <= range_end; k++) {
|
|
|
|
s = Put(Key(k), range_val);
|
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
|
|
|
memtable_add++;
|
|
|
|
} else {
|
|
|
|
// Use DB::AddFile to insert range
|
|
|
|
range_val += "_add_file";
|
|
|
|
|
|
|
|
// Generate the file containing the range
|
|
|
|
std::string file_name = sst_files_dir_ + env_->GenerateUniqueId();
|
2020-12-22 23:08:17 +00:00
|
|
|
s = sst_file_writer.Open(file_name);
|
|
|
|
ASSERT_OK(s);
|
2016-09-07 22:41:54 +00:00
|
|
|
for (int k = range_start; k <= range_end; k++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
s = sst_file_writer.Put(Key(k), range_val);
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_OK(s);
|
|
|
|
}
|
|
|
|
ExternalSstFileInfo file_info;
|
|
|
|
s = sst_file_writer.Finish(&file_info);
|
|
|
|
ASSERT_OK(s);
|
|
|
|
|
|
|
|
// Insert the generated file
|
2016-10-21 00:05:32 +00:00
|
|
|
s = DeprecatedAddFile({file_name});
|
2016-09-07 22:41:54 +00:00
|
|
|
auto it = true_data.lower_bound(Key(range_start));
|
2017-04-26 20:28:39 +00:00
|
|
|
if (option_config_ != kUniversalCompaction &&
|
2018-07-13 18:08:57 +00:00
|
|
|
option_config_ != kUniversalCompactionMultiLevel &&
|
|
|
|
option_config_ != kUniversalSubcompactions) {
|
2017-04-26 20:28:39 +00:00
|
|
|
if (it != true_data.end() && it->first <= Key(range_end)) {
|
|
|
|
// This range overlap with data already exist in DB
|
|
|
|
ASSERT_NOK(s);
|
|
|
|
failed_add_file++;
|
|
|
|
} else {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
success_add_file++;
|
|
|
|
}
|
2016-09-07 22:41:54 +00:00
|
|
|
} else {
|
2017-04-26 20:28:39 +00:00
|
|
|
if ((it != true_data.end() && it->first <= Key(range_end)) ||
|
2018-06-18 21:38:50 +00:00
|
|
|
need_flush || assigned_seqno > 0 || overlap_with_db) {
|
2017-04-26 20:28:39 +00:00
|
|
|
// This range overlap with data already exist in DB
|
|
|
|
ASSERT_NOK(s);
|
|
|
|
failed_add_file++;
|
|
|
|
} else {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
success_add_file++;
|
|
|
|
}
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.ok()) {
|
|
|
|
// Update true_data map to include the new inserted data
|
|
|
|
for (int k = range_start; k <= range_end; k++) {
|
|
|
|
true_data[Key(k)] = range_val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush / Compact the DB
|
|
|
|
if (i && i % 50 == 0) {
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(Flush());
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
if (i && i % 75 == 0) {
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("Total: %" ROCKSDB_PRIszt
|
|
|
|
" ranges\n"
|
|
|
|
"AddFile()|Success: %d ranges\n"
|
|
|
|
"AddFile()|RangeConflict: %d ranges\n"
|
|
|
|
"Put(): %d ranges\n",
|
|
|
|
key_ranges.size(), success_add_file, failed_add_file, memtable_add);
|
|
|
|
|
|
|
|
// Verify the correctness of the data
|
|
|
|
for (const auto& kv : true_data) {
|
|
|
|
ASSERT_EQ(Get(kv.first), kv.second);
|
|
|
|
}
|
|
|
|
printf("keys/values verified\n");
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
2017-04-26 20:28:39 +00:00
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, PickedLevel) {
|
2020-08-24 18:24:22 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-09-07 22:41:54 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.disable_auto_compactions = false;
|
|
|
|
options.level0_file_num_compaction_trigger = 4;
|
|
|
|
options.num_levels = 4;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
std::map<std::string, std::string> true_data;
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// File 0 will go to last level (L3)
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false, true,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "0,0,0,1");
|
|
|
|
|
|
|
|
// File 1 will go to level L2 (since it overlap with file 0 in L3)
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, true,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "0,0,1,1");
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2016-09-07 22:41:54 +00:00
|
|
|
{"ExternalSSTFileTest::PickedLevel:0", "BackgroundCallCompaction:0"},
|
|
|
|
{"DBImpl::BackgroundCompaction:Start",
|
|
|
|
"ExternalSSTFileTest::PickedLevel:1"},
|
|
|
|
{"ExternalSSTFileTest::PickedLevel:2",
|
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun"},
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Flush 4 files containing the same keys
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
ASSERT_OK(Put(Key(3), Key(3) + "put"));
|
|
|
|
ASSERT_OK(Put(Key(8), Key(8) + "put"));
|
2016-10-21 00:05:32 +00:00
|
|
|
true_data[Key(3)] = Key(3) + "put";
|
|
|
|
true_data[Key(8)] = Key(8) + "put";
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_OK(Flush());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for BackgroundCompaction() to be called
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:0");
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:1");
|
|
|
|
|
|
|
|
EXPECT_EQ(FilesPerLevel(), "4,0,1,1");
|
|
|
|
|
|
|
|
// This file overlaps with file 0 (L3), file 1 (L2) and the
|
|
|
|
// output of compaction going to L1
|
Add missing range conflict check between file ingestion and RefitLevel() (#10988)
Summary:
**Context:**
File ingestion never checks whether the key range it acts on overlaps with an ongoing RefitLevel() (used in `CompactRange()` with `change_level=true`). That's because RefitLevel() doesn't register and make its key range known to file ingestion. Though it checks overlapping with other compactions by https://github.com/facebook/rocksdb/blob/7.8.fb/db/external_sst_file_ingestion_job.cc#L998.
RefitLevel() (used in `CompactRange()` with `change_level=true`) doesn't check whether the key range it acts on overlaps with an ongoing file ingestion. That's because file ingestion does not register and make its key range known to other compactions.
- Note that non-refitlevel-compaction (e.g, manual compaction w/o RefitLevel() or general compaction) also does not check key range overlap with ongoing file ingestion for the same reason.
- But it's fine. Credited to cbi42's discovery, `WaitForIngestFile` was called by background and foreground compactions. They were introduced in https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.
- Regardless, this PR registers file ingestion like a compaction is a general approach that will also add range conflict check between file ingestion and non-refitlevel-compaction, though it has not been the issue motivated this PR.
Above are bugs resulting in two bad consequences:
- If file ingestion and RefitLevel() creates files in the same level, then range-overlapped files will be created at that level and caught as corruption by `force_consistency_checks=true`
- If file ingestion and RefitLevel() creates file in different levels, then with one further compaction on the ingested file, it can result in two same keys both with seqno 0 in two different levels. Then with iterator's [optimization](https://github.com/facebook/rocksdb/blame/c62f3221698fd273b673d4f7e54eabb8329a4369/db/db_iter.cc#L342-L343) that assumes no two same keys both with seqno 0, it will either break this assertion in debug build or, even worst, return value of this same key for the key after it, which is the wrong value to return, in release build.
Therefore we decide to introduce range conflict check for file ingestion and RefitLevel() inspired from the existing range conflict check among compactions.
**Summary:**
- Treat file ingestion job and RefitLevel() as `Compaction` of new compaction reasons: `CompactionReason::kExternalSstIngestion` and `CompactionReason::kRefitLevel` and register/unregister them. File ingestion is treated as compaction from L0 to different levels and RefitLevel() as compaction from source level to target level.
- Check for `RangeOverlapWithCompaction` with other ongoing compactions, `RegisterCompaction()` on this "compaction" before changing the LSM state in `VersionStorageInfo`, and `UnregisterCompaction()` after changing.
- Replace scattered fixes (https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.) that prevents overlapping between file ingestion and non-refit-level compaction with this fix cuz those practices are easy to overlook.
- Misc: logic cleanup, see PR comments
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10988
Test Plan:
- New unit test `DBCompactionTestWithOngoingFileIngestionParam*` that failed pre-fix and passed afterwards.
- Made compatible with existing tests, see PR comments
- make check
- [Ongoing] Stress test rehearsal with normal value and aggressive CI value https://github.com/facebook/rocksdb/pull/10761
Reviewed By: cbi42
Differential Revision: D41535685
Pulled By: hx235
fbshipit-source-id: 549833a577ba1496d20a870583d4caa737da1258
2022-12-29 23:05:36 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1,
|
|
|
|
true /* allow_global_seqno */, false,
|
|
|
|
true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "5,0,1,1");
|
|
|
|
|
|
|
|
// This file does not overlap with any file or with the running compaction
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "5,0,1,2");
|
|
|
|
|
|
|
|
// Hold compaction from finishing
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:2");
|
|
|
|
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "1,1,1,2");
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
2018-07-17 21:04:18 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, IngestNonExistingFile) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
Status s = db_->IngestExternalFile({"non_existing_file"},
|
|
|
|
IngestExternalFileOptions());
|
|
|
|
ASSERT_NOK(s);
|
|
|
|
|
|
|
|
// Verify file deletion is not impacted (verify a bug fix)
|
|
|
|
ASSERT_OK(Put(Key(1), Key(1)));
|
|
|
|
ASSERT_OK(Put(Key(9), Key(9)));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), Key(1)));
|
|
|
|
ASSERT_OK(Put(Key(9), Key(9)));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
Remove wait_unscheduled from waitForCompact internal API (#11443)
Summary:
Context:
In pull request https://github.com/facebook/rocksdb/issues/11436, we are introducing a new public API `waitForCompact(const WaitForCompactOptions& wait_for_compact_options)`. This API invokes the internal implementation `waitForCompact(bool wait_unscheduled=false)`. The unscheduled parameter indicates the compactions that are not yet scheduled but are required to process items in the queue.
In certain cases, we are unable to wait for compactions, such as during a shutdown or when background jobs are paused. It is important to return the appropriate status in these scenarios. For all other cases, we should wait for all compaction and flush jobs, including the unscheduled ones. The primary purpose of this new API is to wait until the system has resolved its compaction debt. Currently, the usage of `wait_unscheduled` is limited to test code.
This pull request eliminates the usage of wait_unscheduled. The internal `waitForCompact()` API now waits for unscheduled compactions unless the db is undergoing a shutdown. In the event of a shutdown, the API returns `Status::ShutdownInProgress()`.
Additionally, a new parameter, `abort_on_pause`, has been introduced with a default value of `false`. This parameter addresses the possibility of waiting indefinitely for unscheduled jobs if `PauseBackgroundWork()` was called before `waitForCompact()` is invoked. By setting `abort_on_pause` to `true`, the API will immediately return `Status::Aborted`.
Furthermore, all tests that previously called `waitForCompact(true)` have been fixed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11443
Test Plan:
Existing tests that involve a shutdown in progress:
- DBCompactionTest::CompactRangeShutdownWhileDelayed
- DBTestWithParam::PreShutdownMultipleCompaction
- DBTestWithParam::PreShutdownCompactionMiddle
Reviewed By: pdillinger
Differential Revision: D45923426
Pulled By: jaykorean
fbshipit-source-id: 7dc93fe6a6841a7d9d2d72866fa647090dba8eae
2023-05-18 01:13:50 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2019-01-30 00:16:53 +00:00
|
|
|
|
2018-07-17 21:04:18 +00:00
|
|
|
// After full compaction, there should be only 1 file.
|
|
|
|
std::vector<std::string> files;
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(env_->GetChildren(dbname_, &files));
|
2018-07-17 21:04:18 +00:00
|
|
|
int num_sst_files = 0;
|
|
|
|
for (auto& f : files) {
|
|
|
|
uint64_t number;
|
|
|
|
FileType type;
|
|
|
|
if (ParseFileName(f, &number, &type) && type == kTableFile) {
|
|
|
|
num_sst_files++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_EQ(1, num_sst_files);
|
|
|
|
}
|
|
|
|
|
2021-07-07 18:13:09 +00:00
|
|
|
#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
2016-09-28 22:42:06 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, CompactDuringAddFileRandom) {
|
2020-08-24 18:24:22 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-09-28 22:42:06 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.disable_auto_compactions = false;
|
|
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
|
|
options.num_levels = 2;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
std::function<void()> bg_compact = [&]() {
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
};
|
|
|
|
|
|
|
|
int range_id = 0;
|
|
|
|
std::vector<int> file_keys;
|
|
|
|
std::function<void()> bg_addfile = [&]() {
|
Add missing range conflict check between file ingestion and RefitLevel() (#10988)
Summary:
**Context:**
File ingestion never checks whether the key range it acts on overlaps with an ongoing RefitLevel() (used in `CompactRange()` with `change_level=true`). That's because RefitLevel() doesn't register and make its key range known to file ingestion. Though it checks overlapping with other compactions by https://github.com/facebook/rocksdb/blob/7.8.fb/db/external_sst_file_ingestion_job.cc#L998.
RefitLevel() (used in `CompactRange()` with `change_level=true`) doesn't check whether the key range it acts on overlaps with an ongoing file ingestion. That's because file ingestion does not register and make its key range known to other compactions.
- Note that non-refitlevel-compaction (e.g, manual compaction w/o RefitLevel() or general compaction) also does not check key range overlap with ongoing file ingestion for the same reason.
- But it's fine. Credited to cbi42's discovery, `WaitForIngestFile` was called by background and foreground compactions. They were introduced in https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.
- Regardless, this PR registers file ingestion like a compaction is a general approach that will also add range conflict check between file ingestion and non-refitlevel-compaction, though it has not been the issue motivated this PR.
Above are bugs resulting in two bad consequences:
- If file ingestion and RefitLevel() creates files in the same level, then range-overlapped files will be created at that level and caught as corruption by `force_consistency_checks=true`
- If file ingestion and RefitLevel() creates file in different levels, then with one further compaction on the ingested file, it can result in two same keys both with seqno 0 in two different levels. Then with iterator's [optimization](https://github.com/facebook/rocksdb/blame/c62f3221698fd273b673d4f7e54eabb8329a4369/db/db_iter.cc#L342-L343) that assumes no two same keys both with seqno 0, it will either break this assertion in debug build or, even worst, return value of this same key for the key after it, which is the wrong value to return, in release build.
Therefore we decide to introduce range conflict check for file ingestion and RefitLevel() inspired from the existing range conflict check among compactions.
**Summary:**
- Treat file ingestion job and RefitLevel() as `Compaction` of new compaction reasons: `CompactionReason::kExternalSstIngestion` and `CompactionReason::kRefitLevel` and register/unregister them. File ingestion is treated as compaction from L0 to different levels and RefitLevel() as compaction from source level to target level.
- Check for `RangeOverlapWithCompaction` with other ongoing compactions, `RegisterCompaction()` on this "compaction" before changing the LSM state in `VersionStorageInfo`, and `UnregisterCompaction()` after changing.
- Replace scattered fixes (https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.) that prevents overlapping between file ingestion and non-refit-level compaction with this fix cuz those practices are easy to overlook.
- Misc: logic cleanup, see PR comments
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10988
Test Plan:
- New unit test `DBCompactionTestWithOngoingFileIngestionParam*` that failed pre-fix and passed afterwards.
- Made compatible with existing tests, see PR comments
- make check
- [Ongoing] Stress test rehearsal with normal value and aggressive CI value https://github.com/facebook/rocksdb/pull/10761
Reviewed By: cbi42
Differential Revision: D41535685
Pulled By: hx235
fbshipit-source-id: 549833a577ba1496d20a870583d4caa737da1258
2022-12-29 23:05:36 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, file_keys, range_id,
|
|
|
|
true /* allow_global_seqno */));
|
2016-09-28 22:42:06 +00:00
|
|
|
};
|
|
|
|
|
2018-07-12 21:39:07 +00:00
|
|
|
const int num_of_ranges = 1000;
|
2017-02-06 22:43:55 +00:00
|
|
|
std::vector<port::Thread> threads;
|
2018-07-12 21:39:07 +00:00
|
|
|
while (range_id < num_of_ranges) {
|
2016-10-18 23:59:37 +00:00
|
|
|
int range_start = range_id * 10;
|
2016-09-28 22:42:06 +00:00
|
|
|
int range_end = range_start + 10;
|
|
|
|
|
|
|
|
file_keys.clear();
|
|
|
|
for (int k = range_start + 1; k < range_end; k++) {
|
|
|
|
file_keys.push_back(k);
|
|
|
|
}
|
|
|
|
ASSERT_OK(Put(Key(range_start), Key(range_start)));
|
|
|
|
ASSERT_OK(Put(Key(range_end), Key(range_end)));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
if (range_id % 10 == 0) {
|
|
|
|
threads.emplace_back(bg_compact);
|
|
|
|
}
|
|
|
|
threads.emplace_back(bg_addfile);
|
|
|
|
|
|
|
|
for (auto& t : threads) {
|
|
|
|
t.join();
|
|
|
|
}
|
|
|
|
threads.clear();
|
|
|
|
|
|
|
|
range_id++;
|
|
|
|
}
|
2016-10-18 23:59:37 +00:00
|
|
|
|
2018-07-12 21:39:07 +00:00
|
|
|
for (int rid = 0; rid < num_of_ranges; rid++) {
|
2016-10-18 23:59:37 +00:00
|
|
|
int range_start = rid * 10;
|
|
|
|
int range_end = range_start + 10;
|
|
|
|
|
|
|
|
ASSERT_EQ(Get(Key(range_start)), Key(range_start)) << rid;
|
|
|
|
ASSERT_EQ(Get(Key(range_end)), Key(range_end)) << rid;
|
|
|
|
for (int k = range_start + 1; k < range_end; k++) {
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string v = Key(k) + std::to_string(rid);
|
2016-10-18 23:59:37 +00:00
|
|
|
ASSERT_EQ(Get(Key(k)), v) << rid;
|
|
|
|
}
|
|
|
|
}
|
2016-09-28 22:42:06 +00:00
|
|
|
}
|
2021-07-07 18:13:09 +00:00
|
|
|
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
2016-09-28 22:42:06 +00:00
|
|
|
|
2016-09-07 22:41:54 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
|
2020-08-24 18:24:22 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-09-07 22:41:54 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.disable_auto_compactions = false;
|
|
|
|
options.level0_file_num_compaction_trigger = 4;
|
|
|
|
options.level_compaction_dynamic_level_bytes = true;
|
|
|
|
options.num_levels = 4;
|
|
|
|
DestroyAndReopen(options);
|
2016-10-21 00:05:32 +00:00
|
|
|
std::map<std::string, std::string> true_data;
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2016-09-07 22:41:54 +00:00
|
|
|
{"ExternalSSTFileTest::PickedLevelDynamic:0",
|
|
|
|
"BackgroundCallCompaction:0"},
|
|
|
|
{"DBImpl::BackgroundCompaction:Start",
|
|
|
|
"ExternalSSTFileTest::PickedLevelDynamic:1"},
|
|
|
|
{"ExternalSSTFileTest::PickedLevelDynamic:2",
|
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun"},
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Flush 4 files containing the same keys
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
for (int k = 20; k <= 30; k++) {
|
|
|
|
ASSERT_OK(Put(Key(k), Key(k) + "put"));
|
2016-10-21 00:05:32 +00:00
|
|
|
true_data[Key(k)] = Key(k) + "put";
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
for (int k = 50; k <= 60; k++) {
|
|
|
|
ASSERT_OK(Put(Key(k), Key(k) + "put"));
|
2016-10-21 00:05:32 +00:00
|
|
|
true_data[Key(k)] = Key(k) + "put";
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for BackgroundCompaction() to be called
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:0");
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:1");
|
|
|
|
|
|
|
|
// This file overlaps with the output of the compaction (going to L3)
|
|
|
|
// so the file will be added to L0 since L3 is the base level
|
Add missing range conflict check between file ingestion and RefitLevel() (#10988)
Summary:
**Context:**
File ingestion never checks whether the key range it acts on overlaps with an ongoing RefitLevel() (used in `CompactRange()` with `change_level=true`). That's because RefitLevel() doesn't register and make its key range known to file ingestion. Though it checks overlapping with other compactions by https://github.com/facebook/rocksdb/blob/7.8.fb/db/external_sst_file_ingestion_job.cc#L998.
RefitLevel() (used in `CompactRange()` with `change_level=true`) doesn't check whether the key range it acts on overlaps with an ongoing file ingestion. That's because file ingestion does not register and make its key range known to other compactions.
- Note that non-refitlevel-compaction (e.g, manual compaction w/o RefitLevel() or general compaction) also does not check key range overlap with ongoing file ingestion for the same reason.
- But it's fine. Credited to cbi42's discovery, `WaitForIngestFile` was called by background and foreground compactions. They were introduced in https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.
- Regardless, this PR registers file ingestion like a compaction is a general approach that will also add range conflict check between file ingestion and non-refitlevel-compaction, though it has not been the issue motivated this PR.
Above are bugs resulting in two bad consequences:
- If file ingestion and RefitLevel() creates files in the same level, then range-overlapped files will be created at that level and caught as corruption by `force_consistency_checks=true`
- If file ingestion and RefitLevel() creates file in different levels, then with one further compaction on the ingested file, it can result in two same keys both with seqno 0 in two different levels. Then with iterator's [optimization](https://github.com/facebook/rocksdb/blame/c62f3221698fd273b673d4f7e54eabb8329a4369/db/db_iter.cc#L342-L343) that assumes no two same keys both with seqno 0, it will either break this assertion in debug build or, even worst, return value of this same key for the key after it, which is the wrong value to return, in release build.
Therefore we decide to introduce range conflict check for file ingestion and RefitLevel() inspired from the existing range conflict check among compactions.
**Summary:**
- Treat file ingestion job and RefitLevel() as `Compaction` of new compaction reasons: `CompactionReason::kExternalSstIngestion` and `CompactionReason::kRefitLevel` and register/unregister them. File ingestion is treated as compaction from L0 to different levels and RefitLevel() as compaction from source level to target level.
- Check for `RangeOverlapWithCompaction` with other ongoing compactions, `RegisterCompaction()` on this "compaction" before changing the LSM state in `VersionStorageInfo`, and `UnregisterCompaction()` after changing.
- Replace scattered fixes (https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.) that prevents overlapping between file ingestion and non-refit-level compaction with this fix cuz those practices are easy to overlook.
- Misc: logic cleanup, see PR comments
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10988
Test Plan:
- New unit test `DBCompactionTestWithOngoingFileIngestionParam*` that failed pre-fix and passed afterwards.
- Made compatible with existing tests, see PR comments
- make check
- [Ongoing] Stress test rehearsal with normal value and aggressive CI value https://github.com/facebook/rocksdb/pull/10761
Reviewed By: cbi42
Differential Revision: D41535685
Pulled By: hx235
fbshipit-source-id: 549833a577ba1496d20a870583d4caa737da1258
2022-12-29 23:05:36 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {31, 32, 33, 34}, -1,
|
|
|
|
true /* allow_global_seqno */, false,
|
|
|
|
true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "5");
|
|
|
|
|
|
|
|
// This file does not overlap with the current running compactiong
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
|
2019-04-08 18:12:25 +00:00
|
|
|
true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "5,0,0,1");
|
|
|
|
|
|
|
|
// Hold compaction from finishing
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:2");
|
|
|
|
|
|
|
|
// Output of the compaction will go to L3
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2016-09-07 22:41:54 +00:00
|
|
|
EXPECT_EQ(FilesPerLevel(), "1,0,0,2");
|
|
|
|
|
|
|
|
Close();
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
Reopen(options);
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 15, 19}, -1, false, false,
|
2019-04-08 18:12:25 +00:00
|
|
|
true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(FilesPerLevel(), "1,0,0,3");
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1001, 1002}, -1, false,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(FilesPerLevel(), "1,0,0,4");
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {500, 600, 700}, -1, false,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(FilesPerLevel(), "1,0,0,5");
|
|
|
|
|
|
|
|
// File 5 overlaps with file 2 (L3 / base level)
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false, true,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(FilesPerLevel(), "2,0,0,5");
|
|
|
|
|
|
|
|
// File 6 overlaps with file 2 (L3 / base level) and file 5 (L0)
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, true,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(FilesPerLevel(), "3,0,0,5");
|
|
|
|
|
|
|
|
// Verify data in files
|
2016-10-21 00:05:32 +00:00
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Write range [5 => 10] to L0
|
|
|
|
for (int i = 5; i <= 10; i++) {
|
|
|
|
std::string k = Key(i);
|
|
|
|
std::string v = k + "put";
|
|
|
|
ASSERT_OK(Put(k, v));
|
2016-10-21 00:05:32 +00:00
|
|
|
true_data[k] = v;
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ(FilesPerLevel(), "4,0,0,5");
|
|
|
|
|
|
|
|
// File 7 overlaps with file 4 (L3)
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {650, 651, 652}, -1, false,
|
2019-04-08 18:12:25 +00:00
|
|
|
false, true, false, false, &true_data));
|
2016-09-07 22:41:54 +00:00
|
|
|
ASSERT_EQ(FilesPerLevel(), "5,0,0,5");
|
|
|
|
|
2016-10-21 00:05:32 +00:00
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, AddExternalSstFileWithCustomCompartor) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.comparator = ReverseBytewiseComparator();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
// Generate files with these key ranges
|
|
|
|
// {14 -> 0}
|
|
|
|
// {24 -> 10}
|
|
|
|
// {34 -> 20}
|
|
|
|
// {44 -> 30}
|
|
|
|
// ..
|
|
|
|
std::vector<std::string> generated_files;
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
std::string file_name = sst_files_dir_ + env_->GenerateUniqueId();
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file_name));
|
|
|
|
|
|
|
|
int range_end = i * 10;
|
|
|
|
int range_start = range_end + 15;
|
|
|
|
for (int k = (range_start - 1); k >= range_end; k--) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k)));
|
2016-09-07 22:41:54 +00:00
|
|
|
}
|
|
|
|
ExternalSstFileInfo file_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file_info));
|
|
|
|
generated_files.push_back(file_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> in_files;
|
|
|
|
|
|
|
|
// These 2nd and 3rd files overlap with each other
|
|
|
|
in_files = {generated_files[0], generated_files[4], generated_files[5],
|
|
|
|
generated_files[7]};
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(in_files));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// These 2 files don't overlap with each other
|
2016-09-07 22:41:54 +00:00
|
|
|
in_files = {generated_files[0], generated_files[2]};
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile(in_files));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// These 2 files don't overlap with each other but overlap with keys in DB
|
2016-09-07 22:41:54 +00:00
|
|
|
in_files = {generated_files[3], generated_files[7]};
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_NOK(DeprecatedAddFile(in_files));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// Files don't overlap and don't overlap with DB key range
|
2016-09-07 22:41:54 +00:00
|
|
|
in_files = {generated_files[4], generated_files[6], generated_files[8]};
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile(in_files));
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
if (i % 20 <= 14) {
|
|
|
|
ASSERT_EQ(Get(Key(i)), Key(i));
|
|
|
|
} else {
|
|
|
|
ASSERT_EQ(Get(Key(i)), "NOT_FOUND");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-13 17:49:06 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, AddFileTrivialMoveBug) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.num_levels = 3;
|
|
|
|
options.IncreaseParallelism(20);
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4}, 1)); // L3
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 3}, 2)); // L2
|
|
|
|
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {10, 14}, 3)); // L3
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {12, 13}, 4)); // L2
|
|
|
|
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {20, 24}, 5)); // L3
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {22, 23}, 6)); // L2
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"CompactionJob::Run():Start", [&](void* /*arg*/) {
|
Add missing range conflict check between file ingestion and RefitLevel() (#10988)
Summary:
**Context:**
File ingestion never checks whether the key range it acts on overlaps with an ongoing RefitLevel() (used in `CompactRange()` with `change_level=true`). That's because RefitLevel() doesn't register and make its key range known to file ingestion. Though it checks overlapping with other compactions by https://github.com/facebook/rocksdb/blob/7.8.fb/db/external_sst_file_ingestion_job.cc#L998.
RefitLevel() (used in `CompactRange()` with `change_level=true`) doesn't check whether the key range it acts on overlaps with an ongoing file ingestion. That's because file ingestion does not register and make its key range known to other compactions.
- Note that non-refitlevel-compaction (e.g, manual compaction w/o RefitLevel() or general compaction) also does not check key range overlap with ongoing file ingestion for the same reason.
- But it's fine. Credited to cbi42's discovery, `WaitForIngestFile` was called by background and foreground compactions. They were introduced in https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.
- Regardless, this PR registers file ingestion like a compaction is a general approach that will also add range conflict check between file ingestion and non-refitlevel-compaction, though it has not been the issue motivated this PR.
Above are bugs resulting in two bad consequences:
- If file ingestion and RefitLevel() creates files in the same level, then range-overlapped files will be created at that level and caught as corruption by `force_consistency_checks=true`
- If file ingestion and RefitLevel() creates file in different levels, then with one further compaction on the ingested file, it can result in two same keys both with seqno 0 in two different levels. Then with iterator's [optimization](https://github.com/facebook/rocksdb/blame/c62f3221698fd273b673d4f7e54eabb8329a4369/db/db_iter.cc#L342-L343) that assumes no two same keys both with seqno 0, it will either break this assertion in debug build or, even worst, return value of this same key for the key after it, which is the wrong value to return, in release build.
Therefore we decide to introduce range conflict check for file ingestion and RefitLevel() inspired from the existing range conflict check among compactions.
**Summary:**
- Treat file ingestion job and RefitLevel() as `Compaction` of new compaction reasons: `CompactionReason::kExternalSstIngestion` and `CompactionReason::kRefitLevel` and register/unregister them. File ingestion is treated as compaction from L0 to different levels and RefitLevel() as compaction from source level to target level.
- Check for `RangeOverlapWithCompaction` with other ongoing compactions, `RegisterCompaction()` on this "compaction" before changing the LSM state in `VersionStorageInfo`, and `UnregisterCompaction()` after changing.
- Replace scattered fixes (https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.) that prevents overlapping between file ingestion and non-refit-level compaction with this fix cuz those practices are easy to overlook.
- Misc: logic cleanup, see PR comments
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10988
Test Plan:
- New unit test `DBCompactionTestWithOngoingFileIngestionParam*` that failed pre-fix and passed afterwards.
- Made compatible with existing tests, see PR comments
- make check
- [Ongoing] Stress test rehearsal with normal value and aggressive CI value https://github.com/facebook/rocksdb/pull/10761
Reviewed By: cbi42
Differential Revision: D41535685
Pulled By: hx235
fbshipit-source-id: 549833a577ba1496d20a870583d4caa737da1258
2022-12-29 23:05:36 +00:00
|
|
|
// Fit in L3 but will overlap with the compaction output so will be
|
|
|
|
// added to L2. Prior to the fix, a compaction will then trivially move
|
|
|
|
// this file to L3 and break LSM consistency
|
Skip deleted WALs during recovery
Summary:
This patch record min log number to keep to the manifest while flushing SST files to ignore them and any WAL older than them during recovery. This is to avoid scenarios when we have a gap between the WAL files are fed to the recovery procedure. The gap could happen by for example out-of-order WAL deletion. Such gap could cause problems in 2PC recovery where the prepared and commit entry are placed into two separate WAL and gap in the WALs could result into not processing the WAL with the commit entry and hence breaking the 2PC recovery logic.
Before the commit, for 2PC case, we determined which log number to keep in FindObsoleteFiles(). We looked at the earliest logs with outstanding prepare entries, or prepare entries whose respective commit or abort are in memtable. With the commit, the same calculation is done while we apply the SST flush. Just before installing the flush file, we precompute the earliest log file to keep after the flush finishes using the same logic (but skipping the memtables just flushed), record this information to the manifest entry for this new flushed SST file. This pre-computed value is also remembered in memory, and will later be used to determine whether a log file can be deleted. This value is unlikely to change until next flush because the commit entry will stay in memtable. (In WritePrepared, we could have removed the older log files as soon as all prepared entries are committed. It's not yet done anyway. Even if we do it, the only thing we loss with this new approach is earlier log deletion between two flushes, which does not guarantee to happen anyway because the obsolete file clean-up function is only executed after flush or compaction)
This min log number to keep is stored in the manifest using the safely-ignore customized field of AddFile entry, in order to guarantee that the DB generated using newer release can be opened by previous releases no older than 4.2.
Closes https://github.com/facebook/rocksdb/pull/3765
Differential Revision: D7747618
Pulled By: siying
fbshipit-source-id: d00c92105b4f83852e9754a1b70d6b64cb590729
2018-05-03 22:35:11 +00:00
|
|
|
static std::atomic<bool> called = {false};
|
|
|
|
if (!called) {
|
|
|
|
called = true;
|
|
|
|
ASSERT_OK(dbfull()->SetOptions({{"max_bytes_for_level_base", "1"}}));
|
Add missing range conflict check between file ingestion and RefitLevel() (#10988)
Summary:
**Context:**
File ingestion never checks whether the key range it acts on overlaps with an ongoing RefitLevel() (used in `CompactRange()` with `change_level=true`). That's because RefitLevel() doesn't register and make its key range known to file ingestion. Though it checks overlapping with other compactions by https://github.com/facebook/rocksdb/blob/7.8.fb/db/external_sst_file_ingestion_job.cc#L998.
RefitLevel() (used in `CompactRange()` with `change_level=true`) doesn't check whether the key range it acts on overlaps with an ongoing file ingestion. That's because file ingestion does not register and make its key range known to other compactions.
- Note that non-refitlevel-compaction (e.g, manual compaction w/o RefitLevel() or general compaction) also does not check key range overlap with ongoing file ingestion for the same reason.
- But it's fine. Credited to cbi42's discovery, `WaitForIngestFile` was called by background and foreground compactions. They were introduced in https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.
- Regardless, this PR registers file ingestion like a compaction is a general approach that will also add range conflict check between file ingestion and non-refitlevel-compaction, though it has not been the issue motivated this PR.
Above are bugs resulting in two bad consequences:
- If file ingestion and RefitLevel() creates files in the same level, then range-overlapped files will be created at that level and caught as corruption by `force_consistency_checks=true`
- If file ingestion and RefitLevel() creates file in different levels, then with one further compaction on the ingested file, it can result in two same keys both with seqno 0 in two different levels. Then with iterator's [optimization](https://github.com/facebook/rocksdb/blame/c62f3221698fd273b673d4f7e54eabb8329a4369/db/db_iter.cc#L342-L343) that assumes no two same keys both with seqno 0, it will either break this assertion in debug build or, even worst, return value of this same key for the key after it, which is the wrong value to return, in release build.
Therefore we decide to introduce range conflict check for file ingestion and RefitLevel() inspired from the existing range conflict check among compactions.
**Summary:**
- Treat file ingestion job and RefitLevel() as `Compaction` of new compaction reasons: `CompactionReason::kExternalSstIngestion` and `CompactionReason::kRefitLevel` and register/unregister them. File ingestion is treated as compaction from L0 to different levels and RefitLevel() as compaction from source level to target level.
- Check for `RangeOverlapWithCompaction` with other ongoing compactions, `RegisterCompaction()` on this "compaction" before changing the LSM state in `VersionStorageInfo`, and `UnregisterCompaction()` after changing.
- Replace scattered fixes (https://github.com/facebook/rocksdb/commit/0f88160f67d36ea30e3aca3a3cef924c3a009be6, https://github.com/facebook/rocksdb/commit/5c64fb67d2fc198f1a73ff3ae543749a6a41f513 and https://github.com/facebook/rocksdb/commit/87dfc1d23e0e16ff73e15f63c6fa0fb3b3fc8c8c.) that prevents overlapping between file ingestion and non-refit-level compaction with this fix cuz those practices are easy to overlook.
- Misc: logic cleanup, see PR comments
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10988
Test Plan:
- New unit test `DBCompactionTestWithOngoingFileIngestionParam*` that failed pre-fix and passed afterwards.
- Made compatible with existing tests, see PR comments
- make check
- [Ongoing] Stress test rehearsal with normal value and aggressive CI value https://github.com/facebook/rocksdb/pull/10761
Reviewed By: cbi42
Differential Revision: D41535685
Pulled By: hx235
fbshipit-source-id: 549833a577ba1496d20a870583d4caa737da1258
2022-12-29 23:05:36 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {15, 16}, 7,
|
|
|
|
true /* allow_global_seqno */));
|
Skip deleted WALs during recovery
Summary:
This patch record min log number to keep to the manifest while flushing SST files to ignore them and any WAL older than them during recovery. This is to avoid scenarios when we have a gap between the WAL files are fed to the recovery procedure. The gap could happen by for example out-of-order WAL deletion. Such gap could cause problems in 2PC recovery where the prepared and commit entry are placed into two separate WAL and gap in the WALs could result into not processing the WAL with the commit entry and hence breaking the 2PC recovery logic.
Before the commit, for 2PC case, we determined which log number to keep in FindObsoleteFiles(). We looked at the earliest logs with outstanding prepare entries, or prepare entries whose respective commit or abort are in memtable. With the commit, the same calculation is done while we apply the SST flush. Just before installing the flush file, we precompute the earliest log file to keep after the flush finishes using the same logic (but skipping the memtables just flushed), record this information to the manifest entry for this new flushed SST file. This pre-computed value is also remembered in memory, and will later be used to determine whether a log file can be deleted. This value is unlikely to change until next flush because the commit entry will stay in memtable. (In WritePrepared, we could have removed the older log files as soon as all prepared entries are committed. It's not yet done anyway. Even if we do it, the only thing we loss with this new approach is earlier log deletion between two flushes, which does not guarantee to happen anyway because the obsolete file clean-up function is only executed after flush or compaction)
This min log number to keep is stored in the manifest using the safely-ignore customized field of AddFile entry, in order to guarantee that the DB generated using newer release can be opened by previous releases no older than 4.2.
Closes https://github.com/facebook/rocksdb/pull/3765
Differential Revision: D7747618
Pulled By: siying
fbshipit-source-id: d00c92105b4f83852e9754a1b70d6b64cb590729
2018-05-03 22:35:11 +00:00
|
|
|
}
|
2016-10-13 17:49:06 +00:00
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-10-13 17:49:06 +00:00
|
|
|
|
|
|
|
CompactRangeOptions cro;
|
|
|
|
cro.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2016-10-13 17:49:06 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-10-13 17:49:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, CompactAddedFiles) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.num_levels = 3;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, 1)); // L3
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, 2)); // L2
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 8}, 3)); // L1
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, 4)); // L0
|
|
|
|
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
}
|
2016-10-18 23:59:37 +00:00
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, SstFileWriterNonSharedKeys) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::string file_path = sst_files_dir_ + "/not_shared";
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-10-18 23:59:37 +00:00
|
|
|
|
|
|
|
std::string suffix(100, 'X');
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file_path));
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put("A" + suffix, "VAL"));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("BB" + suffix, "VAL"));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("CC" + suffix, "VAL"));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("CXD" + suffix, "VAL"));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("CZZZ" + suffix, "VAL"));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("ZAAAX" + suffix, "VAL"));
|
2016-10-18 23:59:37 +00:00
|
|
|
|
|
|
|
ASSERT_OK(sst_file_writer.Finish());
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_OK(DeprecatedAddFile({file_path}));
|
|
|
|
}
|
|
|
|
|
2019-12-12 22:05:48 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, WithUnorderedWrite) {
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->LoadDependency(
|
|
|
|
{{"DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL",
|
|
|
|
"ExternalSSTFileTest::WithUnorderedWrite:WaitWriteWAL"},
|
|
|
|
{"DBImpl::WaitForPendingWrites:BeforeBlock",
|
|
|
|
"DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"}});
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
"DBImpl::IngestExternalFile:NeedFlush",
|
|
|
|
[&](void* need_flush) { ASSERT_TRUE(*static_cast<bool*>(need_flush)); });
|
2019-12-12 22:05:48 +00:00
|
|
|
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.unordered_write = true;
|
|
|
|
DestroyAndReopen(options);
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(Put("foo", "v1"));
|
2019-12-12 22:05:48 +00:00
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
2020-12-22 23:08:17 +00:00
|
|
|
port::Thread writer([&]() { ASSERT_OK(Put("bar", "v2")); });
|
2019-12-12 22:05:48 +00:00
|
|
|
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::WithUnorderedWrite:WaitWriteWAL");
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {{"bar", "v3"}}, -1,
|
|
|
|
true /* allow_global_seqno */));
|
|
|
|
ASSERT_EQ(Get("bar"), "v3");
|
|
|
|
|
|
|
|
writer.join();
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
|
|
|
|
2021-07-07 18:13:09 +00:00
|
|
|
#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
|
2020-06-29 23:55:35 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2016-10-21 00:05:32 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.IncreaseParallelism(20);
|
|
|
|
options.level0_slowdown_writes_trigger = 256;
|
|
|
|
options.level0_stop_writes_trigger = 256;
|
|
|
|
|
2019-01-30 00:16:53 +00:00
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
2016-10-21 00:05:32 +00:00
|
|
|
for (int iter = 0; iter < 2; iter++) {
|
|
|
|
bool write_to_memtable = (iter == 0);
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
Random rnd(301);
|
|
|
|
std::map<std::string, std::string> true_data;
|
2018-07-14 00:25:34 +00:00
|
|
|
for (int i = 0; i < 500; i++) {
|
2016-10-21 00:05:32 +00:00
|
|
|
std::vector<std::pair<std::string, std::string>> random_data;
|
|
|
|
for (int j = 0; j < 100; j++) {
|
2020-07-09 21:33:42 +00:00
|
|
|
std::string k = rnd.RandomString(rnd.Next() % 20);
|
|
|
|
std::string v = rnd.RandomString(rnd.Next() % 50);
|
2016-10-21 00:05:32 +00:00
|
|
|
random_data.emplace_back(k, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (write_to_memtable && rnd.OneIn(4)) {
|
|
|
|
// 25% of writes go through memtable
|
|
|
|
for (auto& entry : random_data) {
|
|
|
|
ASSERT_OK(Put(entry.first, entry.second));
|
|
|
|
true_data[entry.first] = entry.second;
|
|
|
|
}
|
|
|
|
} else {
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, random_data, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, true, &true_data));
|
2016-10-21 00:05:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2016-10-21 00:05:32 +00:00
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
|
|
|
}
|
|
|
|
}
|
2021-07-07 18:13:09 +00:00
|
|
|
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
2016-10-21 00:05:32 +00:00
|
|
|
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
|
2016-10-21 00:05:32 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.num_levels = 5;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::vector<std::pair<std::string, std::string>> file_data;
|
|
|
|
std::map<std::string, std::string> true_data;
|
|
|
|
|
|
|
|
// Insert 100 -> 200 into the memtable
|
|
|
|
for (int i = 100; i <= 200; i++) {
|
|
|
|
ASSERT_OK(Put(Key(i), "memtable"));
|
|
|
|
true_data[Key(i)] = "memtable";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert 0 -> 20 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 0; i <= 20; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L4");
|
|
|
|
}
|
2019-01-30 00:16:53 +00:00
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2016-10-21 00:05:32 +00:00
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// This file don't overlap with anything in the DB, will go to L4
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_EQ("0,0,0,0,1", FilesPerLevel());
|
|
|
|
|
|
|
|
// Insert 80 -> 130 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 80; i <= 130; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L0");
|
|
|
|
}
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2016-10-21 00:05:32 +00:00
|
|
|
|
|
|
|
// This file overlap with the memtable, so it will flush it and add
|
|
|
|
// it self to L0
|
|
|
|
ASSERT_EQ("2,0,0,0,1", FilesPerLevel());
|
|
|
|
|
|
|
|
// Insert 30 -> 50 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 30; i <= 50; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L4");
|
|
|
|
}
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2016-10-21 00:05:32 +00:00
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// This file don't overlap with anything in the DB and fit in L4 as well
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_EQ("2,0,0,0,2", FilesPerLevel());
|
|
|
|
|
|
|
|
// Insert 10 -> 40 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 10; i <= 40; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L3");
|
|
|
|
}
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2016-10-21 00:05:32 +00:00
|
|
|
|
|
|
|
// This file overlap with files in L4, we will ingest it in L3
|
|
|
|
ASSERT_EQ("2,0,0,1,2", FilesPerLevel());
|
|
|
|
|
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2016-10-18 23:59:37 +00:00
|
|
|
}
|
|
|
|
|
2024-01-25 07:30:08 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedUniversal) {
|
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.num_levels = 5;
|
|
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::vector<std::pair<std::string, std::string>> file_data;
|
|
|
|
std::map<std::string, std::string> true_data;
|
|
|
|
|
|
|
|
// Write 200 -> 250 into the bottommost level
|
|
|
|
for (int i = 200; i <= 250; i++) {
|
|
|
|
ASSERT_OK(Put(Key(i), "bottommost"));
|
|
|
|
true_data[Key(i)] = "bottommost";
|
|
|
|
}
|
|
|
|
CompactRangeOptions cro;
|
|
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
|
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
|
|
ASSERT_EQ("0,0,0,0,1", FilesPerLevel());
|
|
|
|
|
|
|
|
// Take a snapshot to enforce global sequence number.
|
|
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
|
|
|
|
|
|
// Insert 100 -> 200 into the memtable
|
|
|
|
for (int i = 100; i <= 200; i++) {
|
|
|
|
ASSERT_OK(Put(Key(i), "memtable"));
|
|
|
|
true_data[Key(i)] = "memtable";
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert 0 -> 20 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 0; i <= 20; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L4");
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
|
|
|
|
|
|
|
// This file don't overlap with anything in the DB, will go to L4
|
|
|
|
ASSERT_EQ("0,0,0,0,2", FilesPerLevel());
|
|
|
|
|
|
|
|
// Insert 80 -> 130 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 80; i <= 130; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L0");
|
|
|
|
}
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
|
|
|
|
|
|
|
// This file overlap with the memtable, so it will flush it and add
|
|
|
|
// it self to L0
|
|
|
|
ASSERT_EQ("2,0,0,0,2", FilesPerLevel());
|
|
|
|
|
|
|
|
// Insert 30 -> 50 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 30; i <= 50; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L4");
|
|
|
|
}
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
|
|
|
|
|
|
|
// This file don't overlap with anything in the DB and fit in L4 as well
|
|
|
|
ASSERT_EQ("2,0,0,0,3", FilesPerLevel());
|
|
|
|
|
|
|
|
// Insert 10 -> 40 using AddFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 10; i <= 40; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "L3");
|
|
|
|
}
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
|
|
|
|
|
|
|
// This file overlap with files in L4, we will ingest it into the last
|
|
|
|
// non-overlapping and non-empty level, in this case, it's L0.
|
|
|
|
ASSERT_EQ("3,0,0,0,3", FilesPerLevel());
|
|
|
|
|
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
|
|
}
|
|
|
|
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
|
2016-10-21 00:05:32 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
uint64_t entries_in_memtable;
|
|
|
|
std::map<std::string, std::string> true_data;
|
|
|
|
|
|
|
|
for (int k : {10, 20, 40, 80}) {
|
|
|
|
ASSERT_OK(Put(Key(k), "memtable"));
|
|
|
|
true_data[Key(k)] = "memtable";
|
|
|
|
}
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
|
|
|
&entries_in_memtable));
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_GE(entries_in_memtable, 1);
|
|
|
|
|
2019-01-30 00:16:53 +00:00
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
2016-10-21 00:05:32 +00:00
|
|
|
// No need for flush
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, {90, 100, 110}, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
|
|
|
&entries_in_memtable));
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_GE(entries_in_memtable, 1);
|
|
|
|
|
|
|
|
// This file will flush the memtable
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, {19, 20, 21}, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
|
|
|
&entries_in_memtable));
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_EQ(entries_in_memtable, 0);
|
|
|
|
|
|
|
|
for (int k : {200, 201, 205, 206}) {
|
|
|
|
ASSERT_OK(Put(Key(k), "memtable"));
|
|
|
|
true_data[Key(k)] = "memtable";
|
|
|
|
}
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
|
|
|
&entries_in_memtable));
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_GE(entries_in_memtable, 1);
|
|
|
|
|
|
|
|
// No need for flush, this file keys fit between the memtable keys
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, {202, 203, 204}, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
|
|
|
&entries_in_memtable));
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_GE(entries_in_memtable, 1);
|
|
|
|
|
|
|
|
// This file will flush the memtable
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
2019-04-08 18:12:25 +00:00
|
|
|
options, {206, 207}, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false, &true_data));
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
|
|
|
&entries_in_memtable));
|
2016-10-21 00:05:32 +00:00
|
|
|
ASSERT_EQ(entries_in_memtable, 0);
|
|
|
|
|
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
|
|
|
}
|
|
|
|
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, L0SortingIssue) {
|
2016-10-21 00:05:32 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.num_levels = 2;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::map<std::string, std::string> true_data;
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "memtable"));
|
|
|
|
ASSERT_OK(Put(Key(10), "memtable"));
|
|
|
|
|
2019-01-30 00:16:53 +00:00
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
2016-10-21 00:05:32 +00:00
|
|
|
// No Flush needed, No global seqno needed, Ingest in L1
|
2019-04-08 18:12:25 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
GenerateAndAddExternalFile(options, {7, 8}, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false));
|
2016-10-21 00:05:32 +00:00
|
|
|
// No Flush needed, but need a global seqno, Ingest in L0
|
2019-04-08 18:12:25 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
GenerateAndAddExternalFile(options, {7, 8}, -1, true, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, false, false));
|
2016-10-21 00:05:32 +00:00
|
|
|
printf("%s\n", FilesPerLevel().c_str());
|
|
|
|
|
|
|
|
// Overwrite what we added using external files
|
|
|
|
ASSERT_OK(Put(Key(7), "memtable"));
|
|
|
|
ASSERT_OK(Put(Key(8), "memtable"));
|
|
|
|
|
|
|
|
// Read values from memtable
|
|
|
|
ASSERT_EQ(Get(Key(7)), "memtable");
|
|
|
|
ASSERT_EQ(Get(Key(8)), "memtable");
|
|
|
|
|
|
|
|
// Flush and read from L0
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
printf("%s\n", FilesPerLevel().c_str());
|
|
|
|
ASSERT_EQ(Get(Key(7)), "memtable");
|
|
|
|
ASSERT_EQ(Get(Key(8)), "memtable");
|
|
|
|
}
|
2016-11-09 18:37:51 +00:00
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, CompactionDeadlock) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.num_levels = 2;
|
|
|
|
options.level0_file_num_compaction_trigger = 4;
|
|
|
|
options.level0_slowdown_writes_trigger = 4;
|
|
|
|
options.level0_stop_writes_trigger = 4;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
// atomic conter of currently running bg threads
|
|
|
|
std::atomic<int> running_threads(0);
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2016-11-09 18:37:51 +00:00
|
|
|
{"DBImpl::DelayWrite:Wait", "ExternalSSTFileTest::DeadLock:0"},
|
|
|
|
{"ExternalSSTFileTest::DeadLock:1", "DBImpl::AddFile:Start"},
|
|
|
|
{"DBImpl::AddFile:MutexLock", "ExternalSSTFileTest::DeadLock:2"},
|
|
|
|
{"ExternalSSTFileTest::DeadLock:3", "BackgroundCallCompaction:0"},
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-11-09 18:37:51 +00:00
|
|
|
|
|
|
|
// Start ingesting and extrnal file in the background
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread bg_ingest_file([&]() {
|
2016-11-09 18:37:51 +00:00
|
|
|
running_threads += 1;
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, {5, 6}));
|
|
|
|
running_threads -= 1;
|
|
|
|
});
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(1), "memtable"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(2), "memtable"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(3), "memtable"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(Put(Key(4), "memtable"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
// This thread will try to insert into the memtable but since we have 4 L0
|
|
|
|
// files this thread will be blocked and hold the writer thread
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread bg_block_put([&]() {
|
2016-11-09 18:37:51 +00:00
|
|
|
running_threads += 1;
|
|
|
|
ASSERT_OK(Put(Key(10), "memtable"));
|
|
|
|
running_threads -= 1;
|
|
|
|
});
|
|
|
|
|
|
|
|
// Make sure DelayWrite is called first
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:0");
|
|
|
|
|
|
|
|
// `DBImpl::AddFile:Start` will wait until we be here
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:1");
|
|
|
|
|
|
|
|
// Wait for IngestExternalFile() to start and aquire mutex
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:2");
|
|
|
|
|
|
|
|
// Now let compaction start
|
|
|
|
TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:3");
|
|
|
|
|
|
|
|
// Wait for max 5 seconds, if we did not finish all bg threads
|
|
|
|
// then we hit the deadlock bug
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
if (running_threads.load() == 0) {
|
|
|
|
break;
|
|
|
|
}
|
2021-09-21 15:53:03 +00:00
|
|
|
// Make sure we do a "real sleep", not a mock one.
|
|
|
|
SystemClock::Default()->SleepForMicroseconds(500000);
|
2016-11-09 18:37:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(running_threads.load(), 0);
|
|
|
|
|
|
|
|
bg_ingest_file.join();
|
|
|
|
bg_block_put.join();
|
|
|
|
}
|
|
|
|
|
2016-11-13 04:03:39 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, DirtyExit) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::string file_path = sst_files_dir_ + "/dirty_exit";
|
|
|
|
std::unique_ptr<SstFileWriter> sst_file_writer;
|
|
|
|
|
|
|
|
// Destruct SstFileWriter without calling Finish()
|
2017-03-13 18:17:19 +00:00
|
|
|
sst_file_writer.reset(new SstFileWriter(EnvOptions(), options));
|
2016-11-13 04:03:39 +00:00
|
|
|
ASSERT_OK(sst_file_writer->Open(file_path));
|
|
|
|
sst_file_writer.reset();
|
|
|
|
|
|
|
|
// Destruct SstFileWriter with a failing Finish
|
2017-03-13 18:17:19 +00:00
|
|
|
sst_file_writer.reset(new SstFileWriter(EnvOptions(), options));
|
2016-11-13 04:03:39 +00:00
|
|
|
ASSERT_OK(sst_file_writer->Open(file_path));
|
|
|
|
ASSERT_NOK(sst_file_writer->Finish());
|
|
|
|
}
|
|
|
|
|
2016-12-05 22:16:23 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, FileWithCFInfo) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
CreateAndReopenWithCF({"koko", "toto"}, options);
|
|
|
|
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sfw_default(EnvOptions(), options, handles_[0]);
|
|
|
|
SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]);
|
|
|
|
SstFileWriter sfw_cf2(EnvOptions(), options, handles_[2]);
|
|
|
|
SstFileWriter sfw_unknown(EnvOptions(), options);
|
2016-12-05 22:16:23 +00:00
|
|
|
|
|
|
|
// default_cf.sst
|
|
|
|
const std::string cf_default_sst = sst_files_dir_ + "/default_cf.sst";
|
|
|
|
ASSERT_OK(sfw_default.Open(cf_default_sst));
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sfw_default.Put("K1", "V1"));
|
|
|
|
ASSERT_OK(sfw_default.Put("K2", "V2"));
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_OK(sfw_default.Finish());
|
|
|
|
|
|
|
|
// cf1.sst
|
|
|
|
const std::string cf1_sst = sst_files_dir_ + "/cf1.sst";
|
|
|
|
ASSERT_OK(sfw_cf1.Open(cf1_sst));
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sfw_cf1.Put("K3", "V1"));
|
|
|
|
ASSERT_OK(sfw_cf1.Put("K4", "V2"));
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_OK(sfw_cf1.Finish());
|
|
|
|
|
|
|
|
// cf_unknown.sst
|
|
|
|
const std::string unknown_sst = sst_files_dir_ + "/cf_unknown.sst";
|
|
|
|
ASSERT_OK(sfw_unknown.Open(unknown_sst));
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sfw_unknown.Put("K5", "V1"));
|
|
|
|
ASSERT_OK(sfw_unknown.Put("K6", "V2"));
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_OK(sfw_unknown.Finish());
|
|
|
|
|
|
|
|
IngestExternalFileOptions ifo;
|
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// SST CF don't match
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_NOK(db_->IngestExternalFile(handles_[0], {cf1_sst}, ifo));
|
2020-02-28 22:10:51 +00:00
|
|
|
// SST CF don't match
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_NOK(db_->IngestExternalFile(handles_[2], {cf1_sst}, ifo));
|
|
|
|
// SST CF match
|
|
|
|
ASSERT_OK(db_->IngestExternalFile(handles_[1], {cf1_sst}, ifo));
|
|
|
|
|
2020-02-28 22:10:51 +00:00
|
|
|
// SST CF don't match
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_NOK(db_->IngestExternalFile(handles_[1], {cf_default_sst}, ifo));
|
2020-02-28 22:10:51 +00:00
|
|
|
// SST CF don't match
|
2016-12-05 22:16:23 +00:00
|
|
|
ASSERT_NOK(db_->IngestExternalFile(handles_[2], {cf_default_sst}, ifo));
|
|
|
|
// SST CF match
|
|
|
|
ASSERT_OK(db_->IngestExternalFile(handles_[0], {cf_default_sst}, ifo));
|
|
|
|
|
|
|
|
// SST CF unknown
|
|
|
|
ASSERT_OK(db_->IngestExternalFile(handles_[1], {unknown_sst}, ifo));
|
|
|
|
// SST CF unknown
|
|
|
|
ASSERT_OK(db_->IngestExternalFile(handles_[2], {unknown_sst}, ifo));
|
|
|
|
// SST CF unknown
|
|
|
|
ASSERT_OK(db_->IngestExternalFile(handles_[0], {unknown_sst}, ifo));
|
2016-12-13 08:47:52 +00:00
|
|
|
|
|
|
|
// Cannot ingest a file into a dropped CF
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(handles_[1]));
|
|
|
|
ASSERT_NOK(db_->IngestExternalFile(handles_[1], {unknown_sst}, ifo));
|
|
|
|
|
|
|
|
// CF was not dropped, ok to Ingest
|
|
|
|
ASSERT_OK(db_->IngestExternalFile(handles_[2], {unknown_sst}, ifo));
|
2016-12-05 22:16:23 +00:00
|
|
|
}
|
|
|
|
|
2018-04-13 17:47:54 +00:00
|
|
|
/*
|
2019-05-24 04:54:23 +00:00
|
|
|
* Test and verify the functionality of ingestion_options.move_files and
|
|
|
|
* ingestion_options.failed_move_fall_back_to_copy
|
2018-04-13 17:47:54 +00:00
|
|
|
*/
|
2019-05-24 04:54:23 +00:00
|
|
|
TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
|
|
|
|
const bool fail_link = std::get<0>(GetParam());
|
|
|
|
const bool failed_move_fall_back_to_copy = std::get<1>(GetParam());
|
2023-01-27 22:51:39 +00:00
|
|
|
fs_->set_fail_link(fail_link);
|
2019-05-24 04:54:23 +00:00
|
|
|
const EnvOptions env_options;
|
|
|
|
DestroyAndReopen(options_);
|
2018-04-13 17:47:54 +00:00
|
|
|
const int kNumKeys = 10000;
|
2019-05-24 04:54:23 +00:00
|
|
|
IngestExternalFileOptions ifo;
|
|
|
|
ifo.move_files = true;
|
|
|
|
ifo.failed_move_fall_back_to_copy = failed_move_fall_back_to_copy;
|
2018-04-13 17:47:54 +00:00
|
|
|
|
|
|
|
std::string file_path = sst_files_dir_ + "file1.sst";
|
|
|
|
// Create SstFileWriter for default column family
|
2019-05-24 04:54:23 +00:00
|
|
|
SstFileWriter sst_file_writer(env_options, options_);
|
2018-04-13 17:47:54 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Open(file_path));
|
|
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(i), Key(i) + "_value"));
|
|
|
|
}
|
|
|
|
ASSERT_OK(sst_file_writer.Finish());
|
|
|
|
uint64_t file_size = 0;
|
|
|
|
ASSERT_OK(env_->GetFileSize(file_path, &file_size));
|
|
|
|
|
2019-05-24 04:54:23 +00:00
|
|
|
bool copyfile = false;
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2019-05-24 04:54:23 +00:00
|
|
|
"ExternalSstFileIngestionJob::Prepare:CopyFile",
|
|
|
|
[&](void* /* arg */) { copyfile = true; });
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2019-05-24 04:54:23 +00:00
|
|
|
|
|
|
|
const Status s = db_->IngestExternalFile({file_path}, ifo);
|
2018-04-13 17:47:54 +00:00
|
|
|
|
|
|
|
ColumnFamilyHandleImpl* cfh =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily());
|
|
|
|
ColumnFamilyData* cfd = cfh->cfd();
|
|
|
|
const InternalStats* internal_stats_ptr = cfd->internal_stats();
|
|
|
|
const std::vector<InternalStats::CompactionStats>& comp_stats =
|
|
|
|
internal_stats_ptr->TEST_GetCompactionStats();
|
|
|
|
uint64_t bytes_copied = 0;
|
|
|
|
uint64_t bytes_moved = 0;
|
|
|
|
for (const auto& stats : comp_stats) {
|
|
|
|
bytes_copied += stats.bytes_written;
|
|
|
|
bytes_moved += stats.bytes_moved;
|
|
|
|
}
|
2019-05-24 04:54:23 +00:00
|
|
|
|
|
|
|
if (!fail_link) {
|
|
|
|
// Link operation succeeds. External SST should be moved.
|
|
|
|
ASSERT_OK(s);
|
2018-04-13 17:47:54 +00:00
|
|
|
ASSERT_EQ(0, bytes_copied);
|
|
|
|
ASSERT_EQ(file_size, bytes_moved);
|
2019-05-24 04:54:23 +00:00
|
|
|
ASSERT_FALSE(copyfile);
|
2018-04-13 17:47:54 +00:00
|
|
|
} else {
|
2019-05-24 04:54:23 +00:00
|
|
|
// Link operation fails.
|
|
|
|
ASSERT_EQ(0, bytes_moved);
|
|
|
|
if (failed_move_fall_back_to_copy) {
|
|
|
|
ASSERT_OK(s);
|
|
|
|
// Copy file is true since a failed link falls back to copy file.
|
|
|
|
ASSERT_TRUE(copyfile);
|
|
|
|
ASSERT_EQ(file_size, bytes_copied);
|
|
|
|
} else {
|
|
|
|
ASSERT_TRUE(s.IsNotSupported());
|
|
|
|
// Copy file is false since a failed link does not fall back to copy file.
|
|
|
|
ASSERT_FALSE(copyfile);
|
|
|
|
ASSERT_EQ(0, bytes_copied);
|
|
|
|
}
|
2018-04-13 17:47:54 +00:00
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2018-04-13 17:47:54 +00:00
|
|
|
}
|
|
|
|
|
2016-12-06 21:56:17 +00:00
|
|
|
class TestIngestExternalFileListener : public EventListener {
|
|
|
|
public:
|
2018-03-05 21:08:17 +00:00
|
|
|
void OnExternalFileIngested(DB* /*db*/,
|
2016-12-06 21:56:17 +00:00
|
|
|
const ExternalFileIngestionInfo& info) override {
|
|
|
|
ingested_files.push_back(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<ExternalFileIngestionInfo> ingested_files;
|
|
|
|
};
|
|
|
|
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestionListener) {
|
2016-12-06 21:56:17 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
TestIngestExternalFileListener* listener =
|
|
|
|
new TestIngestExternalFileListener();
|
|
|
|
options.listeners.emplace_back(listener);
|
|
|
|
CreateAndReopenWithCF({"koko", "toto"}, options);
|
|
|
|
|
2019-01-30 00:16:53 +00:00
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
2016-12-06 21:56:17 +00:00
|
|
|
// Ingest into default cf
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, {1, 2}, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, true, nullptr, handles_[0]));
|
2016-12-06 21:56:17 +00:00
|
|
|
ASSERT_EQ(listener->ingested_files.size(), 1);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().cf_name, "default");
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id,
|
|
|
|
0);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name,
|
|
|
|
"default");
|
|
|
|
|
|
|
|
// Ingest into cf1
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, {1, 2}, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, true, nullptr, handles_[1]));
|
2016-12-06 21:56:17 +00:00
|
|
|
ASSERT_EQ(listener->ingested_files.size(), 2);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().cf_name, "koko");
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id,
|
|
|
|
1);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name,
|
|
|
|
"koko");
|
|
|
|
|
|
|
|
// Ingest into cf2
|
2019-01-30 00:16:53 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, {1, 2}, -1, true, write_global_seqno,
|
2019-04-08 18:12:25 +00:00
|
|
|
verify_checksums_before_ingest, false, true, nullptr, handles_[2]));
|
2016-12-06 21:56:17 +00:00
|
|
|
ASSERT_EQ(listener->ingested_files.size(), 3);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().cf_name, "toto");
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id,
|
|
|
|
2);
|
|
|
|
ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name,
|
|
|
|
"toto");
|
|
|
|
}
|
2016-12-08 21:30:09 +00:00
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
const int kNumKeys = 10000;
|
|
|
|
|
|
|
|
// Insert keys using normal path and take a snapshot
|
|
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
|
|
ASSERT_OK(Put(Key(i), Key(i) + "_V1"));
|
|
|
|
}
|
|
|
|
const Snapshot* snap = db_->GetSnapshot();
|
|
|
|
|
|
|
|
// Overwrite all keys using IngestExternalFile
|
|
|
|
std::string sst_file_path = sst_files_dir_ + "file1.sst";
|
2017-03-13 18:17:19 +00:00
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
2016-12-08 21:30:09 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Open(sst_file_path));
|
|
|
|
for (int i = 0; i < kNumKeys; i++) {
|
2017-05-26 19:05:19 +00:00
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(i), Key(i) + "_V2"));
|
2016-12-08 21:30:09 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(sst_file_writer.Finish());
|
|
|
|
|
|
|
|
IngestExternalFileOptions ifo;
|
|
|
|
ifo.move_files = true;
|
|
|
|
ASSERT_OK(db_->IngestExternalFile({sst_file_path}, ifo));
|
|
|
|
|
|
|
|
for (int i = 0; i < kNumKeys; i++) {
|
|
|
|
ASSERT_EQ(Get(Key(i), snap), Key(i) + "_V1");
|
|
|
|
ASSERT_EQ(Get(Key(i)), Key(i) + "_V2");
|
|
|
|
}
|
|
|
|
|
|
|
|
db_->ReleaseSnapshot(snap);
|
|
|
|
}
|
2017-05-17 18:32:26 +00:00
|
|
|
|
2018-11-01 23:21:30 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestBehind) {
|
2017-05-17 18:32:26 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
|
|
options.num_levels = 3;
|
|
|
|
options.disable_auto_compactions = false;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::vector<std::pair<std::string, std::string>> file_data;
|
|
|
|
std::map<std::string, std::string> true_data;
|
|
|
|
|
|
|
|
// Insert 100 -> 200 into the memtable
|
|
|
|
for (int i = 100; i <= 200; i++) {
|
|
|
|
ASSERT_OK(Put(Key(i), "memtable"));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert 100 -> 200 using IngestExternalFile
|
|
|
|
file_data.clear();
|
|
|
|
for (int i = 0; i <= 20; i++) {
|
|
|
|
file_data.emplace_back(Key(i), "ingest_behind");
|
2023-06-14 18:28:56 +00:00
|
|
|
true_data[Key(i)] = "ingest_behind";
|
2017-05-17 18:32:26 +00:00
|
|
|
}
|
|
|
|
|
2019-04-08 18:12:25 +00:00
|
|
|
bool allow_global_seqno = true;
|
|
|
|
bool ingest_behind = true;
|
|
|
|
bool write_global_seqno = std::get<0>(GetParam());
|
|
|
|
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
2017-05-17 18:32:26 +00:00
|
|
|
|
|
|
|
// Can't ingest behind since allow_ingest_behind isn't set to true
|
2019-04-08 18:12:25 +00:00
|
|
|
ASSERT_NOK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, allow_global_seqno, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, ingest_behind, false /*sort_data*/,
|
|
|
|
&true_data));
|
2017-05-17 18:32:26 +00:00
|
|
|
|
|
|
|
options.allow_ingest_behind = true;
|
|
|
|
// check that we still can open the DB, as num_levels should be
|
|
|
|
// sanitized to 3
|
|
|
|
options.num_levels = 2;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
options.num_levels = 3;
|
|
|
|
DestroyAndReopen(options);
|
2023-06-14 18:28:56 +00:00
|
|
|
true_data.clear();
|
2017-05-17 18:32:26 +00:00
|
|
|
// Insert 100 -> 200 into the memtable
|
|
|
|
for (int i = 100; i <= 200; i++) {
|
|
|
|
ASSERT_OK(Put(Key(i), "memtable"));
|
|
|
|
true_data[Key(i)] = "memtable";
|
|
|
|
}
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2017-05-17 18:32:26 +00:00
|
|
|
// Universal picker should go at second from the bottom level
|
|
|
|
ASSERT_EQ("0,1", FilesPerLevel());
|
2019-04-08 18:12:25 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, allow_global_seqno, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, true /*ingest_behind*/,
|
|
|
|
false /*sort_data*/, &true_data));
|
2017-05-17 18:32:26 +00:00
|
|
|
ASSERT_EQ("0,1,1", FilesPerLevel());
|
|
|
|
// this time ingest should fail as the file doesn't fit to the bottom level
|
2019-04-08 18:12:25 +00:00
|
|
|
ASSERT_NOK(GenerateAndAddExternalFile(
|
|
|
|
options, file_data, -1, allow_global_seqno, write_global_seqno,
|
|
|
|
verify_checksums_before_ingest, true /*ingest_behind*/,
|
|
|
|
false /*sort_data*/, &true_data));
|
2017-05-17 18:32:26 +00:00
|
|
|
ASSERT_EQ("0,1,1", FilesPerLevel());
|
2023-06-14 18:28:56 +00:00
|
|
|
std::vector<std::vector<FileMetaData>> level_to_files;
|
|
|
|
dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
|
|
|
|
uint64_t ingested_file_number = level_to_files[2][0].fd.GetNumber();
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
2023-06-14 18:28:56 +00:00
|
|
|
// Last level should not be compacted
|
|
|
|
ASSERT_EQ("0,1,1", FilesPerLevel());
|
|
|
|
dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
|
|
|
|
ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
|
2017-05-17 18:32:26 +00:00
|
|
|
size_t kcnt = 0;
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2023-06-14 18:28:56 +00:00
|
|
|
|
|
|
|
// Auto-compaction should not include the last level.
|
|
|
|
// Trigger compaction if size amplification exceeds 110%.
|
|
|
|
options.compaction_options_universal.max_size_amplification_percent = 110;
|
|
|
|
options.level0_file_num_compaction_trigger = 4;
|
2023-08-09 22:46:44 +00:00
|
|
|
ASSERT_OK(TryReopen(options));
|
2023-06-14 18:28:56 +00:00
|
|
|
Random rnd(301);
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
for (int j = 0; j < 10; j++) {
|
|
|
|
true_data[Key(j)] = rnd.RandomString(1000);
|
|
|
|
ASSERT_OK(Put(Key(j), true_data[Key(j)]));
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
}
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
|
|
|
|
ASSERT_EQ(1, level_to_files[2].size());
|
|
|
|
ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
|
|
|
|
|
|
|
|
// Turning off the option allows DB to compact ingested files.
|
|
|
|
options.allow_ingest_behind = false;
|
2023-08-09 22:46:44 +00:00
|
|
|
ASSERT_OK(TryReopen(options));
|
2023-06-14 18:28:56 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
|
|
dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
|
|
|
|
ASSERT_EQ(1, level_to_files[2].size());
|
|
|
|
ASSERT_NE(ingested_file_number, level_to_files[2][0].fd.GetNumber());
|
|
|
|
VerifyDBFromMap(true_data, &kcnt, false);
|
2017-05-17 18:32:26 +00:00
|
|
|
}
|
2018-01-22 22:37:37 +00:00
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
|
|
|
|
table_options.cache_index_and_filter_blocks = true;
|
|
|
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
// Create external SST file and include bloom filters
|
2020-02-20 20:07:53 +00:00
|
|
|
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
2018-01-22 22:37:37 +00:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
{
|
|
|
|
std::string file_path = sst_files_dir_ + "sst_with_bloom.sst";
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file_path));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("Key1", "Value1"));
|
|
|
|
ASSERT_OK(sst_file_writer.Finish());
|
|
|
|
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->IngestExternalFile({file_path}, IngestExternalFileOptions()));
|
|
|
|
|
|
|
|
ASSERT_EQ(Get("Key1"), "Value1");
|
|
|
|
ASSERT_GE(
|
|
|
|
options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create external SST file but skip bloom filters
|
2020-02-20 20:07:53 +00:00
|
|
|
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
2018-01-22 22:37:37 +00:00
|
|
|
DestroyAndReopen(options);
|
|
|
|
{
|
|
|
|
std::string file_path = sst_files_dir_ + "sst_with_no_bloom.sst";
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options, nullptr, true,
|
|
|
|
Env::IOPriority::IO_TOTAL,
|
|
|
|
true /* skip_filters */);
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file_path));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("Key1", "Value1"));
|
|
|
|
ASSERT_OK(sst_file_writer.Finish());
|
|
|
|
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->IngestExternalFile({file_path}, IngestExternalFileOptions()));
|
|
|
|
|
|
|
|
ASSERT_EQ(Get("Key1"), "Value1");
|
|
|
|
ASSERT_EQ(
|
|
|
|
options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-14 18:16:12 +00:00
|
|
|
TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) {
|
|
|
|
if (!ZSTD_Supported()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const int kNumEntries = 1 << 10;
|
|
|
|
const int kNumBytesPerEntry = 1 << 10;
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.compression = kZSTD;
|
|
|
|
options.compression_opts.max_dict_bytes = 1 << 14; // 16KB
|
|
|
|
options.compression_opts.zstd_max_train_bytes = 1 << 18; // 256KB
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
std::atomic<int> num_compression_dicts(0);
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2019-02-14 18:16:12 +00:00
|
|
|
"BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict",
|
2019-02-19 20:12:25 +00:00
|
|
|
[&](void* /* arg */) { ++num_compression_dicts; });
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2019-02-14 18:16:12 +00:00
|
|
|
|
|
|
|
Random rnd(301);
|
|
|
|
std::vector<std::pair<std::string, std::string>> random_data;
|
|
|
|
for (int i = 0; i < kNumEntries; i++) {
|
2020-07-09 21:33:42 +00:00
|
|
|
std::string val = rnd.RandomString(kNumBytesPerEntry);
|
2019-02-14 18:16:12 +00:00
|
|
|
random_data.emplace_back(Key(i), std::move(val));
|
|
|
|
}
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, std::move(random_data)));
|
|
|
|
ASSERT_EQ(1, num_compression_dicts);
|
|
|
|
}
|
|
|
|
|
2021-12-10 16:12:09 +00:00
|
|
|
class ExternalSSTBlockChecksumTest
|
|
|
|
: public ExternalSSTFileTestBase,
|
|
|
|
public testing::WithParamInterface<uint32_t> {};
|
|
|
|
|
|
|
|
INSTANTIATE_TEST_CASE_P(FormatVersions, ExternalSSTBlockChecksumTest,
|
|
|
|
testing::ValuesIn(test::kFooterFormatVersionsToTest));
|
|
|
|
|
2020-06-19 23:16:57 +00:00
|
|
|
// Very slow, not worth the cost to run regularly
|
2021-12-10 16:12:09 +00:00
|
|
|
TEST_P(ExternalSSTBlockChecksumTest, DISABLED_HugeBlockChecksum) {
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
table_options.format_version = GetParam();
|
Implement XXH3 block checksum type (#9069)
Summary:
XXH3 - latest hash function that is extremely fast on large
data, easily faster than crc32c on most any x86_64 hardware. In
integrating this hash function, I have handled the compression type byte
in a non-standard way to avoid using the streaming API (extra data
movement and active code size because of hash function complexity). This
approach got a thumbs-up from Yann Collet.
Existing functionality change:
* reject bad ChecksumType in options with InvalidArgument
This change split off from https://github.com/facebook/rocksdb/issues/9058 because context-aware checksum is
likely to be handled through different configuration than ChecksumType.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9069
Test Plan:
tests updated, and substantially expanded. Unit tests now check
that we don't accidentally change the values generated by the checksum
algorithms ("schema test") and that we properly handle
invalid/unrecognized checksum types in options or in file footer.
DBTestBase::ChangeOptions (etc.) updated from two to one configuration
changing from default CRC32c ChecksumType. The point of this test code
is to detect possible interactions among features, and the likelihood of
some bad interaction being detected by including configurations other
than XXH3 and CRC32c--and then not detected by stress/crash test--is
extremely low.
Stress/crash test also updated (manual run long enough to see it accepts
new checksum type). db_bench also updated for microbenchmarking
checksums.
### Performance microbenchmark (PORTABLE=0 DEBUG_LEVEL=0, Broadwell processor)
./db_bench -benchmarks=crc32c,xxhash,xxhash64,xxh3,crc32c,xxhash,xxhash64,xxh3,crc32c,xxhash,xxhash64,xxh3
crc32c : 0.200 micros/op 5005220 ops/sec; 19551.6 MB/s (4096 per op)
xxhash : 0.807 micros/op 1238408 ops/sec; 4837.5 MB/s (4096 per op)
xxhash64 : 0.421 micros/op 2376514 ops/sec; 9283.3 MB/s (4096 per op)
xxh3 : 0.171 micros/op 5858391 ops/sec; 22884.3 MB/s (4096 per op)
crc32c : 0.206 micros/op 4859566 ops/sec; 18982.7 MB/s (4096 per op)
xxhash : 0.793 micros/op 1260850 ops/sec; 4925.2 MB/s (4096 per op)
xxhash64 : 0.410 micros/op 2439182 ops/sec; 9528.1 MB/s (4096 per op)
xxh3 : 0.161 micros/op 6202872 ops/sec; 24230.0 MB/s (4096 per op)
crc32c : 0.203 micros/op 4924686 ops/sec; 19237.1 MB/s (4096 per op)
xxhash : 0.839 micros/op 1192388 ops/sec; 4657.8 MB/s (4096 per op)
xxhash64 : 0.424 micros/op 2357391 ops/sec; 9208.6 MB/s (4096 per op)
xxh3 : 0.162 micros/op 6182678 ops/sec; 24151.1 MB/s (4096 per op)
As you can see, especially once warmed up, xxh3 is fastest.
### Performance macrobenchmark (PORTABLE=0 DEBUG_LEVEL=0, Broadwell processor)
Test
for I in `seq 1 50`; do for CHK in 0 1 2 3 4; do TEST_TMPDIR=/dev/shm/rocksdb$CHK ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=30000000 -checksum_type=$CHK 2>&1 | grep 'micros/op' | tee -a results-$CHK & done; wait; done
Results (ops/sec)
for FILE in results*; do echo -n "$FILE "; awk '{ s += $5; c++; } END { print 1.0 * s / c; }' < $FILE; done
results-0 252118 # kNoChecksum
results-1 251588 # kCRC32c
results-2 251863 # kxxHash
results-3 252016 # kxxHash64
results-4 252038 # kXXH3
Reviewed By: mrambacher
Differential Revision: D31905249
Pulled By: pdillinger
fbshipit-source-id: cb9b998ebe2523fc7c400eedf62124a78bf4b4d1
2021-10-29 05:13:47 +00:00
|
|
|
for (auto t : GetSupportedChecksums()) {
|
|
|
|
table_options.checksum = t;
|
2020-06-19 23:16:57 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
|
|
|
|
// 2^32 - 1, will lead to data block with more than 2^32 bytes
|
2022-05-05 20:08:21 +00:00
|
|
|
size_t huge_size = std::numeric_limits<uint32_t>::max();
|
2020-06-19 23:16:57 +00:00
|
|
|
|
|
|
|
std::string f = sst_files_dir_ + "f.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(f));
|
|
|
|
{
|
|
|
|
Random64 r(123);
|
|
|
|
std::string huge(huge_size, 0);
|
|
|
|
for (size_t j = 0; j + 7 < huge_size; j += 8) {
|
|
|
|
EncodeFixed64(&huge[j], r.Next());
|
|
|
|
}
|
|
|
|
ASSERT_OK(sst_file_writer.Put("Huge", huge));
|
|
|
|
}
|
|
|
|
|
|
|
|
ExternalSstFileInfo f_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&f_info));
|
|
|
|
ASSERT_GT(f_info.file_size, uint64_t{huge_size} + 10);
|
|
|
|
|
|
|
|
SstFileReader sst_file_reader(options);
|
|
|
|
ASSERT_OK(sst_file_reader.Open(f));
|
|
|
|
ASSERT_OK(sst_file_reader.VerifyChecksum());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-13 03:07:25 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_Success) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = fault_injection_env.get();
|
2019-08-31 01:27:43 +00:00
|
|
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
2021-10-13 03:38:36 +00:00
|
|
|
|
|
|
|
// Exercise different situations in different column families: two are empty
|
|
|
|
// (so no new sequence number is needed), but at least one overlaps with the
|
|
|
|
// DB and needs to bump the sequence number.
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "foo1", "oldvalue"));
|
|
|
|
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<ColumnFamilyHandle*> column_families;
|
|
|
|
column_families.push_back(handles_[0]);
|
|
|
|
column_families.push_back(handles_[1]);
|
2019-08-31 01:27:43 +00:00
|
|
|
column_families.push_back(handles_[2]);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<IngestExternalFileOptions> ifos(column_families.size());
|
|
|
|
for (auto& ifo : ifos) {
|
|
|
|
ifo.allow_global_seqno = true; // Always allow global_seqno
|
|
|
|
// May or may not write global_seqno
|
|
|
|
ifo.write_global_seqno = std::get<0>(GetParam());
|
|
|
|
// Whether to verify checksums before ingestion
|
|
|
|
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
}
|
|
|
|
std::vector<std::vector<std::pair<std::string, std::string>>> data;
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")});
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")});
|
2019-08-31 01:27:43 +00:00
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")});
|
|
|
|
|
2019-02-13 03:07:25 +00:00
|
|
|
// Resize the true_data vector upon construction to avoid re-alloc
|
|
|
|
std::vector<std::map<std::string, std::string>> true_data(
|
|
|
|
column_families.size());
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFiles(options, column_families, ifos, data,
|
|
|
|
-1, true, true_data));
|
2019-02-13 03:07:25 +00:00
|
|
|
Close();
|
2019-08-31 01:27:43 +00:00
|
|
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"},
|
|
|
|
options);
|
|
|
|
ASSERT_EQ(3, handles_.size());
|
2019-02-13 03:07:25 +00:00
|
|
|
int cf = 0;
|
|
|
|
for (const auto& verify_map : true_data) {
|
|
|
|
for (const auto& elem : verify_map) {
|
|
|
|
const std::string& key = elem.first;
|
|
|
|
const std::string& value = elem.second;
|
|
|
|
ASSERT_EQ(value, Get(cf, key));
|
|
|
|
}
|
|
|
|
++cf;
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
Destroy(options, true /* delete_cf_paths */);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(ExternalSSTFileTest,
|
|
|
|
IngestFilesIntoMultipleColumnFamilies_NoMixedStateWithSnapshot) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->LoadDependency({
|
|
|
|
{"DBImpl::IngestExternalFiles:InstallSVForFirstCF:0",
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:"
|
|
|
|
"BeforeRead"},
|
|
|
|
{"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:"
|
|
|
|
"AfterRead",
|
|
|
|
"DBImpl::IngestExternalFiles:InstallSVForFirstCF:1"},
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = fault_injection_env.get();
|
2019-08-31 01:27:43 +00:00
|
|
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
2019-02-13 03:07:25 +00:00
|
|
|
const std::vector<std::map<std::string, std::string>> data_before_ingestion =
|
|
|
|
{{{"foo1", "fv1_0"}, {"foo2", "fv2_0"}, {"foo3", "fv3_0"}},
|
2019-08-31 01:27:43 +00:00
|
|
|
{{"bar1", "bv1_0"}, {"bar2", "bv2_0"}, {"bar3", "bv3_0"}},
|
|
|
|
{{"bar4", "bv4_0"}, {"bar5", "bv5_0"}, {"bar6", "bv6_0"}}};
|
2019-02-13 03:07:25 +00:00
|
|
|
for (size_t i = 0; i != handles_.size(); ++i) {
|
|
|
|
int cf = static_cast<int>(i);
|
|
|
|
const auto& orig_data = data_before_ingestion[i];
|
|
|
|
for (const auto& kv : orig_data) {
|
|
|
|
ASSERT_OK(Put(cf, kv.first, kv.second));
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush(cf));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<ColumnFamilyHandle*> column_families;
|
|
|
|
column_families.push_back(handles_[0]);
|
|
|
|
column_families.push_back(handles_[1]);
|
2019-08-31 01:27:43 +00:00
|
|
|
column_families.push_back(handles_[2]);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<IngestExternalFileOptions> ifos(column_families.size());
|
|
|
|
for (auto& ifo : ifos) {
|
|
|
|
ifo.allow_global_seqno = true; // Always allow global_seqno
|
|
|
|
// May or may not write global_seqno
|
|
|
|
ifo.write_global_seqno = std::get<0>(GetParam());
|
|
|
|
// Whether to verify checksums before ingestion
|
|
|
|
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
}
|
|
|
|
std::vector<std::vector<std::pair<std::string, std::string>>> data;
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")});
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")});
|
2019-08-31 01:27:43 +00:00
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")});
|
2019-02-13 03:07:25 +00:00
|
|
|
// Resize the true_data vector upon construction to avoid re-alloc
|
|
|
|
std::vector<std::map<std::string, std::string>> true_data(
|
|
|
|
column_families.size());
|
|
|
|
// Take snapshot before ingestion starts
|
|
|
|
ReadOptions read_opts;
|
|
|
|
read_opts.total_order_seek = true;
|
|
|
|
read_opts.snapshot = dbfull()->GetSnapshot();
|
|
|
|
std::vector<Iterator*> iters(handles_.size());
|
|
|
|
|
|
|
|
// Range scan checks first kv of each CF before ingestion starts.
|
|
|
|
for (size_t i = 0; i != handles_.size(); ++i) {
|
|
|
|
iters[i] = dbfull()->NewIterator(read_opts, handles_[i]);
|
|
|
|
iters[i]->SeekToFirst();
|
|
|
|
ASSERT_TRUE(iters[i]->Valid());
|
|
|
|
const std::string& key = iters[i]->key().ToString();
|
|
|
|
const std::string& value = iters[i]->value().ToString();
|
|
|
|
const std::map<std::string, std::string>& orig_data =
|
|
|
|
data_before_ingestion[i];
|
|
|
|
std::map<std::string, std::string>::const_iterator it = orig_data.find(key);
|
|
|
|
ASSERT_NE(orig_data.end(), it);
|
|
|
|
ASSERT_EQ(it->second, value);
|
|
|
|
iters[i]->Next();
|
|
|
|
}
|
|
|
|
port::Thread ingest_thread([&]() {
|
|
|
|
ASSERT_OK(GenerateAndAddExternalFiles(options, column_families, ifos, data,
|
|
|
|
-1, true, true_data));
|
|
|
|
});
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:"
|
|
|
|
"BeforeRead");
|
|
|
|
// Should see only data before ingestion
|
|
|
|
for (size_t i = 0; i != handles_.size(); ++i) {
|
|
|
|
const auto& orig_data = data_before_ingestion[i];
|
|
|
|
for (; iters[i]->Valid(); iters[i]->Next()) {
|
|
|
|
const std::string& key = iters[i]->key().ToString();
|
|
|
|
const std::string& value = iters[i]->value().ToString();
|
|
|
|
std::map<std::string, std::string>::const_iterator it =
|
|
|
|
orig_data.find(key);
|
|
|
|
ASSERT_NE(orig_data.end(), it);
|
|
|
|
ASSERT_EQ(it->second, value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:"
|
|
|
|
"AfterRead");
|
|
|
|
ingest_thread.join();
|
|
|
|
for (auto* iter : iters) {
|
2023-10-18 16:38:38 +00:00
|
|
|
ASSERT_OK(iter->status());
|
2019-02-13 03:07:25 +00:00
|
|
|
delete iter;
|
|
|
|
}
|
|
|
|
iters.clear();
|
|
|
|
dbfull()->ReleaseSnapshot(read_opts.snapshot);
|
|
|
|
|
|
|
|
Close();
|
2019-08-31 01:27:43 +00:00
|
|
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"},
|
|
|
|
options);
|
2019-02-13 03:07:25 +00:00
|
|
|
// Should see consistent state after ingestion for all column families even
|
|
|
|
// without snapshot.
|
2019-08-31 01:27:43 +00:00
|
|
|
ASSERT_EQ(3, handles_.size());
|
2019-02-13 03:07:25 +00:00
|
|
|
int cf = 0;
|
|
|
|
for (const auto& verify_map : true_data) {
|
|
|
|
for (const auto& elem : verify_map) {
|
|
|
|
const std::string& key = elem.first;
|
|
|
|
const std::string& value = elem.second;
|
|
|
|
ASSERT_EQ(value, Get(cf, key));
|
|
|
|
}
|
|
|
|
++cf;
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
Destroy(options, true /* delete_cf_paths */);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_PrepareFail) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = fault_injection_env.get();
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->LoadDependency({
|
|
|
|
{"DBImpl::IngestExternalFiles:BeforeLastJobPrepare:0",
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_PrepareFail:"
|
|
|
|
"0"},
|
|
|
|
{"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies:PrepareFail:"
|
|
|
|
"1",
|
|
|
|
"DBImpl::IngestExternalFiles:BeforeLastJobPrepare:1"},
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
2019-08-31 01:27:43 +00:00
|
|
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<ColumnFamilyHandle*> column_families;
|
|
|
|
column_families.push_back(handles_[0]);
|
|
|
|
column_families.push_back(handles_[1]);
|
2019-08-31 01:27:43 +00:00
|
|
|
column_families.push_back(handles_[2]);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<IngestExternalFileOptions> ifos(column_families.size());
|
|
|
|
for (auto& ifo : ifos) {
|
|
|
|
ifo.allow_global_seqno = true; // Always allow global_seqno
|
|
|
|
// May or may not write global_seqno
|
|
|
|
ifo.write_global_seqno = std::get<0>(GetParam());
|
|
|
|
// Whether to verify block checksums before ingest
|
|
|
|
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
}
|
|
|
|
std::vector<std::vector<std::pair<std::string, std::string>>> data;
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")});
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")});
|
2019-08-31 01:27:43 +00:00
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")});
|
|
|
|
|
2019-02-13 03:07:25 +00:00
|
|
|
// Resize the true_data vector upon construction to avoid re-alloc
|
|
|
|
std::vector<std::map<std::string, std::string>> true_data(
|
|
|
|
column_families.size());
|
|
|
|
port::Thread ingest_thread([&]() {
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(GenerateAndAddExternalFiles(options, column_families, ifos, data,
|
|
|
|
-1, true, true_data));
|
2019-02-13 03:07:25 +00:00
|
|
|
});
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_PrepareFail:"
|
|
|
|
"0");
|
|
|
|
fault_injection_env->SetFilesystemActive(false);
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies:PrepareFail:"
|
|
|
|
"1");
|
|
|
|
ingest_thread.join();
|
|
|
|
|
|
|
|
fault_injection_env->SetFilesystemActive(true);
|
|
|
|
Close();
|
2019-08-31 01:27:43 +00:00
|
|
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"},
|
|
|
|
options);
|
|
|
|
ASSERT_EQ(3, handles_.size());
|
2019-02-13 03:07:25 +00:00
|
|
|
int cf = 0;
|
|
|
|
for (const auto& verify_map : true_data) {
|
|
|
|
for (const auto& elem : verify_map) {
|
|
|
|
const std::string& key = elem.first;
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(cf, key));
|
|
|
|
}
|
|
|
|
++cf;
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
Destroy(options, true /* delete_cf_paths */);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_CommitFail) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = fault_injection_env.get();
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->LoadDependency({
|
|
|
|
{"DBImpl::IngestExternalFiles:BeforeJobsRun:0",
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:"
|
|
|
|
"0"},
|
|
|
|
{"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:"
|
|
|
|
"1",
|
|
|
|
"DBImpl::IngestExternalFiles:BeforeJobsRun:1"},
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
2019-08-31 01:27:43 +00:00
|
|
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<ColumnFamilyHandle*> column_families;
|
|
|
|
column_families.push_back(handles_[0]);
|
|
|
|
column_families.push_back(handles_[1]);
|
2019-08-31 01:27:43 +00:00
|
|
|
column_families.push_back(handles_[2]);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<IngestExternalFileOptions> ifos(column_families.size());
|
|
|
|
for (auto& ifo : ifos) {
|
|
|
|
ifo.allow_global_seqno = true; // Always allow global_seqno
|
|
|
|
// May or may not write global_seqno
|
|
|
|
ifo.write_global_seqno = std::get<0>(GetParam());
|
|
|
|
// Whether to verify block checksums before ingestion
|
|
|
|
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
}
|
|
|
|
std::vector<std::vector<std::pair<std::string, std::string>>> data;
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")});
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")});
|
2019-08-31 01:27:43 +00:00
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")});
|
2019-02-13 03:07:25 +00:00
|
|
|
// Resize the true_data vector upon construction to avoid re-alloc
|
|
|
|
std::vector<std::map<std::string, std::string>> true_data(
|
|
|
|
column_families.size());
|
|
|
|
port::Thread ingest_thread([&]() {
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(GenerateAndAddExternalFiles(options, column_families, ifos, data,
|
|
|
|
-1, true, true_data));
|
2019-02-13 03:07:25 +00:00
|
|
|
});
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:"
|
|
|
|
"0");
|
|
|
|
fault_injection_env->SetFilesystemActive(false);
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:"
|
|
|
|
"1");
|
|
|
|
ingest_thread.join();
|
|
|
|
|
|
|
|
fault_injection_env->SetFilesystemActive(true);
|
|
|
|
Close();
|
2019-08-31 01:27:43 +00:00
|
|
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"},
|
|
|
|
options);
|
|
|
|
ASSERT_EQ(3, handles_.size());
|
2019-02-13 03:07:25 +00:00
|
|
|
int cf = 0;
|
|
|
|
for (const auto& verify_map : true_data) {
|
|
|
|
for (const auto& elem : verify_map) {
|
|
|
|
const std::string& key = elem.first;
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(cf, key));
|
|
|
|
}
|
|
|
|
++cf;
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
Destroy(options, true /* delete_cf_paths */);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(ExternalSSTFileTest,
|
|
|
|
IngestFilesIntoMultipleColumnFamilies_PartialManifestWriteFail) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.env = fault_injection_env.get();
|
|
|
|
|
2019-08-31 01:27:43 +00:00
|
|
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
2019-02-13 03:07:25 +00:00
|
|
|
|
|
|
|
SyncPoint::GetInstance()->ClearTrace();
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
SyncPoint::GetInstance()->LoadDependency({
|
|
|
|
{"VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0",
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_"
|
|
|
|
"PartialManifestWriteFail:0"},
|
|
|
|
{"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_"
|
|
|
|
"PartialManifestWriteFail:1",
|
|
|
|
"VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:1"},
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
|
|
|
|
std::vector<ColumnFamilyHandle*> column_families;
|
|
|
|
column_families.push_back(handles_[0]);
|
|
|
|
column_families.push_back(handles_[1]);
|
2019-08-31 01:27:43 +00:00
|
|
|
column_families.push_back(handles_[2]);
|
2019-02-13 03:07:25 +00:00
|
|
|
std::vector<IngestExternalFileOptions> ifos(column_families.size());
|
|
|
|
for (auto& ifo : ifos) {
|
|
|
|
ifo.allow_global_seqno = true; // Always allow global_seqno
|
|
|
|
// May or may not write global_seqno
|
|
|
|
ifo.write_global_seqno = std::get<0>(GetParam());
|
|
|
|
// Whether to verify block checksums before ingestion
|
|
|
|
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
|
|
|
}
|
|
|
|
std::vector<std::vector<std::pair<std::string, std::string>>> data;
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")});
|
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")});
|
2019-08-31 01:27:43 +00:00
|
|
|
data.push_back(
|
|
|
|
{std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")});
|
2019-02-13 03:07:25 +00:00
|
|
|
// Resize the true_data vector upon construction to avoid re-alloc
|
|
|
|
std::vector<std::map<std::string, std::string>> true_data(
|
|
|
|
column_families.size());
|
|
|
|
port::Thread ingest_thread([&]() {
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_NOK(GenerateAndAddExternalFiles(options, column_families, ifos, data,
|
|
|
|
-1, true, true_data));
|
2019-02-13 03:07:25 +00:00
|
|
|
});
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_"
|
|
|
|
"PartialManifestWriteFail:0");
|
|
|
|
fault_injection_env->SetFilesystemActive(false);
|
|
|
|
TEST_SYNC_POINT(
|
|
|
|
"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_"
|
|
|
|
"PartialManifestWriteFail:1");
|
|
|
|
ingest_thread.join();
|
|
|
|
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(fault_injection_env->DropUnsyncedFileData());
|
2019-02-13 03:07:25 +00:00
|
|
|
fault_injection_env->SetFilesystemActive(true);
|
|
|
|
Close();
|
2019-08-31 01:27:43 +00:00
|
|
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"},
|
|
|
|
options);
|
|
|
|
ASSERT_EQ(3, handles_.size());
|
2019-02-13 03:07:25 +00:00
|
|
|
int cf = 0;
|
|
|
|
for (const auto& verify_map : true_data) {
|
|
|
|
for (const auto& elem : verify_map) {
|
|
|
|
const std::string& key = elem.first;
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(cf, key));
|
|
|
|
}
|
|
|
|
++cf;
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
Destroy(options, true /* delete_cf_paths */);
|
|
|
|
}
|
|
|
|
|
2019-11-15 21:59:03 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, IngestFilesTriggerFlushingWithTwoWriteQueue) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
// Use large buffer to avoid memtable flush
|
|
|
|
options.write_buffer_size = 1024 * 1024;
|
|
|
|
options.two_write_queues = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(dbfull()->Put(WriteOptions(), "1000", "v1"));
|
|
|
|
ASSERT_OK(dbfull()->Put(WriteOptions(), "1001", "v1"));
|
|
|
|
ASSERT_OK(dbfull()->Put(WriteOptions(), "9999", "v1"));
|
|
|
|
|
|
|
|
// Put one key which is overlap with keys in memtable.
|
|
|
|
// It will trigger flushing memtable and require this thread is
|
|
|
|
// currently at the front of the 2nd writer queue. We must make
|
|
|
|
// sure that it won't enter the 2nd writer queue for the second time.
|
|
|
|
std::vector<std::pair<std::string, std::string>> data;
|
2024-01-05 19:53:57 +00:00
|
|
|
data.emplace_back("1001", "v2");
|
2020-12-22 23:08:17 +00:00
|
|
|
ASSERT_OK(GenerateAndAddExternalFile(options, data, -1, true));
|
2019-11-15 21:59:03 +00:00
|
|
|
}
|
|
|
|
|
2020-05-28 17:49:02 +00:00
|
|
|
TEST_P(ExternalSSTFileTest, DeltaEncodingWhileGlobalSeqnoPresent) {
|
2020-04-09 04:19:49 +00:00
|
|
|
Options options = CurrentOptions();
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
constexpr size_t kValueSize = 8;
|
|
|
|
Random rnd(301);
|
2020-07-09 21:33:42 +00:00
|
|
|
std::string value = rnd.RandomString(kValueSize);
|
2020-04-09 04:19:49 +00:00
|
|
|
|
|
|
|
// Write some key to make global seqno larger than zero
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
ASSERT_OK(Put("ab" + Key(i), value));
|
|
|
|
}
|
|
|
|
// Get a Snapshot to make RocksDB assign global seqno to ingested sst files.
|
|
|
|
auto snap = dbfull()->GetSnapshot();
|
|
|
|
|
|
|
|
std::string fname = sst_files_dir_ + "test_file";
|
2020-07-01 02:31:57 +00:00
|
|
|
ROCKSDB_NAMESPACE::SstFileWriter writer(EnvOptions(), options);
|
2020-04-09 04:19:49 +00:00
|
|
|
ASSERT_OK(writer.Open(fname));
|
|
|
|
std::string key1 = "ab";
|
|
|
|
std::string key2 = "ab";
|
|
|
|
|
|
|
|
// Make the prefix of key2 is same with key1 add zero seqno. The tail of every
|
|
|
|
// key is composed as (seqno << 8 | value_type), and here `1` represents
|
|
|
|
// ValueType::kTypeValue
|
|
|
|
|
|
|
|
PutFixed64(&key2, PackSequenceAndType(0, kTypeValue));
|
|
|
|
key2 += "cdefghijkl";
|
|
|
|
|
|
|
|
ASSERT_OK(writer.Put(key1, value));
|
|
|
|
ASSERT_OK(writer.Put(key2, value));
|
|
|
|
|
|
|
|
ExternalSstFileInfo info;
|
|
|
|
ASSERT_OK(writer.Finish(&info));
|
|
|
|
|
|
|
|
ASSERT_OK(dbfull()->IngestExternalFile({info.file_path},
|
|
|
|
IngestExternalFileOptions()));
|
|
|
|
dbfull()->ReleaseSnapshot(snap);
|
|
|
|
ASSERT_EQ(value, Get(key1));
|
|
|
|
// You will get error here
|
|
|
|
ASSERT_EQ(value, Get(key2));
|
|
|
|
}
|
|
|
|
|
2020-05-28 17:49:02 +00:00
|
|
|
TEST_P(ExternalSSTFileTest,
|
|
|
|
DeltaEncodingWhileGlobalSeqnoPresentIteratorSwitch) {
|
|
|
|
// Regression test for bug where global seqno corrupted the shared bytes
|
|
|
|
// buffer when switching from reverse iteration to forward iteration.
|
|
|
|
constexpr size_t kValueSize = 8;
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
|
|
|
|
Random rnd(301);
|
2020-07-09 21:33:42 +00:00
|
|
|
std::string value = rnd.RandomString(kValueSize);
|
2020-05-28 17:49:02 +00:00
|
|
|
|
|
|
|
std::string key0 = "aa";
|
|
|
|
std::string key1 = "ab";
|
|
|
|
// Make the prefix of key2 is same with key1 add zero seqno. The tail of every
|
|
|
|
// key is composed as (seqno << 8 | value_type), and here `1` represents
|
|
|
|
// ValueType::kTypeValue
|
|
|
|
std::string key2 = "ab";
|
|
|
|
PutFixed64(&key2, PackSequenceAndType(0, kTypeValue));
|
|
|
|
key2 += "cdefghijkl";
|
|
|
|
std::string key3 = key2 + "_";
|
|
|
|
|
|
|
|
// Write some key to make global seqno larger than zero
|
|
|
|
ASSERT_OK(Put(key0, value));
|
|
|
|
|
|
|
|
std::string fname = sst_files_dir_ + "test_file";
|
2020-07-01 02:31:57 +00:00
|
|
|
ROCKSDB_NAMESPACE::SstFileWriter writer(EnvOptions(), options);
|
2020-05-28 17:49:02 +00:00
|
|
|
ASSERT_OK(writer.Open(fname));
|
|
|
|
|
|
|
|
// key0 is a dummy to ensure the turnaround point (key1) comes from Prev
|
|
|
|
// cache rather than block (restart keys are pinned in block).
|
|
|
|
ASSERT_OK(writer.Put(key0, value));
|
|
|
|
ASSERT_OK(writer.Put(key1, value));
|
|
|
|
ASSERT_OK(writer.Put(key2, value));
|
|
|
|
ASSERT_OK(writer.Put(key3, value));
|
|
|
|
|
|
|
|
ExternalSstFileInfo info;
|
|
|
|
ASSERT_OK(writer.Finish(&info));
|
|
|
|
|
|
|
|
ASSERT_OK(dbfull()->IngestExternalFile({info.file_path},
|
|
|
|
IngestExternalFileOptions()));
|
|
|
|
ReadOptions read_opts;
|
|
|
|
// Prevents Seek() when switching directions, which circumvents the bug.
|
|
|
|
read_opts.total_order_seek = true;
|
|
|
|
Iterator* iter = db_->NewIterator(read_opts);
|
|
|
|
// Scan backwards to key2. File iterator will then be positioned at key1.
|
|
|
|
iter->Seek(key3);
|
|
|
|
ASSERT_EQ(key3, iter->key());
|
|
|
|
iter->Prev();
|
|
|
|
ASSERT_EQ(key2, iter->key());
|
|
|
|
// Scan forwards and make sure key3 is present. Previously key3 would be
|
|
|
|
// corrupted by the global seqno from key1.
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_EQ(key3, iter->key());
|
|
|
|
delete iter;
|
|
|
|
}
|
|
|
|
|
Add support to bulk load external files with user-defined timestamps (#12343)
Summary:
This PR adds initial support to bulk loading external sst files with user-defined timestamps.
To ensure this invariant is met while ingesting external files:
assume there are two internal keys: <K, ts1, seq1> and <K, ts2, seq2>, the following should hold:
ts1 < ts2 iff. seq1 < seq2
These extra requirements are added for ingesting external files with user-defined timestamps:
1) A file with overlapping user key (without timestamp) range with the db cannot be ingested. This is because we cannot ensure above invariant is met without checking each overlapped key's timestamp and compare it with the timestamp from the db. This is an expensive step. This bulk loading feature will be used by MyRocks and currently their usage can guarantee ingested file's key range doesn't overlap with db.
https://github.com/facebook/mysql-5.6/blob/4f3a57a13fec9fa2cb6d8bef6d38adba209e1981/storage/rocksdb/ha_rocksdb.cc#L3312
We can consider loose this requirement by doing this check in the future, this initial support just disallow this.
2) Files with overlapping user key (without timestamp) range are not allowed to be ingested. For similar reasons, it's hard to ensure above invariant is met. For example, if we have two files where user keys are interleaved like this:
file1: [c10, c8, f10, f5]
file2: [b5, c11, f4]
Either file1 gets a bigger global seqno than file2, or the other way around, above invariant cannot be met.
So we disallow this.
2) When a column family enables user-defined timestamps, it doesn't support ingestion behind mode. Ingestion behind currently simply puts the file at the bottommost level, and assign a global seqno 0 to the file. We need to do similar search though the LSM tree for key range overlap checks to make sure aformentioned invariant is met. So this initial support disallow this mode. We can consider adding it in the future.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12343
Test Plan: Add unit tests
Reviewed By: cbi42
Differential Revision: D53686182
Pulled By: jowlyzhang
fbshipit-source-id: f05e3fb27967f7974ed40179d78634c40ecfb136
2024-02-13 19:15:28 +00:00
|
|
|
class ExternalSSTFileWithTimestampTest : public ExternalSSTFileTest {
|
|
|
|
public:
|
|
|
|
ExternalSSTFileWithTimestampTest() = default;
|
|
|
|
|
|
|
|
static const std::string kValueNotFound;
|
|
|
|
static const std::string kTsNotFound;
|
|
|
|
|
|
|
|
std::string EncodeAsUint64(uint64_t v) {
|
|
|
|
std::string dst;
|
|
|
|
PutFixed64(&dst, v);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status IngestExternalUDTFile(const std::vector<std::string>& files,
|
|
|
|
bool allow_global_seqno = true) {
|
|
|
|
IngestExternalFileOptions opts;
|
|
|
|
opts.snapshot_consistency = true;
|
|
|
|
opts.allow_global_seqno = allow_global_seqno;
|
|
|
|
return db_->IngestExternalFile(files, opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyValueAndTs(const std::string& key,
|
|
|
|
const std::string& read_timestamp,
|
|
|
|
const std::string& expected_value,
|
|
|
|
const std::string& expected_timestamp) {
|
|
|
|
Slice read_ts = read_timestamp;
|
|
|
|
ReadOptions read_options;
|
|
|
|
read_options.timestamp = &read_ts;
|
|
|
|
std::string value;
|
|
|
|
std::string timestamp;
|
|
|
|
Status s = db_->Get(read_options, key, &value, ×tamp);
|
|
|
|
if (s.ok()) {
|
|
|
|
ASSERT_EQ(value, expected_value);
|
|
|
|
ASSERT_EQ(timestamp, expected_timestamp);
|
|
|
|
} else if (s.IsNotFound()) {
|
|
|
|
ASSERT_EQ(kValueNotFound, expected_value);
|
|
|
|
ASSERT_EQ(kTsNotFound, expected_timestamp);
|
|
|
|
} else {
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
const std::string ExternalSSTFileWithTimestampTest::kValueNotFound =
|
|
|
|
"NOT_FOUND";
|
|
|
|
const std::string ExternalSSTFileWithTimestampTest::kTsNotFound =
|
|
|
|
"NOT_FOUND_TS";
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileWithTimestampTest, Basic) {
|
|
|
|
do {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
options.persist_user_defined_timestamps = true;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
|
|
|
|
// Current file size should be 0 after sst_file_writer init and before open
|
|
|
|
// a file.
|
|
|
|
ASSERT_EQ(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
// file1.sst [0, 50)
|
|
|
|
std::string file1 = sst_files_dir_ + "file1.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file1));
|
|
|
|
for (int k = 0; k < 50; k++) {
|
|
|
|
// write 3 versions of values for each key, write newer version first
|
|
|
|
// they are treated as logically smaller by the comparator.
|
|
|
|
for (int version = 3; version > 0; version--) {
|
|
|
|
ASSERT_OK(
|
|
|
|
sst_file_writer.Put(Key(k), EncodeAsUint64(k + version),
|
|
|
|
Key(k) + "_val" + std::to_string(version)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ExternalSstFileInfo file1_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file1_info));
|
|
|
|
// sst_file_writer already finished, cannot add this value
|
|
|
|
ASSERT_NOK(sst_file_writer.Put(Key(100), EncodeAsUint64(1), "bad_val"));
|
|
|
|
|
|
|
|
ASSERT_EQ(file1_info.file_path, file1);
|
|
|
|
ASSERT_EQ(file1_info.num_entries, 150);
|
|
|
|
ASSERT_EQ(file1_info.smallest_key, Key(0) + EncodeAsUint64(0 + 3));
|
|
|
|
ASSERT_EQ(file1_info.largest_key, Key(49) + EncodeAsUint64(49 + 1));
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
// Add file using file path
|
|
|
|
ASSERT_OK(IngestExternalUDTFile({file1}));
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
|
|
|
|
for (int k = 0; k < 50; k++) {
|
|
|
|
for (int version = 3; version > 0; version--) {
|
|
|
|
VerifyValueAndTs(Key(k), EncodeAsUint64(k + version),
|
|
|
|
Key(k) + "_val" + std::to_string(version),
|
|
|
|
EncodeAsUint64(k + version));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// file2.sst [50, 200)
|
|
|
|
// Put [key=k, ts=k, value=k_val] for k in [50, 200)
|
|
|
|
// RangeDelete[start_key=75, end_key=125, ts=100]
|
|
|
|
std::string file2 = sst_files_dir_ + "file2.sst";
|
|
|
|
int range_del_begin = 75, range_del_end = 125, range_del_ts = 100;
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file2));
|
|
|
|
for (int k = 50; k < 200; k++) {
|
|
|
|
ASSERT_OK(
|
|
|
|
sst_file_writer.Put(Key(k), EncodeAsUint64(k), Key(k) + "_val"));
|
|
|
|
if (k == range_del_ts) {
|
|
|
|
ASSERT_OK(sst_file_writer.DeleteRange(
|
|
|
|
Key(range_del_begin), Key(range_del_end), EncodeAsUint64(k)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ExternalSstFileInfo file2_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file2_info));
|
|
|
|
|
|
|
|
// Current file size should be non-zero after success write.
|
|
|
|
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
ASSERT_EQ(file2_info.file_path, file2);
|
|
|
|
ASSERT_EQ(file2_info.num_entries, 150);
|
|
|
|
ASSERT_EQ(file2_info.smallest_key, Key(50) + EncodeAsUint64(50));
|
|
|
|
ASSERT_EQ(file2_info.largest_key, Key(199) + EncodeAsUint64(199));
|
|
|
|
ASSERT_EQ(file2_info.num_range_del_entries, 1);
|
|
|
|
ASSERT_EQ(file2_info.smallest_range_del_key,
|
|
|
|
Key(range_del_begin) + EncodeAsUint64(range_del_ts));
|
|
|
|
ASSERT_EQ(file2_info.largest_range_del_key,
|
|
|
|
Key(range_del_end) + EncodeAsUint64(range_del_ts));
|
|
|
|
// Add file using file path
|
|
|
|
ASSERT_OK(IngestExternalUDTFile({file2}));
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
|
|
|
|
|
|
|
for (int k = 50; k < 200; k++) {
|
|
|
|
if (k < range_del_begin || k >= range_del_end) {
|
|
|
|
VerifyValueAndTs(Key(k), EncodeAsUint64(k), Key(k) + "_val",
|
|
|
|
EncodeAsUint64(k));
|
|
|
|
}
|
|
|
|
// else {
|
|
|
|
// // FIXME(yuzhangyu): when range tombstone and point data has the
|
|
|
|
// // same seq, on read path, make range tombstone overrides point
|
|
|
|
// // data if it has a newer user-defined timestamp. This is how
|
|
|
|
// // we determine point data's overriding relationship, so we
|
|
|
|
// // should keep it consistent.
|
|
|
|
// VerifyValueAndTs(Key(k), EncodeAsUint64(k), Key(k) + "_val",
|
|
|
|
// EncodeAsUint64(k));
|
|
|
|
// VerifyValueAndTs(Key(k), EncodeAsUint64(range_del_ts),
|
|
|
|
// kValueNotFound,
|
|
|
|
// kTsNotFound);
|
|
|
|
// }
|
|
|
|
}
|
|
|
|
|
|
|
|
// file3.sst [100, 200), key range overlap with db
|
|
|
|
std::string file3 = sst_files_dir_ + "file3.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file3));
|
|
|
|
for (int k = 100; k < 200; k++) {
|
|
|
|
ASSERT_OK(
|
|
|
|
sst_file_writer.Put(Key(k), EncodeAsUint64(k + 1), Key(k) + "_val1"));
|
|
|
|
}
|
|
|
|
ExternalSstFileInfo file3_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file3_info));
|
|
|
|
ASSERT_EQ(file3_info.file_path, file3);
|
|
|
|
ASSERT_EQ(file3_info.num_entries, 100);
|
|
|
|
ASSERT_EQ(file3_info.smallest_key, Key(100) + EncodeAsUint64(101));
|
|
|
|
ASSERT_EQ(file3_info.largest_key, Key(199) + EncodeAsUint64(200));
|
|
|
|
|
|
|
|
// Allowing ingesting a file containing overlap key range with the db is
|
|
|
|
// not safe without verifying the overlapped key has a higher timestamp
|
|
|
|
// than what the db contains, so we do not allow this regardless of
|
|
|
|
// whether global sequence number is allowed.
|
|
|
|
ASSERT_NOK(IngestExternalUDTFile({file2}));
|
|
|
|
ASSERT_NOK(IngestExternalUDTFile({file2}, /*allow_global_seqno*/ false));
|
|
|
|
|
|
|
|
// Write [0, 50)
|
|
|
|
// Write to DB newer versions to cover ingested data and move sequence
|
|
|
|
// number forward.
|
|
|
|
for (int k = 0; k < 50; k++) {
|
|
|
|
ASSERT_OK(dbfull()->Put(WriteOptions(), Key(k), EncodeAsUint64(k + 4),
|
|
|
|
Key(k) + "_val" + std::to_string(4)));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read all 4 versions (3 from ingested, 1 from live writes).
|
|
|
|
for (int k = 0; k < 50; k++) {
|
|
|
|
for (int version = 4; version > 0; version--) {
|
|
|
|
VerifyValueAndTs(Key(k), EncodeAsUint64(k + version),
|
|
|
|
Key(k) + "_val" + std::to_string(version),
|
|
|
|
EncodeAsUint64(k + version));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
SequenceNumber seq_num_before_ingestion = db_->GetLatestSequenceNumber();
|
|
|
|
ASSERT_GT(seq_num_before_ingestion, 0U);
|
|
|
|
|
|
|
|
// file4.sst [200, 250)
|
|
|
|
std::string file4 = sst_files_dir_ + "file4.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file4));
|
|
|
|
for (int k = 200; k < 250; k++) {
|
|
|
|
ASSERT_OK(
|
|
|
|
sst_file_writer.Put(Key(k), EncodeAsUint64(k), Key(k) + "_val"));
|
|
|
|
}
|
|
|
|
|
|
|
|
ExternalSstFileInfo file4_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file4_info));
|
|
|
|
|
|
|
|
// Current file size should be non-zero after success write.
|
|
|
|
ASSERT_GT(sst_file_writer.FileSize(), 0);
|
|
|
|
|
|
|
|
ASSERT_EQ(file4_info.file_path, file4);
|
|
|
|
ASSERT_EQ(file4_info.num_entries, 50);
|
|
|
|
ASSERT_EQ(file4_info.smallest_key, Key(200) + EncodeAsUint64(200));
|
|
|
|
ASSERT_EQ(file4_info.largest_key, Key(249) + EncodeAsUint64(249));
|
|
|
|
ASSERT_EQ(file4_info.num_range_del_entries, 0);
|
|
|
|
ASSERT_EQ(file4_info.smallest_range_del_key, "");
|
|
|
|
ASSERT_EQ(file4_info.largest_range_del_key, "");
|
|
|
|
|
|
|
|
ASSERT_OK(IngestExternalUDTFile({file4}));
|
|
|
|
|
|
|
|
// In UDT mode, any external file that can be successfully ingested also
|
|
|
|
// should not overlap with the db. As a result, they can always get the
|
|
|
|
// seq 0 assigned.
|
|
|
|
ASSERT_EQ(db_->GetLatestSequenceNumber(), seq_num_before_ingestion);
|
|
|
|
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
|
|
|
} while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction |
|
|
|
|
kRangeDelSkipConfigs));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(ExternalSSTFileWithTimestampTest, SanityCheck) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
options.persist_user_defined_timestamps = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
|
|
|
|
// file1.sst [0, 100)
|
|
|
|
std::string file1 = sst_files_dir_ + "file1.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file1));
|
|
|
|
for (int k = 0; k < 100; k++) {
|
|
|
|
ASSERT_OK(sst_file_writer.Put(Key(k), EncodeAsUint64(k), Key(k) + "_val"));
|
|
|
|
}
|
|
|
|
|
|
|
|
ExternalSstFileInfo file1_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file1_info));
|
|
|
|
|
|
|
|
// file2.sst [50, 75)
|
|
|
|
std::string file2 = sst_files_dir_ + "file2.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file2));
|
|
|
|
for (int k = 50; k < 75; k++) {
|
|
|
|
ASSERT_OK(
|
|
|
|
sst_file_writer.Put(Key(k), EncodeAsUint64(k + 2), Key(k) + "_val"));
|
|
|
|
}
|
|
|
|
ExternalSstFileInfo file2_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file2_info));
|
|
|
|
|
|
|
|
// Cannot ingest when files' user key range overlaps. There is no
|
|
|
|
// straightforward way to assign sequence number to the files so that they
|
|
|
|
// meet the user-defined timestamps invariant: for the same user provided key,
|
|
|
|
// the entry with a higher sequence number should not have a smaller
|
|
|
|
// timestamp. In this case: file1 has (key=k, ts=k) for k in [50, 75),
|
|
|
|
// file2 has (key=k, ts=k+2) for k in [50, 75).
|
|
|
|
// The invariant is only met if file2 is ingested after file1. In other cases
|
|
|
|
// when user key ranges are interleaved in files, no order of ingestion can
|
|
|
|
// guarantee this invariant. So we do not allow ingesting files with
|
|
|
|
// overlapping key ranges.
|
|
|
|
ASSERT_TRUE(IngestExternalUDTFile({file1, file2}).IsNotSupported());
|
|
|
|
|
|
|
|
options.allow_ingest_behind = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
IngestExternalFileOptions opts;
|
|
|
|
|
|
|
|
// TODO(yuzhangyu): support ingestion behind for user-defined timestamps?
|
|
|
|
// Ingesting external files with user-defined timestamps requires searching
|
|
|
|
// through the whole lsm tree to make sure there is no key range overlap with
|
|
|
|
// the db. Ingestion behind currently is doing a simply placing it at the
|
|
|
|
// bottom level step without a search, so we don't allow it either.
|
|
|
|
opts.ingest_behind = true;
|
|
|
|
ASSERT_TRUE(db_->IngestExternalFile({file1}, opts).IsNotSupported());
|
|
|
|
|
|
|
|
DestroyAndRecreateExternalSSTFilesDir();
|
|
|
|
}
|
|
|
|
|
2020-06-03 22:53:09 +00:00
|
|
|
INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest,
|
|
|
|
testing::Values(std::make_tuple(false, false),
|
|
|
|
std::make_tuple(false, true),
|
|
|
|
std::make_tuple(true, false),
|
|
|
|
std::make_tuple(true, true)));
|
|
|
|
|
|
|
|
INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest,
|
|
|
|
ExternSSTFileLinkFailFallbackTest,
|
|
|
|
testing::Values(std::make_tuple(true, false),
|
|
|
|
std::make_tuple(true, true),
|
|
|
|
std::make_tuple(false, false)));
|
2019-05-24 04:54:23 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2016-09-07 22:41:54 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
2016-09-07 22:41:54 +00:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|