2021-11-03 19:20:19 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
#ifndef OS_WIN
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif // ! OS_WIN
|
2021-11-03 19:20:19 +00:00
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
#include "benchmark/benchmark.h"
|
|
|
|
#include "db/db_impl/db_impl.h"
|
2021-11-03 19:20:19 +00:00
|
|
|
#include "rocksdb/db.h"
|
2022-02-01 17:00:46 +00:00
|
|
|
#include "rocksdb/filter_policy.h"
|
2021-11-03 19:20:19 +00:00
|
|
|
#include "rocksdb/options.h"
|
2022-02-01 17:00:46 +00:00
|
|
|
#include "table/block_based/block.h"
|
|
|
|
#include "table/block_based/block_builder.h"
|
2021-11-03 19:20:19 +00:00
|
|
|
#include "util/random.h"
|
2022-05-09 22:17:19 +00:00
|
|
|
#include "utilities/merge_operators.h"
|
2021-11-03 19:20:19 +00:00
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
class KeyGenerator {
|
|
|
|
public:
|
|
|
|
// Generate next key
|
|
|
|
// buff: the caller needs to make sure there's enough space for generated key
|
|
|
|
// offset: to control the group of the key, 0 means normal key, 1 means
|
|
|
|
// non-existing key, 2 is reserved prefix_only: only return a prefix
|
|
|
|
Slice Next(char* buff, int8_t offset = 0, bool prefix_only = false) {
|
|
|
|
assert(max_key_ < std::numeric_limits<uint32_t>::max() /
|
|
|
|
MULTIPLIER); // TODO: add large key support
|
|
|
|
|
|
|
|
uint32_t k;
|
|
|
|
if (is_sequential_) {
|
|
|
|
assert(next_sequential_key_ < max_key_);
|
|
|
|
k = (next_sequential_key_ % max_key_) * MULTIPLIER + offset;
|
2022-05-09 22:17:19 +00:00
|
|
|
if (next_sequential_key_ + 1 == max_key_) {
|
|
|
|
next_sequential_key_ = 0;
|
|
|
|
} else {
|
|
|
|
next_sequential_key_++;
|
|
|
|
}
|
2022-02-01 17:00:46 +00:00
|
|
|
} else {
|
|
|
|
k = (rnd_->Next() % max_key_) * MULTIPLIER + offset;
|
|
|
|
}
|
|
|
|
// TODO: make sure the buff is large enough
|
|
|
|
memset(buff, 0, key_size_);
|
|
|
|
if (prefix_num_ > 0) {
|
|
|
|
uint32_t prefix = (k % prefix_num_) * MULTIPLIER + offset;
|
|
|
|
Encode(buff, prefix);
|
|
|
|
if (prefix_only) {
|
|
|
|
return {buff, prefix_size_};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Encode(buff + prefix_size_, k);
|
|
|
|
return {buff, key_size_};
|
|
|
|
}
|
|
|
|
|
|
|
|
// use internal buffer for generated key, make sure there's only one caller in
|
|
|
|
// single thread
|
|
|
|
Slice Next() { return Next(buff_); }
|
|
|
|
|
|
|
|
// user internal buffer for generated prefix
|
|
|
|
Slice NextPrefix() {
|
|
|
|
assert(prefix_num_ > 0);
|
|
|
|
return Next(buff_, 0, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// helper function to get non exist key
|
|
|
|
Slice NextNonExist() { return Next(buff_, 1); }
|
|
|
|
|
|
|
|
Slice MaxKey(char* buff) const {
|
|
|
|
memset(buff, 0xff, key_size_);
|
|
|
|
return {buff, key_size_};
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice MinKey(char* buff) const {
|
|
|
|
memset(buff, 0, key_size_);
|
|
|
|
return {buff, key_size_};
|
|
|
|
}
|
|
|
|
|
|
|
|
// max_key: the max key that it could generate
|
|
|
|
// prefix_num: the max prefix number
|
|
|
|
// key_size: in bytes
|
|
|
|
explicit KeyGenerator(Random* rnd, uint64_t max_key = 100 * 1024 * 1024,
|
|
|
|
size_t prefix_num = 0, size_t key_size = 10) {
|
|
|
|
prefix_num_ = prefix_num;
|
|
|
|
key_size_ = key_size;
|
|
|
|
max_key_ = max_key;
|
|
|
|
rnd_ = rnd;
|
|
|
|
if (prefix_num > 0) {
|
|
|
|
prefix_size_ = 4; // TODO: support different prefix_size
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// generate sequential keys
|
|
|
|
explicit KeyGenerator(uint64_t max_key = 100 * 1024 * 1024,
|
|
|
|
size_t key_size = 10) {
|
|
|
|
key_size_ = key_size;
|
|
|
|
max_key_ = max_key;
|
|
|
|
rnd_ = nullptr;
|
|
|
|
is_sequential_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Random* rnd_;
|
|
|
|
size_t prefix_num_ = 0;
|
|
|
|
size_t prefix_size_ = 0;
|
|
|
|
size_t key_size_;
|
|
|
|
uint64_t max_key_;
|
|
|
|
bool is_sequential_ = false;
|
|
|
|
uint32_t next_sequential_key_ = 0;
|
|
|
|
char buff_[256] = {0};
|
|
|
|
const int MULTIPLIER = 3;
|
|
|
|
|
|
|
|
void static Encode(char* buf, uint32_t value) {
|
|
|
|
if (port::kLittleEndian) {
|
|
|
|
buf[0] = static_cast<char>((value >> 24) & 0xff);
|
|
|
|
buf[1] = static_cast<char>((value >> 16) & 0xff);
|
|
|
|
buf[2] = static_cast<char>((value >> 8) & 0xff);
|
|
|
|
buf[3] = static_cast<char>(value & 0xff);
|
|
|
|
} else {
|
|
|
|
memcpy(buf, &value, sizeof(value));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
static void SetupDB(benchmark::State& state, Options& options,
|
|
|
|
std::unique_ptr<DB>* db,
|
2022-02-01 17:00:46 +00:00
|
|
|
const std::string& test_name = "") {
|
|
|
|
options.create_if_missing = true;
|
2021-11-03 19:20:19 +00:00
|
|
|
auto env = Env::Default();
|
|
|
|
std::string db_path;
|
2022-02-01 17:00:46 +00:00
|
|
|
Status s = env->GetTestDirectory(&db_path);
|
2021-11-03 19:20:19 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
2022-02-18 21:35:36 +00:00
|
|
|
std::string db_name =
|
|
|
|
db_path + kFilePathSeparator + test_name + std::to_string(getpid());
|
2021-11-03 19:20:19 +00:00
|
|
|
DestroyDB(db_name, options);
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
DB* db_ptr = nullptr;
|
|
|
|
s = DB::Open(options, db_name, &db_ptr);
|
2021-11-03 19:20:19 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
2022-05-03 20:38:38 +00:00
|
|
|
db->reset(db_ptr);
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
static void TeardownDB(benchmark::State& state, const std::unique_ptr<DB>& db,
|
|
|
|
const Options& options, KeyGenerator& kg) {
|
2022-02-01 17:00:46 +00:00
|
|
|
char min_buff[256], max_buff[256];
|
|
|
|
const Range r(kg.MinKey(min_buff), kg.MaxKey(max_buff));
|
|
|
|
uint64_t size;
|
|
|
|
Status s = db->GetApproximateSizes(&r, 1, &size);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
state.counters["db_size"] = static_cast<double>(size);
|
|
|
|
|
|
|
|
std::string db_name = db->GetName();
|
|
|
|
s = db->Close();
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
DestroyDB(db_name, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DBOpen(benchmark::State& state) {
|
|
|
|
// create DB
|
2022-05-03 20:38:38 +00:00
|
|
|
std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
SetupDB(state, options, &db, "DBOpen");
|
|
|
|
|
|
|
|
std::string db_name = db->GetName();
|
2021-11-03 19:20:19 +00:00
|
|
|
db->Close();
|
|
|
|
|
|
|
|
options.create_if_missing = false;
|
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
auto rnd = Random(123);
|
2021-11-03 19:20:19 +00:00
|
|
|
|
|
|
|
for (auto _ : state) {
|
2022-05-03 20:38:38 +00:00
|
|
|
{
|
|
|
|
DB* db_ptr = nullptr;
|
|
|
|
Status s = DB::Open(options, db_name, &db_ptr);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
db.reset(db_ptr);
|
2021-11-03 19:20:19 +00:00
|
|
|
}
|
|
|
|
state.PauseTiming();
|
|
|
|
auto wo = WriteOptions();
|
2022-05-03 20:38:38 +00:00
|
|
|
Status s;
|
2021-11-03 19:20:19 +00:00
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
for (int j = 0; j < 100; j++) {
|
|
|
|
s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = db->Flush(FlushOptions());
|
|
|
|
}
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
s = db->Close();
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
state.ResumeTiming();
|
|
|
|
}
|
|
|
|
DestroyDB(db_name, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(DBOpen)->Iterations(200); // specify iteration number as the db size
|
|
|
|
// is impacted by iteration number
|
|
|
|
|
|
|
|
static void DBClose(benchmark::State& state) {
|
|
|
|
// create DB
|
2022-05-03 20:38:38 +00:00
|
|
|
std::unique_ptr<DB> db;
|
2021-11-03 19:20:19 +00:00
|
|
|
Options options;
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "DBClose");
|
2021-11-03 19:20:19 +00:00
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
std::string db_name = db->GetName();
|
2021-11-03 19:20:19 +00:00
|
|
|
db->Close();
|
|
|
|
|
|
|
|
options.create_if_missing = false;
|
|
|
|
|
|
|
|
auto rnd = Random(12345);
|
|
|
|
|
|
|
|
for (auto _ : state) {
|
|
|
|
state.PauseTiming();
|
2022-05-03 20:38:38 +00:00
|
|
|
{
|
|
|
|
DB* db_ptr = nullptr;
|
|
|
|
Status s = DB::Open(options, db_name, &db_ptr);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
db.reset(db_ptr);
|
2021-11-03 19:20:19 +00:00
|
|
|
}
|
|
|
|
auto wo = WriteOptions();
|
2022-05-03 20:38:38 +00:00
|
|
|
Status s;
|
2021-11-03 19:20:19 +00:00
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
for (int j = 0; j < 100; j++) {
|
|
|
|
s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = db->Flush(FlushOptions());
|
|
|
|
}
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
state.ResumeTiming();
|
|
|
|
s = db->Close();
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DestroyDB(db_name, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(DBClose)->Iterations(200); // specify iteration number as the db size
|
|
|
|
// is impacted by iteration number
|
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
static void DBPut(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
bool enable_statistics = state.range(3);
|
|
|
|
bool enable_wal = state.range(4);
|
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db = nullptr;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
if (enable_statistics) {
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
}
|
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "DBPut");
|
|
|
|
}
|
|
|
|
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = !enable_wal;
|
|
|
|
|
|
|
|
for (auto _ : state) {
|
|
|
|
state.PauseTiming();
|
|
|
|
Slice key = kg.Next();
|
|
|
|
std::string val = rnd.RandomString(static_cast<int>(per_key_size));
|
|
|
|
state.ResumeTiming();
|
|
|
|
Status s = db->Put(wo, key, val);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
Status s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (enable_statistics) {
|
|
|
|
HistogramData histogram_data;
|
|
|
|
options.statistics->histogramData(DB_WRITE, &histogram_data);
|
2022-03-24 17:08:35 +00:00
|
|
|
state.counters["put_mean"] = histogram_data.average * std::milli::den;
|
|
|
|
state.counters["put_p95"] = histogram_data.percentile95 * std::milli::den;
|
|
|
|
state.counters["put_p99"] = histogram_data.percentile99 * std::milli::den;
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DBPutArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
|
|
|
|
kCompactionStyleFIFO}) {
|
|
|
|
for (int64_t max_data : {100l << 30}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
for (bool enable_statistics : {false, true}) {
|
|
|
|
for (bool wal : {false, true}) {
|
|
|
|
b->Args(
|
|
|
|
{comp_style, max_data, per_key_size, enable_statistics, wal});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames(
|
|
|
|
{"comp_style", "max_data", "per_key_size", "enable_statistics", "wal"});
|
|
|
|
}
|
|
|
|
|
2022-03-01 23:09:45 +00:00
|
|
|
static const uint64_t DBPutNum = 409600l;
|
2022-02-01 17:00:46 +00:00
|
|
|
BENCHMARK(DBPut)->Threads(1)->Iterations(DBPutNum)->Apply(DBPutArguments);
|
|
|
|
BENCHMARK(DBPut)->Threads(8)->Iterations(DBPutNum / 8)->Apply(DBPutArguments);
|
|
|
|
|
|
|
|
static void ManualCompaction(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
bool enable_statistics = state.range(3);
|
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
if (enable_statistics) {
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
}
|
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
// No auto compaction
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.level0_file_num_compaction_trigger = (1 << 30);
|
|
|
|
options.level0_slowdown_writes_trigger = (1 << 30);
|
|
|
|
options.level0_stop_writes_trigger = (1 << 30);
|
|
|
|
options.soft_pending_compaction_bytes_limit = 0;
|
|
|
|
options.hard_pending_compaction_bytes_limit = 0;
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "ManualCompaction");
|
|
|
|
}
|
|
|
|
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
uint64_t flush_mod = key_num / 4; // at least generate 4 files for compaction
|
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(per_key_size)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
if (i + 1 % flush_mod == 0) {
|
|
|
|
s = db->Flush(FlushOptions());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
FlushOptions fo;
|
|
|
|
Status s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
std::vector<LiveFileMetaData> files_meta;
|
|
|
|
db->GetLiveFilesMetaData(&files_meta);
|
|
|
|
std::vector<std::string> files_before_compact;
|
|
|
|
files_before_compact.reserve(files_meta.size());
|
|
|
|
for (const LiveFileMetaData& file : files_meta) {
|
|
|
|
files_before_compact.emplace_back(file.name);
|
|
|
|
}
|
|
|
|
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
CompactionOptions co;
|
|
|
|
for (auto _ : state) {
|
|
|
|
s = db->CompactFiles(co, files_before_compact, 1);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (enable_statistics) {
|
|
|
|
HistogramData histogram_data;
|
|
|
|
options.statistics->histogramData(COMPACTION_TIME, &histogram_data);
|
|
|
|
state.counters["comp_time"] = histogram_data.average;
|
|
|
|
options.statistics->histogramData(COMPACTION_CPU_TIME, &histogram_data);
|
|
|
|
state.counters["comp_cpu_time"] = histogram_data.average;
|
|
|
|
options.statistics->histogramData(COMPACTION_OUTFILE_SYNC_MICROS,
|
|
|
|
&histogram_data);
|
|
|
|
state.counters["comp_outfile_sync"] = histogram_data.average;
|
|
|
|
|
|
|
|
state.counters["comp_read"] = static_cast<double>(
|
|
|
|
options.statistics->getTickerCount(COMPACT_READ_BYTES));
|
|
|
|
state.counters["comp_write"] = static_cast<double>(
|
|
|
|
options.statistics->getTickerCount(COMPACT_WRITE_BYTES));
|
|
|
|
|
|
|
|
state.counters["user_key_comparison_count"] =
|
|
|
|
static_cast<double>(get_perf_context()->user_key_comparison_count);
|
|
|
|
state.counters["block_read_count"] =
|
|
|
|
static_cast<double>(get_perf_context()->block_read_count);
|
|
|
|
state.counters["block_read_time"] =
|
|
|
|
static_cast<double>(get_perf_context()->block_read_time);
|
2023-04-15 18:09:44 +00:00
|
|
|
state.counters["block_read_cpu_time"] =
|
|
|
|
static_cast<double>(get_perf_context()->block_read_cpu_time);
|
2022-02-01 17:00:46 +00:00
|
|
|
state.counters["block_checksum_time"] =
|
|
|
|
static_cast<double>(get_perf_context()->block_checksum_time);
|
|
|
|
state.counters["new_table_block_iter_nanos"] =
|
|
|
|
static_cast<double>(get_perf_context()->new_table_block_iter_nanos);
|
|
|
|
state.counters["new_table_iterator_nanos"] =
|
|
|
|
static_cast<double>(get_perf_context()->new_table_iterator_nanos);
|
|
|
|
state.counters["find_table_nanos"] =
|
|
|
|
static_cast<double>(get_perf_context()->find_table_nanos);
|
|
|
|
}
|
|
|
|
|
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ManualCompactionArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal}) {
|
|
|
|
for (int64_t max_data : {32l << 20, 128l << 20}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
for (bool enable_statistics : {false, true}) {
|
|
|
|
b->Args({comp_style, max_data, per_key_size, enable_statistics});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics"});
|
|
|
|
}
|
|
|
|
|
2022-03-24 17:08:35 +00:00
|
|
|
BENCHMARK(ManualCompaction)->Iterations(1)->Apply(ManualCompactionArguments);
|
2022-02-01 17:00:46 +00:00
|
|
|
|
|
|
|
static void ManualFlush(benchmark::State& state) {
|
|
|
|
uint64_t key_num = state.range(0);
|
|
|
|
uint64_t per_key_size = state.range(1);
|
|
|
|
bool enable_statistics = true;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
if (enable_statistics) {
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
}
|
|
|
|
options.disable_auto_compactions = true;
|
|
|
|
options.level0_file_num_compaction_trigger = (1 << 30);
|
|
|
|
options.level0_slowdown_writes_trigger = (1 << 30);
|
|
|
|
options.level0_stop_writes_trigger = (1 << 30);
|
|
|
|
options.soft_pending_compaction_bytes_limit = 0;
|
|
|
|
options.hard_pending_compaction_bytes_limit = 0;
|
|
|
|
options.write_buffer_size = 2l << 30; // 2G to avoid auto flush
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "ManualFlush");
|
|
|
|
}
|
|
|
|
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
for (auto _ : state) {
|
|
|
|
state.PauseTiming();
|
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(per_key_size)));
|
|
|
|
}
|
|
|
|
FlushOptions fo;
|
|
|
|
state.ResumeTiming();
|
|
|
|
Status s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
Status s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (enable_statistics) {
|
|
|
|
HistogramData histogram_data;
|
|
|
|
options.statistics->histogramData(FLUSH_TIME, &histogram_data);
|
|
|
|
state.counters["flush_time"] = histogram_data.average;
|
|
|
|
state.counters["flush_write_bytes"] = static_cast<double>(
|
|
|
|
options.statistics->getTickerCount(FLUSH_WRITE_BYTES));
|
|
|
|
}
|
|
|
|
|
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ManualFlushArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int64_t key_num : {1l << 10, 8l << 10, 64l << 10}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
b->Args({key_num, per_key_size});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"key_num", "per_key_size"});
|
|
|
|
}
|
|
|
|
|
2022-03-24 17:08:35 +00:00
|
|
|
BENCHMARK(ManualFlush)->Iterations(1)->Apply(ManualFlushArguments);
|
2022-02-01 17:00:46 +00:00
|
|
|
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
// Copied from test_util.cc to not depend on rocksdb_test_lib
|
|
|
|
// when building microbench binaries.
|
|
|
|
static Slice CompressibleString(Random* rnd, double compressed_fraction,
|
|
|
|
int len, std::string* dst) {
|
|
|
|
int raw = static_cast<int>(len * compressed_fraction);
|
2023-12-01 19:10:30 +00:00
|
|
|
if (raw < 1) {
|
|
|
|
raw = 1;
|
|
|
|
}
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
std::string raw_data = rnd->RandomBinaryString(raw);
|
|
|
|
|
|
|
|
// Duplicate the random data until we have filled "len" bytes
|
|
|
|
dst->clear();
|
|
|
|
while (dst->size() < (unsigned int)len) {
|
|
|
|
dst->append(raw_data);
|
|
|
|
}
|
|
|
|
dst->resize(len);
|
|
|
|
return Slice(*dst);
|
|
|
|
}
|
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
static void DBGet(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
bool enable_statistics = state.range(3);
|
|
|
|
bool negative_query = state.range(4);
|
|
|
|
bool enable_filter = state.range(5);
|
2022-04-26 23:46:39 +00:00
|
|
|
bool mmap = state.range(6);
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
auto compression_type = static_cast<CompressionType>(state.range(7));
|
|
|
|
bool compression_checksum = static_cast<bool>(state.range(8));
|
|
|
|
bool no_blockcache = state.range(9);
|
2022-02-01 17:00:46 +00:00
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
if (enable_statistics) {
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
}
|
2022-04-26 23:46:39 +00:00
|
|
|
if (mmap) {
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
options.compression = kNoCompression;
|
|
|
|
}
|
2022-02-01 17:00:46 +00:00
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
|
2022-04-26 23:46:39 +00:00
|
|
|
BlockBasedTableOptions table_options;
|
2022-02-01 17:00:46 +00:00
|
|
|
if (enable_filter) {
|
|
|
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
|
|
|
}
|
2022-04-26 23:46:39 +00:00
|
|
|
if (mmap) {
|
|
|
|
table_options.no_block_cache = true;
|
|
|
|
table_options.block_restart_interval = 1;
|
|
|
|
}
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
options.compression = compression_type;
|
|
|
|
options.compression_opts.checksum = compression_checksum;
|
|
|
|
if (no_blockcache) {
|
|
|
|
table_options.no_block_cache = true;
|
|
|
|
} else {
|
|
|
|
table_options.block_cache = NewLRUCache(100 << 20);
|
|
|
|
}
|
2022-04-26 23:46:39 +00:00
|
|
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
2022-02-01 17:00:46 +00:00
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2023-06-02 23:39:14 +00:00
|
|
|
KeyGenerator kg_seq(key_num /* max_key */);
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "DBGet");
|
|
|
|
|
2023-06-02 23:39:14 +00:00
|
|
|
// Load all valid keys into DB. That way, iterations in `!negative_query`
|
|
|
|
// runs can always find the key even though it is generated from a random
|
|
|
|
// number.
|
2022-02-01 17:00:46 +00:00
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
std::string val;
|
2022-02-01 17:00:46 +00:00
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
CompressibleString(&rnd, 0.5, static_cast<int>(per_key_size), &val);
|
|
|
|
Status s = db->Put(wo, kg_seq.Next(), val);
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-02 23:39:14 +00:00
|
|
|
// Compact whole DB into one level, so each iteration will consider the same
|
|
|
|
// number of files (one).
|
|
|
|
Status s = db->CompactRange(CompactRangeOptions(), nullptr /* begin */,
|
|
|
|
nullptr /* end */);
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-02 23:39:14 +00:00
|
|
|
KeyGenerator kg_rnd(&rnd, key_num /* max_key */);
|
2022-02-01 17:00:46 +00:00
|
|
|
auto ro = ReadOptions();
|
2022-04-26 23:46:39 +00:00
|
|
|
if (mmap) {
|
|
|
|
ro.verify_checksums = false;
|
|
|
|
}
|
2022-02-01 17:00:46 +00:00
|
|
|
size_t not_found = 0;
|
|
|
|
if (negative_query) {
|
|
|
|
for (auto _ : state) {
|
|
|
|
std::string val;
|
2023-06-02 23:39:14 +00:00
|
|
|
Status s = db->Get(ro, kg_rnd.NextNonExist(), &val);
|
2022-02-01 17:00:46 +00:00
|
|
|
if (s.IsNotFound()) {
|
|
|
|
not_found++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (auto _ : state) {
|
|
|
|
std::string val;
|
2023-06-02 23:39:14 +00:00
|
|
|
Status s = db->Get(ro, kg_rnd.Next(), &val);
|
2022-02-01 17:00:46 +00:00
|
|
|
if (s.IsNotFound()) {
|
|
|
|
not_found++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
state.counters["neg_qu_pct"] = benchmark::Counter(
|
|
|
|
static_cast<double>(not_found * 100), benchmark::Counter::kAvgIterations);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
if (enable_statistics) {
|
|
|
|
HistogramData histogram_data;
|
|
|
|
options.statistics->histogramData(DB_GET, &histogram_data);
|
2022-03-24 17:08:35 +00:00
|
|
|
state.counters["get_mean"] = histogram_data.average * std::milli::den;
|
|
|
|
state.counters["get_p95"] = histogram_data.percentile95 * std::milli::den;
|
|
|
|
state.counters["get_p99"] = histogram_data.percentile99 * std::milli::den;
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
2023-06-02 23:39:14 +00:00
|
|
|
TeardownDB(state, db, options, kg_rnd);
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DBGetArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
|
|
|
|
kCompactionStyleFIFO}) {
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
for (int64_t max_data : {1l << 20, 128l << 20, 512l << 20}) {
|
2022-02-01 17:00:46 +00:00
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
for (bool enable_statistics : {false, true}) {
|
|
|
|
for (bool negative_query : {false, true}) {
|
|
|
|
for (bool enable_filter : {false, true}) {
|
2022-04-26 23:46:39 +00:00
|
|
|
for (bool mmap : {false, true}) {
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
for (int compression_type :
|
|
|
|
{kNoCompression /* 0x0 */, kZSTD /* 0x7 */}) {
|
|
|
|
for (bool compression_checksum : {false, true}) {
|
|
|
|
for (bool no_blockcache : {false, true}) {
|
|
|
|
b->Args({comp_style, max_data, per_key_size,
|
|
|
|
enable_statistics, negative_query, enable_filter,
|
|
|
|
mmap, compression_type, compression_checksum,
|
|
|
|
no_blockcache});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-04-26 23:46:39 +00:00
|
|
|
}
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
"negative_query", "enable_filter", "mmap", "compression_type",
|
|
|
|
"compression_checksum", "no_blockcache"});
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666)
Summary:
Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h:
* User can set CompressionOptions::checksum to true to enable this feature.
* We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`.
* Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag.
* Error handling during decompression to propagate error message from ZSTD.
* Updated microbench to test read performance impact.
About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666
Test Plan:
* Existing unit tests for zstd compression
* Add unit test `DBTest2.ZSTDChecksum` to test the corruption case
* Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API.
* Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work.
* Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2.
* Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable.
* Microbench:
```
TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100
Min out of 100 runs:
Main:
10390 10436 10456 10484 10499 10535 10544 10545 10565 10568
After this PR, checksum=false
10285 10397 10503 10508 10515 10557 10562 10635 10640 10660
After this PR, checksum=true
10827 10876 10925 10949 10971 11052 11061 11063 11100 11109
```
* db_bench:
```
Write perf
TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=..
[FillSeq checksum=0]
fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec
fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec
[FillSeq checksum=1]
fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec
fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec
Read perf
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1
[Readrandom checksum=1]
readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec
[Readrandom checksum=0]
readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec
readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec
Compression
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1
checksum=1
compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec
compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec
checksum=0
compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec
compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec
Decompression:
TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1
checksum = 0
uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec
uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec
checksum = 1
uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec
uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec
```
Reviewed By: ajkr
Differential Revision: D48019378
Pulled By: cbi42
fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba
2023-08-18 22:01:59 +00:00
|
|
|
static const uint64_t DBGetNum = 10000l;
|
|
|
|
BENCHMARK(DBGet)->Threads(1)->Iterations(DBGetNum)->Apply(DBGetArguments);
|
|
|
|
BENCHMARK(DBGet)->Threads(8)->Iterations(DBGetNum / 8)->Apply(DBGetArguments);
|
2022-02-01 17:00:46 +00:00
|
|
|
|
|
|
|
static void SimpleGetWithPerfContext(benchmark::State& state) {
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
std::string db_name;
|
|
|
|
Options options;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.arena_block_size = 8 << 20;
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, 1024);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
auto env = Env::Default();
|
|
|
|
std::string db_path;
|
|
|
|
Status s = env->GetTestDirectory(&db_path);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
db_name = db_path + "/simple_get_" + std::to_string(getpid());
|
|
|
|
DestroyDB(db_name, options);
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
{
|
|
|
|
DB* db_ptr = nullptr;
|
|
|
|
s = DB::Open(options, db_name, &db_ptr);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
db.reset(db_ptr);
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
// load db
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < 1024; i++) {
|
|
|
|
s = db->Put(wo, kg.Next(), rnd.RandomString(1024));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
FlushOptions fo;
|
|
|
|
s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto ro = ReadOptions();
|
|
|
|
size_t not_found = 0;
|
|
|
|
uint64_t user_key_comparison_count = 0;
|
|
|
|
uint64_t block_read_time = 0;
|
2023-04-15 18:09:44 +00:00
|
|
|
uint64_t block_read_cpu_time = 0;
|
2022-02-01 17:00:46 +00:00
|
|
|
uint64_t block_checksum_time = 0;
|
|
|
|
uint64_t get_snapshot_time = 0;
|
|
|
|
uint64_t get_post_process_time = 0;
|
|
|
|
uint64_t get_from_output_files_time = 0;
|
|
|
|
uint64_t new_table_block_iter_nanos = 0;
|
|
|
|
uint64_t block_seek_nanos = 0;
|
|
|
|
uint64_t get_cpu_nanos = 0;
|
|
|
|
uint64_t get_from_table_nanos = 0;
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
for (auto _ : state) {
|
|
|
|
std::string val;
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
Status s = db->Get(ro, kg.NextNonExist(), &val);
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
not_found++;
|
|
|
|
}
|
|
|
|
user_key_comparison_count += get_perf_context()->user_key_comparison_count;
|
|
|
|
block_read_time += get_perf_context()->block_read_time;
|
2023-04-15 18:09:44 +00:00
|
|
|
block_read_cpu_time += get_perf_context()->block_read_cpu_time;
|
2022-02-01 17:00:46 +00:00
|
|
|
block_checksum_time += get_perf_context()->block_checksum_time;
|
|
|
|
get_snapshot_time += get_perf_context()->get_snapshot_time;
|
|
|
|
get_post_process_time += get_perf_context()->get_post_process_time;
|
|
|
|
get_from_output_files_time +=
|
|
|
|
get_perf_context()->get_from_output_files_time;
|
|
|
|
new_table_block_iter_nanos +=
|
|
|
|
get_perf_context()->new_table_block_iter_nanos;
|
|
|
|
block_seek_nanos += get_perf_context()->block_seek_nanos;
|
|
|
|
get_cpu_nanos += get_perf_context()->get_cpu_nanos;
|
|
|
|
get_from_table_nanos +=
|
|
|
|
(*(get_perf_context()->level_to_perf_context))[0].get_from_table_nanos;
|
|
|
|
}
|
|
|
|
|
|
|
|
state.counters["neg_qu_pct"] = benchmark::Counter(
|
|
|
|
static_cast<double>(not_found * 100), benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["user_key_comparison_count"] =
|
|
|
|
benchmark::Counter(static_cast<double>(user_key_comparison_count),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["block_read_time"] = benchmark::Counter(
|
|
|
|
static_cast<double>(block_read_time), benchmark::Counter::kAvgIterations);
|
2023-04-15 18:09:44 +00:00
|
|
|
state.counters["block_read_cpu_time"] =
|
|
|
|
benchmark::Counter(static_cast<double>(block_read_cpu_time),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
2022-02-01 17:00:46 +00:00
|
|
|
state.counters["block_checksum_time"] =
|
|
|
|
benchmark::Counter(static_cast<double>(block_checksum_time),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["get_snapshot_time"] =
|
|
|
|
benchmark::Counter(static_cast<double>(get_snapshot_time),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["get_post_process_time"] =
|
|
|
|
benchmark::Counter(static_cast<double>(get_post_process_time),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["get_from_output_files_time"] =
|
|
|
|
benchmark::Counter(static_cast<double>(get_from_output_files_time),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["new_table_block_iter_nanos"] =
|
|
|
|
benchmark::Counter(static_cast<double>(new_table_block_iter_nanos),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["block_seek_nanos"] =
|
|
|
|
benchmark::Counter(static_cast<double>(block_seek_nanos),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["get_cpu_nanos"] = benchmark::Counter(
|
|
|
|
static_cast<double>(get_cpu_nanos), benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["get_from_table_nanos"] =
|
|
|
|
benchmark::Counter(static_cast<double>(get_from_table_nanos),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(SimpleGetWithPerfContext)->Iterations(1000000);
|
|
|
|
|
2022-05-09 22:17:19 +00:00
|
|
|
static void DBGetMergeOperandsInMemtable(benchmark::State& state) {
|
|
|
|
const uint64_t kDataLen = 16 << 20; // 16MB
|
|
|
|
const uint64_t kValueLen = 64;
|
|
|
|
const uint64_t kNumEntries = kDataLen / kValueLen;
|
|
|
|
const uint64_t kNumEntriesPerKey = state.range(0);
|
|
|
|
const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey;
|
|
|
|
|
|
|
|
// setup DB
|
|
|
|
static std::unique_ptr<DB> db;
|
|
|
|
|
|
|
|
Options options;
|
|
|
|
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
|
|
// Make memtable large enough that automatic flush will not be triggered.
|
|
|
|
options.write_buffer_size = 2 * kDataLen;
|
|
|
|
|
|
|
|
KeyGenerator sequential_key_gen(kNumKeys);
|
|
|
|
auto rnd = Random(301 + state.thread_index());
|
|
|
|
|
|
|
|
if (state.thread_index() == 0) {
|
|
|
|
SetupDB(state, options, &db, "DBGetMergeOperandsInMemtable");
|
|
|
|
|
|
|
|
// load db
|
|
|
|
auto write_opts = WriteOptions();
|
|
|
|
write_opts.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < kNumEntries; i++) {
|
|
|
|
Status s = db->Merge(write_opts, sequential_key_gen.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(kValueLen)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
KeyGenerator random_key_gen(kNumKeys);
|
|
|
|
std::vector<PinnableSlice> value_operands;
|
|
|
|
value_operands.resize(kNumEntriesPerKey);
|
|
|
|
GetMergeOperandsOptions get_merge_ops_opts;
|
|
|
|
get_merge_ops_opts.expected_max_number_of_operands =
|
|
|
|
static_cast<int>(kNumEntriesPerKey);
|
|
|
|
for (auto _ : state) {
|
|
|
|
int num_value_operands = 0;
|
|
|
|
Status s = db->GetMergeOperands(
|
|
|
|
ReadOptions(), db->DefaultColumnFamily(), random_key_gen.Next(),
|
|
|
|
value_operands.data(), &get_merge_ops_opts, &num_value_operands);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
if (num_value_operands != static_cast<int>(kNumEntriesPerKey)) {
|
|
|
|
state.SkipWithError("Unexpected number of merge operands found for key");
|
|
|
|
}
|
Avoid allocations/copies for large `GetMergeOperands()` results (#10458)
Summary:
This PR avoids allocations and copies for the result of `GetMergeOperands()` when the average operand size is at least 256 bytes and the total operands size is at least 32KB. The `GetMergeOperands()` already included `PinnableSlice` but was calling `PinSelf()` (i.e., allocating and copying) for each operand. When this optimization takes effect, we instead call `PinSlice()` to skip that allocation and copy. Resources are pinned in order for the `PinnableSlice` to point to valid memory even after `GetMergeOperands()` returns.
The pinned resources include a referenced `SuperVersion`, a `MergingContext`, and a `PinnedIteratorsManager`. They are bundled into a `GetMergeOperandsState`. We use `SharedCleanablePtr` to share that bundle among all `PinnableSlice`s populated by `GetMergeOperands()`. That way, the last `PinnableSlice` to be `Reset()` will cleanup the bundle, including unreferencing the `SuperVersion`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10458
Test Plan:
- new DB level test
- measured benefit/regression in a number of memtable scenarios
Setup command:
```
$ ./db_bench -benchmarks=mergerandom -merge_operator=StringAppendOperator -num=$num -writes=16384 -key_size=16 -value_size=$value_sz -compression_type=none -write_buffer_size=1048576000
```
Benchmark command:
```
./db_bench -threads=$threads -use_existing_db=true -avoid_flush_during_recovery=true -write_buffer_size=1048576000 -benchmarks=readrandomoperands -merge_operator=StringAppendOperator -num=$num -duration=10
```
Worst regression is when a key has many tiny operands:
- Parameters: num=1 (implying 16384 operands per key), value_sz=8, threads=1
- `GetMergeOperands()` latency increases 682 micros -> 800 micros (+17%)
The regression disappears into the noise (<1% difference) if we remove the `Reset()` loop and the size counting loop. The former is arguably needed regardless of this PR as the convention in `Get()` and `MultiGet()` is to `Reset()` the input `PinnableSlice`s at the start. The latter could be optimized to count the size as we accumulate operands rather than after the fact.
Best improvement is when a key has large operands and high concurrency:
- Parameters: num=4 (implying 4096 operands per key), value_sz=2KB, threads=32
- `GetMergeOperands()` latency decreases 11492 micros -> 437 micros (-96%).
Reviewed By: cbi42
Differential Revision: D38336578
Pulled By: ajkr
fbshipit-source-id: 48146d127e04cb7f2d4d2939a2b9dff3aba18258
2022-08-04 07:42:13 +00:00
|
|
|
for (auto& value_operand : value_operands) {
|
|
|
|
value_operand.Reset();
|
|
|
|
}
|
2022-05-09 22:17:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (state.thread_index() == 0) {
|
|
|
|
TeardownDB(state, db, options, random_key_gen);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DBGetMergeOperandsInSstFile(benchmark::State& state) {
|
|
|
|
const uint64_t kDataLen = 16 << 20; // 16MB
|
|
|
|
const uint64_t kValueLen = 64;
|
|
|
|
const uint64_t kNumEntries = kDataLen / kValueLen;
|
|
|
|
const uint64_t kNumEntriesPerKey = state.range(0);
|
|
|
|
const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey;
|
|
|
|
const bool kMmap = state.range(1);
|
|
|
|
|
|
|
|
// setup DB
|
|
|
|
static std::unique_ptr<DB> db;
|
|
|
|
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
if (kMmap) {
|
|
|
|
table_options.no_block_cache = true;
|
|
|
|
} else {
|
|
|
|
// Make block cache large enough that eviction will not be triggered.
|
|
|
|
table_options.block_cache = NewLRUCache(2 * kDataLen);
|
|
|
|
}
|
|
|
|
|
|
|
|
Options options;
|
|
|
|
if (kMmap) {
|
|
|
|
options.allow_mmap_reads = true;
|
|
|
|
}
|
|
|
|
options.compression = kNoCompression;
|
|
|
|
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
|
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
// Make memtable large enough that automatic flush will not be triggered.
|
|
|
|
options.write_buffer_size = 2 * kDataLen;
|
|
|
|
|
|
|
|
KeyGenerator sequential_key_gen(kNumKeys);
|
|
|
|
auto rnd = Random(301 + state.thread_index());
|
|
|
|
|
|
|
|
if (state.thread_index() == 0) {
|
|
|
|
SetupDB(state, options, &db, "DBGetMergeOperandsInBlockCache");
|
|
|
|
|
|
|
|
// load db
|
|
|
|
//
|
|
|
|
// Take a snapshot after each cycle of merges to ensure flush cannot
|
|
|
|
// merge any entries.
|
|
|
|
std::vector<const Snapshot*> snapshots;
|
|
|
|
snapshots.resize(kNumEntriesPerKey);
|
|
|
|
auto write_opts = WriteOptions();
|
|
|
|
write_opts.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < kNumEntriesPerKey; i++) {
|
|
|
|
for (uint64_t j = 0; j < kNumKeys; j++) {
|
|
|
|
Status s = db->Merge(write_opts, sequential_key_gen.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(kValueLen)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
snapshots[i] = db->GetSnapshot();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flush to an L0 file; read back to prime the cache/mapped memory.
|
|
|
|
db->Flush(FlushOptions());
|
|
|
|
for (uint64_t i = 0; i < kNumKeys; ++i) {
|
|
|
|
std::string value;
|
|
|
|
Status s = db->Get(ReadOptions(), sequential_key_gen.Next(), &value);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state.thread_index() == 0) {
|
|
|
|
for (uint64_t i = 0; i < kNumEntriesPerKey; ++i) {
|
|
|
|
db->ReleaseSnapshot(snapshots[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
KeyGenerator random_key_gen(kNumKeys);
|
|
|
|
std::vector<PinnableSlice> value_operands;
|
|
|
|
value_operands.resize(kNumEntriesPerKey);
|
|
|
|
GetMergeOperandsOptions get_merge_ops_opts;
|
|
|
|
get_merge_ops_opts.expected_max_number_of_operands =
|
|
|
|
static_cast<int>(kNumEntriesPerKey);
|
|
|
|
for (auto _ : state) {
|
|
|
|
int num_value_operands = 0;
|
|
|
|
ReadOptions read_opts;
|
|
|
|
read_opts.verify_checksums = false;
|
|
|
|
Status s = db->GetMergeOperands(
|
|
|
|
read_opts, db->DefaultColumnFamily(), random_key_gen.Next(),
|
|
|
|
value_operands.data(), &get_merge_ops_opts, &num_value_operands);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
if (num_value_operands != static_cast<int>(kNumEntriesPerKey)) {
|
|
|
|
state.SkipWithError("Unexpected number of merge operands found for key");
|
|
|
|
}
|
Avoid allocations/copies for large `GetMergeOperands()` results (#10458)
Summary:
This PR avoids allocations and copies for the result of `GetMergeOperands()` when the average operand size is at least 256 bytes and the total operands size is at least 32KB. The `GetMergeOperands()` already included `PinnableSlice` but was calling `PinSelf()` (i.e., allocating and copying) for each operand. When this optimization takes effect, we instead call `PinSlice()` to skip that allocation and copy. Resources are pinned in order for the `PinnableSlice` to point to valid memory even after `GetMergeOperands()` returns.
The pinned resources include a referenced `SuperVersion`, a `MergingContext`, and a `PinnedIteratorsManager`. They are bundled into a `GetMergeOperandsState`. We use `SharedCleanablePtr` to share that bundle among all `PinnableSlice`s populated by `GetMergeOperands()`. That way, the last `PinnableSlice` to be `Reset()` will cleanup the bundle, including unreferencing the `SuperVersion`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10458
Test Plan:
- new DB level test
- measured benefit/regression in a number of memtable scenarios
Setup command:
```
$ ./db_bench -benchmarks=mergerandom -merge_operator=StringAppendOperator -num=$num -writes=16384 -key_size=16 -value_size=$value_sz -compression_type=none -write_buffer_size=1048576000
```
Benchmark command:
```
./db_bench -threads=$threads -use_existing_db=true -avoid_flush_during_recovery=true -write_buffer_size=1048576000 -benchmarks=readrandomoperands -merge_operator=StringAppendOperator -num=$num -duration=10
```
Worst regression is when a key has many tiny operands:
- Parameters: num=1 (implying 16384 operands per key), value_sz=8, threads=1
- `GetMergeOperands()` latency increases 682 micros -> 800 micros (+17%)
The regression disappears into the noise (<1% difference) if we remove the `Reset()` loop and the size counting loop. The former is arguably needed regardless of this PR as the convention in `Get()` and `MultiGet()` is to `Reset()` the input `PinnableSlice`s at the start. The latter could be optimized to count the size as we accumulate operands rather than after the fact.
Best improvement is when a key has large operands and high concurrency:
- Parameters: num=4 (implying 4096 operands per key), value_sz=2KB, threads=32
- `GetMergeOperands()` latency decreases 11492 micros -> 437 micros (-96%).
Reviewed By: cbi42
Differential Revision: D38336578
Pulled By: ajkr
fbshipit-source-id: 48146d127e04cb7f2d4d2939a2b9dff3aba18258
2022-08-04 07:42:13 +00:00
|
|
|
for (auto& value_operand : value_operands) {
|
|
|
|
value_operand.Reset();
|
|
|
|
}
|
2022-05-09 22:17:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (state.thread_index() == 0) {
|
|
|
|
TeardownDB(state, db, options, random_key_gen);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DBGetMergeOperandsInMemtableArguments(
|
|
|
|
benchmark::internal::Benchmark* b) {
|
|
|
|
for (int entries_per_key : {1, 32, 1024}) {
|
|
|
|
b->Args({entries_per_key});
|
|
|
|
}
|
|
|
|
b->ArgNames({"entries_per_key"});
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DBGetMergeOperandsInSstFileArguments(
|
|
|
|
benchmark::internal::Benchmark* b) {
|
|
|
|
for (int entries_per_key : {1, 32, 1024}) {
|
|
|
|
for (bool mmap : {false, true}) {
|
|
|
|
b->Args({entries_per_key, mmap});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"entries_per_key", "mmap"});
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(DBGetMergeOperandsInMemtable)
|
|
|
|
->Threads(1)
|
|
|
|
->Apply(DBGetMergeOperandsInMemtableArguments);
|
|
|
|
BENCHMARK(DBGetMergeOperandsInMemtable)
|
|
|
|
->Threads(8)
|
|
|
|
->Apply(DBGetMergeOperandsInMemtableArguments);
|
|
|
|
BENCHMARK(DBGetMergeOperandsInSstFile)
|
|
|
|
->Threads(1)
|
|
|
|
->Apply(DBGetMergeOperandsInSstFileArguments);
|
|
|
|
BENCHMARK(DBGetMergeOperandsInSstFile)
|
|
|
|
->Threads(8)
|
|
|
|
->Apply(DBGetMergeOperandsInSstFileArguments);
|
|
|
|
|
2022-02-01 17:00:46 +00:00
|
|
|
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
|
|
|
|
Random* rnd) {
|
|
|
|
char buf[50];
|
|
|
|
char* p = &buf[0];
|
|
|
|
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
|
|
|
|
std::string k(p);
|
|
|
|
if (padding_size) {
|
|
|
|
k += rnd->RandomString(padding_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
void GenerateRandomKVs(std::vector<std::string>* keys,
|
|
|
|
std::vector<std::string>* values, const int from,
|
|
|
|
const int len, const int step = 1,
|
|
|
|
const int padding_size = 0,
|
|
|
|
const int keys_share_prefix = 1) {
|
|
|
|
Random rnd(302);
|
|
|
|
|
|
|
|
// generate different prefix
|
|
|
|
for (int i = from; i < from + len; i += step) {
|
|
|
|
// generating keys that share the prefix
|
|
|
|
for (int j = 0; j < keys_share_prefix; ++j) {
|
|
|
|
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
|
|
|
|
// 100 bytes values
|
|
|
|
values->emplace_back(rnd.RandomString(100));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-18 21:35:36 +00:00
|
|
|
// TODO: move it to different files, as it's testing an internal API
|
2022-02-01 17:00:46 +00:00
|
|
|
static void DataBlockSeek(benchmark::State& state) {
|
|
|
|
Random rnd(301);
|
|
|
|
Options options = Options();
|
|
|
|
|
|
|
|
BlockBuilder builder(16, true, false,
|
|
|
|
BlockBasedTableOptions::kDataBlockBinarySearch);
|
|
|
|
|
|
|
|
int num_records = 500;
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
std::vector<std::string> values;
|
|
|
|
|
|
|
|
GenerateRandomKVs(&keys, &values, 0, num_records);
|
|
|
|
|
|
|
|
for (int i = 0; i < num_records; i++) {
|
|
|
|
std::string ukey(keys[i] + "1");
|
|
|
|
InternalKey ikey(ukey, 0, kTypeValue);
|
|
|
|
builder.Add(ikey.Encode().ToString(), values[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice rawblock = builder.Finish();
|
|
|
|
|
|
|
|
BlockContents contents;
|
|
|
|
contents.data = rawblock;
|
|
|
|
Block reader(std::move(contents));
|
|
|
|
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
uint64_t total = 0;
|
|
|
|
for (auto _ : state) {
|
|
|
|
DataBlockIter* iter = reader.NewDataIterator(options.comparator,
|
|
|
|
kDisableGlobalSequenceNumber);
|
|
|
|
uint32_t index = rnd.Uniform(static_cast<int>(num_records));
|
|
|
|
std::string ukey(keys[index] + "1");
|
|
|
|
InternalKey ikey(ukey, 0, kTypeValue);
|
|
|
|
get_perf_context()->Reset();
|
|
|
|
bool may_exist = iter->SeekForGet(ikey.Encode().ToString());
|
|
|
|
if (!may_exist) {
|
|
|
|
state.SkipWithError("key not found");
|
|
|
|
}
|
|
|
|
total += get_perf_context()->block_seek_nanos;
|
|
|
|
delete iter;
|
|
|
|
}
|
|
|
|
state.counters["seek_ns"] = benchmark::Counter(
|
|
|
|
static_cast<double>(total), benchmark::Counter::kAvgIterations);
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(DataBlockSeek)->Iterations(1000000);
|
|
|
|
|
|
|
|
static void IteratorSeek(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
bool enable_statistics = state.range(3);
|
|
|
|
bool negative_query = state.range(4);
|
|
|
|
bool enable_filter = state.range(5);
|
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
if (enable_statistics) {
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
}
|
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
|
|
|
|
if (enable_filter) {
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
|
|
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "IteratorSeek");
|
|
|
|
|
|
|
|
// load db
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(per_key_size)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
FlushOptions fo;
|
|
|
|
Status s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-26 01:02:04 +00:00
|
|
|
for (auto _ : state) {
|
2022-02-01 17:00:46 +00:00
|
|
|
std::unique_ptr<Iterator> iter{nullptr};
|
2022-05-26 01:02:04 +00:00
|
|
|
state.PauseTiming();
|
|
|
|
if (!iter) {
|
|
|
|
iter.reset(db->NewIterator(ReadOptions()));
|
|
|
|
}
|
|
|
|
Slice key = negative_query ? kg.NextNonExist() : kg.Next();
|
|
|
|
if (!iter->status().ok()) {
|
|
|
|
state.SkipWithError(iter->status().ToString().c_str());
|
|
|
|
return;
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
2022-05-26 01:02:04 +00:00
|
|
|
state.ResumeTiming();
|
|
|
|
iter->Seek(key);
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void IteratorSeekArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
|
|
|
|
kCompactionStyleFIFO}) {
|
|
|
|
for (int64_t max_data : {128l << 20, 512l << 20}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
for (bool enable_statistics : {false, true}) {
|
|
|
|
for (bool negative_query : {false, true}) {
|
|
|
|
for (bool enable_filter : {false, true}) {
|
|
|
|
b->Args({comp_style, max_data, per_key_size, enable_statistics,
|
|
|
|
negative_query, enable_filter});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
|
|
|
|
"negative_query", "enable_filter"});
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr uint64_t kDBSeekNum = 10l << 10;
|
|
|
|
BENCHMARK(IteratorSeek)
|
|
|
|
->Threads(1)
|
|
|
|
->Iterations(kDBSeekNum)
|
|
|
|
->Apply(IteratorSeekArguments);
|
|
|
|
BENCHMARK(IteratorSeek)
|
|
|
|
->Threads(8)
|
|
|
|
->Iterations(kDBSeekNum / 8)
|
|
|
|
->Apply(IteratorSeekArguments);
|
|
|
|
|
|
|
|
static void IteratorNext(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "IteratorNext");
|
|
|
|
// load db
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(per_key_size)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
FlushOptions fo;
|
|
|
|
Status s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-26 01:02:04 +00:00
|
|
|
for (auto _ : state) {
|
2022-02-01 17:00:46 +00:00
|
|
|
std::unique_ptr<Iterator> iter{nullptr};
|
2022-05-26 01:02:04 +00:00
|
|
|
state.PauseTiming();
|
|
|
|
if (!iter) {
|
|
|
|
iter.reset(db->NewIterator(ReadOptions()));
|
|
|
|
}
|
|
|
|
while (!iter->Valid()) {
|
|
|
|
iter->Seek(kg.Next());
|
|
|
|
if (!iter->status().ok()) {
|
|
|
|
state.SkipWithError(iter->status().ToString().c_str());
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
2022-05-26 01:02:04 +00:00
|
|
|
state.ResumeTiming();
|
|
|
|
iter->Next();
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void IteratorNextArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
|
|
|
|
kCompactionStyleFIFO}) {
|
|
|
|
for (int64_t max_data : {128l << 20, 512l << 20}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
b->Args({comp_style, max_data, per_key_size});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"comp_style", "max_data", "per_key_size"});
|
|
|
|
}
|
|
|
|
static constexpr uint64_t kIteratorNextNum = 10l << 10;
|
|
|
|
BENCHMARK(IteratorNext)
|
|
|
|
->Iterations(kIteratorNextNum)
|
|
|
|
->Apply(IteratorNextArguments);
|
|
|
|
|
|
|
|
static void IteratorNextWithPerfContext(benchmark::State& state) {
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, 1024);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "IteratorNextWithPerfContext");
|
|
|
|
// load db
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < 1024; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(), rnd.RandomString(1024));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
Status s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
FlushOptions fo;
|
|
|
|
s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t user_key_comparison_count = 0;
|
|
|
|
uint64_t internal_key_skipped_count = 0;
|
|
|
|
uint64_t find_next_user_entry_time = 0;
|
|
|
|
uint64_t iter_next_cpu_nanos = 0;
|
|
|
|
|
|
|
|
SetPerfLevel(kEnableTime);
|
|
|
|
get_perf_context()->EnablePerLevelPerfContext();
|
|
|
|
|
2022-05-26 01:02:04 +00:00
|
|
|
for (auto _ : state) {
|
2022-02-01 17:00:46 +00:00
|
|
|
std::unique_ptr<Iterator> iter{nullptr};
|
2022-05-26 01:02:04 +00:00
|
|
|
state.PauseTiming();
|
|
|
|
if (!iter) {
|
|
|
|
iter.reset(db->NewIterator(ReadOptions()));
|
|
|
|
}
|
|
|
|
while (!iter->Valid()) {
|
|
|
|
iter->Seek(kg.Next());
|
|
|
|
if (!iter->status().ok()) {
|
|
|
|
state.SkipWithError(iter->status().ToString().c_str());
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
2022-05-26 01:02:04 +00:00
|
|
|
get_perf_context()->Reset();
|
|
|
|
state.ResumeTiming();
|
|
|
|
|
|
|
|
iter->Next();
|
|
|
|
user_key_comparison_count += get_perf_context()->user_key_comparison_count;
|
|
|
|
internal_key_skipped_count +=
|
|
|
|
get_perf_context()->internal_key_skipped_count;
|
|
|
|
find_next_user_entry_time += get_perf_context()->find_next_user_entry_time;
|
|
|
|
iter_next_cpu_nanos += get_perf_context()->iter_next_cpu_nanos;
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
state.counters["user_key_comparison_count"] =
|
|
|
|
benchmark::Counter(static_cast<double>(user_key_comparison_count),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["internal_key_skipped_count"] =
|
|
|
|
benchmark::Counter(static_cast<double>(internal_key_skipped_count),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["find_next_user_entry_time"] =
|
|
|
|
benchmark::Counter(static_cast<double>(find_next_user_entry_time),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
state.counters["iter_next_cpu_nanos"] =
|
|
|
|
benchmark::Counter(static_cast<double>(iter_next_cpu_nanos),
|
|
|
|
benchmark::Counter::kAvgIterations);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(IteratorNextWithPerfContext)->Iterations(100000);
|
|
|
|
|
|
|
|
static void IteratorPrev(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
std::string db_name;
|
|
|
|
Options options;
|
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "IteratorPrev");
|
|
|
|
// load db
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(per_key_size)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
FlushOptions fo;
|
|
|
|
Status s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-26 01:02:04 +00:00
|
|
|
for (auto _ : state) {
|
2022-02-01 17:00:46 +00:00
|
|
|
std::unique_ptr<Iterator> iter{nullptr};
|
2022-05-26 01:02:04 +00:00
|
|
|
state.PauseTiming();
|
|
|
|
if (!iter) {
|
|
|
|
iter.reset(db->NewIterator(ReadOptions()));
|
|
|
|
}
|
|
|
|
while (!iter->Valid()) {
|
|
|
|
iter->Seek(kg.Next());
|
|
|
|
if (!iter->status().ok()) {
|
|
|
|
state.SkipWithError(iter->status().ToString().c_str());
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
2022-05-26 01:02:04 +00:00
|
|
|
state.ResumeTiming();
|
|
|
|
iter->Prev();
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void IteratorPrevArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
|
|
|
|
kCompactionStyleFIFO}) {
|
|
|
|
for (int64_t max_data : {128l << 20, 512l << 20}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
b->Args({comp_style, max_data, per_key_size});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"comp_style", "max_data", "per_key_size"});
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr uint64_t kIteratorPrevNum = 10l << 10;
|
|
|
|
BENCHMARK(IteratorPrev)
|
|
|
|
->Iterations(kIteratorPrevNum)
|
|
|
|
->Apply(IteratorPrevArguments);
|
|
|
|
|
|
|
|
static void PrefixSeek(benchmark::State& state) {
|
|
|
|
auto compaction_style = static_cast<CompactionStyle>(state.range(0));
|
|
|
|
uint64_t max_data = state.range(1);
|
|
|
|
uint64_t per_key_size = state.range(2);
|
|
|
|
bool enable_statistics = state.range(3);
|
|
|
|
bool enable_filter = state.range(4);
|
|
|
|
uint64_t key_num = max_data / per_key_size;
|
|
|
|
|
|
|
|
// setup DB
|
2022-05-03 20:38:38 +00:00
|
|
|
static std::unique_ptr<DB> db;
|
2022-02-01 17:00:46 +00:00
|
|
|
Options options;
|
|
|
|
if (enable_statistics) {
|
|
|
|
options.statistics = CreateDBStatistics();
|
|
|
|
}
|
|
|
|
options.compaction_style = compaction_style;
|
|
|
|
options.prefix_extractor.reset(NewFixedPrefixTransform(4));
|
|
|
|
|
|
|
|
if (enable_filter) {
|
|
|
|
BlockBasedTableOptions table_options;
|
|
|
|
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
|
|
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
auto rnd = Random(301 + state.thread_index());
|
2022-02-01 17:00:46 +00:00
|
|
|
KeyGenerator kg(&rnd, key_num, key_num / 100);
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
SetupDB(state, options, &db, "PrefixSeek");
|
|
|
|
|
|
|
|
// load db
|
|
|
|
auto wo = WriteOptions();
|
|
|
|
wo.disableWAL = true;
|
|
|
|
for (uint64_t i = 0; i < key_num; i++) {
|
|
|
|
Status s = db->Put(wo, kg.Next(),
|
|
|
|
rnd.RandomString(static_cast<int>(per_key_size)));
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
FlushOptions fo;
|
|
|
|
Status s = db->Flush(fo);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
|
2022-05-03 20:38:38 +00:00
|
|
|
auto db_full = static_cast_with_check<DBImpl>(db.get());
|
2023-05-26 00:25:51 +00:00
|
|
|
s = db_full->WaitForCompact(WaitForCompactOptions());
|
2022-02-01 17:00:46 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-26 01:02:04 +00:00
|
|
|
for (auto _ : state) {
|
2022-02-01 17:00:46 +00:00
|
|
|
std::unique_ptr<Iterator> iter{nullptr};
|
2022-05-26 01:02:04 +00:00
|
|
|
state.PauseTiming();
|
|
|
|
if (!iter) {
|
|
|
|
iter.reset(db->NewIterator(ReadOptions()));
|
|
|
|
}
|
|
|
|
state.ResumeTiming();
|
|
|
|
iter->Seek(kg.NextPrefix());
|
|
|
|
if (!iter->status().ok()) {
|
|
|
|
state.SkipWithError(iter->status().ToString().c_str());
|
|
|
|
return;
|
2022-02-01 17:00:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-29 22:06:17 +00:00
|
|
|
if (state.thread_index() == 0) {
|
2022-02-01 17:00:46 +00:00
|
|
|
TeardownDB(state, db, options, kg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void PrefixSeekArguments(benchmark::internal::Benchmark* b) {
|
|
|
|
for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal,
|
|
|
|
kCompactionStyleFIFO}) {
|
|
|
|
for (int64_t max_data : {128l << 20, 512l << 20}) {
|
|
|
|
for (int64_t per_key_size : {256, 1024}) {
|
|
|
|
for (bool enable_statistics : {false, true}) {
|
|
|
|
for (bool enable_filter : {false, true}) {
|
|
|
|
b->Args({comp_style, max_data, per_key_size, enable_statistics,
|
|
|
|
enable_filter});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
|
|
|
|
"enable_filter"});
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr uint64_t kPrefixSeekNum = 10l << 10;
|
|
|
|
BENCHMARK(PrefixSeek)->Iterations(kPrefixSeekNum)->Apply(PrefixSeekArguments);
|
|
|
|
BENCHMARK(PrefixSeek)
|
|
|
|
->Threads(8)
|
|
|
|
->Iterations(kPrefixSeekNum / 8)
|
|
|
|
->Apply(PrefixSeekArguments);
|
|
|
|
|
2022-02-18 21:35:36 +00:00
|
|
|
// TODO: move it to different files, as it's testing an internal API
|
|
|
|
static void RandomAccessFileReaderRead(benchmark::State& state) {
|
|
|
|
bool enable_statistics = state.range(0);
|
|
|
|
constexpr int kFileNum = 10;
|
|
|
|
auto env = Env::Default();
|
|
|
|
auto fs = env->GetFileSystem();
|
|
|
|
std::string db_path;
|
|
|
|
Status s = env->GetTestDirectory(&db_path);
|
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Setup multiple `RandomAccessFileReader`s with different parameters to be
|
|
|
|
// used for test
|
|
|
|
Random rand(301);
|
|
|
|
std::string fname_base =
|
|
|
|
db_path + kFilePathSeparator + "random-access-file-reader-read";
|
|
|
|
std::vector<std::unique_ptr<RandomAccessFileReader>> readers;
|
|
|
|
auto statistics_share = CreateDBStatistics();
|
|
|
|
Statistics* statistics = enable_statistics ? statistics_share.get() : nullptr;
|
|
|
|
for (int i = 0; i < kFileNum; i++) {
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string fname = fname_base + std::to_string(i);
|
2022-02-18 21:35:36 +00:00
|
|
|
std::string content = rand.RandomString(kDefaultPageSize);
|
|
|
|
std::unique_ptr<WritableFile> tgt_file;
|
|
|
|
env->NewWritableFile(fname, &tgt_file, EnvOptions());
|
|
|
|
tgt_file->Append(content);
|
|
|
|
tgt_file->Close();
|
|
|
|
|
|
|
|
std::unique_ptr<FSRandomAccessFile> f;
|
|
|
|
fs->NewRandomAccessFile(fname, FileOptions(), &f, nullptr);
|
|
|
|
int rand_num = rand.Next() % 3;
|
|
|
|
auto temperature = rand_num == 0 ? Temperature::kUnknown
|
|
|
|
: rand_num == 1 ? Temperature::kWarm
|
|
|
|
: Temperature::kCold;
|
|
|
|
readers.emplace_back(new RandomAccessFileReader(
|
|
|
|
std::move(f), fname, env->GetSystemClock().get(), nullptr, statistics,
|
2023-04-21 16:07:18 +00:00
|
|
|
Histograms::HISTOGRAM_ENUM_MAX, nullptr, nullptr, {}, temperature,
|
|
|
|
rand_num == 1));
|
2022-02-18 21:35:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
IOOptions io_options;
|
|
|
|
std::unique_ptr<char[]> scratch(new char[2048]);
|
|
|
|
Slice result;
|
|
|
|
uint64_t idx = 0;
|
|
|
|
for (auto _ : state) {
|
|
|
|
s = readers[idx++ % kFileNum]->Read(io_options, 0, kDefaultPageSize / 3,
|
Group rocksdb.sst.read.micros stat by different user read IOActivity + misc (#11444)
Summary:
**Context/Summary:**
- Similar to https://github.com/facebook/rocksdb/pull/11288 but for user read such as `Get(), MultiGet(), DBIterator::XXX(), Verify(File)Checksum()`.
- For this, I refactored some user-facing `MultiGet` calls in `TransactionBase` and various types of `DB` so that it does not call a user-facing `Get()` but `GetImpl()` for passing the `ReadOptions::io_activity` check (see PR conversation)
- New user read stats breakdown are guarded by `kExceptDetailedTimers` since measurement shows they have 4-5% regression to the upstream/main.
- Misc
- More refactoring: with https://github.com/facebook/rocksdb/pull/11288, we complete passing `ReadOptions/IOOptions` to FS level. So we can now replace the previously [added](https://github.com/facebook/rocksdb/pull/9424) `rate_limiter_priority` parameter in `RandomAccessFileReader`'s `Read/MultiRead/Prefetch()` with `IOOptions::rate_limiter_priority`
- Also, `ReadAsync()` call time is measured in `SST_READ_MICRO` now
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11444
Test Plan:
- CI fake db crash/stress test
- Microbenchmarking
**Build** `make clean && ROCKSDB_NO_FBCODE=1 DEBUG_LEVEL=0 make -jN db_basic_bench`
- google benchmark version: https://github.com/google/benchmark/commit/604f6fd3f4b34a84ec4eb4db81d842fa4db829cd
- db_basic_bench_base: upstream
- db_basic_bench_pr: db_basic_bench_base + this PR
- asyncread_db_basic_bench_base: upstream + [db basic bench patch for IteratorNext](https://github.com/facebook/rocksdb/compare/main...hx235:rocksdb:micro_bench_async_read)
- asyncread_db_basic_bench_pr: asyncread_db_basic_bench_base + this PR
**Test**
Get
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{null_stat|base|pr} --benchmark_filter=DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1/negative_query:0/enable_filter:0/mmap:1/threads:1 --benchmark_repetitions=1000
```
Result
```
Coming soon
```
AsyncRead
```
TEST_TMPDIR=/dev/shm ./asyncread_db_basic_bench_{base|pr} --benchmark_filter=IteratorNext/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1/async_io:1/include_detailed_timers:0 --benchmark_repetitions=1000 > syncread_db_basic_bench_{base|pr}.out
```
Result
```
Base:
1956,1956,1968,1977,1979,1986,1988,1988,1988,1990,1991,1991,1993,1993,1993,1993,1994,1996,1997,1997,1997,1998,1999,2001,2001,2002,2004,2007,2007,2008,
PR (2.3% regression, due to measuring `SST_READ_MICRO` that wasn't measured before):
1993,2014,2016,2022,2024,2027,2027,2028,2028,2030,2031,2031,2032,2032,2038,2039,2042,2044,2044,2047,2047,2047,2048,2049,2050,2052,2052,2052,2053,2053,
```
Reviewed By: ajkr
Differential Revision: D45918925
Pulled By: hx235
fbshipit-source-id: 58a54560d9ebeb3a59b6d807639692614dad058a
2023-08-09 00:26:50 +00:00
|
|
|
&result, scratch.get(), nullptr);
|
2022-02-18 21:35:36 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
state.SkipWithError(s.ToString().c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// clean up
|
|
|
|
for (int i = 0; i < kFileNum; i++) {
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string fname = fname_base + std::to_string(i);
|
2022-02-18 21:35:36 +00:00
|
|
|
env->DeleteFile(fname); // ignore return, okay to fail cleanup
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(RandomAccessFileReaderRead)
|
|
|
|
->Iterations(1000000)
|
|
|
|
->Arg(0)
|
|
|
|
->Arg(1)
|
|
|
|
->ArgName("enable_statistics");
|
|
|
|
|
2021-11-03 19:20:19 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
|
|
BENCHMARK_MAIN();
|