2014-05-05 18:11:48 +00:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/benchharness.h"
|
|
|
|
#include "db/version_set.h"
|
2014-09-11 01:46:09 +00:00
|
|
|
#include "db/write_controller.h"
|
2014-05-05 18:11:48 +00:00
|
|
|
#include "util/mutexlock.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
std::string MakeKey(unsigned int num) {
|
|
|
|
char buf[30];
|
|
|
|
snprintf(buf, sizeof(buf), "%016u", num);
|
|
|
|
return std::string(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
void BM_LogAndApply(int iters, int num_base_files) {
|
|
|
|
VersionSet* vset;
|
Push- instead of pull-model for managing Write stalls
Summary:
Introducing WriteController, which is a source of truth about per-DB write delays. Let's define an DB epoch as a period where there are no flushes and compactions (i.e. new epoch is started when flush or compaction finishes). Each epoch can either:
* proceed with all writes without delay
* delay all writes by fixed time
* stop all writes
The three modes are recomputed at each epoch change (flush, compaction), rather than on every write (which is currently the case).
When we have a lot of column families, our current pull behavior adds a big overhead, since we need to loop over every column family for every write. With new push model, overhead on Write code-path is minimal.
This is just the start. Next step is to also take care of stalls introduced by slow memtable flushes. The final goal is to eliminate function MakeRoomForWrite(), which currently needs to be called for every column family by every write.
Test Plan: make check for now. I'll add some unit tests later. Also, perf test.
Reviewers: dhruba, yhchiang, MarkCallaghan, sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22791
2014-09-08 18:20:25 +00:00
|
|
|
WriteController wc;
|
2014-05-05 18:11:48 +00:00
|
|
|
ColumnFamilyData* default_cfd;
|
|
|
|
uint64_t fnum = 1;
|
|
|
|
port::Mutex mu;
|
|
|
|
MutexLock l(&mu);
|
|
|
|
|
|
|
|
BENCHMARK_SUSPEND {
|
|
|
|
std::string dbname = test::TmpDir() + "/rocksdb_test_benchmark";
|
|
|
|
ASSERT_OK(DestroyDB(dbname, Options()));
|
|
|
|
|
|
|
|
DB* db = nullptr;
|
|
|
|
Options opts;
|
|
|
|
opts.create_if_missing = true;
|
|
|
|
Status s = DB::Open(opts, dbname, &db);
|
|
|
|
ASSERT_OK(s);
|
|
|
|
ASSERT_TRUE(db != nullptr);
|
|
|
|
|
|
|
|
delete db;
|
|
|
|
db = nullptr;
|
|
|
|
|
|
|
|
Options options;
|
|
|
|
EnvOptions sopt;
|
2014-07-30 01:37:00 +00:00
|
|
|
// Notice we are using the default options not through SanitizeOptions().
|
|
|
|
// We might want to initialize some options manually if needed.
|
|
|
|
options.db_paths.emplace_back(dbname, 0);
|
|
|
|
// The parameter of table cache is passed in as null, so any file I/O
|
|
|
|
// operation is likely to fail.
|
Push- instead of pull-model for managing Write stalls
Summary:
Introducing WriteController, which is a source of truth about per-DB write delays. Let's define an DB epoch as a period where there are no flushes and compactions (i.e. new epoch is started when flush or compaction finishes). Each epoch can either:
* proceed with all writes without delay
* delay all writes by fixed time
* stop all writes
The three modes are recomputed at each epoch change (flush, compaction), rather than on every write (which is currently the case).
When we have a lot of column families, our current pull behavior adds a big overhead, since we need to loop over every column family for every write. With new push model, overhead on Write code-path is minimal.
This is just the start. Next step is to also take care of stalls introduced by slow memtable flushes. The final goal is to eliminate function MakeRoomForWrite(), which currently needs to be called for every column family by every write.
Test Plan: make check for now. I'll add some unit tests later. Also, perf test.
Reviewers: dhruba, yhchiang, MarkCallaghan, sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22791
2014-09-08 18:20:25 +00:00
|
|
|
vset = new VersionSet(dbname, &options, sopt, nullptr, &wc);
|
2014-05-05 18:11:48 +00:00
|
|
|
std::vector<ColumnFamilyDescriptor> dummy;
|
|
|
|
dummy.push_back(ColumnFamilyDescriptor());
|
|
|
|
ASSERT_OK(vset->Recover(dummy));
|
|
|
|
default_cfd = vset->GetColumnFamilySet()->GetDefault();
|
|
|
|
VersionEdit vbase;
|
|
|
|
for (int i = 0; i < num_base_files; i++) {
|
|
|
|
InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
|
|
|
|
InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
|
2014-07-02 16:54:20 +00:00
|
|
|
vbase.AddFile(2, ++fnum, 0, 1 /* file size */, start, limit, 1, 1);
|
2014-05-05 18:11:48 +00:00
|
|
|
}
|
2014-10-01 23:19:16 +00:00
|
|
|
ASSERT_OK(vset->LogAndApply(default_cfd,
|
|
|
|
*default_cfd->GetLatestMutableCFOptions(), &vbase, &mu));
|
2014-05-05 18:11:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < iters; i++) {
|
|
|
|
VersionEdit vedit;
|
|
|
|
vedit.DeleteFile(2, fnum);
|
|
|
|
InternalKey start(MakeKey(2 * fnum), 1, kTypeValue);
|
|
|
|
InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion);
|
2014-07-02 16:54:20 +00:00
|
|
|
vedit.AddFile(2, ++fnum, 0, 1 /* file size */, start, limit, 1, 1);
|
2014-10-01 23:19:16 +00:00
|
|
|
vset->LogAndApply(default_cfd, *default_cfd->GetLatestMutableCFOptions(),
|
|
|
|
&vedit, &mu);
|
2014-05-05 18:11:48 +00:00
|
|
|
}
|
Push- instead of pull-model for managing Write stalls
Summary:
Introducing WriteController, which is a source of truth about per-DB write delays. Let's define an DB epoch as a period where there are no flushes and compactions (i.e. new epoch is started when flush or compaction finishes). Each epoch can either:
* proceed with all writes without delay
* delay all writes by fixed time
* stop all writes
The three modes are recomputed at each epoch change (flush, compaction), rather than on every write (which is currently the case).
When we have a lot of column families, our current pull behavior adds a big overhead, since we need to loop over every column family for every write. With new push model, overhead on Write code-path is minimal.
This is just the start. Next step is to also take care of stalls introduced by slow memtable flushes. The final goal is to eliminate function MakeRoomForWrite(), which currently needs to be called for every column family by every write.
Test Plan: make check for now. I'll add some unit tests later. Also, perf test.
Reviewers: dhruba, yhchiang, MarkCallaghan, sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22791
2014-09-08 18:20:25 +00:00
|
|
|
delete vset;
|
2014-05-05 18:11:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK_NAMED_PARAM(BM_LogAndApply, 1000_iters_1_file, 1000, 1)
|
|
|
|
BENCHMARK_NAMED_PARAM(BM_LogAndApply, 1000_iters_100_files, 1000, 100)
|
|
|
|
BENCHMARK_NAMED_PARAM(BM_LogAndApply, 1000_iters_10000_files, 1000, 10000)
|
|
|
|
BENCHMARK_NAMED_PARAM(BM_LogAndApply, 100_iters_100000_files, 100, 100000)
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
rocksdb::benchmark::RunBenchmarks();
|
|
|
|
return 0;
|
|
|
|
}
|