2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2013-10-31 20:38:54 +00:00
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
2014-05-09 15:34:18 +00:00
|
|
|
#ifndef GFLAGS
|
|
|
|
#include <cstdio>
|
|
|
|
int main() {
|
|
|
|
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2013-10-31 20:38:54 +00:00
|
|
|
#include <gflags/gflags.h>
|
|
|
|
|
|
|
|
#include "rocksdb/db.h"
|
2013-10-29 03:34:02 +00:00
|
|
|
#include "rocksdb/slice_transform.h"
|
2013-10-31 20:38:54 +00:00
|
|
|
#include "rocksdb/table.h"
|
|
|
|
#include "db/db_impl.h"
|
2013-11-16 06:23:12 +00:00
|
|
|
#include "db/dbformat.h"
|
2013-10-31 20:38:54 +00:00
|
|
|
#include "table/block_based_table_factory.h"
|
2015-10-12 22:06:38 +00:00
|
|
|
#include "table/internal_iterator.h"
|
2014-01-28 05:58:46 +00:00
|
|
|
#include "table/plain_table_factory.h"
|
2014-02-13 02:09:24 +00:00
|
|
|
#include "table/table_builder.h"
|
2014-09-29 18:09:09 +00:00
|
|
|
#include "table/get_context.h"
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
#include "util/file_reader_writer.h"
|
2013-10-31 20:38:54 +00:00
|
|
|
#include "util/histogram.h"
|
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
|
2014-05-09 15:34:18 +00:00
|
|
|
using GFLAGS::ParseCommandLineFlags;
|
|
|
|
using GFLAGS::SetUsageMessage;
|
|
|
|
|
2013-10-31 20:38:54 +00:00
|
|
|
namespace rocksdb {
|
2014-04-10 04:17:14 +00:00
|
|
|
|
|
|
|
namespace {
|
2013-10-31 20:38:54 +00:00
|
|
|
// Make a key that i determines the first 4 characters and j determines the
|
|
|
|
// last 4 characters.
|
2013-11-16 06:23:12 +00:00
|
|
|
static std::string MakeKey(int i, int j, bool through_db) {
|
2013-10-31 20:38:54 +00:00
|
|
|
char buf[100];
|
2013-11-16 06:23:12 +00:00
|
|
|
snprintf(buf, sizeof(buf), "%04d__key___%04d", i, j);
|
|
|
|
if (through_db) {
|
|
|
|
return std::string(buf);
|
|
|
|
}
|
|
|
|
// If we directly query table, which operates on internal keys
|
|
|
|
// instead of user keys, we need to add 8 bytes of internal
|
|
|
|
// information (row type etc) to user key to make an internal
|
|
|
|
// key.
|
|
|
|
InternalKey key(std::string(buf), 0, ValueType::kTypeValue);
|
|
|
|
return key.Encode().ToString();
|
2013-10-31 20:38:54 +00:00
|
|
|
}
|
|
|
|
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
uint64_t Now(Env* env, bool measured_by_nanosecond) {
|
|
|
|
return measured_by_nanosecond ? env->NowNanos() : env->NowMicros();
|
|
|
|
}
|
2014-04-10 04:17:14 +00:00
|
|
|
} // namespace
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
|
2013-10-31 20:38:54 +00:00
|
|
|
// A very simple benchmark that.
|
|
|
|
// Create a table with roughly numKey1 * numKey2 keys,
|
|
|
|
// where there are numKey1 prefixes of the key, each has numKey2 number of
|
|
|
|
// distinguished key, differing in the suffix part.
|
|
|
|
// If if_query_empty_keys = false, query the existing keys numKey1 * numKey2
|
|
|
|
// times randomly.
|
|
|
|
// If if_query_empty_keys = true, query numKey1 * numKey2 random empty keys.
|
|
|
|
// Print out the total time.
|
2013-11-16 06:23:12 +00:00
|
|
|
// If through_db=true, a full DB will be created and queries will be against
|
|
|
|
// it. Otherwise, operations will be directly through table level.
|
2013-10-31 20:38:54 +00:00
|
|
|
//
|
|
|
|
// If for_terator=true, instead of just query one key each time, it queries
|
|
|
|
// a range sharing the same prefix.
|
2014-04-10 04:17:14 +00:00
|
|
|
namespace {
|
2013-10-31 20:38:54 +00:00
|
|
|
void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
|
2013-11-16 06:23:12 +00:00
|
|
|
ReadOptions& read_options, int num_keys1,
|
|
|
|
int num_keys2, int num_iter, int prefix_len,
|
|
|
|
bool if_query_empty_keys, bool for_iterator,
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
bool through_db, bool measured_by_nanosecond) {
|
2014-02-13 02:09:24 +00:00
|
|
|
rocksdb::InternalKeyComparator ikc(opts.comparator);
|
|
|
|
|
2013-10-31 20:38:54 +00:00
|
|
|
std::string file_name = test::TmpDir()
|
|
|
|
+ "/rocksdb_table_reader_benchmark";
|
2013-11-16 06:23:12 +00:00
|
|
|
std::string dbname = test::TmpDir() + "/rocksdb_table_reader_bench_db";
|
|
|
|
WriteOptions wo;
|
2013-10-31 20:38:54 +00:00
|
|
|
Env* env = Env::Default();
|
2013-11-16 06:23:12 +00:00
|
|
|
TableBuilder* tb = nullptr;
|
|
|
|
DB* db = nullptr;
|
|
|
|
Status s;
|
2014-09-04 23:18:36 +00:00
|
|
|
const ImmutableCFOptions ioptions(opts);
|
2015-09-16 23:57:43 +00:00
|
|
|
unique_ptr<WritableFileWriter> file_writer;
|
2013-11-16 06:23:12 +00:00
|
|
|
if (!through_db) {
|
2015-09-16 23:57:43 +00:00
|
|
|
unique_ptr<WritableFile> file;
|
2013-11-16 06:23:12 +00:00
|
|
|
env->NewWritableFile(file_name, &file, env_options);
|
A new call back to TablePropertiesCollector to allow users know the entry is add, delete or merge
Summary:
Currently users have no idea a key is add, delete or merge from TablePropertiesCollector call back. Add a new function to add it.
Also refactor the codes so that
(1) make table property collector and internal table property collector two separate data structures with the later one now exposed
(2) table builders only receive internal table properties
Test Plan: Add cases in table_properties_collector_test to cover both of old and new ways of using TablePropertiesCollector.
Reviewers: yhchiang, igor.sugak, rven, igor
Reviewed By: rven, igor
Subscribers: meyering, yoshinorim, maykov, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D35373
2015-04-06 17:04:30 +00:00
|
|
|
|
|
|
|
std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
|
|
|
|
int_tbl_prop_collector_factories;
|
|
|
|
|
2015-09-16 23:57:43 +00:00
|
|
|
file_writer.reset(new WritableFileWriter(std::move(file), env_options));
|
2016-09-18 05:30:43 +00:00
|
|
|
int unknown_level = -1;
|
A new call back to TablePropertiesCollector to allow users know the entry is add, delete or merge
Summary:
Currently users have no idea a key is add, delete or merge from TablePropertiesCollector call back. Add a new function to add it.
Also refactor the codes so that
(1) make table property collector and internal table property collector two separate data structures with the later one now exposed
(2) table builders only receive internal table properties
Test Plan: Add cases in table_properties_collector_test to cover both of old and new ways of using TablePropertiesCollector.
Reviewers: yhchiang, igor.sugak, rven, igor
Reviewed By: rven, igor
Subscribers: meyering, yoshinorim, maykov, leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D35373
2015-04-06 17:04:30 +00:00
|
|
|
tb = opts.table_factory->NewTableBuilder(
|
|
|
|
TableBuilderOptions(ioptions, ikc, &int_tbl_prop_collector_factories,
|
|
|
|
CompressionType::kNoCompression,
|
Shared dictionary compression using reference block
Summary:
This adds a new metablock containing a shared dictionary that is used
to compress all data blocks in the SST file. The size of the shared dictionary
is configurable in CompressionOptions and defaults to 0. It's currently only
used for zlib/lz4/lz4hc, but the block will be stored in the SST regardless of
the compression type if the user chooses a nonzero dictionary size.
During compaction, computes the dictionary by randomly sampling the first
output file in each subcompaction. It pre-computes the intervals to sample
by assuming the output file will have the maximum allowable length. In case
the file is smaller, some of the pre-computed sampling intervals can be beyond
end-of-file, in which case we skip over those samples and the dictionary will
be a bit smaller. After the dictionary is generated using the first file in a
subcompaction, it is loaded into the compression library before writing each
block in each subsequent file of that subcompaction.
On the read path, gets the dictionary from the metablock, if it exists. Then,
loads that dictionary into the compression library before reading each block.
Test Plan: new unit test
Reviewers: yhchiang, IslamAbdelRahman, cyan, sdong
Reviewed By: sdong
Subscribers: andrewkr, yoshinorim, kradhakrishnan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D52287
2016-04-28 00:36:03 +00:00
|
|
|
CompressionOptions(),
|
|
|
|
nullptr /* compression_dict */,
|
2016-09-18 05:30:43 +00:00
|
|
|
false /* skip_filters */, kDefaultColumnFamilyName,
|
|
|
|
unknown_level),
|
2016-04-07 06:10:32 +00:00
|
|
|
0 /* column_family_id */, file_writer.get());
|
2013-11-16 06:23:12 +00:00
|
|
|
} else {
|
|
|
|
s = DB::Open(opts, dbname, &db);
|
|
|
|
ASSERT_OK(s);
|
|
|
|
ASSERT_TRUE(db != nullptr);
|
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
// Populate slightly more than 1M keys
|
|
|
|
for (int i = 0; i < num_keys1; i++) {
|
|
|
|
for (int j = 0; j < num_keys2; j++) {
|
2013-11-16 06:23:12 +00:00
|
|
|
std::string key = MakeKey(i * 2, j, through_db);
|
|
|
|
if (!through_db) {
|
|
|
|
tb->Add(key, key);
|
|
|
|
} else {
|
|
|
|
db->Put(wo, key, key);
|
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
}
|
|
|
|
}
|
2013-11-16 06:23:12 +00:00
|
|
|
if (!through_db) {
|
|
|
|
tb->Finish();
|
2015-09-16 23:57:43 +00:00
|
|
|
file_writer->Close();
|
2013-11-16 06:23:12 +00:00
|
|
|
} else {
|
|
|
|
db->Flush(FlushOptions());
|
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
|
|
|
|
unique_ptr<TableReader> table_reader;
|
2013-11-16 06:23:12 +00:00
|
|
|
if (!through_db) {
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
unique_ptr<RandomAccessFile> raf;
|
2014-11-07 23:04:30 +00:00
|
|
|
s = env->NewRandomAccessFile(file_name, &raf, env_options);
|
2015-09-16 23:57:43 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "Create File Error: %s\n", s.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
2013-11-16 06:23:12 +00:00
|
|
|
uint64_t file_size;
|
|
|
|
env->GetFileSize(file_name, &file_size);
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
unique_ptr<RandomAccessFileReader> file_reader(
|
|
|
|
new RandomAccessFileReader(std::move(raf)));
|
2015-09-11 18:36:33 +00:00
|
|
|
s = opts.table_factory->NewTableReader(
|
|
|
|
TableReaderOptions(ioptions, env_options, ikc), std::move(file_reader),
|
|
|
|
file_size, &table_reader);
|
2015-09-16 23:57:43 +00:00
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "Open Table Error: %s\n", s.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
2013-11-16 06:23:12 +00:00
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
|
|
|
|
Random rnd(301);
|
2013-11-16 06:23:12 +00:00
|
|
|
std::string result;
|
2013-10-31 20:38:54 +00:00
|
|
|
HistogramImpl hist;
|
|
|
|
|
|
|
|
for (int it = 0; it < num_iter; it++) {
|
|
|
|
for (int i = 0; i < num_keys1; i++) {
|
|
|
|
for (int j = 0; j < num_keys2; j++) {
|
|
|
|
int r1 = rnd.Uniform(num_keys1) * 2;
|
|
|
|
int r2 = rnd.Uniform(num_keys2);
|
2013-11-16 06:23:12 +00:00
|
|
|
if (if_query_empty_keys) {
|
|
|
|
r1++;
|
|
|
|
r2 = num_keys2 * 2 - r2;
|
|
|
|
}
|
|
|
|
|
2013-10-31 20:38:54 +00:00
|
|
|
if (!for_iterator) {
|
|
|
|
// Query one existing key;
|
2013-11-16 06:23:12 +00:00
|
|
|
std::string key = MakeKey(r1, r2, through_db);
|
2014-02-13 23:27:59 +00:00
|
|
|
uint64_t start_time = Now(env, measured_by_nanosecond);
|
2013-11-16 06:23:12 +00:00
|
|
|
if (!through_db) {
|
2017-01-08 22:08:51 +00:00
|
|
|
std::string value;
|
2014-09-29 18:09:09 +00:00
|
|
|
MergeContext merge_context;
|
2016-11-04 01:40:23 +00:00
|
|
|
RangeDelAggregator range_del_agg(ikc, {} /* snapshots */);
|
|
|
|
GetContext get_context(ioptions.user_comparator,
|
|
|
|
ioptions.merge_operator, ioptions.info_log,
|
|
|
|
ioptions.statistics, GetContext::kNotFound,
|
|
|
|
Slice(key), &value, nullptr, &merge_context,
|
|
|
|
&range_del_agg, env);
|
2014-09-29 18:09:09 +00:00
|
|
|
s = table_reader->Get(read_options, key, &get_context);
|
2013-11-16 06:23:12 +00:00
|
|
|
} else {
|
2014-02-13 02:09:24 +00:00
|
|
|
s = db->Get(read_options, key, &result);
|
2013-11-16 06:23:12 +00:00
|
|
|
}
|
2014-02-13 23:27:59 +00:00
|
|
|
hist.Add(Now(env, measured_by_nanosecond) - start_time);
|
2013-10-31 20:38:54 +00:00
|
|
|
} else {
|
2013-11-16 06:23:12 +00:00
|
|
|
int r2_len;
|
|
|
|
if (if_query_empty_keys) {
|
|
|
|
r2_len = 0;
|
|
|
|
} else {
|
|
|
|
r2_len = rnd.Uniform(num_keys2) + 1;
|
|
|
|
if (r2_len + r2 > num_keys2) {
|
|
|
|
r2_len = num_keys2 - r2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::string start_key = MakeKey(r1, r2, through_db);
|
|
|
|
std::string end_key = MakeKey(r1, r2 + r2_len, through_db);
|
2013-10-31 22:26:06 +00:00
|
|
|
uint64_t total_time = 0;
|
2014-02-13 23:27:59 +00:00
|
|
|
uint64_t start_time = Now(env, measured_by_nanosecond);
|
2015-10-12 22:06:38 +00:00
|
|
|
Iterator* iter = nullptr;
|
|
|
|
InternalIterator* iiter = nullptr;
|
2013-11-16 06:23:12 +00:00
|
|
|
if (!through_db) {
|
2015-10-12 22:06:38 +00:00
|
|
|
iiter = table_reader->NewIterator(read_options);
|
2013-11-16 06:23:12 +00:00
|
|
|
} else {
|
|
|
|
iter = db->NewIterator(read_options);
|
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
int count = 0;
|
2015-10-12 22:06:38 +00:00
|
|
|
for (through_db ? iter->Seek(start_key) : iiter->Seek(start_key);
|
|
|
|
through_db ? iter->Valid() : iiter->Valid();
|
|
|
|
through_db ? iter->Next() : iiter->Next()) {
|
2013-11-16 06:23:12 +00:00
|
|
|
if (if_query_empty_keys) {
|
|
|
|
break;
|
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
// verify key;
|
2014-02-13 23:27:59 +00:00
|
|
|
total_time += Now(env, measured_by_nanosecond) - start_time;
|
plain table reader: non-mmap mode to keep two recent buffers
Summary: In plain table reader's non-mmap mode, we only keep the most recent read buffer. However, for binary search, it is likely we come back to a location to read. To avoid one pread in such a case, we keep two read buffers. It should cover most of the cases.
Test Plan:
1. run tests
2. check the optimization works through strace when running
./table_reader_bench -mmap_read=false --num_keys2=1 -num_keys1=5000 -table_factory=plain_table --iterator --through_db
Reviewers: anthony, rven, kradhakrishnan, igor, yhchiang, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D51171
2015-12-24 01:30:10 +00:00
|
|
|
assert(Slice(MakeKey(r1, r2 + count, through_db)) ==
|
|
|
|
(through_db ? iter->key() : iiter->key()));
|
2014-02-13 23:27:59 +00:00
|
|
|
start_time = Now(env, measured_by_nanosecond);
|
2013-10-31 20:38:54 +00:00
|
|
|
if (++count >= r2_len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (count != r2_len) {
|
|
|
|
fprintf(
|
|
|
|
stderr, "Iterator cannot iterate expected number of entries. "
|
|
|
|
"Expected %d but got %d\n", r2_len, count);
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
delete iter;
|
2014-02-13 23:27:59 +00:00
|
|
|
total_time += Now(env, measured_by_nanosecond) - start_time;
|
2013-10-31 22:26:06 +00:00
|
|
|
hist.Add(total_time);
|
2013-10-31 20:38:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(
|
|
|
|
stderr,
|
|
|
|
"==================================================="
|
|
|
|
"====================================================\n"
|
|
|
|
"InMemoryTableSimpleBenchmark: %20s num_key1: %5d "
|
|
|
|
"num_key2: %5d %10s\n"
|
|
|
|
"==================================================="
|
|
|
|
"===================================================="
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
"\nHistogram (unit: %s): \n%s",
|
2013-11-16 06:23:12 +00:00
|
|
|
opts.table_factory->Name(), num_keys1, num_keys2,
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
for_iterator ? "iterator" : (if_query_empty_keys ? "empty" : "non_empty"),
|
|
|
|
measured_by_nanosecond ? "nanosecond" : "microsecond",
|
2013-10-31 20:38:54 +00:00
|
|
|
hist.ToString().c_str());
|
2013-11-16 06:23:12 +00:00
|
|
|
if (!through_db) {
|
|
|
|
env->DeleteFile(file_name);
|
|
|
|
} else {
|
|
|
|
delete db;
|
|
|
|
db = nullptr;
|
|
|
|
DestroyDB(dbname, opts);
|
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
}
|
2014-04-10 04:17:14 +00:00
|
|
|
} // namespace
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
} // namespace rocksdb
|
2013-10-31 20:38:54 +00:00
|
|
|
|
|
|
|
DEFINE_bool(query_empty, false, "query non-existing keys instead of existing "
|
|
|
|
"ones.");
|
|
|
|
DEFINE_int32(num_keys1, 4096, "number of distinguish prefix of keys");
|
|
|
|
DEFINE_int32(num_keys2, 512, "number of distinguish keys for each prefix");
|
|
|
|
DEFINE_int32(iter, 3, "query non-existing keys instead of existing ones");
|
2013-11-16 06:23:12 +00:00
|
|
|
DEFINE_int32(prefix_len, 16, "Prefix length used for iterators and indexes");
|
2013-10-31 20:38:54 +00:00
|
|
|
DEFINE_bool(iterator, false, "For test iterator");
|
2013-11-16 06:23:12 +00:00
|
|
|
DEFINE_bool(through_db, false, "If enable, a DB instance will be created and "
|
|
|
|
"the query will be against DB. Otherwise, will be directly against "
|
|
|
|
"a table reader.");
|
2015-11-18 02:29:40 +00:00
|
|
|
DEFINE_bool(mmap_read, true, "Whether use mmap read");
|
2014-08-19 19:50:13 +00:00
|
|
|
DEFINE_string(table_factory, "block_based",
|
|
|
|
"Table factory to use: `block_based` (default), `plain_table` or "
|
|
|
|
"`cuckoo_hash`.");
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
DEFINE_string(time_unit, "microsecond",
|
|
|
|
"The time unit used for measuring performance. User can specify "
|
|
|
|
"`microsecond` (default) or `nanosecond`");
|
2013-10-31 20:38:54 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2014-05-09 15:34:18 +00:00
|
|
|
SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
|
|
|
|
" [OPTIONS]...");
|
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
2013-10-31 20:38:54 +00:00
|
|
|
|
2014-08-19 19:50:13 +00:00
|
|
|
std::shared_ptr<rocksdb::TableFactory> tf;
|
2013-10-31 20:38:54 +00:00
|
|
|
rocksdb::Options options;
|
2013-11-16 06:23:12 +00:00
|
|
|
if (FLAGS_prefix_len < 16) {
|
2014-03-10 19:56:46 +00:00
|
|
|
options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(
|
|
|
|
FLAGS_prefix_len));
|
2013-11-16 06:23:12 +00:00
|
|
|
}
|
2013-10-31 20:38:54 +00:00
|
|
|
rocksdb::ReadOptions ro;
|
|
|
|
rocksdb::EnvOptions env_options;
|
2013-11-16 06:23:12 +00:00
|
|
|
options.create_if_missing = true;
|
2013-10-29 03:34:02 +00:00
|
|
|
options.compression = rocksdb::CompressionType::kNoCompression;
|
|
|
|
|
2014-08-19 19:50:13 +00:00
|
|
|
if (FLAGS_table_factory == "cuckoo_hash") {
|
2014-11-12 21:05:12 +00:00
|
|
|
#ifndef ROCKSDB_LITE
|
2015-11-18 02:29:40 +00:00
|
|
|
options.allow_mmap_reads = FLAGS_mmap_read;
|
|
|
|
env_options.use_mmap_reads = FLAGS_mmap_read;
|
CuckooTable: add one option to allow identity function for the first hash function
Summary:
MurmurHash becomes expensive when we do millions Get() a second in one
thread. Add this option to allow the first hash function to use identity
function as hash function. It results in QPS increase from 3.7M/s to
~4.3M/s. I did not observe improvement for end to end RocksDB
performance. This may be caused by other bottlenecks that I will address
in a separate diff.
Test Plan:
```
[ljin@dev1964 rocksdb] ./cuckoo_table_reader_test --enable_perf --file_dir=/dev/shm --write --identity_as_first_hash=0
==== Test CuckooReaderTest.WhenKeyExists
==== Test CuckooReaderTest.WhenKeyExistsWithUint64Comparator
==== Test CuckooReaderTest.CheckIterator
==== Test CuckooReaderTest.CheckIteratorUint64
==== Test CuckooReaderTest.WhenKeyNotFound
==== Test CuckooReaderTest.TestReadPerformance
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.272us (3.7 Mqps) with batch size of 0, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.138us (7.2 Mqps) with batch size of 10, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.142us (7.1 Mqps) with batch size of 25, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.142us (7.0 Mqps) with batch size of 50, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.144us (6.9 Mqps) with batch size of 100, # of found keys 125829120
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.121us (8.3 Mqps) with batch size of 10, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.123us (8.1 Mqps) with batch size of 25, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.121us (8.3 Mqps) with batch size of 50, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.112us (8.9 Mqps) with batch size of 100, # of found keys 104857600
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.251us (4.0 Mqps) with batch size of 0, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.107us (9.4 Mqps) with batch size of 10, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.099us (10.1 Mqps) with batch size of 25, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.116us (8.6 Mqps) with batch size of 100, # of found keys 83886080
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.189us (5.3 Mqps) with batch size of 0, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.095us (10.5 Mqps) with batch size of 10, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.096us (10.4 Mqps) with batch size of 25, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.098us (10.2 Mqps) with batch size of 50, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.105us (9.5 Mqps) with batch size of 100, # of found keys 73400320
[ljin@dev1964 rocksdb] ./cuckoo_table_reader_test --enable_perf --file_dir=/dev/shm --write --identity_as_first_hash=1
==== Test CuckooReaderTest.WhenKeyExists
==== Test CuckooReaderTest.WhenKeyExistsWithUint64Comparator
==== Test CuckooReaderTest.CheckIterator
==== Test CuckooReaderTest.CheckIteratorUint64
==== Test CuckooReaderTest.WhenKeyNotFound
==== Test CuckooReaderTest.TestReadPerformance
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.230us (4.3 Mqps) with batch size of 0, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.086us (11.7 Mqps) with batch size of 10, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.088us (11.3 Mqps) with batch size of 25, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.083us (12.1 Mqps) with batch size of 50, # of found keys 125829120
With 125829120 items, utilization is 93.75%, number of hash functions: 2.
Time taken per op is 0.083us (12.1 Mqps) with batch size of 100, # of found keys 125829120
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.159us (6.3 Mqps) with batch size of 0, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.078us (12.8 Mqps) with batch size of 10, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.080us (12.6 Mqps) with batch size of 25, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 50, # of found keys 104857600
With 104857600 items, utilization is 78.12%, number of hash functions: 2.
Time taken per op is 0.082us (12.2 Mqps) with batch size of 100, # of found keys 104857600
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.077us (12.9 Mqps) with batch size of 25, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.078us (12.8 Mqps) with batch size of 50, # of found keys 83886080
With 83886080 items, utilization is 62.50%, number of hash functions: 2.
Time taken per op is 0.079us (12.6 Mqps) with batch size of 100, # of found keys 83886080
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.218us (4.6 Mqps) with batch size of 0, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.083us (12.0 Mqps) with batch size of 10, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.086us (11.6 Mqps) with batch size of 50, # of found keys 73400320
With 73400320 items, utilization is 54.69%, number of hash functions: 2.
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100, # of found keys 73400320
```
Reviewers: sdong, igor, yhchiang
Reviewed By: igor
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D23451
2014-09-18 18:00:48 +00:00
|
|
|
rocksdb::CuckooTableOptions table_options;
|
|
|
|
table_options.hash_table_ratio = 0.75;
|
|
|
|
tf.reset(rocksdb::NewCuckooTableFactory(table_options));
|
2014-11-12 21:05:12 +00:00
|
|
|
#else
|
|
|
|
fprintf(stderr, "Plain table is not supported in lite mode\n");
|
|
|
|
exit(1);
|
|
|
|
#endif // ROCKSDB_LITE
|
2014-08-19 19:50:13 +00:00
|
|
|
} else if (FLAGS_table_factory == "plain_table") {
|
2014-11-12 21:05:12 +00:00
|
|
|
#ifndef ROCKSDB_LITE
|
2015-11-18 02:29:40 +00:00
|
|
|
options.allow_mmap_reads = FLAGS_mmap_read;
|
|
|
|
env_options.use_mmap_reads = FLAGS_mmap_read;
|
2014-07-18 07:08:38 +00:00
|
|
|
|
|
|
|
rocksdb::PlainTableOptions plain_table_options;
|
|
|
|
plain_table_options.user_key_len = 16;
|
|
|
|
plain_table_options.bloom_bits_per_key = (FLAGS_prefix_len == 16) ? 0 : 8;
|
|
|
|
plain_table_options.hash_table_ratio = 0.75;
|
|
|
|
|
2014-08-19 19:50:13 +00:00
|
|
|
tf.reset(new rocksdb::PlainTableFactory(plain_table_options));
|
2014-03-10 19:56:46 +00:00
|
|
|
options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(
|
|
|
|
FLAGS_prefix_len));
|
2014-11-12 21:05:12 +00:00
|
|
|
#else
|
|
|
|
fprintf(stderr, "Cuckoo table is not supported in lite mode\n");
|
|
|
|
exit(1);
|
|
|
|
#endif // ROCKSDB_LITE
|
2014-08-19 19:50:13 +00:00
|
|
|
} else if (FLAGS_table_factory == "block_based") {
|
|
|
|
tf.reset(new rocksdb::BlockBasedTableFactory());
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "Invalid table type %s\n", FLAGS_table_factory.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tf) {
|
|
|
|
// if user provides invalid options, just fall back to microsecond.
|
|
|
|
bool measured_by_nanosecond = FLAGS_time_unit == "nanosecond";
|
|
|
|
|
|
|
|
options.table_factory = tf;
|
|
|
|
rocksdb::TableReaderBenchmark(options, env_options, ro, FLAGS_num_keys1,
|
|
|
|
FLAGS_num_keys2, FLAGS_iter, FLAGS_prefix_len,
|
|
|
|
FLAGS_query_empty, FLAGS_iterator,
|
|
|
|
FLAGS_through_db, measured_by_nanosecond);
|
2013-10-29 03:34:02 +00:00
|
|
|
} else {
|
2014-08-19 19:50:13 +00:00
|
|
|
return 1;
|
2013-10-29 03:34:02 +00:00
|
|
|
}
|
Benchmark table reader wiht nanoseconds
Summary: nanosecnods gave us better view of the performance, especially when some operations are fast so that micro seconds may only reveal less informative results.
Test Plan:
sample output:
./table_reader_bench --plain_table --time_unit=nanosecond
=======================================================================================================
InMemoryTableSimpleBenchmark: PlainTable num_key1: 4096 num_key2: 512 non_empty
=======================================================================================================
Histogram (unit: nanosecond):
Count: 6291456 Average: 475.3867 StdDev: 556.05
Min: 135.0000 Median: 400.1817 Max: 33370.0000
Percentiles: P50: 400.18 P75: 530.02 P99: 887.73 P99.9: 8843.26 P99.99: 9941.21
------------------------------------------------------
[ 120, 140 ) 2 0.000% 0.000%
[ 140, 160 ) 452 0.007% 0.007%
[ 160, 180 ) 13683 0.217% 0.225%
[ 180, 200 ) 54353 0.864% 1.089%
[ 200, 250 ) 101004 1.605% 2.694%
[ 250, 300 ) 729791 11.600% 14.294% ##
[ 300, 350 ) 616070 9.792% 24.086% ##
[ 350, 400 ) 1628021 25.877% 49.963% #####
[ 400, 450 ) 647220 10.287% 60.250% ##
[ 450, 500 ) 577206 9.174% 69.424% ##
[ 500, 600 ) 1168585 18.574% 87.999% ####
[ 600, 700 ) 506875 8.057% 96.055% ##
[ 700, 800 ) 147878 2.350% 98.406%
[ 800, 900 ) 42633 0.678% 99.083%
[ 900, 1000 ) 16304 0.259% 99.342%
[ 1000, 1200 ) 7811 0.124% 99.466%
[ 1200, 1400 ) 1453 0.023% 99.490%
[ 1400, 1600 ) 307 0.005% 99.494%
[ 1600, 1800 ) 81 0.001% 99.496%
[ 1800, 2000 ) 18 0.000% 99.496%
[ 2000, 2500 ) 8 0.000% 99.496%
[ 2500, 3000 ) 6 0.000% 99.496%
[ 3500, 4000 ) 3 0.000% 99.496%
[ 4000, 4500 ) 116 0.002% 99.498%
[ 4500, 5000 ) 1144 0.018% 99.516%
[ 5000, 6000 ) 1087 0.017% 99.534%
[ 6000, 7000 ) 2403 0.038% 99.572%
[ 7000, 8000 ) 9840 0.156% 99.728%
[ 8000, 9000 ) 12820 0.204% 99.932%
[ 9000, 10000 ) 3881 0.062% 99.994%
[ 10000, 12000 ) 135 0.002% 99.996%
[ 12000, 14000 ) 159 0.003% 99.998%
[ 14000, 16000 ) 58 0.001% 99.999%
[ 16000, 18000 ) 30 0.000% 100.000%
[ 18000, 20000 ) 14 0.000% 100.000%
[ 20000, 25000 ) 2 0.000% 100.000%
[ 25000, 30000 ) 2 0.000% 100.000%
[ 30000, 35000 ) 1 0.000% 100.000%
Reviewers: haobo, dhruba, sdong
CC: leveldb
Differential Revision: https://reviews.facebook.net/D16113
2014-02-13 21:55:04 +00:00
|
|
|
|
2013-10-31 20:38:54 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2014-05-09 15:34:18 +00:00
|
|
|
|
|
|
|
#endif // GFLAGS
|