mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-29 09:36:17 +00:00
CuckooTable: add one option to allow identity function for the first hash function
Summary: MurmurHash becomes expensive when we do millions Get() a second in one thread. Add this option to allow the first hash function to use identity function as hash function. It results in QPS increase from 3.7M/s to ~4.3M/s. I did not observe improvement for end to end RocksDB performance. This may be caused by other bottlenecks that I will address in a separate diff. Test Plan: ``` [ljin@dev1964 rocksdb] ./cuckoo_table_reader_test --enable_perf --file_dir=/dev/shm --write --identity_as_first_hash=0 ==== Test CuckooReaderTest.WhenKeyExists ==== Test CuckooReaderTest.WhenKeyExistsWithUint64Comparator ==== Test CuckooReaderTest.CheckIterator ==== Test CuckooReaderTest.CheckIteratorUint64 ==== Test CuckooReaderTest.WhenKeyNotFound ==== Test CuckooReaderTest.TestReadPerformance With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.272us (3.7 Mqps) with batch size of 0, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.138us (7.2 Mqps) with batch size of 10, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.142us (7.1 Mqps) with batch size of 25, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.142us (7.0 Mqps) with batch size of 50, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.144us (6.9 Mqps) with batch size of 100, # of found keys 125829120 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.201us (5.0 Mqps) with batch size of 0, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.121us (8.3 Mqps) with batch size of 10, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.123us (8.1 Mqps) with batch size of 25, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.121us (8.3 Mqps) with batch size of 50, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.112us (8.9 Mqps) with batch size of 100, # of found keys 104857600 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.251us (4.0 Mqps) with batch size of 0, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.107us (9.4 Mqps) with batch size of 10, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.099us (10.1 Mqps) with batch size of 25, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.100us (10.0 Mqps) with batch size of 50, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.116us (8.6 Mqps) with batch size of 100, # of found keys 83886080 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.189us (5.3 Mqps) with batch size of 0, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.095us (10.5 Mqps) with batch size of 10, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.096us (10.4 Mqps) with batch size of 25, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.098us (10.2 Mqps) with batch size of 50, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.105us (9.5 Mqps) with batch size of 100, # of found keys 73400320 [ljin@dev1964 rocksdb] ./cuckoo_table_reader_test --enable_perf --file_dir=/dev/shm --write --identity_as_first_hash=1 ==== Test CuckooReaderTest.WhenKeyExists ==== Test CuckooReaderTest.WhenKeyExistsWithUint64Comparator ==== Test CuckooReaderTest.CheckIterator ==== Test CuckooReaderTest.CheckIteratorUint64 ==== Test CuckooReaderTest.WhenKeyNotFound ==== Test CuckooReaderTest.TestReadPerformance With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.230us (4.3 Mqps) with batch size of 0, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.086us (11.7 Mqps) with batch size of 10, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.088us (11.3 Mqps) with batch size of 25, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.083us (12.1 Mqps) with batch size of 50, # of found keys 125829120 With 125829120 items, utilization is 93.75%, number of hash functions: 2. Time taken per op is 0.083us (12.1 Mqps) with batch size of 100, # of found keys 125829120 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.159us (6.3 Mqps) with batch size of 0, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.078us (12.8 Mqps) with batch size of 10, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.080us (12.6 Mqps) with batch size of 25, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.080us (12.5 Mqps) with batch size of 50, # of found keys 104857600 With 104857600 items, utilization is 78.12%, number of hash functions: 2. Time taken per op is 0.082us (12.2 Mqps) with batch size of 100, # of found keys 104857600 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.154us (6.5 Mqps) with batch size of 0, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.077us (13.0 Mqps) with batch size of 10, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.077us (12.9 Mqps) with batch size of 25, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.078us (12.8 Mqps) with batch size of 50, # of found keys 83886080 With 83886080 items, utilization is 62.50%, number of hash functions: 2. Time taken per op is 0.079us (12.6 Mqps) with batch size of 100, # of found keys 83886080 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.218us (4.6 Mqps) with batch size of 0, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.083us (12.0 Mqps) with batch size of 10, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.085us (11.7 Mqps) with batch size of 25, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.086us (11.6 Mqps) with batch size of 50, # of found keys 73400320 With 73400320 items, utilization is 54.69%, number of hash functions: 2. Time taken per op is 0.078us (12.8 Mqps) with batch size of 100, # of found keys 73400320 ``` Reviewers: sdong, igor, yhchiang Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23451
This commit is contained in:
parent
035043559d
commit
51af7c326c
|
@ -7,6 +7,7 @@
|
|||
### Behavior changes
|
||||
* We have refactored our system of stalling writes. Any stall-related statistics' meanings are changed. Instead of per-write stall counts, we now count stalls per-epoch, where epochs are periods between flushes and compactions. You'll find more information in our Tuning Perf Guide once we release RocksDB 3.6.
|
||||
* When disableDataSync=true, we no longer sync the MANIFEST file.
|
||||
* Add identity_as_first_hash property to CuckooTable. SST file needs to be rebuilt to be opened by reader properly.
|
||||
|
||||
----- Past Releases -----
|
||||
|
||||
|
|
|
@ -514,6 +514,9 @@ DEFINE_int64(keys_per_prefix, 0, "control average number of keys generated "
|
|||
"i.e. use the prefix comes with the generated random number.");
|
||||
DEFINE_bool(enable_io_prio, false, "Lower the background flush/compaction "
|
||||
"threads' IO priority");
|
||||
DEFINE_bool(identity_as_first_hash, false, "the first hash function of cuckoo "
|
||||
"table becomes an identity function. This is only valid when key "
|
||||
"is 8 bytes");
|
||||
|
||||
enum RepFactory {
|
||||
kSkipList,
|
||||
|
@ -1739,8 +1742,11 @@ class Benchmark {
|
|||
fprintf(stderr, "Invalid cuckoo_hash_ratio\n");
|
||||
exit(1);
|
||||
}
|
||||
rocksdb::CuckooTableOptions table_options;
|
||||
table_options.hash_table_ratio = FLAGS_cuckoo_hash_ratio;
|
||||
table_options.identity_as_first_hash = FLAGS_identity_as_first_hash;
|
||||
options.table_factory = std::shared_ptr<TableFactory>(
|
||||
NewCuckooTableFactory(FLAGS_cuckoo_hash_ratio));
|
||||
NewCuckooTableFactory(table_options));
|
||||
} else {
|
||||
BlockBasedTableOptions block_based_options;
|
||||
if (FLAGS_use_hash_search) {
|
||||
|
|
|
@ -251,23 +251,36 @@ struct CuckooTablePropertyNames {
|
|||
// Denotes if the key sorted in the file is Internal Key (if false)
|
||||
// or User Key only (if true).
|
||||
static const std::string kIsLastLevel;
|
||||
// Indicate if using identity function for the first hash function.
|
||||
static const std::string kIdentityAsFirstHash;
|
||||
};
|
||||
|
||||
struct CuckooTableOptions {
|
||||
// Determines the utilization of hash tables. Smaller values
|
||||
// result in larger hash tables with fewer collisions.
|
||||
double hash_table_ratio = 0.9;
|
||||
// A property used by builder to determine the depth to go to
|
||||
// to search for a path to displace elements in case of
|
||||
// collision. See Builder.MakeSpaceForKey method. Higher
|
||||
// values result in more efficient hash tables with fewer
|
||||
// lookups but take more time to build.
|
||||
uint32_t max_search_depth = 100;
|
||||
// In case of collision while inserting, the builder
|
||||
// attempts to insert in the next cuckoo_block_size
|
||||
// locations before skipping over to the next Cuckoo hash
|
||||
// function. This makes lookups more cache friendly in case
|
||||
// of collisions.
|
||||
uint32_t cuckoo_block_size = 5;
|
||||
// If this options is enabled, user key is treated as uint64_t and its value
|
||||
// is used as hash value directly. This option changes builder's behavior.
|
||||
// Reader ignore this option and behave according to what specified in table
|
||||
// property.
|
||||
bool identity_as_first_hash = false;
|
||||
};
|
||||
|
||||
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
|
||||
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
|
||||
// result in larger hash tables with fewer collisions.
|
||||
// @max_search_depth: A property used by builder to determine the depth to go to
|
||||
// to search for a path to displace elements in case of
|
||||
// collision. See Builder.MakeSpaceForKey method. Higher
|
||||
// values result in more efficient hash tables with fewer
|
||||
// lookups but take more time to build.
|
||||
// @cuckoo_block_size: In case of collision while inserting, the builder
|
||||
// attempts to insert in the next cuckoo_block_size
|
||||
// locations before skipping over to the next Cuckoo hash
|
||||
// function. This makes lookups more cache friendly in case
|
||||
// of collisions.
|
||||
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
|
||||
uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);
|
||||
extern TableFactory* NewCuckooTableFactory(
|
||||
const CuckooTableOptions& table_options = CuckooTableOptions());
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ const std::string CuckooTablePropertyNames::kIsLastLevel =
|
|||
"rocksdb.cuckoo.file.islastlevel";
|
||||
const std::string CuckooTablePropertyNames::kCuckooBlockSize =
|
||||
"rocksdb.cuckoo.hash.cuckooblocksize";
|
||||
const std::string CuckooTablePropertyNames::kIdentityAsFirstHash =
|
||||
"rocksdb.cuckoo.hash.identityfirst";
|
||||
|
||||
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||
|
@ -43,6 +45,7 @@ CuckooTableBuilder::CuckooTableBuilder(
|
|||
WritableFile* file, double max_hash_table_ratio,
|
||||
uint32_t max_num_hash_table, uint32_t max_search_depth,
|
||||
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
||||
bool identity_as_first_hash,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
|
||||
: num_hash_func_(2),
|
||||
file_(file),
|
||||
|
@ -54,6 +57,7 @@ CuckooTableBuilder::CuckooTableBuilder(
|
|||
is_last_level_file_(false),
|
||||
has_seen_first_key_(false),
|
||||
ucomp_(user_comparator),
|
||||
identity_as_first_hash_(identity_as_first_hash),
|
||||
get_slice_hash_(get_slice_hash),
|
||||
closed_(false) {
|
||||
// Data is in a huge block.
|
||||
|
@ -119,7 +123,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
|||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
|
||||
++hash_cnt) {
|
||||
uint64_t hash_val = CuckooHash(user_key, hash_cnt,
|
||||
hash_table_size_minus_one, get_slice_hash_);
|
||||
hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_);
|
||||
// If there is a collision, check next cuckoo_block_size_ locations for
|
||||
// empty locations. While checking, if we reach end of the hash table,
|
||||
// stop searching and proceed for next hash function.
|
||||
|
@ -149,7 +153,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
|||
// We don't really need to rehash the entire table because old hashes are
|
||||
// still valid and we only increased the number of hash functions.
|
||||
uint64_t hash_val = CuckooHash(user_key, num_hash_func_,
|
||||
hash_table_size_minus_one, get_slice_hash_);
|
||||
hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_);
|
||||
++num_hash_func_;
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, ++hash_val) {
|
||||
|
@ -261,6 +265,10 @@ Status CuckooTableBuilder::Finish() {
|
|||
CuckooTablePropertyNames::kCuckooBlockSize].assign(
|
||||
reinterpret_cast<const char*>(&cuckoo_block_size_),
|
||||
sizeof(cuckoo_block_size_));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kIdentityAsFirstHash].assign(
|
||||
reinterpret_cast<const char*>(&identity_as_first_hash_),
|
||||
sizeof(identity_as_first_hash_));
|
||||
|
||||
// Write meta blocks.
|
||||
MetaIndexBuilder meta_index_builder;
|
||||
|
@ -380,7 +388,8 @@ bool CuckooTableBuilder::MakeSpaceForKey(
|
|||
uint64_t child_bucket_id = CuckooHash(
|
||||
(is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first :
|
||||
ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first))),
|
||||
hash_cnt, hash_table_size_minus_one, get_slice_hash_);
|
||||
hash_cnt, hash_table_size_minus_one, identity_as_first_hash_,
|
||||
get_slice_hash_);
|
||||
// Iterate inside Cuckoo Block.
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, ++child_bucket_id) {
|
||||
|
|
|
@ -24,6 +24,7 @@ class CuckooTableBuilder: public TableBuilder {
|
|||
WritableFile* file, double max_hash_table_ratio,
|
||||
uint32_t max_num_hash_func, uint32_t max_search_depth,
|
||||
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
||||
bool identity_as_first_hash,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
|
||||
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
|
@ -87,6 +88,7 @@ class CuckooTableBuilder: public TableBuilder {
|
|||
TableProperties properties_;
|
||||
bool has_seen_first_key_;
|
||||
const Comparator* ucomp_;
|
||||
bool identity_as_first_hash_;
|
||||
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
std::string largest_user_key_ = "";
|
||||
|
|
|
@ -133,7 +133,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
|
|||
fname = test::TmpDir() + "/EmptyFile";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
4, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
4, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
ASSERT_EQ(0UL, builder.FileSize());
|
||||
ASSERT_OK(builder.Finish());
|
||||
|
@ -162,7 +162,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
|||
fname = test::TmpDir() + "/NoCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
|
@ -202,7 +202,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
|||
fname = test::TmpDir() + "/WithCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
|
@ -243,7 +243,8 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
|
|||
fname = test::TmpDir() + "/WithCollisionFullKey2";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, false,
|
||||
GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
|
@ -288,7 +289,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
|
|||
fname = test::TmpDir() + "/WithCollisionPathFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
|
@ -330,7 +331,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
|
|||
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 2, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 2, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
|
@ -366,7 +367,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
|||
fname = test::TmpDir() + "/NoCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
|
@ -402,7 +403,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
|||
fname = test::TmpDir() + "/WithCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
|
@ -440,7 +441,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
|
|||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
|
@ -478,7 +479,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
|||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
|
||||
|
@ -498,7 +499,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
|||
fname = test::TmpDir() + "/FailWhenSameKeyInserted";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
|
||||
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
|
||||
|
|
|
@ -30,9 +30,10 @@ TableBuilder* CuckooTableFactory::NewTableBuilder(
|
|||
const InternalKeyComparator& internal_comparator,
|
||||
WritableFile* file, const CompressionType,
|
||||
const CompressionOptions&) const {
|
||||
return new CuckooTableBuilder(file, hash_table_ratio_, 64,
|
||||
max_search_depth_, internal_comparator.user_comparator(),
|
||||
cuckoo_block_size_, nullptr);
|
||||
return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64,
|
||||
table_options_.max_search_depth, internal_comparator.user_comparator(),
|
||||
table_options_.cuckoo_block_size, table_options_.identity_as_first_hash,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
std::string CuckooTableFactory::GetPrintableTableOptions() const {
|
||||
|
@ -42,21 +43,22 @@ std::string CuckooTableFactory::GetPrintableTableOptions() const {
|
|||
char buffer[kBufferSize];
|
||||
|
||||
snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n",
|
||||
hash_table_ratio_);
|
||||
table_options_.hash_table_ratio);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " max_search_depth: %u\n",
|
||||
max_search_depth_);
|
||||
table_options_.max_search_depth);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " cuckoo_block_size: %u\n",
|
||||
cuckoo_block_size_);
|
||||
table_options_.cuckoo_block_size);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " identity_as_first_hash: %d\n",
|
||||
table_options_.identity_as_first_hash);
|
||||
ret.append(buffer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TableFactory* NewCuckooTableFactory(double hash_table_ratio,
|
||||
uint32_t max_search_depth, uint32_t cuckoo_block_size) {
|
||||
return new CuckooTableFactory(
|
||||
hash_table_ratio, max_search_depth, cuckoo_block_size);
|
||||
TableFactory* NewCuckooTableFactory(const CuckooTableOptions& table_options) {
|
||||
return new CuckooTableFactory(table_options);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
|
|
@ -16,6 +16,7 @@ namespace rocksdb {
|
|||
const uint32_t kCuckooMurmurSeedMultiplier = 816922183;
|
||||
static inline uint64_t CuckooHash(
|
||||
const Slice& user_key, uint32_t hash_cnt, uint64_t table_size_minus_one,
|
||||
bool identity_as_first_hash,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) {
|
||||
#ifndef NDEBUG
|
||||
// This part is used only in unit tests.
|
||||
|
@ -23,6 +24,10 @@ static inline uint64_t CuckooHash(
|
|||
return get_slice_hash(user_key, hash_cnt, table_size_minus_one + 1);
|
||||
}
|
||||
#endif
|
||||
if (hash_cnt == 0 && identity_as_first_hash) {
|
||||
return (*reinterpret_cast<const int64_t*>(user_key.data())) &
|
||||
table_size_minus_one;
|
||||
}
|
||||
return MurmurHash(user_key.data(), user_key.size(),
|
||||
kCuckooMurmurSeedMultiplier * hash_cnt) & table_size_minus_one;
|
||||
}
|
||||
|
@ -36,11 +41,8 @@ static inline uint64_t CuckooHash(
|
|||
// - Does not support Merge operations.
|
||||
class CuckooTableFactory : public TableFactory {
|
||||
public:
|
||||
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth,
|
||||
uint32_t cuckoo_block_size)
|
||||
: hash_table_ratio_(hash_table_ratio),
|
||||
max_search_depth_(max_search_depth),
|
||||
cuckoo_block_size_(cuckoo_block_size) {}
|
||||
explicit CuckooTableFactory(const CuckooTableOptions& table_options)
|
||||
: table_options_(table_options) {}
|
||||
~CuckooTableFactory() {}
|
||||
|
||||
const char* Name() const override { return "CuckooTable"; }
|
||||
|
@ -63,9 +65,7 @@ class CuckooTableFactory : public TableFactory {
|
|||
std::string GetPrintableTableOptions() const override;
|
||||
|
||||
private:
|
||||
const double hash_table_ratio_;
|
||||
const uint32_t max_search_depth_;
|
||||
const uint32_t cuckoo_block_size_;
|
||||
const CuckooTableOptions table_options_;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
|
|
@ -50,13 +50,13 @@ CuckooTableReader::CuckooTableReader(
|
|||
auto& user_props = props->user_collected_properties;
|
||||
auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
|
||||
if (hash_funs == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Number of hash functions not found");
|
||||
status_ = Status::Corruption("Number of hash functions not found");
|
||||
return;
|
||||
}
|
||||
num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
|
||||
auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
|
||||
if (unused_key == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Empty bucket value not found");
|
||||
status_ = Status::Corruption("Empty bucket value not found");
|
||||
return;
|
||||
}
|
||||
unused_key_ = unused_key->second;
|
||||
|
@ -64,7 +64,7 @@ CuckooTableReader::CuckooTableReader(
|
|||
key_length_ = props->fixed_key_len;
|
||||
auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
|
||||
if (value_length == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Value length not found");
|
||||
status_ = Status::Corruption("Value length not found");
|
||||
return;
|
||||
}
|
||||
value_length_ = *reinterpret_cast<const uint32_t*>(
|
||||
|
@ -74,21 +74,31 @@ CuckooTableReader::CuckooTableReader(
|
|||
auto hash_table_size = user_props.find(
|
||||
CuckooTablePropertyNames::kHashTableSize);
|
||||
if (hash_table_size == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Hash table size not found");
|
||||
status_ = Status::Corruption("Hash table size not found");
|
||||
return;
|
||||
}
|
||||
table_size_minus_one_ = *reinterpret_cast<const uint64_t*>(
|
||||
hash_table_size->second.data()) - 1;
|
||||
auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
|
||||
if (is_last_level == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Is last level not found");
|
||||
status_ = Status::Corruption("Is last level not found");
|
||||
return;
|
||||
}
|
||||
is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
|
||||
|
||||
auto identity_as_first_hash = user_props.find(
|
||||
CuckooTablePropertyNames::kIdentityAsFirstHash);
|
||||
if (identity_as_first_hash == user_props.end()) {
|
||||
status_ = Status::Corruption("identity as first hash not found");
|
||||
return;
|
||||
}
|
||||
identity_as_first_hash_ = *reinterpret_cast<const bool*>(
|
||||
identity_as_first_hash->second.data());
|
||||
|
||||
auto cuckoo_block_size = user_props.find(
|
||||
CuckooTablePropertyNames::kCuckooBlockSize);
|
||||
if (cuckoo_block_size == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Cuckoo block size not found");
|
||||
status_ = Status::Corruption("Cuckoo block size not found");
|
||||
return;
|
||||
}
|
||||
cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
|
||||
|
@ -106,7 +116,8 @@ Status CuckooTableReader::Get(
|
|||
Slice user_key = ExtractUserKey(key);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
|
||||
uint64_t offset = bucket_length_ * CuckooHash(
|
||||
user_key, hash_cnt, table_size_minus_one_, get_slice_hash_);
|
||||
user_key, hash_cnt, table_size_minus_one_, identity_as_first_hash_,
|
||||
get_slice_hash_);
|
||||
const char* bucket = &file_data_.data()[offset];
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, bucket += bucket_length_) {
|
||||
|
@ -117,7 +128,7 @@ Status CuckooTableReader::Get(
|
|||
// Here, we compare only the user key part as we support only one entry
|
||||
// per user key and we don't support sanpshot.
|
||||
if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) {
|
||||
Slice value = Slice(&bucket[key_length_], value_length_);
|
||||
Slice value(bucket + key_length_, value_length_);
|
||||
if (is_last_level_) {
|
||||
ParsedInternalKey found_ikey(
|
||||
Slice(bucket, key_length_), 0, kTypeValue);
|
||||
|
@ -140,7 +151,8 @@ void CuckooTableReader::Prepare(const Slice& key) {
|
|||
// Prefetch the first Cuckoo Block.
|
||||
Slice user_key = ExtractUserKey(key);
|
||||
uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) +
|
||||
bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_, nullptr);
|
||||
bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_,
|
||||
identity_as_first_hash_, nullptr);
|
||||
uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
|
||||
for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
|
||||
PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
|
||||
|
|
|
@ -64,6 +64,7 @@ class CuckooTableReader: public TableReader {
|
|||
std::unique_ptr<RandomAccessFile> file_;
|
||||
Slice file_data_;
|
||||
bool is_last_level_;
|
||||
bool identity_as_first_hash_;
|
||||
std::shared_ptr<const TableProperties> table_props_;
|
||||
Status status_;
|
||||
uint32_t num_hash_func_;
|
||||
|
|
|
@ -38,6 +38,7 @@ DEFINE_string(file_dir, "", "Directory where the files will be created"
|
|||
DEFINE_bool(enable_perf, false, "Run Benchmark Tests too.");
|
||||
DEFINE_bool(write, false,
|
||||
"Should write new values to file in performance tests?");
|
||||
DEFINE_bool(identity_as_first_hash, true, "use identity as first hash");
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
|
@ -109,7 +110,8 @@ class CuckooReaderTest {
|
|||
std::unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash);
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false,
|
||||
GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
|
||||
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
|
@ -375,8 +377,15 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
|
|||
|
||||
// Performance tests
|
||||
namespace {
|
||||
int64_t found_count = 0;
|
||||
std::string value;
|
||||
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
|
||||
// Deliberately empty.
|
||||
if (*reinterpret_cast<const int32_t*>(k.user_key.data()) ==
|
||||
*reinterpret_cast<const int32_t*>(v.data())) {
|
||||
++found_count;
|
||||
value.assign(v.data(), v.size());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -389,12 +398,14 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
|
|||
}
|
||||
|
||||
void GetKeys(uint64_t num, std::vector<std::string>* keys) {
|
||||
keys->clear();
|
||||
IterKey k;
|
||||
k.SetInternalKey("", 0, kTypeValue);
|
||||
std::string internal_key_suffix = k.GetKey().ToString();
|
||||
ASSERT_EQ(static_cast<size_t>(8), internal_key_suffix.size());
|
||||
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
|
||||
uint64_t value = 2 * key_idx;
|
||||
std::string new_key(reinterpret_cast<char*>(&value), sizeof(value));
|
||||
new_key += internal_key_suffix;
|
||||
keys->push_back(new_key);
|
||||
}
|
||||
|
@ -422,7 +433,8 @@ void WriteFile(const std::vector<std::string>& keys,
|
|||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), hash_ratio,
|
||||
64, 1000, test::Uint64Comparator(), 5, nullptr);
|
||||
64, 1000, test::Uint64Comparator(), 5,
|
||||
FLAGS_identity_as_first_hash, nullptr);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
// Value is just a part of key.
|
||||
|
@ -482,27 +494,36 @@ void ReadKeys(uint64_t num, uint32_t batch_size) {
|
|||
" hash functions: %u.\n", num, num * 100.0 / (table_size), num_hash_fun);
|
||||
ReadOptions r_options;
|
||||
|
||||
std::vector<uint64_t> keys;
|
||||
keys.reserve(num);
|
||||
for (uint64_t i = 0; i < num; ++i) {
|
||||
keys.push_back(2 * i);
|
||||
}
|
||||
std::random_shuffle(keys.begin(), keys.end());
|
||||
|
||||
found_count = 0;
|
||||
uint64_t start_time = env->NowMicros();
|
||||
if (batch_size > 0) {
|
||||
for (uint64_t i = 0; i < num; i += batch_size) {
|
||||
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
||||
reader.Prepare(Slice(reinterpret_cast<char*>(&j), 16));
|
||||
reader.Prepare(Slice(reinterpret_cast<char*>(&keys[j]), 16));
|
||||
}
|
||||
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
||||
reader.Get(r_options, Slice(reinterpret_cast<char*>(&j), 16),
|
||||
nullptr, DoNothing, nullptr);
|
||||
reader.Get(r_options, Slice(reinterpret_cast<char*>(&keys[j]), 16),
|
||||
nullptr, DoNothing, nullptr);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (uint64_t i = 0; i < num; i++) {
|
||||
reader.Get(r_options, Slice(reinterpret_cast<char*>(&i), 16), nullptr,
|
||||
DoNothing, nullptr);
|
||||
reader.Get(r_options, Slice(reinterpret_cast<char*>(&keys[i]), 16),
|
||||
nullptr, DoNothing, nullptr);
|
||||
}
|
||||
}
|
||||
float time_per_op = (env->NowMicros() - start_time) * 1.0 / num;
|
||||
fprintf(stderr,
|
||||
"Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n",
|
||||
time_per_op, 1.0 / time_per_op, batch_size);
|
||||
"Time taken per op is %.3fus (%.1f Mqps) with batch size of %u, "
|
||||
"# of found keys %ld\n",
|
||||
time_per_op, 1.0 / time_per_op, batch_size, found_count);
|
||||
}
|
||||
} // namespace.
|
||||
|
||||
|
@ -514,16 +535,16 @@ TEST(CuckooReaderTest, TestReadPerformance) {
|
|||
// These numbers are chosen to have a hash utilizaiton % close to
|
||||
// 0.9, 0.75, 0.6 and 0.5 respectively.
|
||||
// They all create 128 M buckets.
|
||||
std::vector<uint64_t> nums = {120*1000*1000, 100*1000*1000, 80*1000*1000,
|
||||
70*1000*1000};
|
||||
std::vector<uint64_t> nums = {120*1024*1024, 100*1024*1024, 80*1024*1024,
|
||||
70*1024*1024};
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout,
|
||||
"WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n");
|
||||
#endif
|
||||
std::vector<std::string> keys;
|
||||
GetKeys(*std::max_element(nums.begin(), nums.end()), &keys);
|
||||
for (uint64_t num : nums) {
|
||||
if (FLAGS_write || !Env::Default()->FileExists(GetFileName(num))) {
|
||||
GetKeys(num, &keys);
|
||||
WriteFile(keys, num, hash_ratio);
|
||||
}
|
||||
ReadKeys(num, 0);
|
||||
|
|
|
@ -260,8 +260,9 @@ int main(int argc, char** argv) {
|
|||
if (FLAGS_table_factory == "cuckoo_hash") {
|
||||
options.allow_mmap_reads = true;
|
||||
env_options.use_mmap_reads = true;
|
||||
|
||||
tf.reset(rocksdb::NewCuckooTableFactory(0.75));
|
||||
rocksdb::CuckooTableOptions table_options;
|
||||
table_options.hash_table_ratio = 0.75;
|
||||
tf.reset(rocksdb::NewCuckooTableFactory(table_options));
|
||||
} else if (FLAGS_table_factory == "plain_table") {
|
||||
options.allow_mmap_reads = true;
|
||||
env_options.use_mmap_reads = true;
|
||||
|
|
Loading…
Reference in a new issue