mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-25 22:44:05 +00:00
Changing implementaiton of CuckooTableBuilder to not take file_size, key_length, value_length.
Summary: - Maintain a list of key-value pairs as vectors during Add operation. - Start building hash table only when Finish() is called. - This approach takes more time and space but avoids taking file_size, key and value lengths. - Rewrote cuckoo_table_builder_test I did not know about IterKey while writing this diff. I shall change places where IterKey could be used instead of std::string tomorrow. Please review rest of the logic. Test Plan: cuckoo_table_reader_test --enable_perf cuckoo_table_builder_test valgrind_check asan_check Reviewers: sdong, igor, yhchiang, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20907
This commit is contained in:
parent
2124c85cc6
commit
606a126703
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -36,25 +37,18 @@ const std::string CuckooTablePropertyNames::kIsLastLevel =
|
|||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||
|
||||
CuckooTableBuilder::CuckooTableBuilder(
|
||||
WritableFile* file, uint32_t fixed_key_length,
|
||||
uint32_t fixed_value_length, double hash_table_ratio,
|
||||
uint64_t file_size, uint32_t max_num_hash_table,
|
||||
uint32_t max_search_depth, bool is_last_level,
|
||||
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
|
||||
WritableFile* file, double hash_table_ratio,
|
||||
uint32_t max_num_hash_table, uint32_t max_search_depth,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
|
||||
: num_hash_table_(2),
|
||||
file_(file),
|
||||
value_length_(fixed_value_length),
|
||||
// 8 is the difference between sizes of user key and InternalKey.
|
||||
bucket_size_(fixed_key_length +
|
||||
fixed_value_length - (is_last_level ? 8 : 0)),
|
||||
hash_table_ratio_(hash_table_ratio),
|
||||
max_num_buckets_(file_size / bucket_size_),
|
||||
max_num_hash_table_(max_num_hash_table),
|
||||
max_search_depth_(max_search_depth),
|
||||
is_last_level_file_(is_last_level),
|
||||
buckets_(max_num_buckets_),
|
||||
make_space_for_key_call_id_(0),
|
||||
GetSliceHash(GetSliceHashPtr) {
|
||||
is_last_level_file_(false),
|
||||
has_seen_first_key_(false),
|
||||
get_slice_hash_(get_slice_hash),
|
||||
closed_(false) {
|
||||
properties_.num_entries = 0;
|
||||
// Data is in a huge block.
|
||||
properties_.num_data_blocks = 1;
|
||||
|
@ -62,105 +56,120 @@ CuckooTableBuilder::CuckooTableBuilder(
|
|||
properties_.filter_size = 0;
|
||||
}
|
||||
|
||||
CuckooTableBuilder::~CuckooTableBuilder() {
|
||||
}
|
||||
|
||||
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
if (NumEntries() == max_num_buckets_) {
|
||||
status_ = Status::Corruption("Hash Table is full.");
|
||||
if (properties_.num_entries >= kMaxVectorIdx - 1) {
|
||||
status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1");
|
||||
return;
|
||||
}
|
||||
uint64_t bucket_id;
|
||||
bool bucket_found = false;
|
||||
autovector<uint64_t> hash_vals;
|
||||
ParsedInternalKey ikey;
|
||||
if (!ParseInternalKey(key, &ikey)) {
|
||||
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
||||
return;
|
||||
}
|
||||
Slice user_key = ikey.user_key;
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
|
||||
if (buckets_[hash_val].is_empty) {
|
||||
bucket_id = hash_val;
|
||||
bucket_found = true;
|
||||
break;
|
||||
} else {
|
||||
if (user_key.compare(
|
||||
is_last_level_file_ ? Slice(buckets_[hash_val].key)
|
||||
: ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) {
|
||||
status_ = Status::Corruption("Same key is being inserted again.");
|
||||
return;
|
||||
}
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
while (!bucket_found && !MakeSpaceForKey(key, &bucket_id, hash_vals)) {
|
||||
// Rehash by increashing number of hash tables.
|
||||
if (num_hash_table_ >= max_num_hash_table_) {
|
||||
status_ = Status::Corruption("Too many collissions. Unable to hash.");
|
||||
return;
|
||||
}
|
||||
// We don't really need to rehash the entire table because old hashes are
|
||||
// still valid and we only increased the number of hash functions.
|
||||
uint64_t hash_val = GetSliceHash(user_key,
|
||||
num_hash_table_, max_num_buckets_);
|
||||
++num_hash_table_;
|
||||
if (buckets_[hash_val].is_empty) {
|
||||
bucket_found = true;
|
||||
bucket_id = hash_val;
|
||||
break;
|
||||
} else {
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
// Determine if we can ignore the sequence number and value type from
|
||||
// internal keys by looking at sequence number from first key. We assume
|
||||
// that if first key has a zero sequence number, then all the remaining
|
||||
// keys will have zero seq. no.
|
||||
if (!has_seen_first_key_) {
|
||||
is_last_level_file_ = ikey.sequence == 0;
|
||||
has_seen_first_key_ = true;
|
||||
}
|
||||
// Even if one sequence number is non-zero, then it is not last level.
|
||||
assert(!is_last_level_file_ || ikey.sequence == 0);
|
||||
if (is_last_level_file_) {
|
||||
buckets_[bucket_id].key.assign(user_key.data(), user_key.size());
|
||||
kvs_.emplace_back(std::make_pair(
|
||||
ikey.user_key.ToString(), value.ToString()));
|
||||
} else {
|
||||
buckets_[bucket_id].key.assign(key.data(), key.size());
|
||||
kvs_.emplace_back(std::make_pair(key.ToString(), value.ToString()));
|
||||
}
|
||||
buckets_[bucket_id].value.assign(value.data(), value.size());
|
||||
buckets_[bucket_id].is_empty = false;
|
||||
|
||||
properties_.num_entries++;
|
||||
|
||||
// We assume that the keys are inserted in sorted order. To identify an
|
||||
// unused key, which will be used in filling empty buckets in the table,
|
||||
// we try to find gaps between successive keys inserted. This is done by
|
||||
// maintaining the previous key and comparing it with next key.
|
||||
if (unused_user_key_.empty()) {
|
||||
if (prev_key_.empty()) {
|
||||
prev_key_ = user_key.ToString();
|
||||
return;
|
||||
}
|
||||
std::string new_user_key = prev_key_;
|
||||
// We assume that the keys are inserted in sorted order as determined by
|
||||
// Byte-wise comparator. To identify an unused key, which will be used in
|
||||
// filling empty buckets in the table, we try to find gaps between successive
|
||||
// keys inserted (ie, latest key and previous in kvs_).
|
||||
if (unused_user_key_.empty() && kvs_.size() > 1) {
|
||||
std::string prev_key = is_last_level_file_ ? kvs_[kvs_.size()-1].first
|
||||
: ExtractUserKey(kvs_[kvs_.size()-1].first).ToString();
|
||||
std::string new_user_key = prev_key;
|
||||
new_user_key.back()++;
|
||||
// We ignore carry-overs and check that it is larger than previous key.
|
||||
if ((new_user_key > prev_key_) &&
|
||||
(new_user_key < user_key.ToString())) {
|
||||
if (Slice(new_user_key).compare(Slice(prev_key)) > 0 &&
|
||||
Slice(new_user_key).compare(ikey.user_key) < 0) {
|
||||
unused_user_key_ = new_user_key;
|
||||
} else {
|
||||
prev_key_ = user_key.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status CuckooTableBuilder::status() const { return status_; }
|
||||
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
||||
uint64_t num_buckets = kvs_.size() / hash_table_ratio_;
|
||||
buckets->resize(num_buckets);
|
||||
uint64_t make_space_for_key_call_id = 0;
|
||||
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
|
||||
uint64_t bucket_id;
|
||||
bool bucket_found = false;
|
||||
autovector<uint64_t> hash_vals;
|
||||
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
|
||||
ExtractUserKey(kvs_[vector_idx].first);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets);
|
||||
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
|
||||
bucket_id = hash_val;
|
||||
bucket_found = true;
|
||||
break;
|
||||
} else {
|
||||
if (user_key.compare(is_last_level_file_
|
||||
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
|
||||
: ExtractUserKey(
|
||||
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
|
||||
return Status::NotSupported("Same key is being inserted again.");
|
||||
}
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
while (!bucket_found && !MakeSpaceForKey(hash_vals,
|
||||
++make_space_for_key_call_id, buckets, &bucket_id)) {
|
||||
// Rehash by increashing number of hash tables.
|
||||
if (num_hash_table_ >= max_num_hash_table_) {
|
||||
return Status::NotSupported("Too many collissions. Unable to hash.");
|
||||
}
|
||||
// We don't really need to rehash the entire table because old hashes are
|
||||
// still valid and we only increased the number of hash functions.
|
||||
uint64_t hash_val = get_slice_hash_(user_key,
|
||||
num_hash_table_, num_buckets);
|
||||
++num_hash_table_;
|
||||
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
|
||||
bucket_found = true;
|
||||
bucket_id = hash_val;
|
||||
break;
|
||||
} else {
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
(*buckets)[bucket_id].vector_idx = vector_idx;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CuckooTableBuilder::Finish() {
|
||||
assert(!closed_);
|
||||
closed_ = true;
|
||||
|
||||
if (unused_user_key_.empty()) {
|
||||
if (prev_key_.empty()) {
|
||||
return Status::Corruption("Unable to find unused key");
|
||||
}
|
||||
// Try to find the key next to prev_key_ by handling carryovers.
|
||||
std::string new_user_key = prev_key_;
|
||||
std::vector<CuckooBucket> buckets;
|
||||
Status s = MakeHashTable(&buckets);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
if (unused_user_key_.empty() && !kvs_.empty()) {
|
||||
// Try to find the key next to last key by handling carryovers.
|
||||
std::string last_key =
|
||||
is_last_level_file_ ? kvs_[kvs_.size()-1].first
|
||||
: ExtractUserKey(kvs_[kvs_.size()-1].first).ToString();
|
||||
std::string new_user_key = last_key;
|
||||
int curr_pos = new_user_key.size() - 1;
|
||||
while (curr_pos >= 0) {
|
||||
++new_user_key[curr_pos];
|
||||
if (new_user_key > prev_key_) {
|
||||
if (new_user_key > last_key) {
|
||||
unused_user_key_ = new_user_key;
|
||||
break;
|
||||
}
|
||||
|
@ -171,29 +180,32 @@ Status CuckooTableBuilder::Finish() {
|
|||
}
|
||||
}
|
||||
std::string unused_bucket;
|
||||
if (is_last_level_file_) {
|
||||
unused_bucket = unused_user_key_;
|
||||
} else {
|
||||
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue);
|
||||
AppendInternalKey(&unused_bucket, ikey);
|
||||
if (!kvs_.empty()) {
|
||||
if (is_last_level_file_) {
|
||||
unused_bucket = unused_user_key_;
|
||||
} else {
|
||||
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue);
|
||||
AppendInternalKey(&unused_bucket, ikey);
|
||||
}
|
||||
}
|
||||
properties_.fixed_key_len = unused_bucket.size();
|
||||
uint32_t value_length = kvs_.empty() ? 0 : kvs_[0].second.size();
|
||||
uint32_t bucket_size = value_length + properties_.fixed_key_len;
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kValueLength].assign(
|
||||
reinterpret_cast<const char*>(&value_length_), sizeof(value_length_));
|
||||
reinterpret_cast<const char*>(&value_length), sizeof(value_length));
|
||||
|
||||
unused_bucket.resize(bucket_size_, 'a');
|
||||
unused_bucket.resize(bucket_size, 'a');
|
||||
// Write the table.
|
||||
uint32_t num_added = 0;
|
||||
for (auto& bucket : buckets_) {
|
||||
Status s;
|
||||
if (bucket.is_empty) {
|
||||
for (auto& bucket : buckets) {
|
||||
if (bucket.vector_idx == kMaxVectorIdx) {
|
||||
s = file_->Append(Slice(unused_bucket));
|
||||
} else {
|
||||
++num_added;
|
||||
s = file_->Append(Slice(bucket.key));
|
||||
s = file_->Append(kvs_[bucket.vector_idx].first);
|
||||
if (s.ok()) {
|
||||
s = file_->Append(Slice(bucket.value));
|
||||
s = file_->Append(kvs_[bucket.vector_idx].second);
|
||||
}
|
||||
}
|
||||
if (!s.ok()) {
|
||||
|
@ -202,17 +214,17 @@ Status CuckooTableBuilder::Finish() {
|
|||
}
|
||||
assert(num_added == NumEntries());
|
||||
|
||||
uint64_t offset = buckets_.size() * bucket_size_;
|
||||
uint64_t offset = buckets.size() * bucket_size;
|
||||
unused_bucket.resize(properties_.fixed_key_len);
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kNumHashTable].assign(
|
||||
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
|
||||
uint64_t num_buckets = buckets.size();
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets].assign(
|
||||
reinterpret_cast<const char*>(&max_num_buckets_),
|
||||
sizeof(max_num_buckets_));
|
||||
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kIsLastLevel].assign(
|
||||
reinterpret_cast<const char*>(&is_last_level_file_),
|
||||
|
@ -228,7 +240,7 @@ Status CuckooTableBuilder::Finish() {
|
|||
BlockHandle property_block_handle;
|
||||
property_block_handle.set_offset(offset);
|
||||
property_block_handle.set_size(property_block.size());
|
||||
Status s = file_->Append(property_block);
|
||||
s = file_->Append(property_block);
|
||||
offset += property_block.size();
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
|
@ -266,12 +278,14 @@ uint64_t CuckooTableBuilder::NumEntries() const {
|
|||
uint64_t CuckooTableBuilder::FileSize() const {
|
||||
if (closed_) {
|
||||
return file_->GetFileSize();
|
||||
} else {
|
||||
// This is not the actual size of the file as we need to account for
|
||||
// hash table ratio. This returns the size of filled buckets in the table
|
||||
// scaled up by a factor of 1/hash table ratio.
|
||||
return (properties_.num_entries * bucket_size_) / hash_table_ratio_;
|
||||
} else if (properties_.num_entries == 0) {
|
||||
return 0;
|
||||
}
|
||||
// This is not the actual size of the file as we need to account for
|
||||
// hash table ratio. This returns the size of filled buckets in the table
|
||||
// scaled up by a factor of 1/hash_table_ratio.
|
||||
return ((kvs_[0].first.size() + kvs_[0].second.size()) *
|
||||
properties_.num_entries) / hash_table_ratio_;
|
||||
}
|
||||
|
||||
// This method is invoked when there is no place to insert the target key.
|
||||
|
@ -284,8 +298,11 @@ uint64_t CuckooTableBuilder::FileSize() const {
|
|||
// move all elements along the path from first level to this empty bucket, to
|
||||
// make space for target key which is inserted at first level (*bucket_id).
|
||||
// If tree depth exceedes max depth, we return false indicating failure.
|
||||
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
||||
uint64_t *bucket_id, autovector<uint64_t> hash_vals) {
|
||||
bool CuckooTableBuilder::MakeSpaceForKey(
|
||||
const autovector<uint64_t>& hash_vals,
|
||||
const uint64_t make_space_for_key_call_id,
|
||||
std::vector<CuckooBucket>* buckets,
|
||||
uint64_t* bucket_id) {
|
||||
struct CuckooNode {
|
||||
uint64_t bucket_id;
|
||||
uint32_t depth;
|
||||
|
@ -302,13 +319,11 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
|||
// unique id for this invocation of the method. We store this number into
|
||||
// the nodes that we explore in current method call.
|
||||
// It is unlikely for the increment operation to overflow because the maximum
|
||||
// number of times this will be called is <= max_num_hash_table_ +
|
||||
// max_num_buckets_.
|
||||
++make_space_for_key_call_id_;
|
||||
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size().
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t bucket_id = hash_vals[hash_cnt];
|
||||
buckets_[bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id_;
|
||||
(*buckets)[bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id;
|
||||
tree.push_back(CuckooNode(bucket_id, 0, 0));
|
||||
}
|
||||
bool null_found = false;
|
||||
|
@ -318,21 +333,21 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
|||
if (curr_node.depth >= max_search_depth_) {
|
||||
break;
|
||||
}
|
||||
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
||||
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t child_bucket_id = GetSliceHash(
|
||||
is_last_level_file_ ? curr_bucket.key
|
||||
: ExtractUserKey(Slice(curr_bucket.key)),
|
||||
hash_cnt, max_num_buckets_);
|
||||
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
|
||||
make_space_for_key_call_id_) {
|
||||
uint64_t child_bucket_id = get_slice_hash_(
|
||||
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
|
||||
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
|
||||
hash_cnt, buckets->size());
|
||||
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
|
||||
make_space_for_key_call_id) {
|
||||
continue;
|
||||
}
|
||||
buckets_[child_bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id_;
|
||||
(*buckets)[child_bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id;
|
||||
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
|
||||
curr_pos));
|
||||
if (buckets_[child_bucket_id].is_empty) {
|
||||
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
|
||||
null_found = true;
|
||||
break;
|
||||
}
|
||||
|
@ -349,8 +364,8 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
|||
uint32_t bucket_to_replace_pos = tree.size()-1;
|
||||
while (bucket_to_replace_pos >= num_hash_table_) {
|
||||
CuckooNode& curr_node = tree[bucket_to_replace_pos];
|
||||
buckets_[curr_node.bucket_id] =
|
||||
buckets_[tree[curr_node.parent_pos].bucket_id];
|
||||
(*buckets)[curr_node.bucket_id] =
|
||||
(*buckets)[tree[curr_node.parent_pos].bucket_id];
|
||||
bucket_to_replace_pos = curr_node.parent_pos;
|
||||
}
|
||||
*bucket_id = tree[bucket_to_replace_pos].bucket_id;
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
#pragma once
|
||||
#ifndef ROCKSDB_LITE
|
||||
#include <stdint.h>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "rocksdb/status.h"
|
||||
#include "table/table_builder.h"
|
||||
|
@ -19,14 +21,12 @@ namespace rocksdb {
|
|||
class CuckooTableBuilder: public TableBuilder {
|
||||
public:
|
||||
CuckooTableBuilder(
|
||||
WritableFile* file, uint32_t fixed_key_length,
|
||||
uint32_t fixed_value_length, double hash_table_ratio,
|
||||
uint64_t file_size, uint32_t max_num_hash_table,
|
||||
uint32_t max_search_depth, bool is_last_level,
|
||||
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
|
||||
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
|
||||
uint32_t max_search_depth,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
|
||||
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
~CuckooTableBuilder();
|
||||
~CuckooTableBuilder() {}
|
||||
|
||||
// Add key,value to the table being constructed.
|
||||
// REQUIRES: key is after any previously added key according to comparator.
|
||||
|
@ -34,7 +34,7 @@ class CuckooTableBuilder: public TableBuilder {
|
|||
void Add(const Slice& key, const Slice& value) override;
|
||||
|
||||
// Return non-ok iff some error has been detected.
|
||||
Status status() const override;
|
||||
Status status() const override { return status_; }
|
||||
|
||||
// Finish building the table. Stops using the file passed to the
|
||||
// constructor after this function returns.
|
||||
|
@ -57,35 +57,37 @@ class CuckooTableBuilder: public TableBuilder {
|
|||
|
||||
private:
|
||||
struct CuckooBucket {
|
||||
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
|
||||
std::string key;
|
||||
std::string value;
|
||||
bool is_empty;
|
||||
uint64_t make_space_for_key_call_id;
|
||||
CuckooBucket()
|
||||
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
|
||||
uint32_t vector_idx;
|
||||
// This number will not exceed kvs_.size() + max_num_hash_table_.
|
||||
// We assume number of items is <= 2^32.
|
||||
uint32_t make_space_for_key_call_id;
|
||||
};
|
||||
static const uint32_t kMaxVectorIdx = std::numeric_limits<int32_t>::max();
|
||||
|
||||
bool MakeSpaceForKey(const Slice& key, uint64_t* bucket_id,
|
||||
autovector<uint64_t> hash_vals);
|
||||
bool MakeSpaceForKey(
|
||||
const autovector<uint64_t>& hash_vals,
|
||||
const uint64_t call_id,
|
||||
std::vector<CuckooBucket>* buckets,
|
||||
uint64_t* bucket_id);
|
||||
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
|
||||
|
||||
uint32_t num_hash_table_;
|
||||
WritableFile* file_;
|
||||
const uint32_t value_length_;
|
||||
const uint32_t bucket_size_;
|
||||
const double hash_table_ratio_;
|
||||
const uint64_t max_num_buckets_;
|
||||
const uint32_t max_num_hash_table_;
|
||||
const uint32_t max_search_depth_;
|
||||
const bool is_last_level_file_;
|
||||
bool is_last_level_file_;
|
||||
Status status_;
|
||||
std::vector<CuckooBucket> buckets_;
|
||||
std::vector<std::pair<std::string, std::string>> kvs_;
|
||||
TableProperties properties_;
|
||||
uint64_t make_space_for_key_call_id_;
|
||||
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
|
||||
bool has_seen_first_key_;
|
||||
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
std::string unused_user_key_ = "";
|
||||
std::string prev_key_;
|
||||
|
||||
bool closed_ = false; // Either Finish() or Abandon() has been called.
|
||||
bool closed_; // Either Finish() or Abandon() has been called.
|
||||
|
||||
// No copying allowed
|
||||
CuckooTableBuilder(const CuckooTableBuilder&) = delete;
|
||||
|
|
|
@ -10,26 +10,15 @@
|
|||
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/cuckoo_table_builder.h"
|
||||
#include "util/random.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
extern const uint64_t kCuckooTableMagicNumber;
|
||||
|
||||
namespace {
|
||||
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
||||
|
||||
void AddHashLookups(const std::string& s, uint64_t bucket_id,
|
||||
uint32_t num_hash_fun) {
|
||||
std::vector<uint64_t> v;
|
||||
for (uint32_t i = 0; i < num_hash_fun; i++) {
|
||||
v.push_back(bucket_id + i);
|
||||
}
|
||||
hash_map[s] = v;
|
||||
}
|
||||
|
||||
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets) {
|
||||
return hash_map[s.ToString()][index];
|
||||
|
@ -40,9 +29,14 @@ class CuckooBuilderTest {
|
|||
public:
|
||||
CuckooBuilderTest() {
|
||||
env_ = Env::Default();
|
||||
Options options;
|
||||
options.allow_mmap_reads = true;
|
||||
env_options_ = EnvOptions(options);
|
||||
}
|
||||
|
||||
void CheckFileContents(const std::string& expected_data,
|
||||
void CheckFileContents(const std::vector<std::string>& keys,
|
||||
const std::vector<std::string>& values,
|
||||
const std::vector<uint64_t>& expected_locations,
|
||||
std::string expected_unused_bucket, uint64_t expected_max_buckets,
|
||||
uint32_t expected_num_hash_fun, bool expected_is_last_level) {
|
||||
// Read file
|
||||
|
@ -55,18 +49,19 @@ class CuckooBuilderTest {
|
|||
TableProperties* props = nullptr;
|
||||
ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size,
|
||||
kCuckooTableMagicNumber, env_, nullptr, &props));
|
||||
ASSERT_EQ(props->num_entries, num_items);
|
||||
ASSERT_EQ(props->fixed_key_len, key_length);
|
||||
ASSERT_EQ(props->num_entries, keys.size());
|
||||
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
|
||||
|
||||
// Check unused bucket.
|
||||
std::string unused_key = props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyKey];
|
||||
ASSERT_EQ(expected_unused_bucket.substr(0, key_length), unused_key);
|
||||
ASSERT_EQ(expected_unused_bucket.substr(0,
|
||||
props->fixed_key_len), unused_key);
|
||||
|
||||
uint32_t value_len_found =
|
||||
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kValueLength].data());
|
||||
ASSERT_EQ(value_length, value_len_found);
|
||||
ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
|
||||
const uint64_t max_buckets =
|
||||
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets].data());
|
||||
|
@ -80,411 +75,322 @@ class CuckooBuilderTest {
|
|||
CuckooTablePropertyNames::kIsLastLevel].data());
|
||||
ASSERT_EQ(expected_is_last_level, is_last_level_found);
|
||||
delete props;
|
||||
|
||||
// Check contents of the bucket.
|
||||
std::string read_data;
|
||||
read_data.resize(expected_data.size());
|
||||
Slice read_slice;
|
||||
ASSERT_OK(read_file->Read(0, expected_data.size(),
|
||||
&read_slice, &read_data[0]));
|
||||
ASSERT_EQ(expected_data, read_data);
|
||||
std::vector<bool> keys_found(keys.size(), false);
|
||||
uint32_t bucket_size = expected_unused_bucket.size();
|
||||
for (uint32_t i = 0; i < max_buckets; ++i) {
|
||||
Slice read_slice;
|
||||
ASSERT_OK(read_file->Read(i*bucket_size, bucket_size,
|
||||
&read_slice, nullptr));
|
||||
uint32_t key_idx = std::find(expected_locations.begin(),
|
||||
expected_locations.end(), i) - expected_locations.begin();
|
||||
if (key_idx == keys.size()) {
|
||||
// i is not one of the expected locaitons. Empty bucket.
|
||||
ASSERT_EQ(read_slice.compare(expected_unused_bucket), 0);
|
||||
} else {
|
||||
keys_found[key_idx] = true;
|
||||
ASSERT_EQ(read_slice.compare(keys[key_idx] + values[key_idx]), 0);
|
||||
}
|
||||
}
|
||||
for (auto key_found : keys_found) {
|
||||
// Check that all keys were found.
|
||||
ASSERT_TRUE(key_found);
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetInternalKey(Slice user_key, bool zero_seqno) {
|
||||
IterKey ikey;
|
||||
ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, kTypeValue);
|
||||
return ikey.GetKey().ToString();
|
||||
}
|
||||
|
||||
Env* env_;
|
||||
const EnvOptions env_options_;
|
||||
EnvOptions env_options_;
|
||||
std::string fname;
|
||||
uint64_t file_size = 100000;
|
||||
uint32_t num_items = 20;
|
||||
uint32_t num_hash_fun = 64;
|
||||
double hash_table_ratio = 0.9;
|
||||
uint32_t ikey_length;
|
||||
uint32_t user_key_length;
|
||||
uint32_t key_length;
|
||||
uint32_t value_length;
|
||||
uint32_t bucket_length;
|
||||
const double kHashTableRatio = 0.9;
|
||||
};
|
||||
|
||||
TEST(CuckooBuilderTest, NoCollision) {
|
||||
hash_map.clear();
|
||||
uint32_t expected_num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
uint64_t bucket_ids = 0;
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
AddHashLookups(user_keys[i], bucket_ids, num_hash_fun);
|
||||
bucket_ids += num_hash_fun;
|
||||
}
|
||||
|
||||
ikey_length = keys[0].size();
|
||||
key_length = ikey_length;
|
||||
value_length = values[0].size();
|
||||
bucket_length = ikey_length + value_length;
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_user_key = "keys10:";
|
||||
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||
std::string expected_unused_bucket;
|
||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/BasicTest_writable_file";
|
||||
fname = test::TmpDir() + "/NoCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio,
|
||||
file_size, num_hash_fun, 100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
uint32_t key_idx = 0;
|
||||
std::string expected_file_data = "";
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
expected_file_data.append(keys[key_idx] + values[key_idx]);
|
||||
++key_idx;
|
||||
} else {
|
||||
expected_file_data.append(expected_unused_bucket);
|
||||
}
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun, false);
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
4, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
CheckFileContents({}, {}, {}, "", 0, 2, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, NoCollisionLastLevel) {
|
||||
hash_map.clear();
|
||||
uint32_t expected_num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
uint64_t bucket_ids = 0;
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
// Set zero sequence number in all keys.
|
||||
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
AddHashLookups(user_keys[i], bucket_ids, num_hash_fun);
|
||||
bucket_ids += num_hash_fun;
|
||||
TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
||||
uint32_t num_hash_fun = 4;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1, 2, 3}},
|
||||
{user_keys[1], {1, 2, 3, 4}},
|
||||
{user_keys[2], {2, 3, 4, 5}},
|
||||
{user_keys[3], {3, 4, 5, 6}}
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
||||
std::vector<std::string> keys;
|
||||
for (auto& user_key : user_keys) {
|
||||
keys.push_back(GetInternalKey(user_key, false));
|
||||
}
|
||||
ikey_length = keys[0].size();
|
||||
user_key_length = user_keys[0].size();
|
||||
key_length = user_key_length;
|
||||
value_length = values[0].size();
|
||||
bucket_length = key_length + value_length;
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_bucket = "keys10:";
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
|
||||
fname = test::TmpDir() + "/NoCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio,
|
||||
file_size, num_hash_fun, 100, true, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
uint32_t key_idx = 0;
|
||||
std::string expected_file_data = "";
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
if (key_idx * num_hash_fun == i && key_idx < num_items) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
expected_file_data.append(user_keys[key_idx] + values[key_idx]);
|
||||
++key_idx;
|
||||
} else {
|
||||
expected_file_data.append(expected_unused_bucket);
|
||||
}
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun, true);
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
|
||||
std::string expected_unused_bucket = GetInternalKey("key05", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithCollision) {
|
||||
// Take keys with colliding hash function values.
|
||||
hash_map.clear();
|
||||
num_hash_fun = 20;
|
||||
num_items = num_hash_fun;
|
||||
uint32_t expected_num_hash_fun = num_hash_fun;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
// Make all hash values collide.
|
||||
AddHashLookups(user_keys[i], 0, num_hash_fun);
|
||||
TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
||||
uint32_t num_hash_fun = 4;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1, 2, 3}},
|
||||
{user_keys[1], {0, 1, 2, 3}},
|
||||
{user_keys[2], {0, 1, 2, 3}},
|
||||
{user_keys[3], {0, 1, 2, 3}},
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
||||
std::vector<std::string> keys;
|
||||
for (auto& user_key : user_keys) {
|
||||
keys.push_back(GetInternalKey(user_key, false));
|
||||
}
|
||||
ikey_length = keys[0].size();
|
||||
value_length = values[0].size();
|
||||
key_length = ikey_length;
|
||||
bucket_length = key_length + value_length;
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_user_key = "keys10:";
|
||||
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
|
||||
std::string expected_unused_bucket;
|
||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/WithCollision_writable_file";
|
||||
fname = test::TmpDir() + "/WithCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), key_length, value_length, hash_table_ratio,
|
||||
file_size, num_hash_fun, 100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
uint32_t key_idx = 0;
|
||||
std::string expected_file_data = "";
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
if (key_idx == i && key_idx < num_items) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
expected_file_data.append(keys[key_idx] + values[key_idx]);
|
||||
++key_idx;
|
||||
} else {
|
||||
expected_file_data.append(expected_unused_bucket);
|
||||
}
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun, false);
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
|
||||
std::string expected_unused_bucket = GetInternalKey("key05", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 4, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
|
||||
// Take keys with colliding hash function values.
|
||||
// Take more keys than the number of hash functions.
|
||||
hash_map.clear();
|
||||
num_hash_fun = 20;
|
||||
num_items = num_hash_fun + 1;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
// Make all hash values collide.
|
||||
AddHashLookups(user_keys[i], 0, num_hash_fun);
|
||||
}
|
||||
ikey_length = keys[0].size();
|
||||
value_length = values[0].size();
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/FailWithTooManyCollisions_writable";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
}
|
||||
cuckoo_builder.Add(Slice(keys.back()), Slice(values.back()));
|
||||
ASSERT_TRUE(cuckoo_builder.status().IsCorruption());
|
||||
cuckoo_builder.Abandon();
|
||||
writable_file->Close();
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
||||
hash_map.clear();
|
||||
std::string user_key = "repeatedkey";
|
||||
AddHashLookups(user_key, 0, num_hash_fun);
|
||||
std::string key_to_reuse1, key_to_reuse2;
|
||||
ParsedInternalKey ikey1(user_key, 1000, kTypeValue);
|
||||
ParsedInternalKey ikey2(user_key, 1001, kTypeValue);
|
||||
AppendInternalKey(&key_to_reuse1, ikey1);
|
||||
AppendInternalKey(&key_to_reuse2, ikey2);
|
||||
std::string value = "value";
|
||||
ikey_length = key_to_reuse1.size();
|
||||
value_length = value.size();
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/FailWhenSameKeyInserted_writable_file";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), 1U);
|
||||
cuckoo_builder.Add(Slice(key_to_reuse2), Slice(value));
|
||||
ASSERT_TRUE(cuckoo_builder.status().IsCorruption());
|
||||
cuckoo_builder.Abandon();
|
||||
writable_file->Close();
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithACollisionPath) {
|
||||
hash_map.clear();
|
||||
TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
|
||||
// Have two hash functions. Insert elements with overlapping hashes.
|
||||
// Finally insert an element with hash value somewhere in the middle
|
||||
// so that it displaces all the elements after that.
|
||||
num_hash_fun = 2;
|
||||
uint32_t expected_num_hash_fun = num_hash_fun;
|
||||
uint32_t max_search_depth = 100;
|
||||
num_items = 2*max_search_depth + 2;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
std::vector<uint64_t> expected_bucket_id(num_items);
|
||||
for (uint32_t i = 0; i < num_items - 1; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
// Make all hash values collide with the next element.
|
||||
AddHashLookups(user_keys[i], i, num_hash_fun);
|
||||
if (i <= max_search_depth) {
|
||||
expected_bucket_id[i] = i;
|
||||
} else {
|
||||
expected_bucket_id[i] = i+1;
|
||||
}
|
||||
}
|
||||
user_keys.back() = "keys" + std::to_string(num_items + 99);
|
||||
ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys.back(), ikey);
|
||||
values.back() = "value" + std::to_string(num_items+100);
|
||||
// Make hash values collide with first and middle elements.
|
||||
// Inserting at 0 will fail after exceeding search depth limit.
|
||||
hash_map[user_keys.back()] = {0, max_search_depth + 1};
|
||||
expected_bucket_id.back() = max_search_depth + 1;
|
||||
|
||||
ikey_length = keys[0].size();
|
||||
value_length = values[0].size();
|
||||
key_length = ikey_length;
|
||||
bucket_length = key_length + value_length;
|
||||
|
||||
uint64_t expected_max_buckets = file_size / bucket_length;
|
||||
std::string expected_unused_user_key = "keys10:";
|
||||
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
|
||||
std::string expected_unused_bucket;
|
||||
AppendInternalKey(&expected_unused_bucket, ikey);
|
||||
expected_unused_bucket.resize(bucket_length, 'a');
|
||||
std::string expected_file_data = "";
|
||||
for (uint32_t i = 0; i < expected_max_buckets; i++) {
|
||||
expected_file_data += expected_unused_bucket;
|
||||
uint32_t num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
||||
"key04", "key05"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1}},
|
||||
{user_keys[1], {1, 2}},
|
||||
{user_keys[2], {2, 3}},
|
||||
{user_keys[3], {3, 4}},
|
||||
{user_keys[4], {0, 2}},
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
|
||||
std::vector<std::string> keys;
|
||||
for (auto& user_key : user_keys) {
|
||||
keys.push_back(GetInternalKey(user_key, false));
|
||||
}
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/WithCollisionPath_writable_file";
|
||||
fname = test::TmpDir() + "/WithCollisionPathFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), key_length,
|
||||
value_length, hash_table_ratio, file_size,
|
||||
num_hash_fun, max_search_depth, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
expected_file_data.replace(expected_bucket_id[key_idx]*bucket_length,
|
||||
bucket_length, keys[key_idx] + values[key_idx]);
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(cuckoo_builder.Finish());
|
||||
writable_file->Close();
|
||||
CheckFileContents(expected_file_data, expected_unused_bucket,
|
||||
expected_max_buckets, expected_num_hash_fun, false);
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
|
||||
std::string expected_unused_bucket = GetInternalKey("key06", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
||||
uint32_t num_hash_fun = 4;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1, 2, 3}},
|
||||
{user_keys[1], {1, 2, 3, 4}},
|
||||
{user_keys[2], {2, 3, 4, 5}},
|
||||
{user_keys[3], {3, 4, 5, 6}}
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/NoCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
|
||||
std::string expected_unused_bucket = "key05";
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(user_keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, true);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
||||
uint32_t num_hash_fun = 4;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1, 2, 3}},
|
||||
{user_keys[1], {0, 1, 2, 3}},
|
||||
{user_keys[2], {0, 1, 2, 3}},
|
||||
{user_keys[3], {0, 1, 2, 3}},
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/WithCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
|
||||
std::string expected_unused_bucket = "key05";
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(user_keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 4, true);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
|
||||
uint32_t num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
||||
"key04", "key05"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1}},
|
||||
{user_keys[1], {1, 2}},
|
||||
{user_keys[2], {2, 3}},
|
||||
{user_keys[3], {3, 4}},
|
||||
{user_keys[4], {0, 2}},
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
|
||||
std::string expected_unused_bucket = "key06";
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(user_keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, true);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
hash_map.clear();
|
||||
// Have two hash functions. Insert elements with overlapping hashes.
|
||||
// Finally insert an element which will displace all the current elements.
|
||||
num_hash_fun = 2;
|
||||
// Finally try inserting an element with hash value somewhere in the middle
|
||||
// and it should fail because the no. of elements to displace is too high.
|
||||
uint32_t num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
||||
"key04", "key05"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1}},
|
||||
{user_keys[1], {1, 2}},
|
||||
{user_keys[2], {2, 3}},
|
||||
{user_keys[3], {3, 4}},
|
||||
{user_keys[4], {0, 1}},
|
||||
};
|
||||
|
||||
uint32_t max_search_depth = 100;
|
||||
num_items = 2*max_search_depth + 3;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (uint32_t i = 0; i < num_items - 1; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+100);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
// Make all hash values collide with the next element.
|
||||
AddHashLookups(user_keys[i], i, num_hash_fun);
|
||||
}
|
||||
user_keys.back() = "keys" + std::to_string(num_items + 99);
|
||||
ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys.back(), ikey);
|
||||
values.back() = "value" + std::to_string(num_items+100);
|
||||
// Make hash values collide with middle element.
|
||||
hash_map[user_keys.back()] = {0, max_search_depth + 1};
|
||||
|
||||
ikey_length = keys[0].size();
|
||||
value_length = values[0].size();
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/FailWhenCollisionPathTooLong_writable";
|
||||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
max_search_depth, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
cuckoo_builder.Add(Slice(keys.back()), Slice(values.back()));
|
||||
ASSERT_TRUE(cuckoo_builder.status().IsCorruption());
|
||||
cuckoo_builder.Abandon();
|
||||
writable_file->Close();
|
||||
ASSERT_TRUE(builder.Finish().IsNotSupported());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWhenTableIsFull) {
|
||||
hash_map.clear();
|
||||
file_size = 160;
|
||||
|
||||
num_items = 7;
|
||||
std::vector<std::string> user_keys(num_items);
|
||||
std::vector<std::string> keys(num_items);
|
||||
std::vector<std::string> values(num_items);
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "keys" + std::to_string(i+1000);
|
||||
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + std::to_string(i+100);
|
||||
AddHashLookups(user_keys[i], i, num_hash_fun);
|
||||
}
|
||||
ikey_length = keys[0].size();
|
||||
value_length = values[0].size();
|
||||
bucket_length = ikey_length + value_length;
|
||||
// Check that number of items is tight.
|
||||
ASSERT_GT(bucket_length * num_items, file_size);
|
||||
ASSERT_LE(bucket_length * (num_items-1), file_size);
|
||||
TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
||||
hash_map = {{"repeatedkey", {0, 1, 2, 3}}};
|
||||
uint32_t num_hash_fun = 4;
|
||||
std::string user_key = "repeatedkey";
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/FailWhenTabelIsFull_writable";
|
||||
fname = test::TmpDir() + "/FailWhenSameKeyInserted";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder cuckoo_builder(
|
||||
writable_file.get(), ikey_length,
|
||||
value_length, hash_table_ratio, file_size, num_hash_fun,
|
||||
100, false, GetSliceHash);
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
|
||||
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
ASSERT_OK(cuckoo_builder.status());
|
||||
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
|
||||
}
|
||||
cuckoo_builder.Add(Slice(keys.back()), Slice(values.back()));
|
||||
ASSERT_TRUE(cuckoo_builder.status().IsCorruption());
|
||||
cuckoo_builder.Abandon();
|
||||
writable_file->Close();
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
|
||||
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
|
||||
ASSERT_EQ(builder.NumEntries(), 1);
|
||||
ASSERT_OK(builder.status());
|
||||
builder.Add(Slice(GetInternalKey(user_key, true)), Slice("value2"));
|
||||
ASSERT_EQ(builder.NumEntries(), 2);
|
||||
ASSERT_OK(builder.status());
|
||||
|
||||
ASSERT_TRUE(builder.Finish().IsNotSupported());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
||||
|
|
|
@ -28,9 +28,9 @@ CuckooTableReader::CuckooTableReader(
|
|||
const Options& options,
|
||||
std::unique_ptr<RandomAccessFile>&& file,
|
||||
uint64_t file_size,
|
||||
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
|
||||
: file_(std::move(file)),
|
||||
GetSliceHash(GetSliceHashPtr) {
|
||||
get_slice_hash_(get_slice_hash) {
|
||||
if (!options.allow_mmap_reads) {
|
||||
status_ = Status::InvalidArgument("File is not mmaped");
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ Status CuckooTableReader::Get(
|
|||
return Status::Corruption("Unable to parse key into inernal key.");
|
||||
}
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) {
|
||||
uint64_t hash_val = GetSliceHash(ikey.user_key, hash_cnt, num_buckets_);
|
||||
uint64_t hash_val = get_slice_hash_(ikey.user_key, hash_cnt, num_buckets_);
|
||||
assert(hash_val < num_buckets_);
|
||||
uint64_t offset = hash_val * bucket_length_;
|
||||
const char* bucket = &file_data_.data()[offset];
|
||||
|
|
|
@ -29,7 +29,7 @@ class CuckooTableReader: public TableReader {
|
|||
const Options& options,
|
||||
std::unique_ptr<RandomAccessFile>&& file,
|
||||
uint64_t file_size,
|
||||
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
|
||||
~CuckooTableReader() {}
|
||||
|
||||
std::shared_ptr<const TableProperties> GetTableProperties() const override {
|
||||
|
@ -67,7 +67,7 @@ class CuckooTableReader: public TableReader {
|
|||
uint32_t value_length_;
|
||||
uint32_t bucket_length_;
|
||||
uint64_t num_buckets_;
|
||||
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
|
||||
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
};
|
||||
|
||||
|
|
|
@ -101,12 +101,11 @@ class CuckooReaderTest {
|
|||
return std::string(reinterpret_cast<char*>(&i), sizeof(i));
|
||||
}
|
||||
|
||||
void CreateCuckooFile(bool is_last_level) {
|
||||
void CreateCuckooFileAndCheckReader() {
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), keys[0].size(), values[0].size(), 0.9,
|
||||
10000, kNumHashFunc, 100, is_last_level, GetSliceHash);
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
|
||||
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
|
@ -117,9 +116,8 @@ class CuckooReaderTest {
|
|||
ASSERT_EQ(num_items, builder.NumEntries());
|
||||
file_size = builder.FileSize();
|
||||
ASSERT_OK(writable_file->Close());
|
||||
}
|
||||
|
||||
void CheckReader() {
|
||||
// Check reader now.
|
||||
unique_ptr<RandomAccessFile> read_file;
|
||||
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
||||
CuckooTableReader reader(
|
||||
|
@ -135,6 +133,14 @@ class CuckooReaderTest {
|
|||
ASSERT_EQ(1, v.call_count);
|
||||
}
|
||||
}
|
||||
void UpdateKeys(bool with_zero_seqno) {
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
ParsedInternalKey ikey(user_keys[i],
|
||||
with_zero_seqno ? 0 : i + 1000, kTypeValue);
|
||||
keys[i].clear();
|
||||
AppendInternalKey(&keys[i], ikey);
|
||||
}
|
||||
}
|
||||
|
||||
void CheckIterator() {
|
||||
unique_ptr<RandomAccessFile> read_file;
|
||||
|
@ -216,23 +222,22 @@ TEST(CuckooReaderTest, WhenKeyExists) {
|
|||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + NumToStr(i);
|
||||
// Give disjoint hash values.
|
||||
AddHashLookups(user_keys[i], i * kNumHashFunc, kNumHashFunc);
|
||||
AddHashLookups(user_keys[i], i, kNumHashFunc);
|
||||
}
|
||||
CreateCuckooFile(false);
|
||||
CheckReader();
|
||||
CreateCuckooFileAndCheckReader();
|
||||
// Last level file.
|
||||
CreateCuckooFile(true);
|
||||
CheckReader();
|
||||
UpdateKeys(true);
|
||||
CreateCuckooFileAndCheckReader();
|
||||
// Test with collision. Make all hash values collide.
|
||||
hash_map.clear();
|
||||
for (uint32_t i = 0; i < num_items; i++) {
|
||||
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
||||
}
|
||||
CreateCuckooFile(false);
|
||||
CheckReader();
|
||||
UpdateKeys(false);
|
||||
CreateCuckooFileAndCheckReader();
|
||||
// Last level file.
|
||||
CreateCuckooFile(true);
|
||||
CheckReader();
|
||||
UpdateKeys(true);
|
||||
CreateCuckooFileAndCheckReader();
|
||||
}
|
||||
|
||||
TEST(CuckooReaderTest, CheckIterator) {
|
||||
|
@ -244,18 +249,19 @@ TEST(CuckooReaderTest, CheckIterator) {
|
|||
AppendInternalKey(&keys[i], ikey);
|
||||
values[i] = "value" + NumToStr(i);
|
||||
// Give disjoint hash values, in reverse order.
|
||||
AddHashLookups(user_keys[i], (num_items-i-1)*kNumHashFunc, kNumHashFunc);
|
||||
AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc);
|
||||
}
|
||||
CreateCuckooFile(false);
|
||||
CreateCuckooFileAndCheckReader();
|
||||
CheckIterator();
|
||||
// Last level file.
|
||||
CreateCuckooFile(true);
|
||||
UpdateKeys(true);
|
||||
CreateCuckooFileAndCheckReader();
|
||||
CheckIterator();
|
||||
}
|
||||
|
||||
TEST(CuckooReaderTest, WhenKeyNotFound) {
|
||||
// Add keys with colliding hash values.
|
||||
SetUp(kNumHashFunc / 2);
|
||||
SetUp(kNumHashFunc);
|
||||
fname = test::TmpDir() + "/CuckooReader_WhenKeyNotFound";
|
||||
for (uint64_t i = 0; i < num_items; i++) {
|
||||
user_keys[i] = "key" + NumToStr(i);
|
||||
|
@ -265,8 +271,7 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
|
|||
// Make all hash values collide.
|
||||
AddHashLookups(user_keys[i], 0, kNumHashFunc);
|
||||
}
|
||||
CreateCuckooFile(false);
|
||||
CheckReader();
|
||||
CreateCuckooFileAndCheckReader();
|
||||
unique_ptr<RandomAccessFile> read_file;
|
||||
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
|
||||
CuckooTableReader reader(
|
||||
|
@ -351,20 +356,17 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length,
|
|||
}
|
||||
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
|
||||
|
||||
uint64_t predicted_file_size =
|
||||
num * (key_length + value_length) / hash_ratio + 1024;
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), key_length + 8, value_length, hash_ratio,
|
||||
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
|
||||
writable_file.get(), hash_ratio,
|
||||
kMaxNumHashTable, 1000, GetSliceMurmurHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
// Value is just a part of key.
|
||||
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
|
||||
new_key = std::string(key_length - new_key.size(), 'k') + new_key;
|
||||
ParsedInternalKey ikey(new_key, num, kTypeValue);
|
||||
ParsedInternalKey ikey(new_key, 0, kTypeValue);
|
||||
std::string full_key;
|
||||
AppendInternalKey(&full_key, ikey);
|
||||
builder.Add(Slice(full_key), Slice(&full_key[0], value_length));
|
||||
|
|
Loading…
Reference in a new issue