Minor changes to CuckooTableBuilder

Summary:
- Copy the key and value to in-memory hash table during Add operation. Also modified cuckoo_table_reader_test to use this.
- Store only the user_key in in-memory hash table if it is last level file.
- Handle Carryover while chosing unused key in Finish() method in case unused key was never found before Finish() call.

Test Plan:
cuckoo_table_reader_test --enable_perf
cuckoo_table_builder_test
valgrind_check
asan_check

Reviewers: sdong, yhchiang, igor, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D20715
This commit is contained in:
Radheshyam Balasundaram 2014-07-28 17:14:25 -07:00
parent f04356e660
commit 91c01485d1
3 changed files with 38 additions and 38 deletions

View file

@ -86,7 +86,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
bucket_found = true; bucket_found = true;
break; break;
} else { } else {
if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) { if (user_key.compare(
is_last_level_file_ ? Slice(buckets_[hash_val].key)
: ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) {
status_ = Status::Corruption("Same key is being inserted again."); status_ = Status::Corruption("Same key is being inserted again.");
return; return;
} }
@ -112,8 +114,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
hash_vals.push_back(hash_val); hash_vals.push_back(hash_val);
} }
} }
buckets_[bucket_id].key = key; if (is_last_level_file_) {
buckets_[bucket_id].value = value; buckets_[bucket_id].key.assign(user_key.data(), user_key.size());
} else {
buckets_[bucket_id].key.assign(key.data(), key.size());
}
buckets_[bucket_id].value.assign(value.data(), value.size());
buckets_[bucket_id].is_empty = false; buckets_[bucket_id].is_empty = false;
properties_.num_entries++; properties_.num_entries++;
@ -149,12 +155,18 @@ Status CuckooTableBuilder::Finish() {
if (prev_key_.empty()) { if (prev_key_.empty()) {
return Status::Corruption("Unable to find unused key"); return Status::Corruption("Unable to find unused key");
} }
// Try to find the key next to prev_key_ by handling carryovers.
std::string new_user_key = prev_key_; std::string new_user_key = prev_key_;
new_user_key.back()++; int curr_pos = new_user_key.size() - 1;
// We ignore carry-overs and check that it is larger than previous key. while (curr_pos >= 0) {
if (new_user_key > prev_key_) { ++new_user_key[curr_pos];
unused_user_key_ = new_user_key; if (new_user_key > prev_key_) {
} else { unused_user_key_ = new_user_key;
break;
}
--curr_pos;
}
if (curr_pos < 0) {
return Status::Corruption("Unable to find unused key"); return Status::Corruption("Unable to find unused key");
} }
} }
@ -179,17 +191,9 @@ Status CuckooTableBuilder::Finish() {
s = file_->Append(Slice(unused_bucket)); s = file_->Append(Slice(unused_bucket));
} else { } else {
++num_added; ++num_added;
if (is_last_level_file_) { s = file_->Append(Slice(bucket.key));
Slice user_key = ExtractUserKey(bucket.key); if (s.ok()) {
s = file_->Append(user_key); s = file_->Append(Slice(bucket.value));
if (s.ok()) {
s = file_->Append(bucket.value);
}
} else {
s = file_->Append(bucket.key);
if (s.ok()) {
s = file_->Append(bucket.value);
}
} }
} }
if (!s.ok()) { if (!s.ok()) {
@ -307,7 +311,9 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id]; CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t child_bucket_id = GetSliceHash( uint64_t child_bucket_id = GetSliceHash(
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_); is_last_level_file_ ? curr_bucket.key
: ExtractUserKey(Slice(curr_bucket.key)),
hash_cnt, max_num_buckets_);
if (buckets_[child_bucket_id].make_space_for_key_call_id == if (buckets_[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id_) { make_space_for_key_call_id_) {
continue; continue;

View file

@ -58,8 +58,8 @@ class CuckooTableBuilder: public TableBuilder {
private: private:
struct CuckooBucket { struct CuckooBucket {
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {} CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
Slice key; std::string key;
Slice value; std::string value;
bool is_empty; bool is_empty;
uint64_t make_space_for_key_call_id; uint64_t make_space_for_key_call_id;
}; };

View file

@ -236,18 +236,6 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
// Performance tests // Performance tests
namespace { namespace {
void GenerateKeys(uint64_t num, std::vector<std::string>* keys,
uint32_t user_key_length) {
for (uint64_t i = 0; i < num; ++i) {
std::string new_key(reinterpret_cast<char*>(&i), sizeof(i));
new_key = std::string(user_key_length - new_key.size(), 'k') + new_key;
ParsedInternalKey ikey(new_key, num, kTypeValue);
std::string full_key;
AppendInternalKey(&full_key, ikey);
keys->push_back(full_key);
}
}
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) { bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
// Deliberately empty. // Deliberately empty.
return false; return false;
@ -266,6 +254,7 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
void BM_CuckooRead(uint64_t num, uint32_t key_length, void BM_CuckooRead(uint64_t num, uint32_t key_length,
uint32_t value_length, uint64_t num_reads, double hash_ratio) { uint32_t value_length, uint64_t num_reads, double hash_ratio) {
assert(value_length <= key_length); assert(value_length <= key_length);
assert(8 <= key_length);
std::vector<std::string> keys; std::vector<std::string> keys;
Options options; Options options;
options.allow_mmap_reads = true; options.allow_mmap_reads = true;
@ -277,21 +266,26 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length,
} }
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark"; std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
GenerateKeys(num, &keys, key_length);
uint64_t predicted_file_size = uint64_t predicted_file_size =
num * (key_length + value_length) / hash_ratio + 1024; num * (key_length + value_length) / hash_ratio + 1024;
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder( CuckooTableBuilder builder(
writable_file.get(), keys[0].size(), value_length, hash_ratio, writable_file.get(), key_length + 8, value_length, hash_ratio,
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash); predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t key_idx = 0; key_idx < num; ++key_idx) { for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
// Value is just a part of key. // Value is just a part of key.
builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], value_length)); std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
new_key = std::string(key_length - new_key.size(), 'k') + new_key;
ParsedInternalKey ikey(new_key, num, kTypeValue);
std::string full_key;
AppendInternalKey(&full_key, ikey);
builder.Add(Slice(full_key), Slice(&full_key[0], value_length));
ASSERT_EQ(builder.NumEntries(), key_idx + 1); ASSERT_EQ(builder.NumEntries(), key_idx + 1);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
keys.push_back(full_key);
} }
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_EQ(num, builder.NumEntries()); ASSERT_EQ(num, builder.NumEntries());