Minor changes to CuckooTableBuilder

Summary:
- Copy the key and value to in-memory hash table during Add operation. Also modified cuckoo_table_reader_test to use this.
- Store only the user_key in in-memory hash table if it is last level file.
- Handle Carryover while chosing unused key in Finish() method in case unused key was never found before Finish() call.

Test Plan:
cuckoo_table_reader_test --enable_perf
cuckoo_table_builder_test
valgrind_check
asan_check

Reviewers: sdong, yhchiang, igor, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D20715
This commit is contained in:
Radheshyam Balasundaram 2014-07-28 17:14:25 -07:00
parent f04356e660
commit 91c01485d1
3 changed files with 38 additions and 38 deletions

View file

@ -86,7 +86,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
bucket_found = true;
break;
} else {
if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) {
if (user_key.compare(
is_last_level_file_ ? Slice(buckets_[hash_val].key)
: ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) {
status_ = Status::Corruption("Same key is being inserted again.");
return;
}
@ -112,8 +114,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
hash_vals.push_back(hash_val);
}
}
buckets_[bucket_id].key = key;
buckets_[bucket_id].value = value;
if (is_last_level_file_) {
buckets_[bucket_id].key.assign(user_key.data(), user_key.size());
} else {
buckets_[bucket_id].key.assign(key.data(), key.size());
}
buckets_[bucket_id].value.assign(value.data(), value.size());
buckets_[bucket_id].is_empty = false;
properties_.num_entries++;
@ -149,12 +155,18 @@ Status CuckooTableBuilder::Finish() {
if (prev_key_.empty()) {
return Status::Corruption("Unable to find unused key");
}
// Try to find the key next to prev_key_ by handling carryovers.
std::string new_user_key = prev_key_;
new_user_key.back()++;
// We ignore carry-overs and check that it is larger than previous key.
if (new_user_key > prev_key_) {
unused_user_key_ = new_user_key;
} else {
int curr_pos = new_user_key.size() - 1;
while (curr_pos >= 0) {
++new_user_key[curr_pos];
if (new_user_key > prev_key_) {
unused_user_key_ = new_user_key;
break;
}
--curr_pos;
}
if (curr_pos < 0) {
return Status::Corruption("Unable to find unused key");
}
}
@ -179,17 +191,9 @@ Status CuckooTableBuilder::Finish() {
s = file_->Append(Slice(unused_bucket));
} else {
++num_added;
if (is_last_level_file_) {
Slice user_key = ExtractUserKey(bucket.key);
s = file_->Append(user_key);
if (s.ok()) {
s = file_->Append(bucket.value);
}
} else {
s = file_->Append(bucket.key);
if (s.ok()) {
s = file_->Append(bucket.value);
}
s = file_->Append(Slice(bucket.key));
if (s.ok()) {
s = file_->Append(Slice(bucket.value));
}
}
if (!s.ok()) {
@ -307,7 +311,9 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t child_bucket_id = GetSliceHash(
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
is_last_level_file_ ? curr_bucket.key
: ExtractUserKey(Slice(curr_bucket.key)),
hash_cnt, max_num_buckets_);
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id_) {
continue;

View file

@ -58,8 +58,8 @@ class CuckooTableBuilder: public TableBuilder {
private:
struct CuckooBucket {
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
Slice key;
Slice value;
std::string key;
std::string value;
bool is_empty;
uint64_t make_space_for_key_call_id;
};

View file

@ -236,18 +236,6 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
// Performance tests
namespace {
void GenerateKeys(uint64_t num, std::vector<std::string>* keys,
uint32_t user_key_length) {
for (uint64_t i = 0; i < num; ++i) {
std::string new_key(reinterpret_cast<char*>(&i), sizeof(i));
new_key = std::string(user_key_length - new_key.size(), 'k') + new_key;
ParsedInternalKey ikey(new_key, num, kTypeValue);
std::string full_key;
AppendInternalKey(&full_key, ikey);
keys->push_back(full_key);
}
}
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
// Deliberately empty.
return false;
@ -266,6 +254,7 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
void BM_CuckooRead(uint64_t num, uint32_t key_length,
uint32_t value_length, uint64_t num_reads, double hash_ratio) {
assert(value_length <= key_length);
assert(8 <= key_length);
std::vector<std::string> keys;
Options options;
options.allow_mmap_reads = true;
@ -277,21 +266,26 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length,
}
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
GenerateKeys(num, &keys, key_length);
uint64_t predicted_file_size =
num * (key_length + value_length) / hash_ratio + 1024;
unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder(
writable_file.get(), keys[0].size(), value_length, hash_ratio,
writable_file.get(), key_length + 8, value_length, hash_ratio,
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
ASSERT_OK(builder.status());
for (uint32_t key_idx = 0; key_idx < num; ++key_idx) {
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
// Value is just a part of key.
builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], value_length));
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
new_key = std::string(key_length - new_key.size(), 'k') + new_key;
ParsedInternalKey ikey(new_key, num, kTypeValue);
std::string full_key;
AppendInternalKey(&full_key, ikey);
builder.Add(Slice(full_key), Slice(&full_key[0], value_length));
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
ASSERT_OK(builder.status());
keys.push_back(full_key);
}
ASSERT_OK(builder.Finish());
ASSERT_EQ(num, builder.NumEntries());