mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
Minor changes to CuckooTableBuilder
Summary: - Copy the key and value to in-memory hash table during Add operation. Also modified cuckoo_table_reader_test to use this. - Store only the user_key in in-memory hash table if it is last level file. - Handle Carryover while chosing unused key in Finish() method in case unused key was never found before Finish() call. Test Plan: cuckoo_table_reader_test --enable_perf cuckoo_table_builder_test valgrind_check asan_check Reviewers: sdong, yhchiang, igor, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20715
This commit is contained in:
parent
f04356e660
commit
91c01485d1
|
@ -86,7 +86,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||||
bucket_found = true;
|
bucket_found = true;
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) {
|
if (user_key.compare(
|
||||||
|
is_last_level_file_ ? Slice(buckets_[hash_val].key)
|
||||||
|
: ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) {
|
||||||
status_ = Status::Corruption("Same key is being inserted again.");
|
status_ = Status::Corruption("Same key is being inserted again.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -112,8 +114,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||||
hash_vals.push_back(hash_val);
|
hash_vals.push_back(hash_val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
buckets_[bucket_id].key = key;
|
if (is_last_level_file_) {
|
||||||
buckets_[bucket_id].value = value;
|
buckets_[bucket_id].key.assign(user_key.data(), user_key.size());
|
||||||
|
} else {
|
||||||
|
buckets_[bucket_id].key.assign(key.data(), key.size());
|
||||||
|
}
|
||||||
|
buckets_[bucket_id].value.assign(value.data(), value.size());
|
||||||
buckets_[bucket_id].is_empty = false;
|
buckets_[bucket_id].is_empty = false;
|
||||||
|
|
||||||
properties_.num_entries++;
|
properties_.num_entries++;
|
||||||
|
@ -149,12 +155,18 @@ Status CuckooTableBuilder::Finish() {
|
||||||
if (prev_key_.empty()) {
|
if (prev_key_.empty()) {
|
||||||
return Status::Corruption("Unable to find unused key");
|
return Status::Corruption("Unable to find unused key");
|
||||||
}
|
}
|
||||||
|
// Try to find the key next to prev_key_ by handling carryovers.
|
||||||
std::string new_user_key = prev_key_;
|
std::string new_user_key = prev_key_;
|
||||||
new_user_key.back()++;
|
int curr_pos = new_user_key.size() - 1;
|
||||||
// We ignore carry-overs and check that it is larger than previous key.
|
while (curr_pos >= 0) {
|
||||||
|
++new_user_key[curr_pos];
|
||||||
if (new_user_key > prev_key_) {
|
if (new_user_key > prev_key_) {
|
||||||
unused_user_key_ = new_user_key;
|
unused_user_key_ = new_user_key;
|
||||||
} else {
|
break;
|
||||||
|
}
|
||||||
|
--curr_pos;
|
||||||
|
}
|
||||||
|
if (curr_pos < 0) {
|
||||||
return Status::Corruption("Unable to find unused key");
|
return Status::Corruption("Unable to find unused key");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,17 +191,9 @@ Status CuckooTableBuilder::Finish() {
|
||||||
s = file_->Append(Slice(unused_bucket));
|
s = file_->Append(Slice(unused_bucket));
|
||||||
} else {
|
} else {
|
||||||
++num_added;
|
++num_added;
|
||||||
if (is_last_level_file_) {
|
s = file_->Append(Slice(bucket.key));
|
||||||
Slice user_key = ExtractUserKey(bucket.key);
|
|
||||||
s = file_->Append(user_key);
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = file_->Append(bucket.value);
|
s = file_->Append(Slice(bucket.value));
|
||||||
}
|
|
||||||
} else {
|
|
||||||
s = file_->Append(bucket.key);
|
|
||||||
if (s.ok()) {
|
|
||||||
s = file_->Append(bucket.value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
|
@ -307,7 +311,9 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
|
||||||
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
|
||||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||||
uint64_t child_bucket_id = GetSliceHash(
|
uint64_t child_bucket_id = GetSliceHash(
|
||||||
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
|
is_last_level_file_ ? curr_bucket.key
|
||||||
|
: ExtractUserKey(Slice(curr_bucket.key)),
|
||||||
|
hash_cnt, max_num_buckets_);
|
||||||
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
|
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
|
||||||
make_space_for_key_call_id_) {
|
make_space_for_key_call_id_) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -58,8 +58,8 @@ class CuckooTableBuilder: public TableBuilder {
|
||||||
private:
|
private:
|
||||||
struct CuckooBucket {
|
struct CuckooBucket {
|
||||||
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
|
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
|
||||||
Slice key;
|
std::string key;
|
||||||
Slice value;
|
std::string value;
|
||||||
bool is_empty;
|
bool is_empty;
|
||||||
uint64_t make_space_for_key_call_id;
|
uint64_t make_space_for_key_call_id;
|
||||||
};
|
};
|
||||||
|
|
|
@ -236,18 +236,6 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
|
||||||
|
|
||||||
// Performance tests
|
// Performance tests
|
||||||
namespace {
|
namespace {
|
||||||
void GenerateKeys(uint64_t num, std::vector<std::string>* keys,
|
|
||||||
uint32_t user_key_length) {
|
|
||||||
for (uint64_t i = 0; i < num; ++i) {
|
|
||||||
std::string new_key(reinterpret_cast<char*>(&i), sizeof(i));
|
|
||||||
new_key = std::string(user_key_length - new_key.size(), 'k') + new_key;
|
|
||||||
ParsedInternalKey ikey(new_key, num, kTypeValue);
|
|
||||||
std::string full_key;
|
|
||||||
AppendInternalKey(&full_key, ikey);
|
|
||||||
keys->push_back(full_key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
|
bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) {
|
||||||
// Deliberately empty.
|
// Deliberately empty.
|
||||||
return false;
|
return false;
|
||||||
|
@ -266,6 +254,7 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) {
|
||||||
void BM_CuckooRead(uint64_t num, uint32_t key_length,
|
void BM_CuckooRead(uint64_t num, uint32_t key_length,
|
||||||
uint32_t value_length, uint64_t num_reads, double hash_ratio) {
|
uint32_t value_length, uint64_t num_reads, double hash_ratio) {
|
||||||
assert(value_length <= key_length);
|
assert(value_length <= key_length);
|
||||||
|
assert(8 <= key_length);
|
||||||
std::vector<std::string> keys;
|
std::vector<std::string> keys;
|
||||||
Options options;
|
Options options;
|
||||||
options.allow_mmap_reads = true;
|
options.allow_mmap_reads = true;
|
||||||
|
@ -277,21 +266,26 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length,
|
||||||
}
|
}
|
||||||
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
|
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
|
||||||
|
|
||||||
GenerateKeys(num, &keys, key_length);
|
|
||||||
uint64_t predicted_file_size =
|
uint64_t predicted_file_size =
|
||||||
num * (key_length + value_length) / hash_ratio + 1024;
|
num * (key_length + value_length) / hash_ratio + 1024;
|
||||||
|
|
||||||
unique_ptr<WritableFile> writable_file;
|
unique_ptr<WritableFile> writable_file;
|
||||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||||
CuckooTableBuilder builder(
|
CuckooTableBuilder builder(
|
||||||
writable_file.get(), keys[0].size(), value_length, hash_ratio,
|
writable_file.get(), key_length + 8, value_length, hash_ratio,
|
||||||
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
|
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
|
||||||
ASSERT_OK(builder.status());
|
ASSERT_OK(builder.status());
|
||||||
for (uint32_t key_idx = 0; key_idx < num; ++key_idx) {
|
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||||
// Value is just a part of key.
|
// Value is just a part of key.
|
||||||
builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], value_length));
|
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
|
||||||
|
new_key = std::string(key_length - new_key.size(), 'k') + new_key;
|
||||||
|
ParsedInternalKey ikey(new_key, num, kTypeValue);
|
||||||
|
std::string full_key;
|
||||||
|
AppendInternalKey(&full_key, ikey);
|
||||||
|
builder.Add(Slice(full_key), Slice(&full_key[0], value_length));
|
||||||
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
ASSERT_EQ(builder.NumEntries(), key_idx + 1);
|
||||||
ASSERT_OK(builder.status());
|
ASSERT_OK(builder.status());
|
||||||
|
keys.push_back(full_key);
|
||||||
}
|
}
|
||||||
ASSERT_OK(builder.Finish());
|
ASSERT_OK(builder.Finish());
|
||||||
ASSERT_EQ(num, builder.NumEntries());
|
ASSERT_EQ(num, builder.NumEntries());
|
||||||
|
|
Loading…
Reference in a new issue