handle kDelete type in cuckoo builder

Summary:
when I changed std::vector<std::string, std::string> to std::string to
store key/value pairs in builder, I missed the handling for kDeletion
type. As a result, value_size_ can be wrong if the first add key is for
deletion.
The is captured by ./cuckoo_table_db_test

Test Plan:
./cuckoo_table_db_test
./cuckoo_table_reader_test
./cuckoo_table_builder_test

Reviewers: sdong, yhchiang, igor

Reviewed By: igor

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D24045
This commit is contained in:
Lei Jin 2014-09-29 10:25:21 -07:00
parent 389edb6b1b
commit 2dc6f62bb9
2 changed files with 59 additions and 7 deletions

View File

@ -60,9 +60,11 @@ CuckooTableBuilder::CuckooTableBuilder(
hash_table_size_(use_module_hash ? 0 : 2),
is_last_level_file_(false),
has_seen_first_key_(false),
has_seen_first_value_(false),
key_size_(0),
value_size_(0),
num_entries_(0),
num_values_(0),
ucomp_(user_comparator),
use_module_hash_(use_module_hash),
identity_as_first_hash_(identity_as_first_hash),
@ -84,6 +86,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
status_ = Status::Corruption("Unable to parse key into inernal key.");
return;
}
if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) {
status_ = Status::NotSupported("Unsupported key type " +
std::to_string(ikey.type));
return;
}
// Determine if we can ignore the sequence number and value type from
// internal keys by looking at sequence number from first key. We assume
// that if first key has a zero sequence number, then all the remaining
@ -94,16 +102,38 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size();
value_size_ = value.size();
}
if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) {
status_ = Status::NotSupported("all keys have to be the same size");
return;
}
// Even if one sequence number is non-zero, then it is not last level.
assert(!is_last_level_file_ || ikey.sequence == 0);
if (is_last_level_file_) {
kvs_.append(ikey.user_key.data(), ikey.user_key.size());
if (ikey.type == kTypeValue) {
if (!has_seen_first_value_) {
has_seen_first_value_ = true;
value_size_ = value.size();
}
if (value_size_ != value.size()) {
status_ = Status::NotSupported("all values have to be the same size");
return;
}
if (is_last_level_file_) {
kvs_.append(ikey.user_key.data(), ikey.user_key.size());
} else {
kvs_.append(key.data(), key.size());
}
kvs_.append(value.data(), value.size());
++num_values_;
} else {
kvs_.append(key.data(), key.size());
if (is_last_level_file_) {
deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size());
} else {
deleted_keys_.append(key.data(), key.size());
}
}
kvs_.append(value.data(), value.size());
++num_entries_;
// In order to fill the empty buckets in the hash table, we identify a
@ -123,15 +153,30 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
}
}
bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const {
assert(closed_);
return idx >= num_values_;
}
Slice CuckooTableBuilder::GetKey(uint64_t idx) const {
assert(closed_);
if (IsDeletedKey(idx)) {
return Slice(&deleted_keys_[(idx - num_values_) * key_size_], key_size_);
}
return Slice(&kvs_[idx * (key_size_ + value_size_)], key_size_);
}
Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const {
assert(closed_);
return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx));
}
Slice CuckooTableBuilder::GetValue(uint64_t idx) const {
assert(closed_);
if (IsDeletedKey(idx)) {
static std::string empty_value(value_size_, 'a');
return Slice(empty_value);
}
return Slice(&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_);
}
@ -256,7 +301,9 @@ Status CuckooTableBuilder::Finish() {
++num_added;
s = file_->Append(GetKey(bucket.vector_idx));
if (s.ok()) {
s = file_->Append(GetValue(bucket.vector_idx));
if (value_size_ > 0) {
s = file_->Append(GetValue(bucket.vector_idx));
}
}
}
if (!s.ok()) {

View File

@ -75,6 +75,7 @@ class CuckooTableBuilder: public TableBuilder {
uint64_t* bucket_id);
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
inline bool IsDeletedKey(uint64_t idx) const;
inline Slice GetKey(uint64_t idx) const;
inline Slice GetUserKey(uint64_t idx) const;
inline Slice GetValue(uint64_t idx) const;
@ -88,14 +89,18 @@ class CuckooTableBuilder: public TableBuilder {
uint64_t hash_table_size_;
bool is_last_level_file_;
bool has_seen_first_key_;
bool has_seen_first_value_;
uint64_t key_size_;
uint64_t value_size_;
// A list of fixed-size key-value pairs concatenating into a string.
// Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific
// key / value given an index
std::string kvs_;
// Number of key-value pairs stored in kvs_
std::string deleted_keys_;
// Number of key-value pairs stored in kvs_ + number of deleted keys
uint64_t num_entries_;
// Number of keys that contain value (non-deletion op)
uint64_t num_values_;
Status status_;
TableProperties properties_;
const Comparator* ucomp_;