From b2e7ee8b414822371f930bdf26c2f5b1a0ed5fda Mon Sep 17 00:00:00 2001 From: Kai Liu Date: Thu, 13 Feb 2014 15:27:59 -0800 Subject: [PATCH] Followup code refactor on plain table Summary: Fixed most comments in https://reviews.facebook.net/D15429. Still have some remaining comments left. Test Plan: make all check Reviewers: sdong, haobo Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D15885 --- db/plain_table_db_test.cc | 8 +- table/plain_table_reader.cc | 181 ++++++++++++++++++------------------ table/plain_table_reader.h | 69 ++++++++------ table/table_reader_bench.cc | 12 +-- 4 files changed, 139 insertions(+), 131 deletions(-) diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index c195253b01..3ad7ce8d7e 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -190,12 +190,10 @@ class TestPlainTableReader : public PlainTableReader { const TableProperties* table_properties, unique_ptr&& file, const Options& options, bool* expect_bloom_not_match) - : PlainTableReader(storage_options, icomparator, file_size, - bloom_bits_per_key, hash_table_ratio, index_sparseness, - table_properties), + : PlainTableReader(options, std::move(file), storage_options, icomparator, + file_size, bloom_bits_per_key, hash_table_ratio, + index_sparseness, table_properties), expect_bloom_not_match_(expect_bloom_not_match) { - file_ = std::move(file); - options_ = options; Status s = PopulateIndex(); ASSERT_TRUE(s.ok()); } diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 3595cbe662..593530c87f 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -5,6 +5,7 @@ #include "table/plain_table_reader.h" #include +#include #include "db/dbformat.h" @@ -35,7 +36,7 @@ namespace rocksdb { namespace { -inline uint32_t GetSliceHash(Slice const& s) { +inline uint32_t GetSliceHash(const Slice& s) { return Hash(s.data(), s.size(), 397) ; } @@ -43,6 +44,12 @@ inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) { return hash % num_buckets; } +// Safely getting a uint32_t element from a char array, where, starting from +// `base`, every 4 bytes are considered as an fixed 32 bit integer. +inline uint32_t GetFixed32Element(const char* base, size_t offset) { + return DecodeFixed32(base + offset * sizeof(uint32_t)); +} + } // namespace // Iterator to iterate IndexedTable @@ -84,13 +91,14 @@ class PlainTableIterator : public Iterator { }; extern const uint64_t kPlainTableMagicNumber; -PlainTableReader::PlainTableReader(const EnvOptions& storage_options, - const InternalKeyComparator& icomparator, - uint64_t file_size, int bloom_bits_per_key, - double hash_table_ratio, - size_t index_sparseness, - const TableProperties* table_properties) - : soptions_(storage_options), +PlainTableReader::PlainTableReader( + const Options& options, unique_ptr&& file, + const EnvOptions& storage_options, const InternalKeyComparator& icomparator, + uint64_t file_size, int bloom_bits_per_key, double hash_table_ratio, + size_t index_sparseness, const TableProperties* table_properties) + : options_(options), + soptions_(storage_options), + file_(std::move(file)), internal_comparator_(icomparator), file_size_(file_size), kHashTableRatio(hash_table_ratio), @@ -98,12 +106,11 @@ PlainTableReader::PlainTableReader(const EnvOptions& storage_options, kIndexIntervalForSamePrefixKeys(index_sparseness), table_properties_(table_properties), data_end_offset_(table_properties_->data_size), - user_key_len_(table_properties->fixed_key_len) {} + user_key_len_(table_properties->fixed_key_len) { + assert(kHashTableRatio >= 0.0); +} PlainTableReader::~PlainTableReader() { - delete[] hash_table_; - delete[] sub_index_; - delete bloom_; } Status PlainTableReader::Open( @@ -126,10 +133,8 @@ Status PlainTableReader::Open( } std::unique_ptr new_reader(new PlainTableReader( - soptions, internal_comparator, file_size, bloom_bits_per_key, - hash_table_ratio, index_sparseness, props)); - new_reader->file_ = std::move(file); - new_reader->options_ = options; + options, std::move(file), soptions, internal_comparator, file_size, + bloom_bits_per_key, hash_table_ratio, index_sparseness, props)); // -- Populate Index s = new_reader->PopulateIndex(); @@ -198,6 +203,9 @@ class PlainTableReader::IndexRecordList { return result; } + // Each group in `groups_` contains fix-sized records (determined by + // kNumRecordsPerGroup). Which can help us minimize the cost if resizing + // occurs. const size_t kNumRecordsPerGroup; IndexRecord* current_group_; // List of arrays allocated @@ -206,12 +214,11 @@ class PlainTableReader::IndexRecordList { }; Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, - int* num_prefixes, - DynamicBloom* bloom_) const { + int* num_prefixes) const { Slice prev_key_prefix_slice; uint32_t prev_key_prefix_hash = 0; uint32_t pos = data_start_offset_; - int key_index_within_prefix = 0; + int num_keys_per_prefix = 0; bool is_first_record = true; HistogramImpl keys_per_prefix_hist; // Need map to be ordered to make sure sub indexes generated @@ -222,7 +229,7 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, uint32_t key_offset = pos; ParsedInternalKey key; Slice value_slice; - Status s = Next(pos, &key, &value_slice, &pos); + Status s = Next(&pos, &key, &value_slice); if (!s.ok()) { return s; } @@ -235,22 +242,22 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, if (is_first_record || prev_key_prefix_slice != key_prefix_slice) { ++(*num_prefixes); if (!is_first_record) { - keys_per_prefix_hist.Add(key_index_within_prefix); + keys_per_prefix_hist.Add(num_keys_per_prefix); } - key_index_within_prefix = 0; + num_keys_per_prefix = 0; prev_key_prefix_slice = key_prefix_slice; prev_key_prefix_hash = GetSliceHash(key_prefix_slice); } if (kIndexIntervalForSamePrefixKeys == 0 || - key_index_within_prefix++ % kIndexIntervalForSamePrefixKeys == 0) { + num_keys_per_prefix++ % kIndexIntervalForSamePrefixKeys == 0) { // Add an index key for every kIndexIntervalForSamePrefixKeys keys record_list->AddRecord(prev_key_prefix_hash, key_offset); } is_first_record = false; } - keys_per_prefix_hist.Add(key_index_within_prefix); + keys_per_prefix_hist.Add(num_keys_per_prefix); Log(options_.info_log, "Number of Keys per prefix Histogram: %s", keys_per_prefix_hist.ToString().c_str()); @@ -258,36 +265,35 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, } void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) { - delete[] hash_table_; + index_.reset(); if (options_.prefix_extractor != nullptr) { uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey; if (bloom_total_bits > 0) { - bloom_ = new DynamicBloom(bloom_total_bits); + bloom_.reset(new DynamicBloom(bloom_total_bits)); } } if (options_.prefix_extractor == nullptr || kHashTableRatio <= 0) { // Fall back to pure binary search if the user fails to specify a prefix // extractor. - hash_table_size_ = 1; + index_size_ = 1; } else { double hash_table_size_multipier = 1.0 / kHashTableRatio; - hash_table_size_ = num_prefixes * hash_table_size_multipier + 1; + index_size_ = num_prefixes * hash_table_size_multipier + 1; } - hash_table_ = new uint32_t[hash_table_size_]; + index_.reset(new uint32_t[index_size_]); } size_t PlainTableReader::BucketizeIndexesAndFillBloom( - IndexRecordList& record_list, int num_prefixes, - std::vector* hash_to_offsets, + IndexRecordList* record_list, std::vector* hash_to_offsets, std::vector* bucket_count) { size_t sub_index_size_needed = 0; bool first = true; uint32_t prev_hash = 0; - size_t num_records = record_list.GetNumRecords(); + size_t num_records = record_list->GetNumRecords(); for (size_t i = 0; i < num_records; i++) { - IndexRecord* index_record = record_list.At(i); + IndexRecord* index_record = record_list->At(i); uint32_t cur_hash = index_record->hash; if (first || prev_hash != cur_hash) { prev_hash = cur_hash; @@ -296,7 +302,7 @@ size_t PlainTableReader::BucketizeIndexesAndFillBloom( bloom_->AddHash(cur_hash); } } - uint32_t bucket = GetBucketIdFromHash(cur_hash, hash_table_size_); + uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_); IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket]; index_record->next = prev_bucket_head; (*hash_to_offsets)[bucket] = index_record; @@ -326,27 +332,24 @@ void PlainTableReader::FillIndexes( size_t buffer_size = 8 * 8; size_t buffer_used = 0; sub_index_size_needed += buffer_size; - sub_index_ = new char[sub_index_size_needed]; + sub_index_.reset(new char[sub_index_size_needed]); size_t sub_index_offset = 0; - char* prev_ptr; - char* cur_ptr; - uint32_t* sub_index_ptr; - for (int i = 0; i < hash_table_size_; i++) { + for (int i = 0; i < index_size_; i++) { uint32_t num_keys_for_bucket = bucket_count[i]; switch (num_keys_for_bucket) { case 0: // No key for bucket - hash_table_[i] = data_end_offset_; + index_[i] = data_end_offset_; break; case 1: // point directly to the file offset - hash_table_[i] = hash_to_offsets[i]->offset; + index_[i] = hash_to_offsets[i]->offset; break; default: // point to second level indexes. - hash_table_[i] = sub_index_offset | kSubIndexMask; - prev_ptr = sub_index_ + sub_index_offset; - cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket); + index_[i] = sub_index_offset | kSubIndexMask; + char* prev_ptr = &sub_index_[sub_index_offset]; + char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket); sub_index_offset += (cur_ptr - prev_ptr); if (cur_ptr - prev_ptr > 2 || (cur_ptr - prev_ptr == 2 && num_keys_for_bucket <= 127)) { @@ -359,17 +362,16 @@ void PlainTableReader::FillIndexes( sub_index_size_needed += buffer_size; buffer_size *= 2; char* new_sub_index = new char[sub_index_size_needed]; - memcpy(new_sub_index, sub_index_, sub_index_offset); - delete[] sub_index_; - sub_index_ = new_sub_index; + memcpy(new_sub_index, sub_index_.get(), sub_index_offset); + sub_index_.reset(new_sub_index); } } - sub_index_ptr = (uint32_t*) (sub_index_ + sub_index_offset); + char* sub_index_pos = &sub_index_[sub_index_offset]; IndexRecord* record = hash_to_offsets[i]; int j; for (j = num_keys_for_bucket - 1; j >= 0 && record; j--, record = record->next) { - sub_index_ptr[j] = record->offset; + EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset); } assert(j == -1 && record == nullptr); sub_index_offset += kOffsetLen * num_keys_for_bucket; @@ -378,7 +380,7 @@ void PlainTableReader::FillIndexes( } Log(options_.info_log, "hash table size: %d, suffix_map length %zu", - hash_table_size_, sub_index_size_needed); + index_size_, sub_index_size_needed); } Status PlainTableReader::PopulateIndex() { @@ -405,11 +407,11 @@ Status PlainTableReader::PopulateIndex() { if (IsTotalOrderMode()) { uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey; if (num_bloom_bits > 0) { - bloom_ = new DynamicBloom(num_bloom_bits); + bloom_.reset(new DynamicBloom(num_bloom_bits)); } } - s = PopulateIndexRecordList(&record_list, &num_prefixes, bloom_); + s = PopulateIndexRecordList(&record_list, &num_prefixes); if (!s.ok()) { return s; } @@ -419,10 +421,10 @@ Status PlainTableReader::PopulateIndex() { // Bucketize all the index records to a temp data structure, in which for // each bucket, we generate a linked list of IndexRecord, in reversed order. - std::vector hash_to_offsets(hash_table_size_, nullptr); - std::vector bucket_count(hash_table_size_, 0); + std::vector hash_to_offsets(index_size_, nullptr); + std::vector bucket_count(index_size_, 0); size_t sub_index_size_needed = BucketizeIndexesAndFillBloom( - record_list, num_prefixes, &hash_to_offsets, &bucket_count); + &record_list, &hash_to_offsets, &bucket_count); // From the temp data structure, populate indexes. FillIndexes(sub_index_size_needed, hash_to_offsets, bucket_count); @@ -431,16 +433,16 @@ Status PlainTableReader::PopulateIndex() { Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, uint32_t prefix_hash, bool& prefix_matched, - uint32_t* ret_offset) const { + uint32_t* offset) const { prefix_matched = false; - int bucket = GetBucketIdFromHash(prefix_hash, hash_table_size_); - uint32_t bucket_value = hash_table_[bucket]; + int bucket = GetBucketIdFromHash(prefix_hash, index_size_); + uint32_t bucket_value = index_[bucket]; if (bucket_value == data_end_offset_) { - *ret_offset = data_end_offset_; + *offset = data_end_offset_; return Status::OK(); } else if ((bucket_value & kSubIndexMask) == 0) { // point directly to the file - *ret_offset = bucket_value; + *offset = bucket_value; return Status::OK(); } @@ -448,11 +450,9 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, uint32_t low = 0; uint64_t prefix_index_offset = bucket_value ^ kSubIndexMask; - const char* index_ptr = sub_index_ + prefix_index_offset; + const char* index_ptr = &sub_index_[prefix_index_offset]; uint32_t upper_bound = 0; - const uint32_t* base_ptr = (const uint32_t*) GetVarint32Ptr(index_ptr, - index_ptr + 4, - &upper_bound); + const char* base_ptr = GetVarint32Ptr(index_ptr, index_ptr + 4, &upper_bound); uint32_t high = upper_bound; ParsedInternalKey mid_key; ParsedInternalKey parsed_target; @@ -463,7 +463,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, // The key is between [low, high). Do a binary search between it. while (high - low > 1) { uint32_t mid = (high + low) / 2; - uint32_t file_offset = base_ptr[mid]; + uint32_t file_offset = GetFixed32Element(base_ptr, mid); size_t tmp; Status s = ReadKey(file_data_.data() + file_offset, &mid_key, &tmp); if (!s.ok()) { @@ -477,7 +477,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, // Happen to have found the exact key or target is smaller than the // first key after base_offset. prefix_matched = true; - *ret_offset = file_offset; + *offset = file_offset; return Status::OK(); } else { high = mid; @@ -489,19 +489,19 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, // to the wrong prefix. ParsedInternalKey low_key; size_t tmp; - uint32_t low_key_offset = base_ptr[low]; + uint32_t low_key_offset = GetFixed32Element(base_ptr, low); Status s = ReadKey(file_data_.data() + low_key_offset, &low_key, &tmp); if (GetPrefix(low_key) == prefix) { prefix_matched = true; - *ret_offset = low_key_offset; + *offset = low_key_offset; } else if (low + 1 < upper_bound) { // There is possible a next prefix, return it prefix_matched = false; - *ret_offset = base_ptr[low + 1]; + *offset = GetFixed32Element(base_ptr, low + 1); } else { // target is larger than a key of the last prefix in this bucket // but with a different prefix. Key does not exist. - *ret_offset = data_end_offset_; + *offset = data_end_offset_; } return Status::OK(); } @@ -514,23 +514,23 @@ Slice PlainTableReader::GetPrefix(const ParsedInternalKey& target) const { return GetPrefixFromUserKey(target.user_key); } -Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, +Status PlainTableReader::ReadKey(const char* start, ParsedInternalKey* key, size_t* bytes_read) const { const char* key_ptr = nullptr; *bytes_read = 0; size_t user_key_size = 0; if (IsFixedLength()) { user_key_size = user_key_len_; - key_ptr = row_ptr; + key_ptr = start; } else { uint32_t tmp_size = 0; - key_ptr = GetVarint32Ptr(row_ptr, file_data_.data() + data_end_offset_, - &tmp_size); + key_ptr = + GetVarint32Ptr(start, file_data_.data() + data_end_offset_, &tmp_size); if (key_ptr == nullptr) { return Status::Corruption("Unable to read the next key"); } user_key_size = (size_t)tmp_size; - *bytes_read = key_ptr - row_ptr; + *bytes_read = key_ptr - start; } if (key_ptr + user_key_size + 1 >= file_data_.data() + data_end_offset_) { return Status::Corruption("Unable to read the next key"); @@ -543,7 +543,7 @@ Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, key->type = kTypeValue; *bytes_read += user_key_size + 1; } else { - if (row_ptr + user_key_size + 8 >= file_data_.data() + data_end_offset_) { + if (start + user_key_size + 8 >= file_data_.data() + data_end_offset_) { return Status::Corruption("Unable to read the next key"); } if (!ParseInternalKey(Slice(key_ptr, user_key_size + 8), key)) { @@ -555,29 +555,28 @@ Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, return Status::OK(); } -Status PlainTableReader::Next(uint32_t offset, ParsedInternalKey* key, - Slice* value, uint32_t* next_offset) const { - if (offset == data_end_offset_) { - *next_offset = data_end_offset_; +Status PlainTableReader::Next(uint32_t* offset, ParsedInternalKey* key, + Slice* value) const { + if (*offset == data_end_offset_) { + *offset = data_end_offset_; return Status::OK(); } - if (offset > data_end_offset_) { + if (*offset > data_end_offset_) { return Status::Corruption("Offset is out of file size"); } - const char* row_ptr = file_data_.data() + offset; + const char* start = file_data_.data() + *offset; size_t bytes_for_key; - Status s = ReadKey(row_ptr, key, &bytes_for_key); + Status s = ReadKey(start, key, &bytes_for_key); uint32_t value_size; - const char* value_ptr = GetVarint32Ptr(row_ptr + bytes_for_key, - file_data_.data() + data_end_offset_, - &value_size); + const char* value_ptr = GetVarint32Ptr( + start + bytes_for_key, file_data_.data() + data_end_offset_, &value_size); if (value_ptr == nullptr) { return Status::Corruption("Error reading value length."); } - *next_offset = offset + (value_ptr - row_ptr) + value_size; - if (*next_offset > data_end_offset_) { + *offset = *offset + (value_ptr - start) + value_size; + if (*offset > data_end_offset_) { return Status::Corruption("Reach end of file when reading value"); } *value = Slice(value_ptr, value_size); @@ -624,7 +623,7 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target, Slice found_value; while (offset < data_end_offset_) { - Status s = Next(offset, &found_key, &found_value, &offset); + Status s = Next(&offset, &found_key, &found_value); if (!s.ok()) { return s; } @@ -680,7 +679,7 @@ void PlainTableIterator::SeekToLast() { void PlainTableIterator::Seek(const Slice& target) { // If the user doesn't set prefix seek option and we are not able to do a // total Seek(). assert failure. - if (!use_prefix_seek_ && table_->hash_table_size_ > 1) { + if (!use_prefix_seek_ && table_->index_size_ > 1) { assert(false); status_ = Status::NotSupported( "PlainTable cannot issue non-prefix seek unless in total order mode."); @@ -736,7 +735,7 @@ void PlainTableIterator::Next() { if (offset_ < table_->data_end_offset_) { Slice tmp_slice; ParsedInternalKey parsed_key; - status_ = table_->Next(next_offset_, &parsed_key, &value_, &next_offset_); + status_ = table_->Next(&next_offset_, &parsed_key, &value_); if (status_.ok()) { // Make a copy in this case. TODO optimize. tmp_str_.clear(); diff --git a/table/plain_table_reader.h b/table/plain_table_reader.h index 03bf11a4e6..16bbc8ba5c 100644 --- a/table/plain_table_reader.h +++ b/table/plain_table_reader.h @@ -69,7 +69,8 @@ class PlainTableReader: public TableReader { return table_properties_; } - PlainTableReader(const EnvOptions& storage_options, + PlainTableReader(const Options& options, unique_ptr&& file, + const EnvOptions& storage_options, const InternalKeyComparator& internal_comparator, uint64_t file_size, int bloom_num_bits, double hash_table_ratio, size_t index_sparseness, @@ -85,9 +86,9 @@ class PlainTableReader: public TableReader { // PopulateIndex() builds index of keys. It must be called before any query // to the table. // - // hash_table_ contains buckets size of hash_table_size_, each is a 32-bit - // integer. The lower 31 bits contain an offset value (explained below) and - // the first bit of the integer indicates type of the offset. + // index_ contains buckets size of index_size_, each is a + // 32-bit integer. The lower 31 bits contain an offset value (explained below) + // and the first bit of the integer indicates type of the offset. // // +--------------+------------------------------------------------------+ // | Flag (1 bit) | Offset to binary search buffer or file (31 bits) + @@ -122,23 +123,29 @@ class PlainTableReader: public TableReader { // Status PopulateIndex(); - Options options_; - unique_ptr file_; - private: struct IndexRecord; class IndexRecordList; - uint32_t* hash_table_ = nullptr; - int hash_table_size_ = 0; - char* sub_index_ = nullptr; + // Plain table maintains an index and a sub index. + // index is implemented by a hash table. + // subindex is a big of memory array. + // For more details about the in-memory index, please refer to: + // https://github.com/facebook/rocksdb/wiki/PlainTable-Format + // #wiki-in-memory-index-format + std::unique_ptr index_; + int index_size_ = 0; + std::unique_ptr sub_index_; + Options options_; const EnvOptions& soptions_; + unique_ptr file_; + const InternalKeyComparator internal_comparator_; + // represents plain table's current status. Status status_; Slice file_data_; - uint32_t version_; uint32_t file_size_; const double kHashTableRatio; @@ -147,9 +154,12 @@ class PlainTableReader: public TableReader { // every N keys, where the "N" is determined by // kIndexIntervalForSamePrefixKeys const size_t kIndexIntervalForSamePrefixKeys = 16; - DynamicBloom* bloom_ = nullptr; + // Bloom filter is used to rule out non-existent key + unique_ptr bloom_; std::shared_ptr table_properties_; + // data_start_offset_ and data_end_offset_ defines the range of the + // sst file that stores data. const uint32_t data_start_offset_ = 0; const uint32_t data_end_offset_; const size_t user_key_len_; @@ -176,42 +186,43 @@ class PlainTableReader: public TableReader { // If bloom_ is not null, all the keys' full-key hash will be added to the // bloom filter. Status PopulateIndexRecordList(IndexRecordList* record_list, - int* num_prefixes, DynamicBloom* bloom_) const; + int* num_prefixes) const; // Internal helper function to allocate memory for indexes and bloom filters void AllocateIndexAndBloom(int num_prefixes); // Internal helper function to bucket index record list to hash buckets. - // hash_to_offsets is sized of of hash_table_size_, each contains a linked - // list + // bucket_header is a vector of size hash_table_size_, with each entry + // containing a linklist of IndexRecord hashed to the same bucket, in reverse + // order. // of offsets for the hash, in reversed order. - // bucket_count is sized of hash_table_size_. The value is how many index - // records are there in hash_to_offsets for the same bucket. - size_t BucketizeIndexesAndFillBloom( - IndexRecordList& record_list, int num_prefixes, - std::vector* hash_to_offsets, - std::vector* bucket_count); + // bucket_count is sized of index_size_. The value is how many index + // records are there in bucket_headers for the same bucket. + size_t BucketizeIndexesAndFillBloom(IndexRecordList* record_list, + std::vector* bucket_headers, + std::vector* bucket_count); // Internal helper class to fill the indexes and bloom filters to internal - // data structures. hash_to_offsets and bucket_count are bucketized indexes + // data structures. bucket_headers and bucket_count are bucketized indexes // and counts generated by BucketizeIndexesAndFillBloom(). void FillIndexes(size_t sub_index_size_needed, - const std::vector& hash_to_offsets, + const std::vector& bucket_headers, const std::vector& bucket_count); + // Read a plain table key from the position `start`. The read content + // will be written to `key` and the size of read bytes will be populated + // in `bytes_read`. Status ReadKey(const char* row_ptr, ParsedInternalKey* key, size_t* bytes_read) const; - // Read the key and value at offset to key and value. - // tmp_slice is a tmp slice. - // return next_offset as the offset for the next key. - Status Next(uint32_t offset, ParsedInternalKey* key, Slice* value, - uint32_t* next_offset) const; + // Read the key and value at `offset` to parameters `key` and `value`. + // On success, `offset` will be updated as the offset for the next key. + Status Next(uint32_t* offset, ParsedInternalKey* key, Slice* value) const; // Get file offset for key target. // return value prefix_matched is set to true if the offset is confirmed // for a key with the same prefix as target. Status GetOffset(const Slice& target, const Slice& prefix, uint32_t prefix_hash, bool& prefix_matched, - uint32_t* ret_offset) const; + uint32_t* offset) const; Slice GetUserKey(const Slice& key) const { return Slice(key.data(), key.size() - 8); diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 684ac732eb..0d070a14e3 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -130,7 +130,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, if (!for_iterator) { // Query one existing key; std::string key = MakeKey(r1, r2, through_db); - uint64_t start_micros = Now(env, measured_by_nanosecond); + uint64_t start_time = Now(env, measured_by_nanosecond); port::MemoryBarrier(); if (!through_db) { s = table_reader->Get(read_options, key, arg, DummySaveValue, @@ -139,7 +139,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, s = db->Get(read_options, key, &result); } port::MemoryBarrier(); - hist.Add(Now(env, measured_by_nanosecond) - start_micros); + hist.Add(Now(env, measured_by_nanosecond) - start_time); } else { int r2_len; if (if_query_empty_keys) { @@ -157,7 +157,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, read_options.prefix = &prefix; } uint64_t total_time = 0; - uint64_t start_micros = Now(env, measured_by_nanosecond); + uint64_t start_time = Now(env, measured_by_nanosecond); port::MemoryBarrier(); Iterator* iter; if (!through_db) { @@ -172,9 +172,9 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, } // verify key; port::MemoryBarrier(); - total_time += Now(env, measured_by_nanosecond) - start_micros; + total_time += Now(env, measured_by_nanosecond) - start_time; assert(Slice(MakeKey(r1, r2 + count, through_db)) == iter->key()); - start_micros = Now(env, measured_by_nanosecond); + start_time = Now(env, measured_by_nanosecond); if (++count >= r2_len) { break; } @@ -187,7 +187,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, } delete iter; port::MemoryBarrier(); - total_time += Now(env, measured_by_nanosecond) - start_micros; + total_time += Now(env, measured_by_nanosecond) - start_time; hist.Add(total_time); } }