mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 07:30:54 +00:00
Enable hash index for block-based table
Summary: Based on previous patches, this diff eventually provides the end-to-end mechanism for users to specify the hash-index. Test Plan: Wrote several new unit tests. Reviewers: sdong, haobo, dhruba Reviewed By: sdong CC: leveldb Differential Revision: https://reviews.facebook.net/D16539
This commit is contained in:
parent
7a92537fc4
commit
75b59d5146
|
@ -266,6 +266,8 @@ class DBTest {
|
|||
// Sequence of option configurations to try
|
||||
enum OptionConfig {
|
||||
kDefault,
|
||||
kBlockBasedTableWithPrefixHashIndex,
|
||||
kBlockBasedTableWithWholeKeyHashIndex,
|
||||
kPlainTableFirstBytePrefix,
|
||||
kPlainTableAllBytesPrefix,
|
||||
kVectorRep,
|
||||
|
@ -303,7 +305,8 @@ class DBTest {
|
|||
kSkipDeletesFilterFirst = 1,
|
||||
kSkipUniversalCompaction = 2,
|
||||
kSkipMergePut = 4,
|
||||
kSkipPlainTable = 8
|
||||
kSkipPlainTable = 8,
|
||||
kSkipHashIndex = 16
|
||||
};
|
||||
|
||||
DBTest() : option_config_(kDefault),
|
||||
|
@ -343,6 +346,12 @@ class DBTest {
|
|||
|| option_config_ == kPlainTableFirstBytePrefix)) {
|
||||
continue;
|
||||
}
|
||||
if ((skip_mask & kSkipPlainTable) &&
|
||||
(option_config_ == kBlockBasedTableWithPrefixHashIndex ||
|
||||
option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -439,6 +448,20 @@ class DBTest {
|
|||
case kInfiniteMaxOpenFiles:
|
||||
options.max_open_files = -1;
|
||||
break;
|
||||
case kBlockBasedTableWithPrefixHashIndex: {
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.index_type = BlockBasedTableOptions::kHashSearch;
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
|
||||
break;
|
||||
}
|
||||
case kBlockBasedTableWithWholeKeyHashIndex: {
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.index_type = BlockBasedTableOptions::kHashSearch;
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
options.prefix_extractor.reset(NewNoopTransform());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1363,7 +1386,7 @@ TEST(DBTest, KeyMayExist) {
|
|||
|
||||
// KeyMayExist function only checks data in block caches, which is not used
|
||||
// by plain table format.
|
||||
} while (ChangeOptions(kSkipPlainTable));
|
||||
} while (ChangeOptions(kSkipPlainTable | kSkipHashIndex));
|
||||
}
|
||||
|
||||
TEST(DBTest, NonBlockingIteration) {
|
||||
|
@ -6184,7 +6207,9 @@ TEST(DBTest, Randomized) {
|
|||
int minimum = 0;
|
||||
if (option_config_ == kHashSkipList ||
|
||||
option_config_ == kHashLinkList ||
|
||||
option_config_ == kPlainTableFirstBytePrefix) {
|
||||
option_config_ == kPlainTableFirstBytePrefix ||
|
||||
option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
|
||||
option_config_ == kBlockBasedTableWithPrefixHashIndex) {
|
||||
minimum = 1;
|
||||
}
|
||||
if (p < 45) { // Put
|
||||
|
@ -6224,8 +6249,15 @@ TEST(DBTest, Randomized) {
|
|||
}
|
||||
|
||||
if ((step % 100) == 0) {
|
||||
// For DB instances that use the hash index + block-based table, the
|
||||
// iterator will be invalid right when seeking a non-existent key, right
|
||||
// than return a key that is close to it.
|
||||
if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
|
||||
option_config_ != kBlockBasedTableWithPrefixHashIndex) {
|
||||
ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
|
||||
ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
|
||||
}
|
||||
|
||||
// Save a snapshot from each DB this time that we'll use next
|
||||
// time we compare things, to make sure the current state is
|
||||
// preserved with the snapshot
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "rocksdb/db.h"
|
||||
#include "rocksdb/filter_policy.h"
|
||||
#include "rocksdb/slice.h"
|
||||
#include "rocksdb/slice_transform.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "rocksdb/types.h"
|
||||
#include "util/coding.h"
|
||||
|
@ -304,4 +305,34 @@ class IterKey {
|
|||
void operator=(const IterKey&) = delete;
|
||||
};
|
||||
|
||||
class InternalKeySliceTransform : public SliceTransform {
|
||||
public:
|
||||
explicit InternalKeySliceTransform(const SliceTransform* transform)
|
||||
: transform_(transform) {}
|
||||
|
||||
virtual const char* Name() const { return transform_->Name(); }
|
||||
|
||||
virtual Slice Transform(const Slice& src) const {
|
||||
auto user_key = ExtractUserKey(src);
|
||||
return transform_->Transform(user_key);
|
||||
}
|
||||
|
||||
virtual bool InDomain(const Slice& src) const {
|
||||
auto user_key = ExtractUserKey(src);
|
||||
return transform_->InDomain(user_key);
|
||||
}
|
||||
|
||||
virtual bool InRange(const Slice& dst) const {
|
||||
auto user_key = ExtractUserKey(dst);
|
||||
return transform_->InRange(user_key);
|
||||
}
|
||||
|
||||
const SliceTransform* user_prefix_extractor() const { return transform_; }
|
||||
|
||||
private:
|
||||
// Like comparator, InternalKeySliceTransform will not take care of the
|
||||
// deletion of transform_
|
||||
const SliceTransform* const transform_;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
|
|
@ -60,6 +60,12 @@ struct BlockBasedTableOptions {
|
|||
// A space efficient index block that is optimized for
|
||||
// binary-search-based index.
|
||||
kBinarySearch,
|
||||
|
||||
// The hash index, if enabled, will do the hash lookup when
|
||||
// `ReadOption.prefix_seek == true`. User should also specify
|
||||
// `Options.prefix_extractor` to allow the index block to correctly
|
||||
// extract the prefix of the given key and perform hash table lookup.
|
||||
kHashSearch,
|
||||
};
|
||||
|
||||
IndexType index_type = kBinarySearch;
|
||||
|
|
119
table/block.cc
119
table/block.cc
|
@ -11,16 +11,20 @@
|
|||
|
||||
#include "table/block.h"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "rocksdb/comparator.h"
|
||||
#include "table/block_hash_index.h"
|
||||
#include "table/format.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/logging.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
inline uint32_t Block::NumRestarts() const {
|
||||
uint32_t Block::NumRestarts() const {
|
||||
assert(size_ >= 2*sizeof(uint32_t));
|
||||
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
||||
}
|
||||
|
@ -92,6 +96,7 @@ class Block::Iter : public Iterator {
|
|||
std::string key_;
|
||||
Slice value_;
|
||||
Status status_;
|
||||
BlockHashIndex* hash_index_;
|
||||
|
||||
inline int Compare(const Slice& a, const Slice& b) const {
|
||||
return comparator_->Compare(a, b);
|
||||
|
@ -118,16 +123,15 @@ class Block::Iter : public Iterator {
|
|||
}
|
||||
|
||||
public:
|
||||
Iter(const Comparator* comparator,
|
||||
const char* data,
|
||||
uint32_t restarts,
|
||||
uint32_t num_restarts)
|
||||
Iter(const Comparator* comparator, const char* data, uint32_t restarts,
|
||||
uint32_t num_restarts, BlockHashIndex* hash_index)
|
||||
: comparator_(comparator),
|
||||
data_(data),
|
||||
restarts_(restarts),
|
||||
num_restarts_(num_restarts),
|
||||
current_(restarts_),
|
||||
restart_index_(num_restarts_) {
|
||||
restart_index_(num_restarts_),
|
||||
hash_index_(hash_index) {
|
||||
assert(num_restarts_ > 0);
|
||||
}
|
||||
|
||||
|
@ -169,45 +173,22 @@ class Block::Iter : public Iterator {
|
|||
}
|
||||
|
||||
virtual void Seek(const Slice& target) {
|
||||
// Binary search in restart array to find the first restart point
|
||||
// with a key >= target
|
||||
uint32_t left = 0;
|
||||
uint32_t right = num_restarts_ - 1;
|
||||
while (left < right) {
|
||||
uint32_t mid = (left + right + 1) / 2;
|
||||
uint32_t region_offset = GetRestartPoint(mid);
|
||||
uint32_t shared, non_shared, value_length;
|
||||
const char* key_ptr = DecodeEntry(data_ + region_offset,
|
||||
data_ + restarts_,
|
||||
&shared, &non_shared, &value_length);
|
||||
if (key_ptr == nullptr || (shared != 0)) {
|
||||
CorruptionError();
|
||||
uint32_t index = 0;
|
||||
bool ok = hash_index_ ? HashSeek(target, &index)
|
||||
: BinarySeek(target, 0, num_restarts_ - 1, &index);
|
||||
|
||||
if (!ok) {
|
||||
return;
|
||||
}
|
||||
Slice mid_key(key_ptr, non_shared);
|
||||
if (Compare(mid_key, target) < 0) {
|
||||
// Key at "mid" is smaller than "target". Therefore all
|
||||
// blocks before "mid" are uninteresting.
|
||||
left = mid;
|
||||
} else {
|
||||
// Key at "mid" is >= "target". Therefore all blocks at or
|
||||
// after "mid" are uninteresting.
|
||||
right = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
SeekToRestartPoint(index);
|
||||
// Linear search (within restart block) for first key >= target
|
||||
SeekToRestartPoint(left);
|
||||
while (true) {
|
||||
if (!ParseNextKey()) {
|
||||
return;
|
||||
}
|
||||
if (Compare(key_, target) >= 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (!ParseNextKey() || Compare(key_, target) >= 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual void SeekToFirst() {
|
||||
SeekToRestartPoint(0);
|
||||
ParseNextKey();
|
||||
|
@ -257,6 +238,53 @@ class Block::Iter : public Iterator {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
// Binary search in restart array to find the first restart point
|
||||
// with a key >= target
|
||||
bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
|
||||
uint32_t* index) {
|
||||
assert(left <= right);
|
||||
|
||||
while (left < right) {
|
||||
uint32_t mid = (left + right + 1) / 2;
|
||||
uint32_t region_offset = GetRestartPoint(mid);
|
||||
uint32_t shared, non_shared, value_length;
|
||||
const char* key_ptr =
|
||||
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
|
||||
&non_shared, &value_length);
|
||||
if (key_ptr == nullptr || (shared != 0)) {
|
||||
CorruptionError();
|
||||
return false;
|
||||
}
|
||||
Slice mid_key(key_ptr, non_shared);
|
||||
if (Compare(mid_key, target) < 0) {
|
||||
// Key at "mid" is smaller than "target". Therefore all
|
||||
// blocks before "mid" are uninteresting.
|
||||
left = mid;
|
||||
} else {
|
||||
// Key at "mid" is >= "target". Therefore all blocks at or
|
||||
// after "mid" are uninteresting.
|
||||
right = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
*index = left;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HashSeek(const Slice& target, uint32_t* index) {
|
||||
assert(hash_index_);
|
||||
auto restart_index = hash_index_->GetRestartIndex(target);
|
||||
if (restart_index == nullptr) {
|
||||
current_ = restarts_;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// the elements in restart_array[index : index + num_blocks]
|
||||
// are all with same prefix. We'll do binary search in that small range.
|
||||
auto left = restart_index->first_index;
|
||||
auto right = restart_index->first_index + restart_index->num_blocks - 1;
|
||||
return BinarySeek(target, left, right, index);
|
||||
}
|
||||
};
|
||||
|
||||
Iterator* Block::NewIterator(const Comparator* cmp) {
|
||||
|
@ -267,8 +295,13 @@ Iterator* Block::NewIterator(const Comparator* cmp) {
|
|||
if (num_restarts == 0) {
|
||||
return NewEmptyIterator();
|
||||
} else {
|
||||
return new Iter(cmp, data_, restart_offset_, num_restarts);
|
||||
return new Iter(cmp, data_, restart_offset_, num_restarts,
|
||||
hash_index_.get());
|
||||
}
|
||||
}
|
||||
|
||||
void Block::SetBlockHashIndex(BlockHashIndex* hash_index) {
|
||||
hash_index_.reset(hash_index);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#pragma once
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/options.h"
|
||||
|
||||
|
@ -17,6 +18,7 @@ namespace rocksdb {
|
|||
|
||||
struct BlockContents;
|
||||
class Comparator;
|
||||
class BlockHashIndex;
|
||||
|
||||
class Block {
|
||||
public:
|
||||
|
@ -26,20 +28,28 @@ class Block {
|
|||
~Block();
|
||||
|
||||
size_t size() const { return size_; }
|
||||
const char* data() const { return data_; }
|
||||
bool cachable() const { return cachable_; }
|
||||
uint32_t NumRestarts() const;
|
||||
CompressionType compression_type() const { return compression_type_; }
|
||||
|
||||
// If hash index lookup is enabled and `use_hash_index` is true. This block
|
||||
// will do hash lookup for the key prefix.
|
||||
//
|
||||
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
|
||||
// the iterator will simply be set as "invalid", rather than returning
|
||||
// the key that is just pass the target key.
|
||||
Iterator* NewIterator(const Comparator* comparator);
|
||||
const char* data() { return data_; }
|
||||
void SetBlockHashIndex(BlockHashIndex* hash_index);
|
||||
|
||||
private:
|
||||
uint32_t NumRestarts() const;
|
||||
|
||||
const char* data_;
|
||||
size_t size_;
|
||||
uint32_t restart_offset_; // Offset in data_ of restart array
|
||||
bool owned_; // Block owns data_[]
|
||||
bool cachable_;
|
||||
CompressionType compression_type_;
|
||||
std::unique_ptr<BlockHashIndex> hash_index_;
|
||||
|
||||
// No copying allowed
|
||||
Block(const Block&);
|
||||
|
|
|
@ -97,9 +97,9 @@ class IndexBuilder {
|
|||
// 2. Shorten the key length for index block. Other than honestly using the
|
||||
// last key in the data block as the index key, we instead find a shortest
|
||||
// substitute key that serves the same function.
|
||||
class BinarySearchIndexBuilder : public IndexBuilder {
|
||||
class ShortenedIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit BinarySearchIndexBuilder(const Comparator* comparator)
|
||||
explicit ShortenedIndexBuilder(const Comparator* comparator)
|
||||
: IndexBuilder(comparator),
|
||||
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
|
||||
|
||||
|
@ -128,11 +128,41 @@ class BinarySearchIndexBuilder : public IndexBuilder {
|
|||
BlockBuilder index_block_builder_;
|
||||
};
|
||||
|
||||
// FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like
|
||||
// ShortenedIndexBuilder, but preserves the full key instead the substitude key.
|
||||
// with the reason being that hash index is based on "prefix".
|
||||
class FullKeyIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit FullKeyIndexBuilder(const Comparator* comparator)
|
||||
: IndexBuilder(comparator),
|
||||
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
|
||||
|
||||
virtual void AddEntry(std::string* last_key_in_current_block,
|
||||
const Slice* first_key_in_next_block,
|
||||
const BlockHandle& block_handle) override {
|
||||
std::string handle_encoding;
|
||||
block_handle.EncodeTo(&handle_encoding);
|
||||
index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
|
||||
}
|
||||
|
||||
virtual Slice Finish() override { return index_block_builder_.Finish(); }
|
||||
|
||||
virtual size_t EstimatedSize() const {
|
||||
return index_block_builder_.CurrentSizeEstimate();
|
||||
}
|
||||
|
||||
private:
|
||||
BlockBuilder index_block_builder_;
|
||||
};
|
||||
|
||||
// Create a index builder based on its type.
|
||||
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) {
|
||||
switch (type) {
|
||||
case BlockBasedTableOptions::kBinarySearch: {
|
||||
return new BinarySearchIndexBuilder(comparator);
|
||||
return new ShortenedIndexBuilder(comparator);
|
||||
}
|
||||
case BlockBasedTableOptions::kHashSearch: {
|
||||
return new FullKeyIndexBuilder(comparator);
|
||||
}
|
||||
default: {
|
||||
assert(!"Do not recognize the index type ");
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "table/block.h"
|
||||
#include "table/filter_block.h"
|
||||
#include "table/block_hash_index.h"
|
||||
#include "table/format.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
|
@ -180,19 +181,51 @@ class BinarySearchIndexReader : public IndexReader {
|
|||
std::unique_ptr<Block> index_block_;
|
||||
};
|
||||
|
||||
// TODO(kailiu) This class is only a stub for now. And the comment below is also
|
||||
// not completed.
|
||||
// Index that leverages an internal hash table to quicken the lookup for a given
|
||||
// key.
|
||||
// @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that
|
||||
// functions requires index to be initalized. To avoid this problem external
|
||||
// caller will pass a function that can create the iterator over the entries
|
||||
// without the table to be fully initialized.
|
||||
class HashIndexReader : public IndexReader {
|
||||
public:
|
||||
static Status Create(RandomAccessFile* file, const BlockHandle& index_handle,
|
||||
Env* env, const Comparator* comparator,
|
||||
BlockBasedTable* table,
|
||||
std::function<Iterator*(Iterator*)> data_iter_gen,
|
||||
const SliceTransform* prefix_extractor,
|
||||
IndexReader** index_reader) {
|
||||
return Status::NotSupported("not implemented yet!");
|
||||
assert(prefix_extractor);
|
||||
Block* index_block = nullptr;
|
||||
auto s =
|
||||
ReadBlockFromFile(file, ReadOptions(), index_handle, &index_block, env);
|
||||
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
*index_reader = new HashIndexReader(comparator, index_block);
|
||||
std::unique_ptr<Iterator> index_iter(index_block->NewIterator(nullptr));
|
||||
std::unique_ptr<Iterator> data_iter(
|
||||
data_iter_gen(index_block->NewIterator(nullptr)));
|
||||
auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(),
|
||||
index_block->NumRestarts(),
|
||||
comparator, prefix_extractor);
|
||||
index_block->SetBlockHashIndex(hash_index);
|
||||
return s;
|
||||
}
|
||||
|
||||
virtual Iterator* NewIterator() override {
|
||||
return index_block_->NewIterator(comparator_);
|
||||
}
|
||||
|
||||
virtual size_t size() const override { return index_block_->size(); }
|
||||
|
||||
private:
|
||||
HashIndexReader(const Comparator* comparator, Block* index_block)
|
||||
: IndexReader(comparator), index_block_(index_block) {
|
||||
assert(index_block_ != nullptr);
|
||||
}
|
||||
std::unique_ptr<Block> index_block_;
|
||||
};
|
||||
|
||||
|
||||
|
@ -223,6 +256,11 @@ struct BlockBasedTable::Rep {
|
|||
|
||||
std::shared_ptr<const TableProperties> table_properties;
|
||||
BlockBasedTableOptions::IndexType index_type;
|
||||
// TODO(kailiu) It is very ugly to use internal key in table, since table
|
||||
// module should not be relying on db module. However to make things easier
|
||||
// and compatible with existing code, we introduce a wrapper that allows
|
||||
// block to extract prefix without knowing if a key is internal or not.
|
||||
unique_ptr<SliceTransform> internal_prefix_transform;
|
||||
};
|
||||
|
||||
BlockBasedTable::~BlockBasedTable() {
|
||||
|
@ -747,8 +785,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||
return { filter, cache_handle };
|
||||
}
|
||||
|
||||
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options)
|
||||
const {
|
||||
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
||||
// index reader has already been pre-populated.
|
||||
if (rep_->index_reader) {
|
||||
return rep_->index_reader->NewIterator();
|
||||
|
@ -978,7 +1015,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
|
|||
// 3. options
|
||||
// 4. internal_comparator
|
||||
// 5. index_type
|
||||
Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
|
||||
Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
|
||||
// Some old version of block-based tables don't have index type present in
|
||||
// table properties. If that's the case we can safely use the kBinarySearch.
|
||||
auto index_type = BlockBasedTableOptions::kBinarySearch;
|
||||
|
@ -989,11 +1026,30 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
|
|||
DecodeFixed32(pos->second.c_str()));
|
||||
}
|
||||
|
||||
auto file = rep_->file.get();
|
||||
const auto& index_handle = rep_->index_handle;
|
||||
auto env = rep_->options.env;
|
||||
auto comparator = &rep_->internal_comparator;
|
||||
|
||||
switch (index_type) {
|
||||
case BlockBasedTableOptions::kBinarySearch: {
|
||||
return BinarySearchIndexReader::Create(
|
||||
rep_->file.get(), rep_->index_handle, rep_->options.env,
|
||||
&rep_->internal_comparator, index_reader);
|
||||
return BinarySearchIndexReader::Create(file, index_handle, env,
|
||||
comparator, index_reader);
|
||||
}
|
||||
case BlockBasedTableOptions::kHashSearch: {
|
||||
// We need to wrap data with internal_prefix_transform to make sure it can
|
||||
// handle prefix correctly.
|
||||
rep_->internal_prefix_transform.reset(
|
||||
new InternalKeySliceTransform(rep_->options.prefix_extractor.get()));
|
||||
return HashIndexReader::Create(
|
||||
file, index_handle, env, comparator,
|
||||
[&](Iterator* index_iter) {
|
||||
return NewTwoLevelIterator(
|
||||
index_iter, &BlockBasedTable::DataBlockReader,
|
||||
const_cast<BlockBasedTable*>(this), ReadOptions(),
|
||||
rep_->soptions, rep_->internal_comparator);
|
||||
},
|
||||
rep_->internal_prefix_transform.get(), index_reader);
|
||||
}
|
||||
default: {
|
||||
std::string error_message =
|
||||
|
|
|
@ -131,7 +131,7 @@ class BlockBasedTable : public TableReader {
|
|||
// 2. index is not present in block cache.
|
||||
// 3. We disallowed any io to be performed, that is, read_options ==
|
||||
// kBlockCacheTier
|
||||
Iterator* NewIndexIterator(const ReadOptions& read_options) const;
|
||||
Iterator* NewIndexIterator(const ReadOptions& read_options);
|
||||
|
||||
// Read block cache from block caches (if set): block_cache and
|
||||
// block_cache_compressed.
|
||||
|
@ -164,7 +164,7 @@ class BlockBasedTable : public TableReader {
|
|||
|
||||
void ReadMeta(const Footer& footer);
|
||||
void ReadFilter(const Slice& filter_handle_value);
|
||||
Status CreateIndexReader(IndexReader** index_reader) const;
|
||||
Status CreateIndexReader(IndexReader** index_reader);
|
||||
|
||||
// Read the meta block from sst.
|
||||
static Status ReadMetaBlock(
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "db/memtable.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
|
@ -11,9 +14,11 @@
|
|||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "rocksdb/slice_transform.h"
|
||||
#include "table/block.h"
|
||||
#include "table/block_builder.h"
|
||||
#include "table/format.h"
|
||||
#include "table/block_hash_index.h"
|
||||
#include "util/random.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
@ -25,6 +30,40 @@ static std::string RandomString(Random* rnd, int len) {
|
|||
test::RandomString(rnd, len, &r);
|
||||
return r;
|
||||
}
|
||||
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
|
||||
Random *rnd) {
|
||||
char buf[50];
|
||||
char *p = &buf[0];
|
||||
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
|
||||
std::string k(p);
|
||||
if (padding_size) {
|
||||
k += RandomString(rnd, padding_size);
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
// Generate random key value pairs.
|
||||
// The generated key will be sorted. You can tune the parameters to generated
|
||||
// different kinds of test key/value pairs for different scenario.
|
||||
void GenerateRandomKVs(std::vector<std::string> *keys,
|
||||
std::vector<std::string> *values, const int from,
|
||||
const int len, const int step = 1,
|
||||
const int padding_size = 0,
|
||||
const int keys_share_prefix = 1) {
|
||||
Random rnd(302);
|
||||
|
||||
// generate different prefix
|
||||
for (int i = from; i < from + len; i += step) {
|
||||
// generating keys that shares the prefix
|
||||
for (int j = 0; j < keys_share_prefix; ++j) {
|
||||
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
|
||||
|
||||
// 100 bytes values
|
||||
values->emplace_back(RandomString(&rnd, 100));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class BlockTest {};
|
||||
|
||||
|
@ -39,24 +78,11 @@ TEST(BlockTest, SimpleTest) {
|
|||
std::vector<std::string> values;
|
||||
BlockBuilder builder(options, ic.get());
|
||||
int num_records = 100000;
|
||||
char buf[10];
|
||||
char* p = &buf[0];
|
||||
|
||||
GenerateRandomKVs(&keys, &values, 0, num_records);
|
||||
// add a bunch of records to a block
|
||||
for (int i = 0; i < num_records; i++) {
|
||||
// generate random kvs
|
||||
sprintf(p, "%6d", i);
|
||||
std::string k(p);
|
||||
std::string v = RandomString(&rnd, 100); // 100 byte values
|
||||
|
||||
// write kvs to the block
|
||||
Slice key(k);
|
||||
Slice value(v);
|
||||
builder.Add(key, value);
|
||||
|
||||
// remember kvs in a lookaside array
|
||||
keys.push_back(k);
|
||||
values.push_back(v);
|
||||
builder.Add(keys[i], values[i]);
|
||||
}
|
||||
|
||||
// read serialized contents of the block
|
||||
|
@ -101,6 +127,114 @@ TEST(BlockTest, SimpleTest) {
|
|||
delete iter;
|
||||
}
|
||||
|
||||
// return the block contents
|
||||
BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
|
||||
const std::vector<std::string> &keys,
|
||||
const std::vector<std::string> &values,
|
||||
const int prefix_group_size = 1) {
|
||||
builder->reset(
|
||||
new BlockBuilder(1 /* restart interval */, BytewiseComparator()));
|
||||
|
||||
// Add only half of the keys
|
||||
for (size_t i = 0; i < keys.size(); ++i) {
|
||||
(*builder)->Add(keys[i], values[i]);
|
||||
}
|
||||
Slice rawblock = (*builder)->Finish();
|
||||
|
||||
BlockContents contents;
|
||||
contents.data = rawblock;
|
||||
contents.cachable = false;
|
||||
contents.heap_allocated = false;
|
||||
|
||||
return contents;
|
||||
}
|
||||
|
||||
void CheckBlockContents(BlockContents contents, const int max_key,
|
||||
const std::vector<std::string> &keys,
|
||||
const std::vector<std::string> &values) {
|
||||
const size_t prefix_size = 6;
|
||||
// create block reader
|
||||
Block reader1(contents);
|
||||
Block reader2(contents);
|
||||
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||
NewFixedPrefixTransform(prefix_size));
|
||||
|
||||
{
|
||||
auto iter1 = reader1.NewIterator(nullptr);
|
||||
auto iter2 = reader1.NewIterator(nullptr);
|
||||
reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(),
|
||||
BytewiseComparator(),
|
||||
prefix_extractor.get()));
|
||||
|
||||
delete iter1;
|
||||
delete iter2;
|
||||
}
|
||||
|
||||
std::unique_ptr<Iterator> hash_iter(
|
||||
reader1.NewIterator(BytewiseComparator()));
|
||||
|
||||
std::unique_ptr<Iterator> regular_iter(
|
||||
reader2.NewIterator(BytewiseComparator()));
|
||||
|
||||
// Seek existent keys
|
||||
for (size_t i = 0; i < keys.size(); i++) {
|
||||
hash_iter->Seek(keys[i]);
|
||||
ASSERT_OK(hash_iter->status());
|
||||
ASSERT_TRUE(hash_iter->Valid());
|
||||
|
||||
Slice v = hash_iter->value();
|
||||
ASSERT_EQ(v.ToString().compare(values[i]), 0);
|
||||
}
|
||||
|
||||
// Seek non-existent keys.
|
||||
// For hash index, if no key with a given prefix is not found, iterator will
|
||||
// simply be set as invalid; whereas the binary search based iterator will
|
||||
// return the one that is closest.
|
||||
for (int i = 1; i < max_key - 1; i += 2) {
|
||||
auto key = GenerateKey(i, 0, 0, nullptr);
|
||||
hash_iter->Seek(key);
|
||||
ASSERT_TRUE(!hash_iter->Valid());
|
||||
|
||||
regular_iter->Seek(key);
|
||||
ASSERT_TRUE(regular_iter->Valid());
|
||||
}
|
||||
}
|
||||
|
||||
// In this test case, no two key share same prefix.
|
||||
TEST(BlockTest, SimpleIndexHash) {
|
||||
const int kMaxKey = 100000;
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
GenerateRandomKVs(&keys, &values, 0 /* first key id */,
|
||||
kMaxKey /* last key id */, 2 /* step */,
|
||||
8 /* padding size (8 bytes randomly generated suffix) */);
|
||||
|
||||
std::unique_ptr<BlockBuilder> builder;
|
||||
auto contents = GetBlockContents(&builder, keys, values);
|
||||
|
||||
CheckBlockContents(contents, kMaxKey, keys, values);
|
||||
}
|
||||
|
||||
TEST(BlockTest, IndexHashWithSharedPrefix) {
|
||||
const int kMaxKey = 100000;
|
||||
// for each prefix, there will be 5 keys starts with it.
|
||||
const int kPrefixGroup = 5;
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
// Generate keys with same prefix.
|
||||
GenerateRandomKVs(&keys, &values, 0, // first key id
|
||||
kMaxKey, // last key id
|
||||
2, // step
|
||||
10, // padding size,
|
||||
kPrefixGroup);
|
||||
|
||||
std::unique_ptr<BlockBuilder> builder;
|
||||
auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup);
|
||||
|
||||
CheckBlockContents(contents, kMaxKey, keys, values);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
|
|
@ -1055,6 +1055,116 @@ static std::string RandomString(Random* rnd, int len) {
|
|||
return r;
|
||||
}
|
||||
|
||||
void AddInternalKey(TableConstructor* c, const std::string prefix,
|
||||
int suffix_len = 800) {
|
||||
static Random rnd(1023);
|
||||
InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
|
||||
c->Add(k.Encode().ToString(), "v");
|
||||
}
|
||||
|
||||
TEST(TableTest, HashIndexTest) {
|
||||
TableConstructor c(BytewiseComparator());
|
||||
|
||||
// keys with prefix length 3, make sure the key/value is big enough to fill
|
||||
// one block
|
||||
AddInternalKey(&c, "0015");
|
||||
AddInternalKey(&c, "0035");
|
||||
|
||||
AddInternalKey(&c, "0054");
|
||||
AddInternalKey(&c, "0055");
|
||||
|
||||
AddInternalKey(&c, "0056");
|
||||
AddInternalKey(&c, "0057");
|
||||
|
||||
AddInternalKey(&c, "0058");
|
||||
AddInternalKey(&c, "0075");
|
||||
|
||||
AddInternalKey(&c, "0076");
|
||||
AddInternalKey(&c, "0095");
|
||||
|
||||
std::vector<std::string> keys;
|
||||
KVMap kvmap;
|
||||
Options options;
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.index_type = BlockBasedTableOptions::kHashSearch;
|
||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||
|
||||
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
||||
options.block_cache = NewLRUCache(1024);
|
||||
options.block_size = 1700;
|
||||
|
||||
std::unique_ptr<InternalKeyComparator> comparator(
|
||||
new InternalKeyComparator(BytewiseComparator()));
|
||||
c.Finish(options, *comparator, &keys, &kvmap);
|
||||
auto reader = c.table_reader();
|
||||
|
||||
auto props = c.table_reader()->GetTableProperties();
|
||||
ASSERT_EQ(5u, props->num_data_blocks);
|
||||
|
||||
std::unique_ptr<Iterator> hash_iter(reader->NewIterator(ReadOptions()));
|
||||
|
||||
// -- Find keys do not exist, but have common prefix.
|
||||
std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
|
||||
std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
|
||||
keys[7], keys[9], };
|
||||
|
||||
// find the lower bound of the prefix
|
||||
for (size_t i = 0; i < prefixes.size(); ++i) {
|
||||
hash_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
|
||||
ASSERT_OK(hash_iter->status());
|
||||
ASSERT_TRUE(hash_iter->Valid());
|
||||
|
||||
// seek the first element in the block
|
||||
ASSERT_EQ(lower_bound[i], hash_iter->key().ToString());
|
||||
ASSERT_EQ("v", hash_iter->value().ToString());
|
||||
}
|
||||
|
||||
// find the upper bound of prefixes
|
||||
std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
|
||||
|
||||
// find existing keys
|
||||
for (const auto& item : kvmap) {
|
||||
auto ukey = ExtractUserKey(item.first).ToString();
|
||||
hash_iter->Seek(ukey);
|
||||
|
||||
// ASSERT_OK(regular_iter->status());
|
||||
ASSERT_OK(hash_iter->status());
|
||||
|
||||
// ASSERT_TRUE(regular_iter->Valid());
|
||||
ASSERT_TRUE(hash_iter->Valid());
|
||||
|
||||
ASSERT_EQ(item.first, hash_iter->key().ToString());
|
||||
ASSERT_EQ(item.second, hash_iter->value().ToString());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < prefixes.size(); ++i) {
|
||||
// the key is greater than any existing keys.
|
||||
auto key = prefixes[i] + "9";
|
||||
hash_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
|
||||
|
||||
ASSERT_OK(hash_iter->status());
|
||||
if (i == prefixes.size() - 1) {
|
||||
// last key
|
||||
ASSERT_TRUE(!hash_iter->Valid());
|
||||
} else {
|
||||
ASSERT_TRUE(hash_iter->Valid());
|
||||
// seek the first element in the block
|
||||
ASSERT_EQ(upper_bound[i], hash_iter->key().ToString());
|
||||
ASSERT_EQ("v", hash_iter->value().ToString());
|
||||
}
|
||||
}
|
||||
|
||||
// find keys with prefix that don't match any of the existing prefixes.
|
||||
std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
|
||||
for (const auto& prefix : non_exist_prefixes) {
|
||||
hash_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
|
||||
// regular_iter->Seek(prefix);
|
||||
|
||||
ASSERT_OK(hash_iter->status());
|
||||
ASSERT_TRUE(!hash_iter->Valid());
|
||||
}
|
||||
}
|
||||
|
||||
// It's very hard to figure out the index block size of a block accurately.
|
||||
// To make sure we get the index size, we just make sure as key number
|
||||
// grows, the filter block size also grows.
|
||||
|
|
Loading…
Reference in a new issue