rocksdb/db/plain_table_db_test.cc

1297 lines
44 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef ROCKSDB_LITE
#include <algorithm>
#include <set>
#include "db/db_impl/db_impl.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "file/filename.h"
#include "logging/logging.h"
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/table.h"
#include "table/meta_blocks.h"
#include "table/plain/plain_table_bloom.h"
#include "table/plain/plain_table_factory.h"
#include "table/plain/plain_table_key_coding.h"
#include "table/plain/plain_table_reader.h"
#include "table/table_builder.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/hash.h"
#include "util/mutexlock.h"
#include "util/string_util.h"
#include "utilities/merge_operators.h"
using std::unique_ptr;
namespace rocksdb {
class PlainTableKeyDecoderTest : public testing::Test {};
TEST_F(PlainTableKeyDecoderTest, ReadNonMmap) {
std::string tmp;
Random rnd(301);
const uint32_t kLength = 2222;
Slice contents = test::RandomString(&rnd, kLength, &tmp);
test::StringSource* string_source =
new test::StringSource(contents, 0, false);
std::unique_ptr<RandomAccessFileReader> file_reader(
test::GetRandomAccessFileReader(string_source));
std::unique_ptr<PlainTableReaderFileInfo> file_info(
new PlainTableReaderFileInfo(std::move(file_reader), EnvOptions(),
kLength));
{
PlainTableFileReader reader(file_info.get());
const uint32_t kReadSize = 77;
for (uint32_t pos = 0; pos < kLength; pos += kReadSize) {
uint32_t read_size = std::min(kLength - pos, kReadSize);
Slice out;
ASSERT_TRUE(reader.Read(pos, read_size, &out));
ASSERT_EQ(0, out.compare(tmp.substr(pos, read_size)));
}
ASSERT_LT(uint32_t(string_source->total_reads()), kLength / kReadSize / 2);
}
std::vector<std::vector<std::pair<uint32_t, uint32_t>>> reads = {
{{600, 30}, {590, 30}, {600, 20}, {600, 40}},
{{800, 20}, {100, 20}, {500, 20}, {1500, 20}, {100, 20}, {80, 20}},
{{1000, 20}, {500, 20}, {1000, 50}},
{{1000, 20}, {500, 20}, {500, 20}},
{{1000, 20}, {500, 20}, {200, 20}, {500, 20}},
{{1000, 20}, {500, 20}, {200, 20}, {1000, 50}},
{{600, 500}, {610, 20}, {100, 20}},
{{500, 100}, {490, 100}, {550, 50}},
};
std::vector<int> num_file_reads = {2, 6, 2, 2, 4, 3, 2, 2};
for (size_t i = 0; i < reads.size(); i++) {
string_source->set_total_reads(0);
PlainTableFileReader reader(file_info.get());
for (auto p : reads[i]) {
Slice out;
ASSERT_TRUE(reader.Read(p.first, p.second, &out));
ASSERT_EQ(0, out.compare(tmp.substr(p.first, p.second)));
}
ASSERT_EQ(num_file_reads[i], string_source->total_reads());
}
}
class PlainTableDBTest : public testing::Test,
public testing::WithParamInterface<bool> {
protected:
private:
std::string dbname_;
Env* env_;
DB* db_;
bool mmap_mode_;
Options last_options_;
public:
PlainTableDBTest() : env_(Env::Default()) {}
~PlainTableDBTest() override {
delete db_;
EXPECT_OK(DestroyDB(dbname_, Options()));
}
void SetUp() override {
mmap_mode_ = GetParam();
dbname_ = test::PerThreadDBPath("plain_table_db_test");
EXPECT_OK(DestroyDB(dbname_, Options()));
db_ = nullptr;
Reopen();
}
// Return the current option configuration.
Options CurrentOptions() {
Options options;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = 2;
plain_table_options.hash_table_ratio = 0.8;
plain_table_options.index_sparseness = 3;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPrefix;
plain_table_options.full_scan_mode = false;
plain_table_options.store_index_in_file = false;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true));
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
options.allow_mmap_reads = mmap_mode_;
options.allow_concurrent_memtable_write = false;
options.unordered_write = false;
return options;
}
DBImpl* dbfull() {
return reinterpret_cast<DBImpl*>(db_);
}
void Reopen(Options* options = nullptr) {
ASSERT_OK(TryReopen(options));
}
void Close() {
delete db_;
db_ = nullptr;
}
bool mmap_mode() const { return mmap_mode_; }
void DestroyAndReopen(Options* options = nullptr) {
//Destroy using last options
Destroy(&last_options_);
ASSERT_OK(TryReopen(options));
}
void Destroy(Options* options) {
delete db_;
db_ = nullptr;
ASSERT_OK(DestroyDB(dbname_, *options));
}
Status PureReopen(Options* options, DB** db) {
return DB::Open(*options, dbname_, db);
}
Status ReopenForReadOnly(Options* options) {
delete db_;
db_ = nullptr;
return DB::OpenForReadOnly(*options, dbname_, &db_);
}
Status TryReopen(Options* options = nullptr) {
delete db_;
db_ = nullptr;
Options opts;
if (options != nullptr) {
opts = *options;
} else {
opts = CurrentOptions();
opts.create_if_missing = true;
}
last_options_ = opts;
return DB::Open(opts, dbname_, &db_);
}
Status Put(const Slice& k, const Slice& v) {
return db_->Put(WriteOptions(), k, v);
}
Status Delete(const std::string& k) {
return db_->Delete(WriteOptions(), k);
}
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
ReadOptions options;
options.snapshot = snapshot;
std::string result;
Status s = db_->Get(options, k, &result);
if (s.IsNotFound()) {
result = "NOT_FOUND";
} else if (!s.ok()) {
result = s.ToString();
}
return result;
}
int NumTableFilesAtLevel(int level) {
std::string property;
EXPECT_TRUE(db_->GetProperty(
"rocksdb.num-files-at-level" + NumberToString(level), &property));
return atoi(property.c_str());
}
// Return spread of files per level
std::string FilesPerLevel() {
std::string result;
size_t last_non_zero_offset = 0;
for (int level = 0; level < db_->NumberLevels(); level++) {
int f = NumTableFilesAtLevel(level);
char buf[100];
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
result += buf;
if (f > 0) {
last_non_zero_offset = result.size();
}
}
result.resize(last_non_zero_offset);
return result;
}
std::string IterStatus(Iterator* iter) {
std::string result;
if (iter->Valid()) {
result = iter->key().ToString() + "->" + iter->value().ToString();
} else {
result = "(invalid)";
}
return result;
}
};
TEST_P(PlainTableDBTest, Empty) {
ASSERT_TRUE(dbfull() != nullptr);
ASSERT_EQ("NOT_FOUND", Get("0000000000000foo"));
}
extern const uint64_t kPlainTableMagicNumber;
class TestPlainTableReader : public PlainTableReader {
public:
TestPlainTableReader(const EnvOptions& env_options,
const InternalKeyComparator& icomparator,
EncodingType encoding_type, uint64_t file_size,
int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness,
const TableProperties* table_properties,
std::unique_ptr<RandomAccessFileReader>&& file,
const ImmutableCFOptions& ioptions,
const SliceTransform* prefix_extractor,
bool* expect_bloom_not_match, bool store_index_in_file,
uint32_t column_family_id,
const std::string& column_family_name)
: PlainTableReader(ioptions, std::move(file), env_options, icomparator,
encoding_type, file_size, table_properties,
prefix_extractor),
expect_bloom_not_match_(expect_bloom_not_match) {
Status s = MmapDataIfNeeded();
EXPECT_TRUE(s.ok());
s = PopulateIndex(const_cast<TableProperties*>(table_properties),
bloom_bits_per_key, hash_table_ratio, index_sparseness,
2 * 1024 * 1024);
EXPECT_TRUE(s.ok());
TableProperties* props = const_cast<TableProperties*>(table_properties);
EXPECT_EQ(column_family_id, static_cast<uint32_t>(props->column_family_id));
EXPECT_EQ(column_family_name, props->column_family_name);
if (store_index_in_file) {
auto bloom_version_ptr = props->user_collected_properties.find(
PlainTablePropertyNames::kBloomVersion);
EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end());
EXPECT_EQ(bloom_version_ptr->second, std::string("1"));
if (ioptions.bloom_locality > 0) {
auto num_blocks_ptr = props->user_collected_properties.find(
PlainTablePropertyNames::kNumBloomBlocks);
EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end());
}
}
}
~TestPlainTableReader() override {}
private:
bool MatchBloom(uint32_t hash) const override {
bool ret = PlainTableReader::MatchBloom(hash);
if (*expect_bloom_not_match_) {
EXPECT_TRUE(!ret);
} else {
EXPECT_TRUE(ret);
}
return ret;
}
bool* expect_bloom_not_match_;
};
extern const uint64_t kPlainTableMagicNumber;
class TestPlainTableFactory : public PlainTableFactory {
public:
explicit TestPlainTableFactory(bool* expect_bloom_not_match,
const PlainTableOptions& options,
uint32_t column_family_id,
std::string column_family_name)
: PlainTableFactory(options),
bloom_bits_per_key_(options.bloom_bits_per_key),
hash_table_ratio_(options.hash_table_ratio),
index_sparseness_(options.index_sparseness),
store_index_in_file_(options.store_index_in_file),
expect_bloom_not_match_(expect_bloom_not_match),
column_family_id_(column_family_id),
column_family_name_(std::move(column_family_name)) {}
Status NewTableReader(
const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const override {
TableProperties* props = nullptr;
auto s =
ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
table_reader_options.ioptions, &props,
true /* compression_type_missing */);
EXPECT_TRUE(s.ok());
if (store_index_in_file_) {
BlockHandle bloom_block_handle;
s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
table_reader_options.ioptions,
BloomBlockBuilder::kBloomBlock, &bloom_block_handle,
/* compression_type_missing */ true);
EXPECT_TRUE(s.ok());
BlockHandle index_block_handle;
s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber,
table_reader_options.ioptions,
PlainTableIndexBuilder::kPlainTableIndexBlock,
&index_block_handle, /* compression_type_missing */ true);
EXPECT_TRUE(s.ok());
}
auto& user_props = props->user_collected_properties;
auto encoding_type_prop =
user_props.find(PlainTablePropertyNames::kEncodingType);
assert(encoding_type_prop != user_props.end());
EncodingType encoding_type = static_cast<EncodingType>(
DecodeFixed32(encoding_type_prop->second.c_str()));
std::unique_ptr<PlainTableReader> new_reader(new TestPlainTableReader(
table_reader_options.env_options,
table_reader_options.internal_comparator, encoding_type, file_size,
bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props,
std::move(file), table_reader_options.ioptions,
table_reader_options.prefix_extractor, expect_bloom_not_match_,
store_index_in_file_, column_family_id_, column_family_name_));
*table = std::move(new_reader);
return s;
}
private:
int bloom_bits_per_key_;
double hash_table_ratio_;
size_t index_sparseness_;
bool store_index_in_file_;
bool* expect_bloom_not_match_;
const uint32_t column_family_id_;
const std::string column_family_name_;
};
TEST_P(PlainTableDBTest, Flush) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
huge_page_tlb_size += 2 * 1024 * 1024) {
for (EncodingType encoding_type : {kPlain, kPrefix}) {
for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
for (int total_order = 0; total_order <= 1; total_order++) {
for (int store_index_in_file = 0; store_index_in_file <= 1;
++store_index_in_file) {
Options options = CurrentOptions();
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
if (total_order) {
options.prefix_extractor.reset();
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
plain_table_options.full_scan_mode = false;
plain_table_options.store_index_in_file = store_index_in_file;
options.table_factory.reset(
NewPlainTableFactory(plain_table_options));
} else {
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
plain_table_options.full_scan_mode = false;
plain_table_options.store_index_in_file = store_index_in_file;
options.table_factory.reset(
NewPlainTableFactory(plain_table_options));
}
DestroyAndReopen(&options);
uint64_t int_num;
ASSERT_TRUE(dbfull()->GetIntProperty(
"rocksdb.estimate-table-readers-mem", &int_num));
ASSERT_EQ(int_num, 0U);
ASSERT_OK(Put("1000000000000foo", "v1"));
ASSERT_OK(Put("0000000000000bar", "v2"));
ASSERT_OK(Put("1000000000000foo", "v3"));
dbfull()->TEST_FlushMemTable();
ASSERT_TRUE(dbfull()->GetIntProperty(
"rocksdb.estimate-table-readers-mem", &int_num));
ASSERT_GT(int_num, 0U);
TablePropertiesCollection ptc;
reinterpret_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc);
ASSERT_EQ(1U, ptc.size());
auto row = ptc.begin();
auto tp = row->second;
if (!store_index_in_file) {
ASSERT_EQ(total_order ? "4" : "12",
(tp->user_collected_properties)
.at("plain_table_hash_table_size"));
ASSERT_EQ("0", (tp->user_collected_properties)
.at("plain_table_sub_index_size"));
} else {
ASSERT_EQ("0", (tp->user_collected_properties)
.at("plain_table_hash_table_size"));
ASSERT_EQ("0", (tp->user_collected_properties)
.at("plain_table_sub_index_size"));
}
ASSERT_EQ("v3", Get("1000000000000foo"));
ASSERT_EQ("v2", Get("0000000000000bar"));
}
}
}
}
}
}
TEST_P(PlainTableDBTest, Flush2) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
huge_page_tlb_size += 2 * 1024 * 1024) {
for (EncodingType encoding_type : {kPlain, kPrefix}) {
for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
for (int total_order = 0; total_order <= 1; total_order++) {
for (int store_index_in_file = 0; store_index_in_file <= 1;
++store_index_in_file) {
if (encoding_type == kPrefix && total_order) {
continue;
}
if (!bloom_bits && store_index_in_file) {
continue;
}
if (total_order && store_index_in_file) {
continue;
}
bool expect_bloom_not_match = false;
Options options = CurrentOptions();
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
PlainTableOptions plain_table_options;
if (total_order) {
options.prefix_extractor = nullptr;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2;
} else {
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
}
plain_table_options.user_key_len = kPlainTableVariableLength;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
plain_table_options.store_index_in_file = store_index_in_file;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, plain_table_options,
0 /* column_family_id */, kDefaultColumnFamilyName));
DestroyAndReopen(&options);
ASSERT_OK(Put("0000000000000bar", "b"));
ASSERT_OK(Put("1000000000000foo", "v1"));
dbfull()->TEST_FlushMemTable();
ASSERT_OK(Put("1000000000000foo", "v2"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v2", Get("1000000000000foo"));
ASSERT_OK(Put("0000000000000eee", "v3"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v3", Get("0000000000000eee"));
ASSERT_OK(Delete("0000000000000bar"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("NOT_FOUND", Get("0000000000000bar"));
ASSERT_OK(Put("0000000000000eee", "v5"));
ASSERT_OK(Put("9000000000000eee", "v5"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v5", Get("0000000000000eee"));
// Test Bloom Filter
if (bloom_bits > 0) {
// Neither key nor value should exist.
expect_bloom_not_match = true;
ASSERT_EQ("NOT_FOUND", Get("5_not00000000bar"));
// Key doesn't exist any more but prefix exists.
if (total_order) {
ASSERT_EQ("NOT_FOUND", Get("1000000000000not"));
ASSERT_EQ("NOT_FOUND", Get("0000000000000not"));
}
expect_bloom_not_match = false;
}
}
}
}
}
}
}
TEST_P(PlainTableDBTest, Immortal) {
for (EncodingType encoding_type : {kPlain, kPrefix}) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.max_open_files = -1;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
PlainTableOptions plain_table_options;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.user_key_len = kPlainTableVariableLength;
plain_table_options.bloom_bits_per_key = 10;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("0000000000000bar", "b"));
ASSERT_OK(Put("1000000000000foo", "v1"));
dbfull()->TEST_FlushMemTable();
int copied = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"GetContext::SaveValue::PinSelf", [&](void* /*arg*/) { copied++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_EQ("b", Get("0000000000000bar"));
ASSERT_EQ("v1", Get("1000000000000foo"));
ASSERT_EQ(2, copied);
copied = 0;
Close();
ASSERT_OK(ReopenForReadOnly(&options));
ASSERT_EQ("b", Get("0000000000000bar"));
ASSERT_EQ("v1", Get("1000000000000foo"));
ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
if (mmap_mode()) {
ASSERT_EQ(0, copied);
} else {
ASSERT_EQ(2, copied);
}
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
}
TEST_P(PlainTableDBTest, Iterator) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
huge_page_tlb_size += 2 * 1024 * 1024) {
for (EncodingType encoding_type : {kPlain, kPrefix}) {
for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
for (int total_order = 0; total_order <= 1; total_order++) {
if (encoding_type == kPrefix && total_order == 1) {
continue;
}
bool expect_bloom_not_match = false;
Options options = CurrentOptions();
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
if (total_order) {
options.prefix_extractor = nullptr;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, plain_table_options,
0 /* column_family_id */, kDefaultColumnFamilyName));
} else {
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = bloom_bits;
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
plain_table_options.encoding_type = encoding_type;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, plain_table_options,
0 /* column_family_id */, kDefaultColumnFamilyName));
}
DestroyAndReopen(&options);
ASSERT_OK(Put("1000000000foo002", "v_2"));
ASSERT_OK(Put("0000000000000bar", "random"));
ASSERT_OK(Put("1000000000foo001", "v1"));
ASSERT_OK(Put("3000000000000bar", "bar_v"));
ASSERT_OK(Put("1000000000foo003", "v__3"));
ASSERT_OK(Put("1000000000foo004", "v__4"));
ASSERT_OK(Put("1000000000foo005", "v__5"));
ASSERT_OK(Put("1000000000foo007", "v__7"));
ASSERT_OK(Put("1000000000foo008", "v__8"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("1000000000foo001"));
ASSERT_EQ("v__3", Get("1000000000foo003"));
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek("1000000000foo000");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo001", iter->key().ToString());
ASSERT_EQ("v1", iter->value().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo002", iter->key().ToString());
ASSERT_EQ("v_2", iter->value().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo003", iter->key().ToString());
ASSERT_EQ("v__3", iter->value().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo004", iter->key().ToString());
ASSERT_EQ("v__4", iter->value().ToString());
iter->Seek("3000000000000bar");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("3000000000000bar", iter->key().ToString());
ASSERT_EQ("bar_v", iter->value().ToString());
iter->Seek("1000000000foo000");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo001", iter->key().ToString());
ASSERT_EQ("v1", iter->value().ToString());
iter->Seek("1000000000foo005");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo005", iter->key().ToString());
ASSERT_EQ("v__5", iter->value().ToString());
iter->Seek("1000000000foo006");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo007", iter->key().ToString());
ASSERT_EQ("v__7", iter->value().ToString());
iter->Seek("1000000000foo008");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo008", iter->key().ToString());
ASSERT_EQ("v__8", iter->value().ToString());
if (total_order == 0) {
iter->Seek("1000000000foo009");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("3000000000000bar", iter->key().ToString());
}
// Test Bloom Filter
if (bloom_bits > 0) {
if (!total_order) {
// Neither key nor value should exist.
expect_bloom_not_match = true;
iter->Seek("2not000000000bar");
ASSERT_TRUE(!iter->Valid());
ASSERT_EQ("NOT_FOUND", Get("2not000000000bar"));
expect_bloom_not_match = false;
} else {
expect_bloom_not_match = true;
ASSERT_EQ("NOT_FOUND", Get("2not000000000bar"));
expect_bloom_not_match = false;
}
}
delete iter;
}
}
}
}
}
namespace {
std::string NthKey(size_t n, char filler) {
std::string rv(16, filler);
rv[0] = n % 10;
rv[1] = (n / 10) % 10;
rv[2] = (n / 100) % 10;
rv[3] = (n / 1000) % 10;
return rv;
}
} // anonymous namespace
TEST_P(PlainTableDBTest, BloomSchema) {
Options options = CurrentOptions();
options.create_if_missing = true;
for (int bloom_locality = 0; bloom_locality <= 1; bloom_locality++) {
options.bloom_locality = bloom_locality;
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 3; // high FP rate for test
plain_table_options.hash_table_ratio = 0.75;
plain_table_options.index_sparseness = 16;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPlain;
bool expect_bloom_not_match = false;
options.table_factory.reset(new TestPlainTableFactory(
&expect_bloom_not_match, plain_table_options,
0 /* column_family_id */, kDefaultColumnFamilyName));
DestroyAndReopen(&options);
for (unsigned i = 0; i < 2345; ++i) {
ASSERT_OK(Put(NthKey(i, 'y'), "added"));
}
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("added", Get(NthKey(42, 'y')));
for (unsigned i = 0; i < 32; ++i) {
// Known pattern of Bloom filter false positives can detect schema change
// with high probability. Known FPs stuffed into bits:
uint32_t pattern;
if (!bloom_locality) {
pattern = 1785868347UL;
} else if (CACHE_LINE_SIZE == 64U) {
pattern = 2421694657UL;
} else if (CACHE_LINE_SIZE == 128U) {
pattern = 788710956UL;
} else {
ASSERT_EQ(CACHE_LINE_SIZE, 256U);
pattern = 163905UL;
}
bool expect_fp = pattern & (1UL << i);
//fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp);
expect_bloom_not_match = !expect_fp;
ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
}
}
}
namespace {
std::string MakeLongKey(size_t length, char c) {
return std::string(length, c);
}
} // namespace
TEST_P(PlainTableDBTest, IteratorLargeKeys) {
Options options = CurrentOptions();
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 0;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.create_if_missing = true;
options.prefix_extractor.reset();
DestroyAndReopen(&options);
std::string key_list[] = {
MakeLongKey(30, '0'),
MakeLongKey(16, '1'),
MakeLongKey(32, '2'),
MakeLongKey(60, '3'),
MakeLongKey(90, '4'),
MakeLongKey(50, '5'),
MakeLongKey(26, '6')
};
for (size_t i = 0; i < 7; i++) {
ASSERT_OK(Put(key_list[i], ToString(i)));
}
dbfull()->TEST_FlushMemTable();
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek(key_list[0]);
for (size_t i = 0; i < 7; i++) {
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(key_list[i], iter->key().ToString());
ASSERT_EQ(ToString(i), iter->value().ToString());
iter->Next();
}
ASSERT_TRUE(!iter->Valid());
delete iter;
}
namespace {
std::string MakeLongKeyWithPrefix(size_t length, char c) {
return "00000000" + std::string(length - 8, c);
}
} // namespace
TEST_P(PlainTableDBTest, IteratorLargeKeysWithPrefix) {
Options options = CurrentOptions();
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0.8;
plain_table_options.index_sparseness = 3;
plain_table_options.huge_page_tlb_size = 0;
plain_table_options.encoding_type = kPrefix;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
options.create_if_missing = true;
DestroyAndReopen(&options);
std::string key_list[] = {
MakeLongKeyWithPrefix(30, '0'), MakeLongKeyWithPrefix(16, '1'),
MakeLongKeyWithPrefix(32, '2'), MakeLongKeyWithPrefix(60, '3'),
MakeLongKeyWithPrefix(90, '4'), MakeLongKeyWithPrefix(50, '5'),
MakeLongKeyWithPrefix(26, '6')};
for (size_t i = 0; i < 7; i++) {
ASSERT_OK(Put(key_list[i], ToString(i)));
}
dbfull()->TEST_FlushMemTable();
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek(key_list[0]);
for (size_t i = 0; i < 7; i++) {
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(key_list[i], iter->key().ToString());
ASSERT_EQ(ToString(i), iter->value().ToString());
iter->Next();
}
ASSERT_TRUE(!iter->Valid());
delete iter;
}
TEST_P(PlainTableDBTest, IteratorReverseSuffixComparator) {
Options options = CurrentOptions();
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
test::SimpleSuffixReverseComparator comp;
options.comparator = &comp;
DestroyAndReopen(&options);
ASSERT_OK(Put("1000000000foo002", "v_2"));
ASSERT_OK(Put("0000000000000bar", "random"));
ASSERT_OK(Put("1000000000foo001", "v1"));
ASSERT_OK(Put("3000000000000bar", "bar_v"));
ASSERT_OK(Put("1000000000foo003", "v__3"));
ASSERT_OK(Put("1000000000foo004", "v__4"));
ASSERT_OK(Put("1000000000foo005", "v__5"));
ASSERT_OK(Put("1000000000foo007", "v__7"));
ASSERT_OK(Put("1000000000foo008", "v__8"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("1000000000foo001"));
ASSERT_EQ("v__3", Get("1000000000foo003"));
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek("1000000000foo009");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo008", iter->key().ToString());
ASSERT_EQ("v__8", iter->value().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo007", iter->key().ToString());
ASSERT_EQ("v__7", iter->value().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo005", iter->key().ToString());
ASSERT_EQ("v__5", iter->value().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo004", iter->key().ToString());
ASSERT_EQ("v__4", iter->value().ToString());
iter->Seek("3000000000000bar");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("3000000000000bar", iter->key().ToString());
ASSERT_EQ("bar_v", iter->value().ToString());
iter->Seek("1000000000foo005");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo005", iter->key().ToString());
ASSERT_EQ("v__5", iter->value().ToString());
iter->Seek("1000000000foo006");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo005", iter->key().ToString());
ASSERT_EQ("v__5", iter->value().ToString());
iter->Seek("1000000000foo008");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("1000000000foo008", iter->key().ToString());
ASSERT_EQ("v__8", iter->value().ToString());
iter->Seek("1000000000foo000");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("3000000000000bar", iter->key().ToString());
delete iter;
}
TEST_P(PlainTableDBTest, HashBucketConflict) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
huge_page_tlb_size += 2 * 1024 * 1024) {
for (unsigned char i = 1; i <= 3; i++) {
Options options = CurrentOptions();
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2 ^ i;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("5000000000000fo0", "v1"));
ASSERT_OK(Put("5000000000000fo1", "v2"));
ASSERT_OK(Put("5000000000000fo2", "v"));
ASSERT_OK(Put("2000000000000fo0", "v3"));
ASSERT_OK(Put("2000000000000fo1", "v4"));
ASSERT_OK(Put("2000000000000fo2", "v"));
ASSERT_OK(Put("2000000000000fo3", "v"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("5000000000000fo0"));
ASSERT_EQ("v2", Get("5000000000000fo1"));
ASSERT_EQ("v3", Get("2000000000000fo0"));
ASSERT_EQ("v4", Get("2000000000000fo1"));
ASSERT_EQ("NOT_FOUND", Get("5000000000000bar"));
ASSERT_EQ("NOT_FOUND", Get("2000000000000bar"));
ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8"));
ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8"));
ReadOptions ro;
Iterator* iter = dbfull()->NewIterator(ro);
iter->Seek("5000000000000fo0");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo0", iter->key().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo1", iter->key().ToString());
iter->Seek("5000000000000fo1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo1", iter->key().ToString());
iter->Seek("2000000000000fo0");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo0", iter->key().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo1", iter->key().ToString());
iter->Seek("2000000000000fo1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo1", iter->key().ToString());
iter->Seek("2000000000000bar");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo0", iter->key().ToString());
iter->Seek("5000000000000bar");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo0", iter->key().ToString());
iter->Seek("2000000000000fo8");
ASSERT_TRUE(!iter->Valid() ||
options.comparator->Compare(iter->key(), "20000001") > 0);
iter->Seek("5000000000000fo8");
ASSERT_TRUE(!iter->Valid());
iter->Seek("1000000000000fo2");
ASSERT_TRUE(!iter->Valid());
iter->Seek("3000000000000fo2");
ASSERT_TRUE(!iter->Valid());
iter->Seek("8000000000000fo2");
ASSERT_TRUE(!iter->Valid());
delete iter;
}
}
}
TEST_P(PlainTableDBTest, HashBucketConflictReverseSuffixComparator) {
for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
huge_page_tlb_size += 2 * 1024 * 1024) {
for (unsigned char i = 1; i <= 3; i++) {
Options options = CurrentOptions();
options.create_if_missing = true;
test::SimpleSuffixReverseComparator comp;
options.comparator = &comp;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 2 ^ i;
plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("5000000000000fo0", "v1"));
ASSERT_OK(Put("5000000000000fo1", "v2"));
ASSERT_OK(Put("5000000000000fo2", "v"));
ASSERT_OK(Put("2000000000000fo0", "v3"));
ASSERT_OK(Put("2000000000000fo1", "v4"));
ASSERT_OK(Put("2000000000000fo2", "v"));
ASSERT_OK(Put("2000000000000fo3", "v"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("5000000000000fo0"));
ASSERT_EQ("v2", Get("5000000000000fo1"));
ASSERT_EQ("v3", Get("2000000000000fo0"));
ASSERT_EQ("v4", Get("2000000000000fo1"));
ASSERT_EQ("NOT_FOUND", Get("5000000000000bar"));
ASSERT_EQ("NOT_FOUND", Get("2000000000000bar"));
ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8"));
ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8"));
ReadOptions ro;
Iterator* iter = dbfull()->NewIterator(ro);
iter->Seek("5000000000000fo1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo1", iter->key().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo0", iter->key().ToString());
iter->Seek("5000000000000fo1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo1", iter->key().ToString());
iter->Seek("2000000000000fo1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo1", iter->key().ToString());
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo0", iter->key().ToString());
iter->Seek("2000000000000fo1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo1", iter->key().ToString());
iter->Seek("2000000000000var");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("2000000000000fo3", iter->key().ToString());
iter->Seek("5000000000000var");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo2", iter->key().ToString());
std::string seek_key = "2000000000000bar";
iter->Seek(seek_key);
ASSERT_TRUE(!iter->Valid() ||
options.prefix_extractor->Transform(iter->key()) !=
options.prefix_extractor->Transform(seek_key));
iter->Seek("1000000000000fo2");
ASSERT_TRUE(!iter->Valid());
iter->Seek("3000000000000fo2");
ASSERT_TRUE(!iter->Valid());
iter->Seek("8000000000000fo2");
ASSERT_TRUE(!iter->Valid());
delete iter;
}
}
}
TEST_P(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) {
Options options = CurrentOptions();
options.create_if_missing = true;
// Set only one bucket to force bucket conflict.
// Test index interval for the same prefix to be 1, 2 and 4
PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;
plain_table_options.bloom_bits_per_key = 0;
plain_table_options.hash_table_ratio = 0;
plain_table_options.index_sparseness = 5;
options.table_factory.reset(NewPlainTableFactory(plain_table_options));
DestroyAndReopen(&options);
ASSERT_OK(Put("5000000000000fo0", "v1"));
ASSERT_OK(Put("5000000000000fo1", "v2"));
ASSERT_OK(Put("5000000000000fo2", "v3"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v1", Get("5000000000000fo0"));
ASSERT_EQ("v2", Get("5000000000000fo1"));
ASSERT_EQ("v3", Get("5000000000000fo2"));
ASSERT_EQ("NOT_FOUND", Get("8000000000000bar"));
ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
Iterator* iter = dbfull()->NewIterator(ReadOptions());
iter->Seek("5000000000000bar");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("5000000000000fo0", iter->key().ToString());
iter->Seek("5000000000000fo8");
ASSERT_TRUE(!iter->Valid());
iter->Seek("1000000000000fo2");
ASSERT_TRUE(!iter->Valid());
iter->Seek("8000000000000fo2");
ASSERT_TRUE(!iter->Valid());
delete iter;
}
static std::string Key(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "key_______%06d", i);
return std::string(buf);
}
static std::string RandomString(Random* rnd, int len) {
std::string r;
test::RandomString(rnd, len, &r);
return r;
}
TEST_P(PlainTableDBTest, CompactionTrigger) {
Options options = CurrentOptions();
options.write_buffer_size = 120 << 10; // 100KB
options.num_levels = 3;
options.level0_file_num_compaction_trigger = 3;
Reopen(&options);
Random rnd(301);
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
num++) {
std::vector<std::string> values;
// Write 120KB (10 values, each 12K)
for (int i = 0; i < 10; i++) {
values.push_back(RandomString(&rnd, 12000));
ASSERT_OK(Put(Key(i), values[i]));
}
ASSERT_OK(Put(Key(999), ""));
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ(NumTableFilesAtLevel(0), num + 1);
}
//generate one more file in level-0, and should trigger level-0 compaction
std::vector<std::string> values;
for (int i = 0; i < 12; i++) {
values.push_back(RandomString(&rnd, 10000));
ASSERT_OK(Put(Key(i), values[i]));
}
ASSERT_OK(Put(Key(999), ""));
dbfull()->TEST_WaitForCompact();
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
}
TEST_P(PlainTableDBTest, AdaptiveTable) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.table_factory.reset(NewPlainTableFactory());
DestroyAndReopen(&options);
ASSERT_OK(Put("1000000000000foo", "v1"));
ASSERT_OK(Put("0000000000000bar", "v2"));
ASSERT_OK(Put("1000000000000foo", "v3"));
dbfull()->TEST_FlushMemTable();
options.create_if_missing = false;
std::shared_ptr<TableFactory> dummy_factory;
std::shared_ptr<TableFactory> block_based_factory(
NewBlockBasedTableFactory());
options.table_factory.reset(NewAdaptiveTableFactory(
block_based_factory, dummy_factory, dummy_factory));
Reopen(&options);
ASSERT_EQ("v3", Get("1000000000000foo"));
ASSERT_EQ("v2", Get("0000000000000bar"));
ASSERT_OK(Put("2000000000000foo", "v4"));
ASSERT_OK(Put("3000000000000bar", "v5"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v4", Get("2000000000000foo"));
ASSERT_EQ("v5", Get("3000000000000bar"));
Reopen(&options);
ASSERT_EQ("v3", Get("1000000000000foo"));
ASSERT_EQ("v2", Get("0000000000000bar"));
ASSERT_EQ("v4", Get("2000000000000foo"));
ASSERT_EQ("v5", Get("3000000000000bar"));
options.table_factory.reset(NewBlockBasedTableFactory());
Reopen(&options);
ASSERT_NE("v3", Get("1000000000000foo"));
options.table_factory.reset(NewPlainTableFactory());
Reopen(&options);
ASSERT_NE("v5", Get("3000000000000bar"));
}
INSTANTIATE_TEST_CASE_P(PlainTableDBTest, PlainTableDBTest, ::testing::Bool());
} // namespace rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#else
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
fprintf(stderr, "SKIPPED as plain table is not supported in ROCKSDB_LITE\n");
return 0;
}
#endif // !ROCKSDB_LITE