2020-04-24 22:30:12 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
|
|
#include "table/block_fetcher.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
|
2020-04-24 22:30:12 +00:00
|
|
|
#include "db/table_properties_collector.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
#include "file/file_util.h"
|
2020-04-24 22:30:12 +00:00
|
|
|
#include "options/options_helper.h"
|
|
|
|
#include "port/port.h"
|
|
|
|
#include "port/stack_trace.h"
|
2021-09-29 11:01:57 +00:00
|
|
|
#include "rocksdb/db.h"
|
2021-01-29 06:08:46 +00:00
|
|
|
#include "rocksdb/file_system.h"
|
2020-04-24 22:30:12 +00:00
|
|
|
#include "table/block_based/binary_search_index_reader.h"
|
|
|
|
#include "table/block_based/block_based_table_builder.h"
|
|
|
|
#include "table/block_based/block_based_table_factory.h"
|
|
|
|
#include "table/block_based/block_based_table_reader.h"
|
|
|
|
#include "table/format.h"
|
|
|
|
#include "test_util/testharness.h"
|
2021-12-17 12:19:34 +00:00
|
|
|
#include "utilities/memory_allocators.h"
|
2020-04-24 22:30:12 +00:00
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
namespace {
|
|
|
|
struct MemcpyStats {
|
2020-06-30 22:38:59 +00:00
|
|
|
int num_stack_buf_memcpy;
|
|
|
|
int num_heap_buf_memcpy;
|
|
|
|
int num_compressed_buf_memcpy;
|
2020-04-24 22:30:12 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct BufAllocationStats {
|
2020-06-30 22:38:59 +00:00
|
|
|
int num_heap_buf_allocations;
|
|
|
|
int num_compressed_buf_allocations;
|
2020-04-24 22:30:12 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct TestStats {
|
|
|
|
MemcpyStats memcpy_stats;
|
|
|
|
BufAllocationStats buf_allocation_stats;
|
|
|
|
};
|
|
|
|
|
|
|
|
class BlockFetcherTest : public testing::Test {
|
2020-06-30 22:38:59 +00:00
|
|
|
public:
|
|
|
|
enum class Mode {
|
|
|
|
kBufferedRead = 0,
|
|
|
|
kBufferedMmap,
|
|
|
|
kDirectRead,
|
|
|
|
kNumModes,
|
|
|
|
};
|
|
|
|
// use NumModes as array size to avoid "size of array '...' has non-integral
|
|
|
|
// type" errors.
|
|
|
|
const static int NumModes = static_cast<int>(Mode::kNumModes);
|
|
|
|
|
2020-04-24 22:30:12 +00:00
|
|
|
protected:
|
|
|
|
void SetUp() override {
|
2020-07-09 21:33:42 +00:00
|
|
|
SetupSyncPointsToMockDirectIO();
|
2020-04-24 22:30:12 +00:00
|
|
|
test_dir_ = test::PerThreadDBPath("block_fetcher_test");
|
|
|
|
env_ = Env::Default();
|
|
|
|
fs_ = FileSystem::Default();
|
|
|
|
ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr));
|
|
|
|
}
|
|
|
|
|
2020-07-09 21:33:42 +00:00
|
|
|
void TearDown() override { EXPECT_OK(DestroyDir(env_, test_dir_)); }
|
2020-04-24 22:30:12 +00:00
|
|
|
|
2020-06-30 22:38:59 +00:00
|
|
|
void AssertSameBlock(const std::string& block1, const std::string& block2) {
|
|
|
|
ASSERT_EQ(block1, block2);
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Creates a table with kv pairs (i, i) where i ranges from 0 to 9, inclusive.
|
|
|
|
void CreateTable(const std::string& table_name,
|
|
|
|
const CompressionType& compression_type) {
|
|
|
|
std::unique_ptr<WritableFileWriter> writer;
|
|
|
|
NewFileWriter(table_name, &writer);
|
|
|
|
|
|
|
|
// Create table builder.
|
2021-05-05 20:59:21 +00:00
|
|
|
ImmutableOptions ioptions(options_);
|
2020-06-30 22:38:59 +00:00
|
|
|
InternalKeyComparator comparator(options_.comparator);
|
|
|
|
ColumnFamilyOptions cf_options(options_);
|
2020-04-24 22:30:12 +00:00
|
|
|
MutableCFOptions moptions(cf_options);
|
2021-05-18 01:27:42 +00:00
|
|
|
IntTblPropCollectorFactories factories;
|
2020-04-24 22:30:12 +00:00
|
|
|
std::unique_ptr<TableBuilder> table_builder(table_factory_.NewTableBuilder(
|
|
|
|
TableBuilderOptions(ioptions, moptions, comparator, &factories,
|
2021-03-25 21:58:23 +00:00
|
|
|
compression_type, CompressionOptions(),
|
Add more LSM info to FilterBuildingContext (#8246)
Summary:
Add `num_levels`, `is_bottommost`, and table file creation
`reason` to `FilterBuildingContext`, in anticipation of more powerful
Bloom-like filter support.
To support this, added `is_bottommost` and `reason` to
`TableBuilderOptions`, which allowed removing `reason` parameter from
`rocksdb::BuildTable`.
I attempted to remove `skip_filters` from `TableBuilderOptions`, because
filter construction decisions should arise from options, not one-off
parameters. I could not completely remove it because the public API for
SstFileWriter takes a `skip_filters` parameter, and translating this
into an option change would mean awkwardly replacing the table_factory
if it is BlockBasedTableFactory with new filter_policy=nullptr option.
I marked this public skip_filters option as deprecated because of this
oddity. (skip_filters on the read side probably makes sense.)
At least `skip_filters` is now largely hidden for users of
`TableBuilderOptions` and is no longer used for implementing the
optimize_filters_for_hits option. Bringing the logic for that option
closer to handling of FilterBuildingContext makes it more obvious that
hese two are using the same notion of "bottommost." (Planned:
configuration options for Bloom-like filters that generalize
`optimize_filters_for_hits`)
Recommended follow-up: Try to get away from "bottommost level" naming of
things, which is inaccurate (see
VersionStorageInfo::RangeMightExistAfterSortedRun), and move to
"bottommost run" or just "bottommost."
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8246
Test Plan:
extended an existing unit test to exercise and check various
filter building contexts. Also, existing tests for
optimize_filters_for_hits validate some of the "bottommost" handling,
which is now closely connected to FilterBuildingContext::is_bottommost
through TableBuilderOptions::is_bottommost
Reviewed By: mrambacher
Differential Revision: D28099346
Pulled By: pdillinger
fbshipit-source-id: 2c1072e29c24d4ac404c761a7b7663292372600a
2021-04-30 20:49:24 +00:00
|
|
|
0 /* column_family_id */, kDefaultColumnFamilyName,
|
|
|
|
-1 /* level */),
|
2021-04-29 13:59:53 +00:00
|
|
|
writer.get()));
|
2020-04-24 22:30:12 +00:00
|
|
|
|
|
|
|
// Build table.
|
|
|
|
for (int i = 0; i < 9; i++) {
|
|
|
|
std::string key = ToInternalKey(std::to_string(i));
|
Fix testcase failures on windows (#7992)
Summary:
Fixed 5 test case failures found on Windows 10/Windows Server 2016
1. In `flush_job_test`, the DestroyDir function fails in deconstructor because some file handles are still being held by VersionSet. This happens on Windows Server 2016, so need to manually reset versions_ pointer to release all file handles.
2. In `StatsHistoryTest.InMemoryStatsHistoryPurging` test, the capping memory cost of stats_history_size on Windows becomes 14000 bytes with latest changes, not just 13000 bytes.
3. In `SSTDumpToolTest.RawOutput` test, the output file handle is not closed at the end.
4. In `FullBloomTest.OptimizeForMemory` test, ROCKSDB_MALLOC_USABLE_SIZE is undefined on windows so `total_mem` is always equal to `total_size`. The internal memory fragmentation assertion does not apply in this case.
5. In `BlockFetcherTest.FetchAndUncompressCompressedDataBlock` test, XPRESS cannot reach 87.5% compression ratio with original CreateTable method, so I append extra zeros to the string value to enhance compression ratio. Beside, since XPRESS allocates memory internally, thus does not support for custom allocator verification, we will skip the allocator verification for XPRESS
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7992
Reviewed By: jay-zhuang
Differential Revision: D26615283
Pulled By: ajkr
fbshipit-source-id: 3632612f84b99e2b9c77c403b112b6bedf3b125d
2021-02-23 22:31:50 +00:00
|
|
|
// Append "00000000" to string value to enhance compression ratio
|
|
|
|
std::string value = "00000000" + std::to_string(i);
|
2020-04-24 22:30:12 +00:00
|
|
|
table_builder->Add(key, value);
|
|
|
|
}
|
|
|
|
ASSERT_OK(table_builder->Finish());
|
|
|
|
}
|
|
|
|
|
2020-06-30 22:38:59 +00:00
|
|
|
void FetchIndexBlock(const std::string& table_name,
|
2020-04-24 22:30:12 +00:00
|
|
|
CountedMemoryAllocator* heap_buf_allocator,
|
|
|
|
CountedMemoryAllocator* compressed_buf_allocator,
|
2020-06-30 22:38:59 +00:00
|
|
|
MemcpyStats* memcpy_stats, BlockContents* index_block,
|
|
|
|
std::string* result) {
|
|
|
|
FileOptions fopt(options_);
|
2020-04-24 22:30:12 +00:00
|
|
|
std::unique_ptr<RandomAccessFileReader> file;
|
|
|
|
NewFileReader(table_name, fopt, &file);
|
|
|
|
|
|
|
|
// Get handle of the index block.
|
|
|
|
Footer footer;
|
|
|
|
ReadFooter(file.get(), &footer);
|
|
|
|
const BlockHandle& index_handle = footer.index_handle();
|
|
|
|
|
|
|
|
CompressionType compression_type;
|
|
|
|
FetchBlock(file.get(), index_handle, BlockType::kIndex,
|
|
|
|
false /* compressed */, false /* do_uncompress */,
|
|
|
|
heap_buf_allocator, compressed_buf_allocator, index_block,
|
|
|
|
memcpy_stats, &compression_type);
|
|
|
|
ASSERT_EQ(compression_type, CompressionType::kNoCompression);
|
2020-06-30 22:38:59 +00:00
|
|
|
result->assign(index_block->data.ToString());
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Fetches the first data block in both direct IO and non-direct IO mode.
|
|
|
|
//
|
|
|
|
// compressed: whether the data blocks are compressed;
|
|
|
|
// do_uncompress: whether the data blocks should be uncompressed on fetching.
|
|
|
|
// compression_type: the expected compression type.
|
|
|
|
//
|
|
|
|
// Expects:
|
|
|
|
// Block contents are the same.
|
|
|
|
// Bufferr allocation and memory copy statistics are expected.
|
2020-06-30 22:38:59 +00:00
|
|
|
void TestFetchDataBlock(
|
|
|
|
const std::string& table_name_prefix, bool compressed, bool do_uncompress,
|
|
|
|
std::array<TestStats, NumModes> expected_stats_by_mode) {
|
2020-04-24 22:30:12 +00:00
|
|
|
for (CompressionType compression_type : GetSupportedCompressions()) {
|
|
|
|
bool do_compress = compression_type != kNoCompression;
|
|
|
|
if (compressed != do_compress) continue;
|
|
|
|
std::string compression_type_str =
|
|
|
|
CompressionTypeToString(compression_type);
|
|
|
|
|
|
|
|
std::string table_name = table_name_prefix + compression_type_str;
|
|
|
|
CreateTable(table_name, compression_type);
|
|
|
|
|
|
|
|
CompressionType expected_compression_type_after_fetch =
|
|
|
|
(compressed && !do_uncompress) ? compression_type : kNoCompression;
|
|
|
|
|
2020-06-30 22:38:59 +00:00
|
|
|
BlockContents blocks[NumModes];
|
|
|
|
std::string block_datas[NumModes];
|
|
|
|
MemcpyStats memcpy_stats[NumModes];
|
|
|
|
CountedMemoryAllocator heap_buf_allocators[NumModes];
|
|
|
|
CountedMemoryAllocator compressed_buf_allocators[NumModes];
|
|
|
|
for (int i = 0; i < NumModes; ++i) {
|
|
|
|
SetMode(static_cast<Mode>(i));
|
|
|
|
FetchFirstDataBlock(table_name, compressed, do_uncompress,
|
|
|
|
expected_compression_type_after_fetch,
|
|
|
|
&heap_buf_allocators[i],
|
|
|
|
&compressed_buf_allocators[i], &blocks[i],
|
|
|
|
&block_datas[i], &memcpy_stats[i]);
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
2020-06-30 22:38:59 +00:00
|
|
|
for (int i = 0; i < NumModes - 1; ++i) {
|
|
|
|
AssertSameBlock(block_datas[i], block_datas[i + 1]);
|
|
|
|
}
|
2020-04-24 22:30:12 +00:00
|
|
|
|
|
|
|
// Check memcpy and buffer allocation statistics.
|
2020-06-30 22:38:59 +00:00
|
|
|
for (int i = 0; i < NumModes; ++i) {
|
|
|
|
const TestStats& expected_stats = expected_stats_by_mode[i];
|
2020-04-24 22:30:12 +00:00
|
|
|
|
2020-06-30 22:38:59 +00:00
|
|
|
ASSERT_EQ(memcpy_stats[i].num_stack_buf_memcpy,
|
2020-04-24 22:30:12 +00:00
|
|
|
expected_stats.memcpy_stats.num_stack_buf_memcpy);
|
2020-06-30 22:38:59 +00:00
|
|
|
ASSERT_EQ(memcpy_stats[i].num_heap_buf_memcpy,
|
2020-04-24 22:30:12 +00:00
|
|
|
expected_stats.memcpy_stats.num_heap_buf_memcpy);
|
2020-06-30 22:38:59 +00:00
|
|
|
ASSERT_EQ(memcpy_stats[i].num_compressed_buf_memcpy,
|
2020-04-24 22:30:12 +00:00
|
|
|
expected_stats.memcpy_stats.num_compressed_buf_memcpy);
|
|
|
|
|
Fix testcase failures on windows (#7992)
Summary:
Fixed 5 test case failures found on Windows 10/Windows Server 2016
1. In `flush_job_test`, the DestroyDir function fails in deconstructor because some file handles are still being held by VersionSet. This happens on Windows Server 2016, so need to manually reset versions_ pointer to release all file handles.
2. In `StatsHistoryTest.InMemoryStatsHistoryPurging` test, the capping memory cost of stats_history_size on Windows becomes 14000 bytes with latest changes, not just 13000 bytes.
3. In `SSTDumpToolTest.RawOutput` test, the output file handle is not closed at the end.
4. In `FullBloomTest.OptimizeForMemory` test, ROCKSDB_MALLOC_USABLE_SIZE is undefined on windows so `total_mem` is always equal to `total_size`. The internal memory fragmentation assertion does not apply in this case.
5. In `BlockFetcherTest.FetchAndUncompressCompressedDataBlock` test, XPRESS cannot reach 87.5% compression ratio with original CreateTable method, so I append extra zeros to the string value to enhance compression ratio. Beside, since XPRESS allocates memory internally, thus does not support for custom allocator verification, we will skip the allocator verification for XPRESS
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7992
Reviewed By: jay-zhuang
Differential Revision: D26615283
Pulled By: ajkr
fbshipit-source-id: 3632612f84b99e2b9c77c403b112b6bedf3b125d
2021-02-23 22:31:50 +00:00
|
|
|
if (kXpressCompression == compression_type) {
|
|
|
|
// XPRESS allocates memory internally, thus does not support for
|
|
|
|
// custom allocator verification
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
ASSERT_EQ(
|
|
|
|
heap_buf_allocators[i].GetNumAllocations(),
|
|
|
|
expected_stats.buf_allocation_stats.num_heap_buf_allocations);
|
|
|
|
ASSERT_EQ(compressed_buf_allocators[i].GetNumAllocations(),
|
|
|
|
expected_stats.buf_allocation_stats
|
|
|
|
.num_compressed_buf_allocations);
|
|
|
|
|
|
|
|
// The allocated buffers are not deallocated until
|
|
|
|
// the block content is deleted.
|
|
|
|
ASSERT_EQ(heap_buf_allocators[i].GetNumDeallocations(), 0);
|
|
|
|
ASSERT_EQ(compressed_buf_allocators[i].GetNumDeallocations(), 0);
|
|
|
|
blocks[i].allocation.reset();
|
|
|
|
ASSERT_EQ(
|
|
|
|
heap_buf_allocators[i].GetNumDeallocations(),
|
|
|
|
expected_stats.buf_allocation_stats.num_heap_buf_allocations);
|
|
|
|
ASSERT_EQ(compressed_buf_allocators[i].GetNumDeallocations(),
|
|
|
|
expected_stats.buf_allocation_stats
|
|
|
|
.num_compressed_buf_allocations);
|
|
|
|
}
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-30 22:38:59 +00:00
|
|
|
void SetMode(Mode mode) {
|
|
|
|
switch (mode) {
|
|
|
|
case Mode::kBufferedRead:
|
|
|
|
options_.use_direct_reads = false;
|
|
|
|
options_.allow_mmap_reads = false;
|
|
|
|
break;
|
|
|
|
case Mode::kBufferedMmap:
|
|
|
|
options_.use_direct_reads = false;
|
|
|
|
options_.allow_mmap_reads = true;
|
|
|
|
break;
|
|
|
|
case Mode::kDirectRead:
|
|
|
|
options_.use_direct_reads = true;
|
|
|
|
options_.allow_mmap_reads = false;
|
|
|
|
break;
|
|
|
|
case Mode::kNumModes:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-24 22:30:12 +00:00
|
|
|
private:
|
|
|
|
std::string test_dir_;
|
|
|
|
Env* env_;
|
|
|
|
std::shared_ptr<FileSystem> fs_;
|
|
|
|
BlockBasedTableFactory table_factory_;
|
2020-06-30 22:38:59 +00:00
|
|
|
Options options_;
|
2020-04-24 22:30:12 +00:00
|
|
|
|
|
|
|
std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; }
|
|
|
|
|
|
|
|
void WriteToFile(const std::string& content, const std::string& filename) {
|
|
|
|
std::unique_ptr<FSWritableFile> f;
|
|
|
|
ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr));
|
|
|
|
ASSERT_OK(f->Append(content, IOOptions(), nullptr));
|
|
|
|
ASSERT_OK(f->Close(IOOptions(), nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
void NewFileWriter(const std::string& filename,
|
|
|
|
std::unique_ptr<WritableFileWriter>* writer) {
|
|
|
|
std::string path = Path(filename);
|
2021-01-29 06:08:46 +00:00
|
|
|
FileOptions file_options;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), path,
|
|
|
|
file_options, writer, nullptr));
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void NewFileReader(const std::string& filename, const FileOptions& opt,
|
|
|
|
std::unique_ptr<RandomAccessFileReader>* reader) {
|
|
|
|
std::string path = Path(filename);
|
|
|
|
std::unique_ptr<FSRandomAccessFile> f;
|
|
|
|
ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr));
|
2021-03-15 11:32:24 +00:00
|
|
|
reader->reset(new RandomAccessFileReader(std::move(f), path,
|
|
|
|
env_->GetSystemClock().get()));
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
2021-05-05 20:59:21 +00:00
|
|
|
void NewTableReader(const ImmutableOptions& ioptions,
|
2020-04-24 22:30:12 +00:00
|
|
|
const FileOptions& foptions,
|
|
|
|
const InternalKeyComparator& comparator,
|
|
|
|
const std::string& table_name,
|
|
|
|
std::unique_ptr<BlockBasedTable>* table) {
|
|
|
|
std::unique_ptr<RandomAccessFileReader> file;
|
|
|
|
NewFileReader(table_name, foptions, &file);
|
|
|
|
|
|
|
|
uint64_t file_size = 0;
|
|
|
|
ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size));
|
|
|
|
|
|
|
|
std::unique_ptr<TableReader> table_reader;
|
2020-06-29 21:51:57 +00:00
|
|
|
ReadOptions ro;
|
2020-09-14 23:59:00 +00:00
|
|
|
const auto* table_options =
|
|
|
|
table_factory_.GetOptions<BlockBasedTableOptions>();
|
|
|
|
ASSERT_NE(table_options, nullptr);
|
Record and use the tail size to prefetch table tail (#11406)
Summary:
**Context:**
We prefetch the tail part of a SST file (i.e, the blocks after data blocks till the end of the file) during each SST file open in hope to prefetch all the stuff at once ahead of time for later read e.g, footer, meta index, filter/index etc. The existing approach to estimate the tail size to prefetch is through `TailPrefetchStats` heuristics introduced in https://github.com/facebook/rocksdb/pull/4156, which has caused small reads in unlucky case (e.g, small read into the tail buffer during table open in thread 1 under the same BlockBasedTableFactory object can make thread 2's tail prefetching use a small size that it shouldn't) and is hard to debug. Therefore we decide to record the exact tail size and use it directly to prefetch tail of the SST instead of relying heuristics.
**Summary:**
- Obtain and record in manifest the tail size in `BlockBasedTableBuilder::Finish()`
- For backward compatibility, we fall back to TailPrefetchStats and last to simple heuristics that the tail size is a linear portion of the file size - see PR conversation for more.
- Make`tail_start_offset` part of the table properties and deduct tail size to record in manifest for external files (e.g, file ingestion, import CF) and db repair (with no access to manifest).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11406
Test Plan:
1. New UT
2. db bench
Note: db bench on /tmp/ where direct read is supported is too slow to finish and the default pinning setting in db bench is not helpful to profile # sst read of Get. Therefore I hacked the following to obtain the following comparison.
```
diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc
index bd5669f0f..791484c1f 100644
--- a/table/block_based/block_based_table_reader.cc
+++ b/table/block_based/block_based_table_reader.cc
@@ -838,7 +838,7 @@ Status BlockBasedTable::PrefetchTail(
&tail_prefetch_size);
// Try file system prefetch
- if (!file->use_direct_io() && !force_direct_prefetch) {
+ if (false && !file->use_direct_io() && !force_direct_prefetch) {
if (!file->Prefetch(prefetch_off, prefetch_len, ro.rate_limiter_priority)
.IsNotSupported()) {
prefetch_buffer->reset(new FilePrefetchBuffer(
diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc
index ea40f5fa0..39a0ac385 100644
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@@ -4191,6 +4191,8 @@ class Benchmark {
std::shared_ptr<TableFactory>(NewCuckooTableFactory(table_options));
} else {
BlockBasedTableOptions block_based_options;
+ block_based_options.metadata_cache_options.partition_pinning =
+ PinningTier::kAll;
block_based_options.checksum =
static_cast<ChecksumType>(FLAGS_checksum_type);
if (FLAGS_use_hash_search) {
```
Create DB
```
./db_bench --bloom_bits=3 --use_existing_db=1 --seed=1682546046158958 --partition_index_and_filters=1 --statistics=1 -db=/dev/shm/testdb/ -benchmarks=readrandom -key_size=3200 -value_size=512 -num=1000000 -write_buffer_size=6550000 -disable_auto_compactions=false -target_file_size_base=6550000 -compression_type=none
```
ReadRandom
```
./db_bench --bloom_bits=3 --use_existing_db=1 --seed=1682546046158958 --partition_index_and_filters=1 --statistics=1 -db=/dev/shm/testdb/ -benchmarks=readrandom -key_size=3200 -value_size=512 -num=1000000 -write_buffer_size=6550000 -disable_auto_compactions=false -target_file_size_base=6550000 -compression_type=none
```
(a) Existing (Use TailPrefetchStats for tail size + use seperate prefetch buffer in PartitionedFilter/IndexReader::CacheDependencies())
```
rocksdb.table.open.prefetch.tail.hit COUNT : 3395
rocksdb.sst.read.micros P50 : 5.655570 P95 : 9.931396 P99 : 14.845454 P100 : 585.000000 COUNT : 999905 SUM : 6590614
```
(b) This PR (Record tail size + use the same tail buffer in PartitionedFilter/IndexReader::CacheDependencies())
```
rocksdb.table.open.prefetch.tail.hit COUNT : 14257
rocksdb.sst.read.micros P50 : 5.173347 P95 : 9.015017 P99 : 12.912610 P100 : 228.000000 COUNT : 998547 SUM : 5976540
```
As we can see, we increase the prefetch tail hit count and decrease SST read count with this PR
3. Test backward compatibility by stepping through reading with post-PR code on a db generated pre-PR.
Reviewed By: pdillinger
Differential Revision: D45413346
Pulled By: hx235
fbshipit-source-id: 7d5e36a60a72477218f79905168d688452a4c064
2023-05-08 20:14:28 +00:00
|
|
|
ASSERT_OK(BlockBasedTable::Open(ro, ioptions, EnvOptions(), *table_options,
|
|
|
|
comparator, std::move(file), file_size,
|
|
|
|
0 /* block_protection_bytes_per_key */,
|
|
|
|
&table_reader, 0 /* tail_size */));
|
2020-04-24 22:30:12 +00:00
|
|
|
|
|
|
|
table->reset(reinterpret_cast<BlockBasedTable*>(table_reader.release()));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string ToInternalKey(const std::string& key) {
|
|
|
|
InternalKey internal_key(key, 0, ValueType::kTypeValue);
|
|
|
|
return internal_key.Encode().ToString();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReadFooter(RandomAccessFileReader* file, Footer* footer) {
|
|
|
|
uint64_t file_size = 0;
|
|
|
|
ASSERT_OK(env_->GetFileSize(file->file_name(), &file_size));
|
2020-06-29 21:51:57 +00:00
|
|
|
IOOptions opts;
|
2022-12-09 18:03:47 +00:00
|
|
|
ASSERT_OK(ReadFooterFromFile(opts, file, *fs_,
|
|
|
|
nullptr /* prefetch_buffer */, file_size,
|
|
|
|
footer, kBlockBasedTableMagicNumber));
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NOTE: compression_type returns the compression type of the fetched block
|
|
|
|
// contents, so if the block is fetched and uncompressed, then it's
|
|
|
|
// kNoCompression.
|
|
|
|
void FetchBlock(RandomAccessFileReader* file, const BlockHandle& block,
|
|
|
|
BlockType block_type, bool compressed, bool do_uncompress,
|
|
|
|
MemoryAllocator* heap_buf_allocator,
|
|
|
|
MemoryAllocator* compressed_buf_allocator,
|
|
|
|
BlockContents* contents, MemcpyStats* stats,
|
|
|
|
CompressionType* compresstion_type) {
|
2021-05-05 20:59:21 +00:00
|
|
|
ImmutableOptions ioptions(options_);
|
2020-04-24 22:30:12 +00:00
|
|
|
ReadOptions roptions;
|
|
|
|
PersistentCacheOptions persistent_cache_options;
|
|
|
|
Footer footer;
|
|
|
|
ReadFooter(file, &footer);
|
|
|
|
std::unique_ptr<BlockFetcher> fetcher(new BlockFetcher(
|
|
|
|
file, nullptr /* prefetch_buffer */, footer, roptions, block, contents,
|
|
|
|
ioptions, do_uncompress, compressed, block_type,
|
|
|
|
UncompressionDict::GetEmptyDict(), persistent_cache_options,
|
|
|
|
heap_buf_allocator, compressed_buf_allocator));
|
|
|
|
|
|
|
|
ASSERT_OK(fetcher->ReadBlockContents());
|
|
|
|
|
|
|
|
stats->num_stack_buf_memcpy = fetcher->TEST_GetNumStackBufMemcpy();
|
|
|
|
stats->num_heap_buf_memcpy = fetcher->TEST_GetNumHeapBufMemcpy();
|
|
|
|
stats->num_compressed_buf_memcpy =
|
|
|
|
fetcher->TEST_GetNumCompressedBufMemcpy();
|
|
|
|
|
|
|
|
*compresstion_type = fetcher->get_compression_type();
|
|
|
|
}
|
|
|
|
|
|
|
|
// NOTE: expected_compression_type is the expected compression
|
|
|
|
// type of the fetched block content, if the block is uncompressed,
|
|
|
|
// then the expected compression type is kNoCompression.
|
2020-06-30 22:38:59 +00:00
|
|
|
void FetchFirstDataBlock(const std::string& table_name, bool compressed,
|
|
|
|
bool do_uncompress,
|
2020-04-24 22:30:12 +00:00
|
|
|
CompressionType expected_compression_type,
|
|
|
|
MemoryAllocator* heap_buf_allocator,
|
|
|
|
MemoryAllocator* compressed_buf_allocator,
|
2020-06-30 22:38:59 +00:00
|
|
|
BlockContents* block, std::string* result,
|
|
|
|
MemcpyStats* memcpy_stats) {
|
2021-05-05 20:59:21 +00:00
|
|
|
ImmutableOptions ioptions(options_);
|
2020-06-30 22:38:59 +00:00
|
|
|
InternalKeyComparator comparator(options_.comparator);
|
|
|
|
FileOptions foptions(options_);
|
2020-04-24 22:30:12 +00:00
|
|
|
|
|
|
|
// Get block handle for the first data block.
|
|
|
|
std::unique_ptr<BlockBasedTable> table;
|
|
|
|
NewTableReader(ioptions, foptions, comparator, table_name, &table);
|
|
|
|
|
|
|
|
std::unique_ptr<BlockBasedTable::IndexReader> index_reader;
|
2020-06-29 21:51:57 +00:00
|
|
|
ReadOptions ro;
|
2020-04-24 22:30:12 +00:00
|
|
|
ASSERT_OK(BinarySearchIndexReader::Create(
|
2020-06-29 21:51:57 +00:00
|
|
|
table.get(), ro, nullptr /* prefetch_buffer */, false /* use_cache */,
|
2020-04-24 22:30:12 +00:00
|
|
|
false /* prefetch */, false /* pin */, nullptr /* lookup_context */,
|
|
|
|
&index_reader));
|
|
|
|
|
|
|
|
std::unique_ptr<InternalIteratorBase<IndexValue>> iter(
|
|
|
|
index_reader->NewIterator(
|
|
|
|
ReadOptions(), false /* disable_prefix_seek */, nullptr /* iter */,
|
|
|
|
nullptr /* get_context */, nullptr /* lookup_context */));
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
iter->SeekToFirst();
|
|
|
|
BlockHandle first_block_handle = iter->value().handle;
|
|
|
|
|
|
|
|
// Fetch first data block.
|
|
|
|
std::unique_ptr<RandomAccessFileReader> file;
|
|
|
|
NewFileReader(table_name, foptions, &file);
|
|
|
|
CompressionType compression_type;
|
|
|
|
FetchBlock(file.get(), first_block_handle, BlockType::kData, compressed,
|
|
|
|
do_uncompress, heap_buf_allocator, compressed_buf_allocator,
|
|
|
|
block, memcpy_stats, &compression_type);
|
|
|
|
ASSERT_EQ(compression_type, expected_compression_type);
|
2020-06-30 22:38:59 +00:00
|
|
|
result->assign(block->data.ToString());
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-05-21 20:55:18 +00:00
|
|
|
// Skip the following tests in lite mode since direct I/O is unsupported.
|
|
|
|
|
2020-04-24 22:30:12 +00:00
|
|
|
// Fetch index block under both direct IO and non-direct IO.
|
|
|
|
// Expects:
|
|
|
|
// the index block contents are the same for both read modes.
|
|
|
|
TEST_F(BlockFetcherTest, FetchIndexBlock) {
|
|
|
|
for (CompressionType compression : GetSupportedCompressions()) {
|
|
|
|
std::string table_name =
|
|
|
|
"FetchIndexBlock" + CompressionTypeToString(compression);
|
|
|
|
CreateTable(table_name, compression);
|
|
|
|
|
|
|
|
CountedMemoryAllocator allocator;
|
|
|
|
MemcpyStats memcpy_stats;
|
2020-06-30 22:38:59 +00:00
|
|
|
BlockContents indexes[NumModes];
|
|
|
|
std::string index_datas[NumModes];
|
|
|
|
for (int i = 0; i < NumModes; ++i) {
|
|
|
|
SetMode(static_cast<Mode>(i));
|
|
|
|
FetchIndexBlock(table_name, &allocator, &allocator, &memcpy_stats,
|
|
|
|
&indexes[i], &index_datas[i]);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < NumModes - 1; ++i) {
|
|
|
|
AssertSameBlock(index_datas[i], index_datas[i + 1]);
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Data blocks are not compressed,
|
2020-06-30 22:38:59 +00:00
|
|
|
// fetch data block under direct IO, mmap IO,and non-direct IO.
|
2020-04-24 22:30:12 +00:00
|
|
|
// Expects:
|
|
|
|
// 1. in non-direct IO mode, allocate a heap buffer and memcpy the block
|
|
|
|
// into the buffer;
|
|
|
|
// 2. in direct IO mode, allocate a heap buffer and memcpy from the
|
|
|
|
// direct IO buffer to the heap buffer.
|
|
|
|
TEST_F(BlockFetcherTest, FetchUncompressedDataBlock) {
|
2020-06-30 22:38:59 +00:00
|
|
|
TestStats expected_non_mmap_stats = {
|
|
|
|
{
|
|
|
|
0 /* num_stack_buf_memcpy */,
|
|
|
|
1 /* num_heap_buf_memcpy */,
|
|
|
|
0 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
1 /* num_heap_buf_allocations */,
|
|
|
|
0 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
TestStats expected_mmap_stats = {{
|
|
|
|
0 /* num_stack_buf_memcpy */,
|
|
|
|
0 /* num_heap_buf_memcpy */,
|
|
|
|
0 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
0 /* num_heap_buf_allocations */,
|
|
|
|
0 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
std::array<TestStats, NumModes> expected_stats_by_mode{{
|
|
|
|
expected_non_mmap_stats /* kBufferedRead */,
|
|
|
|
expected_mmap_stats /* kBufferedMmap */,
|
|
|
|
expected_non_mmap_stats /* kDirectRead */,
|
|
|
|
}};
|
|
|
|
TestFetchDataBlock("FetchUncompressedDataBlock", false, false,
|
|
|
|
expected_stats_by_mode);
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Data blocks are compressed,
|
|
|
|
// fetch data block under both direct IO and non-direct IO,
|
|
|
|
// but do not uncompress.
|
|
|
|
// Expects:
|
|
|
|
// 1. in non-direct IO mode, allocate a compressed buffer and memcpy the block
|
|
|
|
// into the buffer;
|
|
|
|
// 2. in direct IO mode, allocate a compressed buffer and memcpy from the
|
|
|
|
// direct IO buffer to the compressed buffer.
|
|
|
|
TEST_F(BlockFetcherTest, FetchCompressedDataBlock) {
|
2020-06-30 22:38:59 +00:00
|
|
|
TestStats expected_non_mmap_stats = {
|
|
|
|
{
|
|
|
|
0 /* num_stack_buf_memcpy */,
|
|
|
|
0 /* num_heap_buf_memcpy */,
|
|
|
|
1 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
0 /* num_heap_buf_allocations */,
|
|
|
|
1 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
TestStats expected_mmap_stats = {{
|
|
|
|
0 /* num_stack_buf_memcpy */,
|
|
|
|
0 /* num_heap_buf_memcpy */,
|
|
|
|
0 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
0 /* num_heap_buf_allocations */,
|
|
|
|
0 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
std::array<TestStats, NumModes> expected_stats_by_mode{{
|
|
|
|
expected_non_mmap_stats /* kBufferedRead */,
|
|
|
|
expected_mmap_stats /* kBufferedMmap */,
|
|
|
|
expected_non_mmap_stats /* kDirectRead */,
|
|
|
|
}};
|
|
|
|
TestFetchDataBlock("FetchCompressedDataBlock", true, false,
|
|
|
|
expected_stats_by_mode);
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Data blocks are compressed,
|
|
|
|
// fetch and uncompress data block under both direct IO and non-direct IO.
|
|
|
|
// Expects:
|
|
|
|
// 1. in non-direct IO mode, since the block is small, so it's first memcpyed
|
|
|
|
// to the stack buffer, then a heap buffer is allocated and the block is
|
|
|
|
// uncompressed into the heap.
|
|
|
|
// 2. in direct IO mode mode, allocate a heap buffer, then directly uncompress
|
|
|
|
// and memcpy from the direct IO buffer to the heap buffer.
|
|
|
|
TEST_F(BlockFetcherTest, FetchAndUncompressCompressedDataBlock) {
|
2020-06-30 22:38:59 +00:00
|
|
|
TestStats expected_buffered_read_stats = {
|
|
|
|
{
|
|
|
|
1 /* num_stack_buf_memcpy */,
|
|
|
|
1 /* num_heap_buf_memcpy */,
|
|
|
|
0 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
1 /* num_heap_buf_allocations */,
|
|
|
|
0 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
TestStats expected_mmap_stats = {{
|
|
|
|
0 /* num_stack_buf_memcpy */,
|
|
|
|
1 /* num_heap_buf_memcpy */,
|
|
|
|
0 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
1 /* num_heap_buf_allocations */,
|
|
|
|
0 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
TestStats expected_direct_read_stats = {
|
|
|
|
{
|
|
|
|
0 /* num_stack_buf_memcpy */,
|
|
|
|
1 /* num_heap_buf_memcpy */,
|
|
|
|
0 /* num_compressed_buf_memcpy */,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
1 /* num_heap_buf_allocations */,
|
|
|
|
0 /* num_compressed_buf_allocations */,
|
|
|
|
}};
|
|
|
|
std::array<TestStats, NumModes> expected_stats_by_mode{{
|
|
|
|
expected_buffered_read_stats,
|
|
|
|
expected_mmap_stats,
|
|
|
|
expected_direct_read_stats,
|
|
|
|
}};
|
2020-04-24 22:30:12 +00:00
|
|
|
TestFetchDataBlock("FetchAndUncompressCompressedDataBlock", true, true,
|
2020-06-30 22:38:59 +00:00
|
|
|
expected_stats_by_mode);
|
2020-04-24 22:30:12 +00:00
|
|
|
}
|
|
|
|
|
2020-05-21 20:55:18 +00:00
|
|
|
|
2020-04-24 22:30:12 +00:00
|
|
|
} // namespace
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|