More refactoring ahead of footer & meta changes (#9240)

Summary:
I'm working on a new format_version=6 to support context
checksum (https://github.com/facebook/rocksdb/issues/9058) and this includes much of the refactoring and test
updates to support that change.

Test coverage data and manual inspection agree on dead code in
block_based_table_reader.cc (removed).

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9240

Test Plan:
tests enhanced to cover more cases etc.

Extreme case performance testing indicates small % regression in fillseq (w/ compaction), though CPU profile etc. doesn't suggest any explanation. There is enhanced correctness checking in Footer::DecodeFrom, but this should be negligible.

TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num=30000000 -checksum_type=1 --disable_wal={false,true}

(Each is ops/s averaged over 50 runs, run simultaneously with competing configuration for load fairness)
Before w/ wal: 454512
After w/ wal: 444820 (-2.1%)
Before w/o wal: 1004560
After w/o wal: 998897 (-0.6%)

Since this doesn't modify WAL code, one would expect real effects to be larger in w/o wal case.

This regression will be corrected in a follow-up PR.

Reviewed By: ajkr

Differential Revision: D32813769

Pulled By: pdillinger

fbshipit-source-id: 444a244eabf3825cd329b7d1b150cddce320862f
This commit is contained in:
Peter Dillinger 2021-12-10 08:12:09 -08:00 committed by Facebook GitHub Bot
parent f57745814f
commit 653c392e47
32 changed files with 449 additions and 354 deletions

View file

@ -554,7 +554,7 @@ class ColumnFamilyTest
INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest, INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest,
testing::Values(test::kDefaultFormatVersion)); testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest, INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest,
testing::Values(test::kLatestFormatVersion)); testing::Values(kLatestFormatVersion));
TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) { TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) {
for (int iter = 0; iter < 3; ++iter) { for (int iter = 0; iter < 3; ++iter) {
@ -746,8 +746,8 @@ INSTANTIATE_TEST_CASE_P(
std::make_tuple(test::kDefaultFormatVersion, false))); std::make_tuple(test::kDefaultFormatVersion, false)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatLatest, FlushEmptyCFTestWithParam, FormatLatest, FlushEmptyCFTestWithParam,
testing::Values(std::make_tuple(test::kLatestFormatVersion, true), testing::Values(std::make_tuple(kLatestFormatVersion, true),
std::make_tuple(test::kLatestFormatVersion, false))); std::make_tuple(kLatestFormatVersion, false)));
TEST_P(ColumnFamilyTest, AddDrop) { TEST_P(ColumnFamilyTest, AddDrop) {
Open(); Open();

View file

@ -317,7 +317,7 @@ class ComparatorDBTest
INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest, INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest,
testing::Values(test::kDefaultFormatVersion)); testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest, INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest,
testing::Values(test::kLatestFormatVersion)); testing::Values(kLatestFormatVersion));
TEST_P(ComparatorDBTest, Bytewise) { TEST_P(ComparatorDBTest, Bytewise) {
for (int rand_seed = 301; rand_seed < 306; rand_seed++) { for (int rand_seed = 301; rand_seed < 306; rand_seed++) {

View file

@ -546,7 +546,7 @@ TEST_F(CorruptionTest, RangeDeletionCorrupted) {
BlockHandle range_del_handle; BlockHandle range_del_handle;
ASSERT_OK(FindMetaBlockInFile( ASSERT_OK(FindMetaBlockInFile(
file_reader.get(), file_size, kBlockBasedTableMagicNumber, file_reader.get(), file_size, kBlockBasedTableMagicNumber,
ImmutableOptions(options_), kRangeDelBlock, &range_del_handle)); ImmutableOptions(options_), kRangeDelBlockName, &range_del_handle));
ASSERT_OK(TryReopen()); ASSERT_OK(TryReopen());
ASSERT_OK(test::CorruptFile(env_, filename, ASSERT_OK(test::CorruptFile(env_, filename,

View file

@ -15,6 +15,7 @@
#include "rocksdb/flush_block_policy.h" #include "rocksdb/flush_block_policy.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/perf_context.h" #include "rocksdb/perf_context.h"
#include "rocksdb/table.h"
#include "rocksdb/utilities/debug.h" #include "rocksdb/utilities/debug.h"
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h" #include "table/block_based/block_builder.h"
@ -972,8 +973,15 @@ TEST_F(DBBasicTest, MultiGetEmpty) {
} while (ChangeCompactOptions()); } while (ChangeCompactOptions());
} }
TEST_F(DBBasicTest, ChecksumTest) { class DBBlockChecksumTest : public DBBasicTest,
public testing::WithParamInterface<uint32_t> {};
INSTANTIATE_TEST_CASE_P(FormatVersions, DBBlockChecksumTest,
testing::ValuesIn(test::kFooterFormatVersionsToTest));
TEST_P(DBBlockChecksumTest, BlockChecksumTest) {
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.format_version = GetParam();
Options options = CurrentOptions(); Options options = CurrentOptions();
const int kNumPerFile = 2; const int kNumPerFile = 2;

View file

@ -15,6 +15,7 @@
#include "db/column_family.h" #include "db/column_family.h"
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
#include "rocksdb/persistent_cache.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "util/compression.h" #include "util/compression.h"

View file

@ -551,10 +551,9 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
FormatLatest, DBBloomFilterTestWithParam, FormatLatest, DBBloomFilterTestWithParam,
::testing::Values( ::testing::Values(
std::make_tuple(BFP::kDeprecatedBlock, false, std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion),
test::kLatestFormatVersion), std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion),
std::make_tuple(BFP::kAutoBloom, true, test::kLatestFormatVersion), std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion)));
std::make_tuple(BFP::kAutoBloom, false, test::kLatestFormatVersion)));
#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
TEST_F(DBBloomFilterTest, BloomFilterRate) { TEST_F(DBBloomFilterTest, BloomFilterRate) {

View file

@ -15,6 +15,7 @@
#include "rocksdb/env_encryption.h" #include "rocksdb/env_encryption.h"
#include "rocksdb/unique_id.h" #include "rocksdb/unique_id.h"
#include "rocksdb/utilities/object_registry.h" #include "rocksdb/utilities/object_registry.h"
#include "table/format.h"
#include "util/random.h" #include "util/random.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -516,6 +517,11 @@ Options DBTestBase::GetOptions(
table_options.index_block_restart_interval = 8; table_options.index_block_restart_interval = 8;
break; break;
} }
case kBlockBasedTableWithLatestFormat: {
// In case different from default
table_options.format_version = kLatestFormatVersion;
break;
}
case kOptimizeFiltersForHits: { case kOptimizeFiltersForHits: {
options.optimize_filters_for_hits = true; options.optimize_filters_for_hits = true;
set_block_based_table_factory = true; set_block_based_table_factory = true;

View file

@ -867,6 +867,7 @@ class DBTestBase : public testing::Test {
kBlockBasedTableWithIndexRestartInterval, kBlockBasedTableWithIndexRestartInterval,
kBlockBasedTableWithPartitionedIndex, kBlockBasedTableWithPartitionedIndex,
kBlockBasedTableWithPartitionedIndexFormat4, kBlockBasedTableWithPartitionedIndexFormat4,
kBlockBasedTableWithLatestFormat,
kPartitionedFilterWithNewTableReaderForCompactions, kPartitionedFilterWithNewTableReaderForCompactions,
kUniversalSubcompactions, kUniversalSubcompactions,
kUnorderedWrite, kUnorderedWrite,

View file

@ -41,16 +41,33 @@ class ExternalSSTTestEnv : public EnvWrapper {
bool fail_link_; bool fail_link_;
}; };
class ExternalSSTFileTestBase : public DBTestBase {
public:
ExternalSSTFileTestBase()
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) {
sst_files_dir_ = dbname_ + "/sst_files/";
DestroyAndRecreateExternalSSTFilesDir();
}
void DestroyAndRecreateExternalSSTFilesDir() {
ASSERT_OK(DestroyDir(env_, sst_files_dir_));
ASSERT_OK(env_->CreateDir(sst_files_dir_));
}
~ExternalSSTFileTestBase() override {
DestroyDir(env_, sst_files_dir_).PermitUncheckedError();
}
protected:
std::string sst_files_dir_;
};
class ExternSSTFileLinkFailFallbackTest class ExternSSTFileLinkFailFallbackTest
: public DBTestBase, : public ExternalSSTFileTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> { public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public: public:
ExternSSTFileLinkFailFallbackTest() ExternSSTFileLinkFailFallbackTest()
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true), : test_env_(new ExternalSSTTestEnv(env_, true)) {
test_env_(new ExternalSSTTestEnv(env_, true)) {
sst_files_dir_ = dbname_ + "/sst_files/";
EXPECT_EQ(DestroyDir(env_, sst_files_dir_), Status::OK());
EXPECT_EQ(env_->CreateDir(sst_files_dir_), Status::OK());
options_ = CurrentOptions(); options_ = CurrentOptions();
options_.disable_auto_compactions = true; options_.disable_auto_compactions = true;
options_.env = test_env_; options_.env = test_env_;
@ -65,25 +82,15 @@ class ExternSSTFileLinkFailFallbackTest
} }
protected: protected:
std::string sst_files_dir_;
Options options_; Options options_;
ExternalSSTTestEnv* test_env_; ExternalSSTTestEnv* test_env_;
}; };
class ExternalSSTFileTest class ExternalSSTFileTest
: public DBTestBase, : public ExternalSSTFileTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> { public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public: public:
ExternalSSTFileTest() ExternalSSTFileTest() {}
: DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) {
sst_files_dir_ = dbname_ + "/sst_files/";
DestroyAndRecreateExternalSSTFilesDir();
}
void DestroyAndRecreateExternalSSTFilesDir() {
ASSERT_OK(DestroyDir(env_, sst_files_dir_));
ASSERT_OK(env_->CreateDir(sst_files_dir_));
}
Status GenerateOneExternalFile( Status GenerateOneExternalFile(
const Options& options, ColumnFamilyHandle* cfh, const Options& options, ColumnFamilyHandle* cfh,
@ -282,13 +289,8 @@ class ExternalSSTFileTest
return db_->IngestExternalFile(files, opts); return db_->IngestExternalFile(files, opts);
} }
~ExternalSSTFileTest() override {
DestroyDir(env_, sst_files_dir_).PermitUncheckedError();
}
protected: protected:
int last_file_id_ = 0; int last_file_id_ = 0;
std::string sst_files_dir_;
}; };
TEST_F(ExternalSSTFileTest, Basic) { TEST_F(ExternalSSTFileTest, Basic) {
@ -2382,10 +2384,18 @@ TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) {
ASSERT_EQ(1, num_compression_dicts); ASSERT_EQ(1, num_compression_dicts);
} }
class ExternalSSTBlockChecksumTest
: public ExternalSSTFileTestBase,
public testing::WithParamInterface<uint32_t> {};
INSTANTIATE_TEST_CASE_P(FormatVersions, ExternalSSTBlockChecksumTest,
testing::ValuesIn(test::kFooterFormatVersionsToTest));
// Very slow, not worth the cost to run regularly // Very slow, not worth the cost to run regularly
TEST_F(ExternalSSTFileTest, DISABLED_HugeBlockChecksum) { TEST_P(ExternalSSTBlockChecksumTest, DISABLED_HugeBlockChecksum) {
BlockBasedTableOptions table_options;
table_options.format_version = GetParam();
for (auto t : GetSupportedChecksums()) { for (auto t : GetSupportedChecksums()) {
BlockBasedTableOptions table_options;
table_options.checksum = t; table_options.checksum = t;
Options options = CurrentOptions(); Options options = CurrentOptions();
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));

View file

@ -1271,8 +1271,8 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
return s; return s;
} }
// By setting the magic number to kInvalidTableMagicNumber, we can by // By setting the magic number to kNullTableMagicNumber, we can bypass
// pass the magic number check in the footer. // the magic number check in the footer.
std::unique_ptr<RandomAccessFileReader> file_reader( std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader( new RandomAccessFileReader(
std::move(file), file_name, nullptr /* env */, io_tracer_, std::move(file), file_name, nullptr /* env */, io_tracer_,
@ -1281,7 +1281,7 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
std::unique_ptr<TableProperties> props; std::unique_ptr<TableProperties> props;
s = ReadTableProperties( s = ReadTableProperties(
file_reader.get(), file_meta->fd.GetFileSize(), file_reader.get(), file_meta->fd.GetFileSize(),
Footer::kInvalidTableMagicNumber /* table's magic number */, *ioptions, Footer::kNullTableMagicNumber /* table's magic number */, *ioptions,
&props); &props);
if (!s.ok()) { if (!s.ok()) {
return s; return s;

View file

@ -53,6 +53,7 @@ class StatsHistoryIterator {
// REQUIRES: Valid() // REQUIRES: Valid()
virtual uint64_t GetStatsTime() const = 0; virtual uint64_t GetStatsTime() const = 0;
// DEPRECATED (was never used)
virtual int GetFormatVersion() const { return -1; } virtual int GetFormatVersion() const { return -1; }
// Return the current stats history as an std::map which specifies the // Return the current stats history as an std::map which specifies the

View file

@ -44,6 +44,9 @@ class WritableFileWriter;
struct ConfigOptions; struct ConfigOptions;
struct EnvOptions; struct EnvOptions;
// Types of checksums to use for checking integrity of logical blocks within
// files. All checksums currently use 32 bits of checking power (1 in 4B
// chance of failing to detect random corruption).
enum ChecksumType : char { enum ChecksumType : char {
kNoChecksum = 0x0, kNoChecksum = 0x0,
kCRC32c = 0x1, kCRC32c = 0x1,
@ -390,10 +393,9 @@ struct BlockBasedTableOptions {
// Default: 0 (disabled) // Default: 0 (disabled)
uint32_t read_amp_bytes_per_bit = 0; uint32_t read_amp_bytes_per_bit = 0;
// We currently have five versions: // We currently have these versions:
// 0 -- This version is currently written out by all RocksDB's versions by // 0 -- This version can be read by really old RocksDB's. Doesn't support
// default. Can be read by really old RocksDB's. Doesn't support changing // changing checksum type (default is CRC32).
// checksum (default is CRC32).
// 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default // 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default
// checksum, like xxHash. It is written by RocksDB when // checksum, like xxHash. It is written by RocksDB when
// BlockBasedTableOptions::checksum is something other than kCRC32c. (version // BlockBasedTableOptions::checksum is something other than kCRC32c. (version

View file

@ -58,7 +58,7 @@ Status AdaptiveTableFactory::NewTableReader(
return plain_table_factory_->NewTableReader( return plain_table_factory_->NewTableReader(
table_reader_options, std::move(file), file_size, table); table_reader_options, std::move(file), file_size, table);
} else if (footer.table_magic_number() == kBlockBasedTableMagicNumber || } else if (footer.table_magic_number() == kBlockBasedTableMagicNumber ||
footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) { footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) {
return block_based_table_factory_->NewTableReader( return block_based_table_factory_->NewTableReader(
ro, table_reader_options, std::move(file), file_size, table, ro, table_reader_options, std::move(file), file_size, table,
prefetch_index_and_filter_in_cache); prefetch_index_and_filter_in_cache);

View file

@ -1744,7 +1744,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
} }
#endif // !NDEBUG #endif // !NDEBUG
const std::string* properties_block_meta = &kPropertiesBlock; const std::string* properties_block_meta = &kPropertiesBlockName;
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::WritePropertiesBlock:Meta", "BlockBasedTableBuilder::WritePropertiesBlock:Meta",
&properties_block_meta); &properties_block_meta);
@ -1769,7 +1769,7 @@ void BlockBasedTableBuilder::WriteCompressionDictBlock(
#endif // NDEBUG #endif // NDEBUG
} }
if (ok()) { if (ok()) {
meta_index_builder->Add(kCompressionDictBlock, meta_index_builder->Add(kCompressionDictBlockName,
compression_dict_block_handle); compression_dict_block_handle);
} }
} }
@ -1781,7 +1781,7 @@ void BlockBasedTableBuilder::WriteRangeDelBlock(
BlockHandle range_del_block_handle; BlockHandle range_del_block_handle;
WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression, WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression,
&range_del_block_handle, BlockType::kRangeDeletion); &range_del_block_handle, BlockType::kRangeDeletion);
meta_index_builder->Add(kRangeDelBlock, range_del_block_handle); meta_index_builder->Add(kRangeDelBlockName, range_del_block_handle);
} }
} }
@ -1799,14 +1799,16 @@ void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
// this is guaranteed by BlockBasedTableBuilder's constructor // this is guaranteed by BlockBasedTableBuilder's constructor
assert(r->table_options.checksum == kCRC32c || assert(r->table_options.checksum == kCRC32c ||
r->table_options.format_version != 0); r->table_options.format_version != 0);
Footer footer( Footer footer;
legacy ? kLegacyBlockBasedTableMagicNumber : kBlockBasedTableMagicNumber, footer
r->table_options.format_version); .set_table_magic_number(legacy ? kLegacyBlockBasedTableMagicNumber
footer.set_metaindex_handle(metaindex_block_handle); : kBlockBasedTableMagicNumber)
footer.set_index_handle(index_block_handle); .set_format_version(r->table_options.format_version)
footer.set_checksum(r->table_options.checksum); .set_metaindex_handle(metaindex_block_handle)
.set_index_handle(index_block_handle)
.set_checksum_type(r->table_options.checksum);
std::string footer_encoding; std::string footer_encoding;
footer.EncodeTo(&footer_encoding); footer.EncodeTo(&footer_encoding, r->get_offset());
assert(ok()); assert(ok());
IOStatus ios = r->file->Append(footer_encoding); IOStatus ios = r->file->Append(footer_encoding);
if (ios.ok()) { if (ios.ok()) {

View file

@ -650,7 +650,7 @@ Status BlockBasedTableFactory::ValidateOptions(
"Enable pin_l0_filter_and_index_blocks_in_cache, " "Enable pin_l0_filter_and_index_blocks_in_cache, "
", but block cache is disabled"); ", but block cache is disabled");
} }
if (!BlockBasedTableSupportedVersion(table_options_.format_version)) { if (!IsSupportedFormatVersion(table_options_.format_version)) {
return Status::InvalidArgument( return Status::InvalidArgument(
"Unsupported BlockBasedTable format_version. Please check " "Unsupported BlockBasedTable format_version. Please check "
"include/rocksdb/table.h for more info"); "include/rocksdb/table.h for more info");

View file

@ -600,7 +600,7 @@ Status BlockBasedTable::Open(
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
if (!BlockBasedTableSupportedVersion(footer.version())) { if (!IsSupportedFormatVersion(footer.format_version())) {
return Status::Corruption( return Status::Corruption(
"Unknown Footer version. Maybe this file was created with newer " "Unknown Footer version. Maybe this file was created with newer "
"version of RocksDB?"); "version of RocksDB?");
@ -757,7 +757,7 @@ Status BlockBasedTable::ReadPropertiesBlock(
InternalIterator* meta_iter, const SequenceNumber largest_seqno) { InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
Status s; Status s;
BlockHandle handle; BlockHandle handle;
s = FindOptionalMetaBlock(meta_iter, kPropertiesBlock, &handle); s = FindOptionalMetaBlock(meta_iter, kPropertiesBlockName, &handle);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN(rep_->ioptions.logger, ROCKS_LOG_WARN(rep_->ioptions.logger,
@ -856,7 +856,7 @@ Status BlockBasedTable::ReadRangeDelBlock(
BlockCacheLookupContext* lookup_context) { BlockCacheLookupContext* lookup_context) {
Status s; Status s;
BlockHandle range_del_handle; BlockHandle range_del_handle;
s = FindOptionalMetaBlock(meta_iter, kRangeDelBlock, &range_del_handle); s = FindOptionalMetaBlock(meta_iter, kRangeDelBlockName, &range_del_handle);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN( ROCKS_LOG_WARN(
rep_->ioptions.logger, rep_->ioptions.logger,
@ -925,7 +925,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch); rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
// Find compression dictionary handle // Find compression dictionary handle
s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlock, s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName,
&rep_->compression_dict_handle); &rep_->compression_dict_handle);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -1808,7 +1808,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
// begin address of each read request, we need to add the offset // begin address of each read request, we need to add the offset
// in each read request. Checksum is stored in the block trailer, // in each read request. Checksum is stored in the block trailer,
// beyond the payload size. // beyond the payload size.
s = VerifyBlockChecksum(footer.checksum(), data + req_offset, s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset,
handle.size(), rep_->file->file_name(), handle.size(), rep_->file->file_name(),
handle.offset()); handle.offset());
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s); TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
@ -1875,9 +1875,9 @@ void BlockBasedTable::RetrieveMultipleBlocks(
if (compression_type != kNoCompression) { if (compression_type != kNoCompression) {
UncompressionContext context(compression_type); UncompressionContext context(compression_type);
UncompressionInfo info(context, uncompression_dict, compression_type); UncompressionInfo info(context, uncompression_dict, compression_type);
s = UncompressBlockContents(info, req.result.data() + req_offset, s = UncompressBlockContents(
handle.size(), &contents, footer.version(), info, req.result.data() + req_offset, handle.size(), &contents,
rep_->ioptions, memory_allocator); footer.format_version(), rep_->ioptions, memory_allocator);
} else { } else {
// There are two cases here: // There are two cases here:
// 1) caller uses the shared buffer (scratch or direct io buffer); // 1) caller uses the shared buffer (scratch or direct io buffer);
@ -3008,15 +3008,15 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
return BlockType::kFilter; return BlockType::kFilter;
} }
if (meta_block_name == kPropertiesBlock) { if (meta_block_name == kPropertiesBlockName) {
return BlockType::kProperties; return BlockType::kProperties;
} }
if (meta_block_name == kCompressionDictBlock) { if (meta_block_name == kCompressionDictBlockName) {
return BlockType::kCompressionDictionary; return BlockType::kCompressionDictionary;
} }
if (meta_block_name == kRangeDelBlock) { if (meta_block_name == kRangeDelBlockName) {
return BlockType::kRangeDeletion; return BlockType::kRangeDeletion;
} }
@ -3045,7 +3045,7 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
s = handle.DecodeFrom(&input); s = handle.DecodeFrom(&input);
BlockContents contents; BlockContents contents;
const Slice meta_block_name = index_iter->key(); const Slice meta_block_name = index_iter->key();
if (meta_block_name == kPropertiesBlock) { if (meta_block_name == kPropertiesBlockName) {
// Unfortunate special handling for properties block checksum w/ // Unfortunate special handling for properties block checksum w/
// global seqno // global seqno
std::unique_ptr<TableProperties> table_properties; std::unique_ptr<TableProperties> table_properties;
@ -3111,8 +3111,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
// 5. index_type // 5. index_type
Status BlockBasedTable::CreateIndexReader( Status BlockBasedTable::CreateIndexReader(
const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin,
bool pin, BlockCacheLookupContext* lookup_context, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader) { std::unique_ptr<IndexReader>* index_reader) {
// kHashSearch requires non-empty prefix_extractor but bypass checking // kHashSearch requires non-empty prefix_extractor but bypass checking
// prefix_extractor here since we have no access to MutableCFOptions. // prefix_extractor here since we have no access to MutableCFOptions.
@ -3136,25 +3136,12 @@ Status BlockBasedTable::CreateIndexReader(
case BlockBasedTableOptions::kHashSearch: { case BlockBasedTableOptions::kHashSearch: {
std::unique_ptr<Block> metaindex_guard; std::unique_ptr<Block> metaindex_guard;
std::unique_ptr<InternalIterator> metaindex_iter_guard; std::unique_ptr<InternalIterator> metaindex_iter_guard;
auto meta_index_iter = preloaded_meta_index_iter;
bool should_fallback = false; bool should_fallback = false;
if (rep_->internal_prefix_transform.get() == nullptr) { if (rep_->internal_prefix_transform.get() == nullptr) {
ROCKS_LOG_WARN(rep_->ioptions.logger, ROCKS_LOG_WARN(rep_->ioptions.logger,
"No prefix extractor passed in. Fall back to binary" "No prefix extractor passed in. Fall back to binary"
" search index."); " search index.");
should_fallback = true; should_fallback = true;
} else if (meta_index_iter == nullptr) {
auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard,
&metaindex_iter_guard);
if (!s.ok()) {
// we simply fall back to binary search in case there is any
// problem with prefix hash index loading.
ROCKS_LOG_WARN(rep_->ioptions.logger,
"Unable to read the metaindex block."
" Fall back to binary search index.");
should_fallback = true;
}
meta_index_iter = metaindex_iter_guard.get();
} }
if (should_fallback) { if (should_fallback) {
@ -3162,9 +3149,9 @@ Status BlockBasedTable::CreateIndexReader(
use_cache, prefetch, pin, use_cache, prefetch, pin,
lookup_context, index_reader); lookup_context, index_reader);
} else { } else {
return HashIndexReader::Create(this, ro, prefetch_buffer, return HashIndexReader::Create(this, ro, prefetch_buffer, meta_iter,
meta_index_iter, use_cache, prefetch, use_cache, prefetch, pin, lookup_context,
pin, lookup_context, index_reader); index_reader);
} }
} }
default: { default: {
@ -3357,17 +3344,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
if (metaindex_iter->key() == kPropertiesBlock) { if (metaindex_iter->key() == kPropertiesBlockName) {
out_stream << " Properties block handle: " out_stream << " Properties block handle: "
<< metaindex_iter->value().ToString(true) << "\n"; << metaindex_iter->value().ToString(true) << "\n";
} else if (metaindex_iter->key() == kCompressionDictBlock) { } else if (metaindex_iter->key() == kCompressionDictBlockName) {
out_stream << " Compression dictionary block handle: " out_stream << " Compression dictionary block handle: "
<< metaindex_iter->value().ToString(true) << "\n"; << metaindex_iter->value().ToString(true) << "\n";
} else if (strstr(metaindex_iter->key().ToString().c_str(), } else if (strstr(metaindex_iter->key().ToString().c_str(),
"filter.rocksdb.") != nullptr) { "filter.rocksdb.") != nullptr) {
out_stream << " Filter block handle: " out_stream << " Filter block handle: "
<< metaindex_iter->value().ToString(true) << "\n"; << metaindex_iter->value().ToString(true) << "\n";
} else if (metaindex_iter->key() == kRangeDelBlock) { } else if (metaindex_iter->key() == kRangeDelBlockName) {
out_stream << " Range deletion block handle: " out_stream << " Range deletion block handle: "
<< metaindex_iter->value().ToString(true) << "\n"; << metaindex_iter->value().ToString(true) << "\n";
} }

View file

@ -20,6 +20,7 @@
#include "table/block_based/filter_block.h" #include "table/block_based/filter_block.h"
#include "table/block_based/uncompression_dict_reader.h" #include "table/block_based/uncompression_dict_reader.h"
#include "table/format.h" #include "table/format.h"
#include "table/persistent_cache_options.h"
#include "table/table_properties_internal.h" #include "table/table_properties_internal.h"
#include "table/table_reader.h" #include "table/table_reader.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"

View file

@ -3,15 +3,15 @@
// COPYING file in the root directory) and Apache 2.0 License // COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#include "table/block_based/partitioned_filter_block.h"
#include <map> #include <map>
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/partitioned_filter_block.h"
#include "table/block_based/filter_policy_internal.h"
#include "index_builder.h" #include "index_builder.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/filter_policy_internal.h"
#include "table/format.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
#include "util/coding.h" #include "util/coding.h"
@ -292,10 +292,11 @@ class PartitionedFilterBlockTest
} }
}; };
INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest, // Format versions potentially intersting to partitioning
testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatVersions, PartitionedFilterBlockTest,
INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest, testing::ValuesIn(std::set<uint32_t>{
testing::Values(test::kLatestFormatVersion)); 2, 3, 4, test::kDefaultFormatVersion,
kLatestFormatVersion}));
TEST_P(PartitionedFilterBlockTest, EmptyBuilder) { TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder()); std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());

View file

@ -32,9 +32,9 @@ inline void BlockFetcher::ProcessTrailerIfPresent() {
if (footer_.GetBlockTrailerSize() > 0) { if (footer_.GetBlockTrailerSize() > 0) {
assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize); assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize);
if (read_options_.verify_checksums) { if (read_options_.verify_checksums) {
io_status_ = status_to_io_status( io_status_ = status_to_io_status(VerifyBlockChecksum(
VerifyBlockChecksum(footer_.checksum(), slice_.data(), block_size_, footer_.checksum_type(), slice_.data(), block_size_,
file_->file_name(), handle_.offset())); file_->file_name(), handle_.offset()));
} }
compression_type_ = compression_type_ =
BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_); BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_);
@ -315,7 +315,7 @@ IOStatus BlockFetcher::ReadBlockContents() {
UncompressionContext context(compression_type_); UncompressionContext context(compression_type_);
UncompressionInfo info(context, uncompression_dict_, compression_type_); UncompressionInfo info(context, uncompression_dict_, compression_type_);
io_status_ = status_to_io_status(UncompressBlockContents( io_status_ = status_to_io_status(UncompressBlockContents(
info, slice_.data(), block_size_, contents_, footer_.version(), info, slice_.data(), block_size_, contents_, footer_.format_version(),
ioptions_, memory_allocator_)); ioptions_, memory_allocator_));
#ifndef NDEBUG #ifndef NDEBUG
num_heap_buf_memcpy_++; num_heap_buf_memcpy_++;

View file

@ -12,6 +12,7 @@
#include "table/block_based/block.h" #include "table/block_based/block.h"
#include "table/block_based/block_type.h" #include "table/block_based/block_type.h"
#include "table/format.h" #include "table/format.h"
#include "table/persistent_cache_options.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {

View file

@ -381,7 +381,7 @@ Status CuckooTableBuilder::Finish() {
return status_; return status_;
} }
meta_index_builder.Add(kPropertiesBlock, property_block_handle); meta_index_builder.Add(kPropertiesBlockName, property_block_handle);
Slice meta_index_block = meta_index_builder.Finish(); Slice meta_index_block = meta_index_builder.Finish();
BlockHandle meta_index_block_handle; BlockHandle meta_index_block_handle;
@ -393,11 +393,14 @@ Status CuckooTableBuilder::Finish() {
return status_; return status_;
} }
Footer footer(kCuckooTableMagicNumber, 1); Footer footer;
footer.set_metaindex_handle(meta_index_block_handle); footer.set_table_magic_number(kCuckooTableMagicNumber)
footer.set_index_handle(BlockHandle::NullBlockHandle()); .set_format_version(1)
.set_metaindex_handle(meta_index_block_handle)
.set_index_handle(BlockHandle::NullBlockHandle())
.set_checksum_type(kNoChecksum);
std::string footer_encoding; std::string footer_encoding;
footer.EncodeTo(&footer_encoding); footer.EncodeTo(&footer_encoding, offset);
io_status_ = file_->Append(footer_encoding); io_status_ = file_->Append(footer_encoding);
status_ = io_status_; status_ = io_status_;
return status_; return status_;

View file

@ -20,9 +20,11 @@
#include "options/options_helper.h" #include "options/options_helper.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "table/block_based/block.h" #include "table/block_based/block.h"
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
#include "table/persistent_cache_helper.h" #include "table/persistent_cache_helper.h"
#include "util/cast_util.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
@ -58,6 +60,15 @@ void BlockHandle::EncodeTo(std::string* dst) const {
PutVarint64Varint64(dst, offset_, size_); PutVarint64Varint64(dst, offset_, size_);
} }
char* BlockHandle::EncodeTo(char* dst) const {
// Sanity check that all fields have been set
assert(offset_ != ~uint64_t{0});
assert(size_ != ~uint64_t{0});
char* cur = EncodeVarint64(dst, offset_);
cur = EncodeVarint64(cur, size_);
return cur;
}
Status BlockHandle::DecodeFrom(Slice* input) { Status BlockHandle::DecodeFrom(Slice* input) {
if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) {
return Status::OK(); return Status::OK();
@ -166,8 +177,8 @@ inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) {
} }
} // namespace } // namespace
void Footer::set_table_magic_number(uint64_t magic_number) { Footer& Footer::set_table_magic_number(uint64_t magic_number) {
assert(!HasInitializedTableMagicNumber()); assert(table_magic_number_ == kNullTableMagicNumber);
table_magic_number_ = magic_number; table_magic_number_ = magic_number;
if (magic_number == kBlockBasedTableMagicNumber || if (magic_number == kBlockBasedTableMagicNumber ||
magic_number == kLegacyBlockBasedTableMagicNumber) { magic_number == kLegacyBlockBasedTableMagicNumber) {
@ -176,64 +187,80 @@ void Footer::set_table_magic_number(uint64_t magic_number) {
} else { } else {
block_trailer_size_ = 0; block_trailer_size_ = 0;
} }
return *this;
} }
// legacy footer format: // Footer format, in three parts:
// metaindex handle (varint64 offset, varint64 size) // * Part1
// index handle (varint64 offset, varint64 size) // -> format_version == 0 (inferred from legacy magic number)
// <padding> to make the total size 2 * BlockHandle::kMaxEncodedLength // <empty> (0 bytes)
// table_magic_number (8 bytes) // -> format_version >= 1
// new footer format: // checksum type (char, 1 byte)
// checksum type (char, 1 byte) // * Part2
// metaindex handle (varint64 offset, varint64 size) // metaindex handle (varint64 offset, varint64 size)
// index handle (varint64 offset, varint64 size) // index handle (varint64 offset, varint64 size)
// <padding> to make the total size 2 * BlockHandle::kMaxEncodedLength + 1 // <zero padding> for part2 size = 2 * BlockHandle::kMaxEncodedLength = 40
// footer version (4 bytes) // * Part3
// table_magic_number (8 bytes) // -> format_version == 0 (inferred from legacy magic number)
void Footer::EncodeTo(std::string* dst) const { // legacy magic number (8 bytes)
assert(HasInitializedTableMagicNumber()); // -> format_version >= 1 (inferred from NOT legacy magic number)
if (IsLegacyFooterFormat(table_magic_number())) { // format_version (uint32LE, 4 bytes), also called "footer version"
// has to be default checksum with legacy footer // newer magic number (8 bytes)
assert(checksum_ == kCRC32c); void Footer::EncodeTo(std::string* dst, uint64_t footer_offset) const {
const size_t original_size = dst->size(); (void)footer_offset; // Future use
metaindex_handle_.EncodeTo(dst);
index_handle_.EncodeTo(dst); // Sanitize magic numbers & format versions
dst->resize(original_size + 2 * BlockHandle::kMaxEncodedLength); // Padding assert(table_magic_number_ != kNullTableMagicNumber);
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() & 0xffffffffu)); uint64_t magic = table_magic_number_;
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() >> 32)); uint32_t fv = format_version_;
assert(dst->size() == original_size + kVersion0EncodedLength); assert(fv != kInvalidFormatVersion);
assert(IsLegacyFooterFormat(magic) == (fv == 0));
ChecksumType ct = checksum_type();
// Allocate destination data and generate parts 1 and 3
const size_t original_size = dst->size();
char* part2;
if (fv > 0) {
dst->resize(original_size + kNewVersionsEncodedLength);
char* part1 = &(*dst)[original_size];
part2 = part1 + 1;
char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength;
assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 12);
// Generate parts 1 and 3
part1[0] = ct;
EncodeFixed32(part3, fv);
EncodeFixed64(part3 + 4, magic);
} else { } else {
const size_t original_size = dst->size(); dst->resize(original_size + kVersion0EncodedLength);
dst->push_back(static_cast<char>(checksum_)); part2 = &(*dst)[original_size];
metaindex_handle_.EncodeTo(dst); char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength;
index_handle_.EncodeTo(dst); assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 8);
dst->resize(original_size + kNewVersionsEncodedLength - 12); // Padding // Legacy SST files use kCRC32c checksum but it's not stored in footer.
PutFixed32(dst, version()); assert(ct == kNoChecksum || ct == kCRC32c);
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() & 0xffffffffu)); // Generate part 3 (part 1 empty)
PutFixed32(dst, static_cast<uint32_t>(table_magic_number() >> 32)); EncodeFixed64(part3, magic);
assert(dst->size() == original_size + kNewVersionsEncodedLength);
} }
// Generate Part2
// Variable size encode handles (sigh)
part2 = metaindex_handle_.EncodeTo(part2);
/*part2 = */ index_handle_.EncodeTo(part2);
// remainder of part2 is already zero padded
} }
Footer::Footer(uint64_t _table_magic_number, uint32_t _version) Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) {
: version_(_version), (void)input_offset; // Future use
checksum_(kCRC32c),
table_magic_number_(_table_magic_number) {
// This should be guaranteed by constructor callers
assert(!IsLegacyFooterFormat(_table_magic_number) || version_ == 0);
}
Status Footer::DecodeFrom(Slice* input) { // Only decode to unused Footer
assert(!HasInitializedTableMagicNumber()); assert(table_magic_number_ == kNullTableMagicNumber);
assert(input != nullptr); assert(input != nullptr);
assert(input->size() >= kMinEncodedLength); assert(input->size() >= kMinEncodedLength);
const char* magic_ptr = const char* magic_ptr =
input->data() + input->size() - kMagicNumberLengthByte; input->data() + input->size() - kMagicNumberLengthByte;
const uint32_t magic_lo = DecodeFixed32(magic_ptr); uint64_t magic = DecodeFixed64(magic_ptr);
const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4);
uint64_t magic = ((static_cast<uint64_t>(magic_hi) << 32) |
(static_cast<uint64_t>(magic_lo)));
// We check for legacy formats here and silently upconvert them // We check for legacy formats here and silently upconvert them
bool legacy = IsLegacyFooterFormat(magic); bool legacy = IsLegacyFooterFormat(magic);
@ -242,44 +269,51 @@ Status Footer::DecodeFrom(Slice* input) {
} }
set_table_magic_number(magic); set_table_magic_number(magic);
// Parse Part3
if (legacy) { if (legacy) {
// The size is already asserted to be at least kMinEncodedLength // The size is already asserted to be at least kMinEncodedLength
// at the beginning of the function // at the beginning of the function
input->remove_prefix(input->size() - kVersion0EncodedLength); input->remove_prefix(input->size() - kVersion0EncodedLength);
version_ = 0 /* legacy */; format_version_ = 0 /* legacy */;
checksum_ = kCRC32c; checksum_type_ = kCRC32c;
} else { } else {
version_ = DecodeFixed32(magic_ptr - 4); const char* part3_ptr = magic_ptr - 4;
// Footer version 1 and higher will always occupy exactly this many bytes. format_version_ = DecodeFixed32(part3_ptr);
// It consists of the checksum type, two block handles, padding, if (!IsSupportedFormatVersion(format_version_)) {
// a version number, and a magic number return Status::Corruption("Corrupt or unsupported format_version: " +
ROCKSDB_NAMESPACE::ToString(format_version_));
}
// All known format versions >= 1 occupy exactly this many bytes.
if (input->size() < kNewVersionsEncodedLength) { if (input->size() < kNewVersionsEncodedLength) {
return Status::Corruption("input is too short to be an sstable"); return Status::Corruption("Input is too short to be an SST file");
} else {
input->remove_prefix(input->size() - kNewVersionsEncodedLength);
} }
uint32_t chksum; uint64_t adjustment = input->size() - kNewVersionsEncodedLength;
if (!GetVarint32(input, &chksum)) { input->remove_prefix(adjustment);
return Status::Corruption("bad checksum type");
} // Parse Part1
checksum_ = static_cast<ChecksumType>(chksum); char chksum = input->data()[0];
if (chksum != static_cast<uint32_t>(checksum_) || checksum_type_ = lossless_cast<ChecksumType>(chksum);
!IsSupportedChecksumType(checksum_)) { if (!IsSupportedChecksumType(checksum_type())) {
return Status::Corruption("unknown checksum type " + return Status::Corruption(
ROCKSDB_NAMESPACE::ToString(chksum)); "Corrupt or unsupported checksum type: " +
ROCKSDB_NAMESPACE::ToString(lossless_cast<uint8_t>(chksum)));
} }
// Consume checksum type field
input->remove_prefix(1);
} }
// Parse Part2
Status result = metaindex_handle_.DecodeFrom(input); Status result = metaindex_handle_.DecodeFrom(input);
if (result.ok()) { if (result.ok()) {
result = index_handle_.DecodeFrom(input); result = index_handle_.DecodeFrom(input);
} }
if (result.ok()) { if (!result.ok()) {
// We skip over any leftover data (just padding for now) in "input" return result;
const char* end = magic_ptr + kMagicNumberLengthByte;
*input = Slice(end, input->data() + input->size() - end);
} }
return result;
// Mark all input consumed (skip padding & part3)
*input = Slice(input->data() + input->size(), 0U);
return Status::OK();
} }
std::string Footer::ToString() const { std::string Footer::ToString() const {
@ -293,14 +327,12 @@ std::string Footer::ToString() const {
result.append("table_magic_number: " + result.append("table_magic_number: " +
ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n "); ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n ");
} else { } else {
result.append("checksum: " + ROCKSDB_NAMESPACE::ToString(checksum_) +
"\n ");
result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n "); result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n ");
result.append("index handle: " + index_handle_.ToString() + "\n "); result.append("index handle: " + index_handle_.ToString() + "\n ");
result.append("footer version: " + ROCKSDB_NAMESPACE::ToString(version_) +
"\n ");
result.append("table_magic_number: " + result.append("table_magic_number: " +
ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n "); ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n ");
result.append("format version: " +
ROCKSDB_NAMESPACE::ToString(format_version_) + "\n ");
} }
return result; return result;
} }
@ -319,10 +351,9 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
std::string footer_buf; std::string footer_buf;
AlignedBuf internal_buf; AlignedBuf internal_buf;
Slice footer_input; Slice footer_input;
size_t read_offset = uint64_t read_offset = (file_size > Footer::kMaxEncodedLength)
(file_size > Footer::kMaxEncodedLength) ? file_size - Footer::kMaxEncodedLength
? static_cast<size_t>(file_size - Footer::kMaxEncodedLength) : 0;
: 0;
Status s; Status s;
// TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now, // TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now,
// there is no readahead for point lookups, so TryReadFromCache will fail if // there is no readahead for point lookups, so TryReadFromCache will fail if
@ -353,7 +384,7 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file,
file->file_name()); file->file_name());
} }
s = footer->DecodeFrom(&footer_input); s = footer->DecodeFrom(&footer_input, read_offset);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -376,7 +407,7 @@ inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) {
// more byte, except we don't need to re-mix the input checksum as long as // more byte, except we don't need to re-mix the input checksum as long as
// we do this step only once (per checksum). // we do this step only once (per checksum).
const uint32_t kRandomPrime = 0x6b9083d9; const uint32_t kRandomPrime = 0x6b9083d9;
return checksum ^ static_cast<uint8_t>(last_byte) * kRandomPrime; return checksum ^ lossless_cast<uint8_t>(last_byte) * kRandomPrime;
} }
} // namespace } // namespace

View file

@ -8,21 +8,20 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once #pragma once
#include <stdint.h>
#include <cstdint>
#include <string> #include <string>
#include "file/file_prefetch_buffer.h" #include "file/file_prefetch_buffer.h"
#include "file/random_access_file_reader.h" #include "file/random_access_file_reader.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "memory/memory_allocator.h" #include "memory/memory_allocator.h"
#include "options/cf_options.h" #include "options/cf_options.h"
#include "port/malloc.h" #include "port/malloc.h"
#include "port/port.h" // noexcept #include "port/port.h" // noexcept
#include "table/persistent_cache_options.h" #include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "util/hash.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -32,7 +31,7 @@ struct ReadOptions;
extern bool ShouldReportDetailedTime(Env* env, Statistics* stats); extern bool ShouldReportDetailedTime(Env* env, Statistics* stats);
// the length of the magic number in bytes. // the length of the magic number in bytes.
const int kMagicNumberLengthByte = 8; constexpr uint32_t kMagicNumberLengthByte = 8;
// BlockHandle is a pointer to the extent of a file that stores a data // BlockHandle is a pointer to the extent of a file that stores a data
// block or a meta block. // block or a meta block.
@ -52,6 +51,7 @@ class BlockHandle {
void set_size(uint64_t _size) { size_ = _size; } void set_size(uint64_t _size) { size_ = _size; }
void EncodeTo(std::string* dst) const; void EncodeTo(std::string* dst) const;
char* EncodeTo(char* dst) const;
Status DecodeFrom(Slice* input); Status DecodeFrom(Slice* input);
Status DecodeSizeFrom(uint64_t offset, Slice* input); Status DecodeSizeFrom(uint64_t offset, Slice* input);
@ -65,7 +65,7 @@ class BlockHandle {
static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; } static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; }
// Maximum encoding length of a BlockHandle // Maximum encoding length of a BlockHandle
enum { kMaxEncodedLength = 10 + 10 }; static constexpr uint32_t kMaxEncodedLength = 2 * kMaxVarint64Length;
inline bool operator==(const BlockHandle& rhs) const { inline bool operator==(const BlockHandle& rhs) const {
return offset_ == rhs.offset_ && size_ == rhs.size_; return offset_ == rhs.offset_ && size_ == rhs.size_;
@ -117,94 +117,107 @@ inline uint32_t GetCompressFormatForVersion(uint32_t format_version) {
return format_version >= 2 ? 2 : 1; return format_version >= 2 ? 2 : 1;
} }
inline bool BlockBasedTableSupportedVersion(uint32_t version) { constexpr uint32_t kLatestFormatVersion = 5;
return version <= 5;
inline bool IsSupportedFormatVersion(uint32_t version) {
return version <= kLatestFormatVersion;
} }
// Footer encapsulates the fixed information stored at the tail // Footer encapsulates the fixed information stored at the tail end of every
// end of every table file. // SST file. In general, it should only include things that cannot go
// elsewhere under the metaindex block. For example, checksum_type is
// required for verifying metaindex block checksum (when applicable), but
// index block handle can easily go in metaindex block (possible future).
class Footer { class Footer {
public: public:
// Constructs a footer without specifying its table magic number. Footer() {}
// In such case, the table magic number of such footer should be
// initialized via @ReadFooterFromFile().
// Use this when you plan to load Footer with DecodeFrom(). Never use this
// when you plan to EncodeTo.
Footer() : Footer(kInvalidTableMagicNumber, 0) {}
// Use this constructor when you plan to write out the footer using // Uses builder pattern rather than distinctive ctors
// EncodeTo(). Never use this constructor with DecodeFrom().
// `version` is same as `format_version` for block-based table.
Footer(uint64_t table_magic_number, uint32_t version);
// The version of the footer in this file
uint32_t version() const { return version_; }
// The checksum type used in this file
ChecksumType checksum() const { return checksum_; }
void set_checksum(const ChecksumType c) { checksum_ = c; }
// The block handle for the metaindex block of the table
const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; }
// The block handle for the index block of the table
const BlockHandle& index_handle() const { return index_handle_; }
void set_index_handle(const BlockHandle& h) { index_handle_ = h; }
// Table magic number identifies file as RocksDB SST file and which kind of
// SST format is use.
Footer& set_table_magic_number(uint64_t tmn);
uint64_t table_magic_number() const { return table_magic_number_; } uint64_t table_magic_number() const { return table_magic_number_; }
void EncodeTo(std::string* dst) const; // A version (footer and more) within a kind of SST. (It would add more
// unnecessary complexity to separate footer versions and
// BBTO::format_version.)
Footer& set_format_version(uint32_t fv) {
format_version_ = fv;
return *this;
}
uint32_t format_version() const { return format_version_; }
// Set the current footer based on the input slice. // Block handle for metaindex block.
// Footer& set_metaindex_handle(const BlockHandle& h) {
// REQUIRES: table_magic_number_ is not set (i.e., metaindex_handle_ = h;
// HasInitializedTableMagicNumber() is true). The function will initialize the return *this;
// magic number }
Status DecodeFrom(Slice* input); const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
// Encoded length of a Footer. Note that the serialization of a Footer will // Block handle for (top-level) index block.
// always occupy at least kMinEncodedLength bytes. If fields are changed Footer& set_index_handle(const BlockHandle& h) {
// the version number should be incremented and kMaxEncodedLength should be index_handle_ = h;
// increased accordingly. return *this;
enum { }
// Footer version 0 (legacy) will always occupy exactly this many bytes. const BlockHandle& index_handle() const { return index_handle_; }
// It consists of two block handles, padding, and a magic number.
kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8,
// Footer of versions 1 and higher will always occupy exactly this many
// bytes. It consists of the checksum type, two block handles, padding,
// a version number (bigger than 1), and a magic number
kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8,
kMinEncodedLength = kVersion0EncodedLength,
kMaxEncodedLength = kNewVersionsEncodedLength,
};
static const uint64_t kInvalidTableMagicNumber = 0; // Checksum type used in the file.
Footer& set_checksum_type(ChecksumType ct) {
checksum_type_ = ct;
return *this;
}
ChecksumType checksum_type() const {
return static_cast<ChecksumType>(checksum_type_);
}
// convert this object to a human readable form // Appends serialized footer to `dst`. The starting offset of the footer
// within the file is required for future work.
void EncodeTo(std::string* dst, uint64_t footer_offset) const;
// Deserialize a footer (populate fields) from `input` and check for various
// corruptions. On success (and some error cases) `input` is advanced past
// the footer. Like EncodeTo, the offset within the file will be nedded for
// future work
Status DecodeFrom(Slice* input, uint64_t input_offset);
// Convert this object to a human readable form
std::string ToString() const; std::string ToString() const;
// Block trailer size used by file with this footer (e.g. 5 for block-based // Block trailer size used by file with this footer (e.g. 5 for block-based
// table and 0 for plain table) // table and 0 for plain table)
inline size_t GetBlockTrailerSize() const { return block_trailer_size_; } inline size_t GetBlockTrailerSize() const { return block_trailer_size_; }
// Encoded lengths of Footers. Bytes for serialized Footer will always be
// >= kMinEncodedLength and <= kMaxEncodedLength.
//
// Footer version 0 (legacy) will always occupy exactly this many bytes.
// It consists of two block handles, padding, and a magic number.
static constexpr uint32_t kVersion0EncodedLength =
2 * BlockHandle::kMaxEncodedLength + kMagicNumberLengthByte;
static constexpr uint32_t kMinEncodedLength = kVersion0EncodedLength;
// Footer of versions 1 and higher will always occupy exactly this many
// bytes. It originally consisted of the checksum type, two block handles,
// padding (to maximum handle encoding size), a format version number, and a
// magic number.
static constexpr uint32_t kNewVersionsEncodedLength =
1 + 2 * BlockHandle::kMaxEncodedLength + 4 + kMagicNumberLengthByte;
static constexpr uint32_t kMaxEncodedLength = kNewVersionsEncodedLength;
static constexpr uint64_t kNullTableMagicNumber = 0;
private: private:
// REQUIRES: magic number wasn't initialized. static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU;
void set_table_magic_number(uint64_t magic_number); static constexpr int kInvalidChecksumType =
(1 << (sizeof(ChecksumType) * 8)) | kNoChecksum;
// return true if @table_magic_number_ is set to a value different uint64_t table_magic_number_ = kNullTableMagicNumber;
// from @kInvalidTableMagicNumber. uint32_t format_version_ = kInvalidFormatVersion;
bool HasInitializedTableMagicNumber() const {
return (table_magic_number_ != kInvalidTableMagicNumber);
}
uint32_t version_;
ChecksumType checksum_;
uint8_t block_trailer_size_ = 0; // set based on magic number
BlockHandle metaindex_handle_; BlockHandle metaindex_handle_;
BlockHandle index_handle_; BlockHandle index_handle_;
uint64_t table_magic_number_ = 0; int checksum_type_ = kInvalidChecksumType;
uint8_t block_trailer_size_ = 0; // set based on magic number
}; };
// Read the footer from file // Read the footer from file

View file

@ -26,11 +26,11 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
const std::string kPropertiesBlock = "rocksdb.properties"; const std::string kPropertiesBlockName = "rocksdb.properties";
// Old property block name for backward compatibility // Old property block name for backward compatibility
const std::string kPropertiesBlockOldName = "rocksdb.stats"; const std::string kPropertiesBlockOldName = "rocksdb.stats";
const std::string kCompressionDictBlock = "rocksdb.compression_dict"; const std::string kCompressionDictBlockName = "rocksdb.compression_dict";
const std::string kRangeDelBlock = "rocksdb.range_del"; const std::string kRangeDelBlockName = "rocksdb.range_del";
MetaIndexBuilder::MetaIndexBuilder() MetaIndexBuilder::MetaIndexBuilder()
: meta_index_block_(new BlockBuilder(1 /* restart interval */)) {} : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
@ -381,7 +381,7 @@ Status ReadTablePropertiesHelper(
// Modified version of BlockFetcher checksum verification // Modified version of BlockFetcher checksum verification
// (See write_global_seqno comment above) // (See write_global_seqno comment above)
if (s.ok() && footer.GetBlockTrailerSize() > 0) { if (s.ok() && footer.GetBlockTrailerSize() > 0) {
s = VerifyBlockChecksum(footer.checksum(), properties_block.data(), s = VerifyBlockChecksum(footer.checksum_type(), properties_block.data(),
block_size, file->file_name(), handle.offset()); block_size, file->file_name(), handle.offset());
if (s.IsCorruption()) { if (s.IsCorruption()) {
if (new_table_properties->external_sst_file_global_seqno_offset != 0) { if (new_table_properties->external_sst_file_global_seqno_offset != 0) {
@ -391,8 +391,8 @@ Status ReadTablePropertiesHelper(
new_table_properties->external_sst_file_global_seqno_offset - new_table_properties->external_sst_file_global_seqno_offset -
handle.offset(); handle.offset();
EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0); EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0);
s = VerifyBlockChecksum(footer.checksum(), tmp_buf.data(), block_size, s = VerifyBlockChecksum(footer.checksum_type(), tmp_buf.data(),
file->file_name(), handle.offset()); block_size, file->file_name(), handle.offset());
} }
} }
} }
@ -413,7 +413,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
BlockHandle block_handle; BlockHandle block_handle;
Footer footer; Footer footer;
Status s = FindMetaBlockInFile(file, file_size, table_magic_number, ioptions, Status s = FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
kPropertiesBlock, &block_handle, kPropertiesBlockName, &block_handle,
memory_allocator, prefetch_buffer, &footer); memory_allocator, prefetch_buffer, &footer);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -438,7 +438,7 @@ Status FindOptionalMetaBlock(InternalIterator* meta_index_iter,
if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) { if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) {
Slice v = meta_index_iter->value(); Slice v = meta_index_iter->value();
return block_handle->DecodeFrom(&v); return block_handle->DecodeFrom(&v);
} else if (meta_block_name == kPropertiesBlock) { } else if (meta_block_name == kPropertiesBlockName) {
// Have to try old name for compatibility // Have to try old name for compatibility
meta_index_iter->Seek(kPropertiesBlockOldName); meta_index_iter->Seek(kPropertiesBlockOldName);
if (meta_index_iter->status().ok() && meta_index_iter->Valid() && if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&

View file

@ -31,10 +31,10 @@ class RandomAccessFile;
struct TableProperties; struct TableProperties;
// Meta block names for metaindex // Meta block names for metaindex
extern const std::string kPropertiesBlock; extern const std::string kPropertiesBlockName;
extern const std::string kPropertiesBlockOldName; extern const std::string kPropertiesBlockOldName;
extern const std::string kCompressionDictBlock; extern const std::string kCompressionDictBlockName;
extern const std::string kRangeDelBlock; extern const std::string kRangeDelBlockName;
class MetaIndexBuilder { class MetaIndexBuilder {
public: public:

View file

@ -279,7 +279,7 @@ Status PlainTableBuilder::Finish() {
if (!s.ok()) { if (!s.ok()) {
return std::move(s); return std::move(s);
} }
meta_index_builer.Add(kPropertiesBlock, property_block_handle); meta_index_builer.Add(kPropertiesBlockName, property_block_handle);
// -- write metaindex block // -- write metaindex block
BlockHandle metaindex_block_handle; BlockHandle metaindex_block_handle;
@ -292,11 +292,13 @@ Status PlainTableBuilder::Finish() {
// Write Footer // Write Footer
// no need to write out new footer if we're using default checksum // no need to write out new footer if we're using default checksum
Footer footer(kLegacyPlainTableMagicNumber, 0); Footer footer;
footer.set_metaindex_handle(metaindex_block_handle); footer.set_table_magic_number(kLegacyPlainTableMagicNumber)
footer.set_index_handle(BlockHandle::NullBlockHandle()); .set_format_version(0)
.set_metaindex_handle(metaindex_block_handle)
.set_index_handle(BlockHandle::NullBlockHandle());
std::string footer_encoding; std::string footer_encoding;
footer.EncodeTo(&footer_encoding); footer.EncodeTo(&footer_encoding, offset_);
io_status_ = file_->Append(footer_encoding); io_status_ = file_->Append(footer_encoding);
if (io_status_.ok()) { if (io_status_.ok()) {
offset_ += footer_encoding.size(); offset_ += footer_encoding.size();

View file

@ -74,7 +74,7 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
// Warning about 'magic_number' being uninitialized shows up only in UBsan // Warning about 'magic_number' being uninitialized shows up only in UBsan
// builds. Though access is guarded by 's.ok()' checks, fix the issue to // builds. Though access is guarded by 's.ok()' checks, fix the issue to
// avoid any warnings. // avoid any warnings.
uint64_t magic_number = Footer::kInvalidTableMagicNumber; uint64_t magic_number = Footer::kNullTableMagicNumber;
// read table magic number // read table magic number
Footer footer; Footer footer;

View file

@ -21,16 +21,15 @@
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "block_fetcher.h"
#include "cache/lru_cache.h" #include "cache/lru_cache.h"
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/memtable.h" #include "db/memtable.h"
#include "db/write_batch_internal.h" #include "db/write_batch_internal.h"
#include "memtable/stl_wrappers.h" #include "memtable/stl_wrappers.h"
#include "meta_blocks.h"
#include "monitoring/statistics.h" #include "monitoring/statistics.h"
#include "options/options_helper.h" #include "options/options_helper.h"
#include "port/port.h" #include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/compression_type.h" #include "rocksdb/compression_type.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
@ -53,9 +52,11 @@
#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader.h"
#include "table/block_based/block_builder.h" #include "table/block_based/block_builder.h"
#include "table/block_based/flush_block_policy.h" #include "table/block_based/flush_block_policy.h"
#include "table/block_fetcher.h"
#include "table/format.h" #include "table/format.h"
#include "table/get_context.h" #include "table/get_context.h"
#include "table/internal_iterator.h" #include "table/internal_iterator.h"
#include "table/meta_blocks.h"
#include "table/plain/plain_table_factory.h" #include "table/plain/plain_table_factory.h"
#include "table/scoped_arena_iterator.h" #include "table/scoped_arena_iterator.h"
#include "table/sst_file_writer_collectors.h" #include "table/sst_file_writer_collectors.h"
@ -1356,10 +1357,8 @@ class FileChecksumTestHelper {
uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1; uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;
INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest, INSTANTIATE_TEST_CASE_P(FormatVersions, BlockBasedTableTest,
testing::Values(test::kDefaultFormatVersion)); testing::ValuesIn(test::kFooterFormatVersionsToTest));
INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
testing::Values(test::kLatestFormatVersion));
// This test serves as the living tutorial for the prefix scan of user collected // This test serves as the living tutorial for the prefix scan of user collected
// properties. // properties.
@ -2228,7 +2227,8 @@ TEST_P(BlockBasedTableTest, BadChecksumType) {
const MutableCFOptions new_moptions(options); const MutableCFOptions new_moptions(options);
Status s = c.Reopen(new_ioptions, new_moptions); Status s = c.Reopen(new_ioptions, new_moptions);
ASSERT_NOK(s); ASSERT_NOK(s);
ASSERT_MATCHES_REGEX(s.ToString(), "Corruption: unknown checksum type 123.*"); ASSERT_EQ(s.ToString(),
"Corruption: Corrupt or unsupported checksum type: 123");
} }
namespace { namespace {
@ -4166,106 +4166,107 @@ TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) {
} }
TEST(TableTest, FooterTests) { TEST(TableTest, FooterTests) {
Random* r = Random::GetTLSInstance();
uint64_t data_size = (uint64_t{1} << r->Uniform(40)) + r->Uniform(100);
uint64_t index_size = r->Uniform(1000000000);
uint64_t metaindex_size = r->Uniform(1000000);
// 5 == block trailer size
BlockHandle index(data_size + 5, index_size);
BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size);
uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5;
{ {
// upconvert legacy block based // upconvert legacy block based
std::string encoded; std::string encoded;
Footer footer(kLegacyBlockBasedTableMagicNumber, 0); Footer footer;
BlockHandle meta_index(10, 5), index(20, 15); footer.set_table_magic_number(kLegacyBlockBasedTableMagicNumber)
footer.set_metaindex_handle(meta_index); .set_format_version(0)
footer.set_index_handle(index); .set_metaindex_handle(meta_index)
footer.EncodeTo(&encoded); .set_index_handle(index);
footer.EncodeTo(&encoded, footer_offset);
Footer decoded_footer; Footer decoded_footer;
Slice encoded_slice(encoded); Slice encoded_slice(encoded);
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
ASSERT_EQ(decoded_footer.checksum(), kCRC32c); ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
ASSERT_EQ(decoded_footer.version(), 0U); ASSERT_EQ(decoded_footer.format_version(), 0U);
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
} }
// block based, various checksums, various versions
for (auto t : GetSupportedChecksums()) { for (auto t : GetSupportedChecksums()) {
// block based, various checksums for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) {
std::string encoded; std::string encoded;
Footer footer(kBlockBasedTableMagicNumber, 1); Footer footer;
BlockHandle meta_index(10, 5), index(20, 15); footer.set_table_magic_number(kBlockBasedTableMagicNumber)
footer.set_metaindex_handle(meta_index); .set_format_version(fv)
footer.set_index_handle(index); .set_metaindex_handle(meta_index)
footer.set_checksum(t); .set_index_handle(index)
footer.EncodeTo(&encoded); .set_checksum_type(t);
Footer decoded_footer; footer.EncodeTo(&encoded, footer_offset);
Slice encoded_slice(encoded); Footer decoded_footer;
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); Slice encoded_slice(encoded);
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
ASSERT_EQ(decoded_footer.checksum(), t); ASSERT_EQ(decoded_footer.table_magic_number(),
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); kBlockBasedTableMagicNumber);
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.checksum_type(), t);
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().offset(),
ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); meta_index.offset());
ASSERT_EQ(decoded_footer.version(), 1U); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
ASSERT_EQ(decoded_footer.format_version(), fv);
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U);
}
} }
// Plain table is not supported in ROCKSDB_LITE // Plain table is not supported in ROCKSDB_LITE
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
{ {
// upconvert legacy plain table // upconvert legacy plain table
std::string encoded; std::string encoded;
Footer footer(kLegacyPlainTableMagicNumber, 0); Footer footer;
BlockHandle meta_index(10, 5), index(20, 15); footer.set_table_magic_number(kLegacyPlainTableMagicNumber)
footer.set_metaindex_handle(meta_index); .set_format_version(0)
footer.set_index_handle(index); .set_metaindex_handle(meta_index)
footer.EncodeTo(&encoded); .set_index_handle(index);
footer.EncodeTo(&encoded, footer_offset);
Footer decoded_footer; Footer decoded_footer;
Slice encoded_slice(encoded); Slice encoded_slice(encoded);
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
ASSERT_EQ(decoded_footer.checksum(), kCRC32c); ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c);
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
ASSERT_EQ(decoded_footer.version(), 0U); ASSERT_EQ(decoded_footer.format_version(), 0U);
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
} }
{ {
// xxhash plain table (not currently used) // xxhash plain table (not currently used)
std::string encoded; std::string encoded;
Footer footer(kPlainTableMagicNumber, 1); Footer footer;
BlockHandle meta_index(10, 5), index(20, 15); footer.set_table_magic_number(kPlainTableMagicNumber)
footer.set_metaindex_handle(meta_index); .set_format_version(1)
footer.set_index_handle(index); .set_metaindex_handle(meta_index)
footer.set_checksum(kxxHash); .set_index_handle(index)
footer.EncodeTo(&encoded); .set_checksum_type(kxxHash);
footer.EncodeTo(&encoded, footer_offset);
Footer decoded_footer; Footer decoded_footer;
Slice encoded_slice(encoded); Slice encoded_slice(encoded);
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset));
ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
ASSERT_EQ(decoded_footer.checksum(), kxxHash); ASSERT_EQ(decoded_footer.checksum_type(), kxxHash);
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
ASSERT_EQ(decoded_footer.version(), 1U); ASSERT_EQ(decoded_footer.format_version(), 1U);
ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U);
} }
#endif // !ROCKSDB_LITE #endif // !ROCKSDB_LITE
{
// version == 2
std::string encoded;
Footer footer(kBlockBasedTableMagicNumber, 2);
BlockHandle meta_index(10, 5), index(20, 15);
footer.set_metaindex_handle(meta_index);
footer.set_index_handle(index);
footer.EncodeTo(&encoded);
Footer decoded_footer;
Slice encoded_slice(encoded);
ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice));
ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
ASSERT_EQ(decoded_footer.version(), 2U);
}
} }
class IndexBlockRestartIntervalTest class IndexBlockRestartIntervalTest
@ -4786,7 +4787,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
// -- Read properties block // -- Read properties block
BlockHandle properties_handle; BlockHandle properties_handle;
ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlock, ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlockName,
&properties_handle)); &properties_handle));
ASSERT_FALSE(properties_handle.IsNull()); ASSERT_FALSE(properties_handle.IsNull());
BlockContents properties_contents; BlockContents properties_contents;
@ -4873,7 +4874,7 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
key_at_max_offset = metaindex_iter->key().ToString(); key_at_max_offset = metaindex_iter->key().ToString();
} }
} }
ASSERT_EQ(kPropertiesBlock, key_at_max_offset); ASSERT_EQ(kPropertiesBlockName, key_at_max_offset);
// index handle is stored in footer rather than metaindex block, so need // index handle is stored in footer rather than metaindex block, so need
// separate logic to verify it comes before properties block. // separate logic to verify it comes before properties block.
ASSERT_GT(max_offset, footer.index_handle().offset()); ASSERT_GT(max_offset, footer.index_handle().offset());
@ -5369,6 +5370,7 @@ TEST_P(
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) { int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }

View file

@ -38,7 +38,12 @@ namespace ROCKSDB_NAMESPACE {
namespace test { namespace test {
const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version; const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version;
const uint32_t kLatestFormatVersion = 5u; const std::set<uint32_t> kFooterFormatVersionsToTest{
5U,
// In case any interesting future changes
kDefaultFormatVersion,
kLatestFormatVersion,
};
std::string RandomKey(Random* rnd, int len, RandomKeyType type) { std::string RandomKey(Random* rnd, int len, RandomKeyType type) {
// Make sure to generate a wide variety of characters so we // Make sure to generate a wide variety of characters so we

View file

@ -44,7 +44,7 @@ class SequentialFileReader;
namespace test { namespace test {
extern const uint32_t kDefaultFormatVersion; extern const uint32_t kDefaultFormatVersion;
extern const uint32_t kLatestFormatVersion; extern const std::set<uint32_t> kFooterFormatVersionsToTest;
// Return a random key with the specified length that may contain interesting // Return a random key with the specified length that may contain interesting
// characters (e.g. \x00, \xff, etc.). // characters (e.g. \x00, \xff, etc.).

View file

@ -5,6 +5,8 @@
#pragma once #pragma once
#include <type_traits>
#include "rocksdb/rocksdb_namespace.h" #include "rocksdb/rocksdb_namespace.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -20,4 +22,21 @@ inline DestClass* static_cast_with_check(SrcClass* x) {
#endif #endif
return ret; return ret;
} }
// A wrapper around static_cast for lossless conversion between integral
// types, including enum types. For example, this can be used for converting
// between signed/unsigned or enum type and underlying type without fear of
// stripping away data, now or in the future.
template <typename To, typename From>
inline To lossless_cast(From x) {
using FromValue = typename std::remove_reference<From>::type;
static_assert(
std::is_integral<FromValue>::value || std::is_enum<FromValue>::value,
"Only works on integral types");
static_assert(std::is_integral<To>::value || std::is_enum<To>::value,
"Only works on integral types");
static_assert(sizeof(To) >= sizeof(FromValue), "Must be lossless");
return static_cast<To>(x);
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

View file

@ -31,7 +31,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
// The maximum length of a varint in bytes for 64-bit. // The maximum length of a varint in bytes for 64-bit.
const unsigned int kMaxVarint64Length = 10; const uint32_t kMaxVarint64Length = 10;
// Standard Put... routines append to a string // Standard Put... routines append to a string
extern void PutFixed16(std::string* dst, uint16_t value); extern void PutFixed16(std::string* dst, uint16_t value);