mirror of https://github.com/facebook/rocksdb.git
Block per key-value checksum (#11287)
Summary: add option `block_protection_bytes_per_key` and implementation for block per key-value checksum. The main changes are 1. checksum construction and verification in block.cc/h 2. pass the option `block_protection_bytes_per_key` around (mainly for methods defined in table_cache.h) 3. unit tests/crash test updates Tests: * Added unit tests * Crash test: `python3 tools/db_crashtest.py blackbox --simple --block_protection_bytes_per_key=1 --write_buffer_size=1048576` Follow up (maybe as a separate PR): make sure corruption status returned from BlockIters are correctly handled. Performance: Turning on block per KV protection has a non-trivial negative impact on read performance and costs additional memory. For memory, each block includes additional 24 bytes for checksum-related states beside checksum itself. For CPU, I set up a DB of size ~1.2GB with 5M keys (32 bytes key and 200 bytes value) which compacts to ~5 SST files (target file size 256 MB) in L6 without compression. I tested readrandom performance with various block cache size (to mimic various cache hit rates): ``` SETUP make OPTIMIZE_LEVEL="-O3" USE_LTO=1 DEBUG_LEVEL=0 -j32 db_bench ./db_bench -benchmarks=fillseq,compact0,waitforcompaction,compact,waitforcompaction -write_buffer_size=33554432 -level_compaction_dynamic_level_bytes=true -max_background_jobs=8 -target_file_size_base=268435456 --num=5000000 --key_size=32 --value_size=200 --compression_type=none BENCHMARK ./db_bench --use_existing_db -benchmarks=readtocache,readrandom[-X10] --num=5000000 --key_size=32 --disable_auto_compactions --reads=1000000 --block_protection_bytes_per_key=[0|1] --cache_size=$CACHESIZE The readrandom ops/sec looks like the following: Block cache size: 2GB 1.2GB * 0.9 1.2GB * 0.8 1.2GB * 0.5 8MB Main 240805 223604 198176 161653 139040 PR prot_bytes=0 238691 226693 200127 161082 141153 PR prot_bytes=1 214983 193199 178532 137013 108211 prot_bytes=1 vs -10% -15% -10.8% -15% -23% prot_bytes=0 ``` The benchmark has a lot of variance, but there was a 5% to 25% regression in this benchmark with different cache hit rates. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11287 Reviewed By: ajkr Differential Revision: D43970708 Pulled By: cbi42 fbshipit-source-id: ef98d898b71779846fa74212b9ec9e08b7183940
This commit is contained in:
parent
40d69b59ad
commit
62fc15f009
|
@ -1,6 +1,7 @@
|
|||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### New Features
|
||||
* Introduced a new option `block_protection_bytes_per_key`, which can be used to enable per key-value integrity protection for in-memory blocks in block cache (#11287).
|
||||
|
||||
## 8.2.0 (04/24/2023)
|
||||
### Public API Changes
|
||||
|
|
|
@ -380,7 +380,8 @@ Status BuildTable(
|
|||
MaxFileSizeForL0MetaPin(mutable_cf_options),
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key*/ nullptr,
|
||||
/*allow_unprepared_value*/ false));
|
||||
/*allow_unprepared_value*/ false,
|
||||
mutable_cf_options.block_protection_bytes_per_key));
|
||||
s = it->status();
|
||||
if (s.ok() && paranoid_file_checks) {
|
||||
OutputValidator file_validator(tboptions.internal_comparator,
|
||||
|
|
|
@ -1428,6 +1428,12 @@ Status ColumnFamilyData::ValidateOptions(
|
|||
"Memtable per key-value checksum protection only supports 0, 1, 2, 4 "
|
||||
"or 8 bytes per key.");
|
||||
}
|
||||
if (std::find(supported.begin(), supported.end(),
|
||||
cf_options.block_protection_bytes_per_key) == supported.end()) {
|
||||
return Status::NotSupported(
|
||||
"Block per key-value checksum protection only supports 0, 1, 2, 4 "
|
||||
"or 8 bytes per key.");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
|
@ -504,7 +504,9 @@ void CompactionJob::GenSubcompactionBoundaries() {
|
|||
FileMetaData* f = flevel->files[i].file_metadata;
|
||||
std::vector<TableReader::Anchor> my_anchors;
|
||||
Status s = cfd->table_cache()->ApproximateKeyAnchors(
|
||||
read_options, icomp, *f, my_anchors);
|
||||
read_options, icomp, *f,
|
||||
c->mutable_cf_options()->block_protection_bytes_per_key,
|
||||
my_anchors);
|
||||
if (!s.ok() || my_anchors.empty()) {
|
||||
my_anchors.emplace_back(f->largest.user_key(), f->fd.GetFileSize());
|
||||
}
|
||||
|
@ -735,7 +737,9 @@ Status CompactionJob::Run() {
|
|||
*compact_->compaction->mutable_cf_options()),
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr,
|
||||
/*allow_unprepared_value=*/false);
|
||||
/*allow_unprepared_value=*/false,
|
||||
compact_->compaction->mutable_cf_options()
|
||||
->block_protection_bytes_per_key);
|
||||
auto s = iter->status();
|
||||
|
||||
if (s.ok() && paranoid_file_checks_) {
|
||||
|
|
|
@ -454,7 +454,8 @@ class CompactionJobTestBase : public testing::Test {
|
|||
Status s = cf_options_.table_factory->NewTableReader(
|
||||
read_opts,
|
||||
TableReaderOptions(*cfd->ioptions(), nullptr, FileOptions(),
|
||||
cfd_->internal_comparator()),
|
||||
cfd_->internal_comparator(),
|
||||
0 /* block_protection_bytes_per_key */),
|
||||
std::move(freader), file_size, &table_reader, false);
|
||||
ASSERT_OK(s);
|
||||
assert(table_reader);
|
||||
|
|
|
@ -64,8 +64,8 @@ Status VerifySstFileChecksum(const Options& options,
|
|||
const bool kImmortal = true;
|
||||
auto reader_options = TableReaderOptions(
|
||||
ioptions, options.prefix_extractor, env_options, internal_comparator,
|
||||
false /* skip_filters */, !kImmortal, false /* force_direct_prefetch */,
|
||||
-1 /* level */);
|
||||
options.block_protection_bytes_per_key, false /* skip_filters */,
|
||||
!kImmortal, false /* force_direct_prefetch */, -1 /* level */);
|
||||
reader_options.largest_seqno = largest_seqno;
|
||||
s = ioptions.table_factory->NewTableReader(
|
||||
reader_options, std::move(file_reader), file_size, &table_reader,
|
||||
|
|
|
@ -678,6 +678,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|||
TableReaderOptions(
|
||||
*cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
|
||||
env_options_, cfd_->internal_comparator(),
|
||||
sv->mutable_cf_options.block_protection_bytes_per_key,
|
||||
/*skip_filters*/ false, /*immortal*/ false,
|
||||
/*force_direct_prefetch*/ false, /*level*/ -1,
|
||||
/*block_cache_tracer*/ nullptr,
|
||||
|
|
|
@ -36,7 +36,7 @@ class ForwardLevelIterator : public InternalIterator {
|
|||
const ColumnFamilyData* const cfd, const ReadOptions& read_options,
|
||||
const std::vector<FileMetaData*>& files,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
bool allow_unprepared_value)
|
||||
bool allow_unprepared_value, uint8_t block_protection_bytes_per_key)
|
||||
: cfd_(cfd),
|
||||
read_options_(read_options),
|
||||
files_(files),
|
||||
|
@ -45,7 +45,8 @@ class ForwardLevelIterator : public InternalIterator {
|
|||
file_iter_(nullptr),
|
||||
pinned_iters_mgr_(nullptr),
|
||||
prefix_extractor_(prefix_extractor),
|
||||
allow_unprepared_value_(allow_unprepared_value) {
|
||||
allow_unprepared_value_(allow_unprepared_value),
|
||||
block_protection_bytes_per_key_(block_protection_bytes_per_key) {
|
||||
status_.PermitUncheckedError(); // Allow uninitialized status through
|
||||
}
|
||||
|
||||
|
@ -87,7 +88,8 @@ class ForwardLevelIterator : public InternalIterator {
|
|||
/*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1,
|
||||
/*max_file_size_for_l0_meta_pin=*/0,
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_);
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_,
|
||||
block_protection_bytes_per_key_);
|
||||
file_iter_->SetPinnedItersMgr(pinned_iters_mgr_);
|
||||
valid_ = false;
|
||||
if (!range_del_agg.IsEmpty()) {
|
||||
|
@ -211,6 +213,7 @@ class ForwardLevelIterator : public InternalIterator {
|
|||
// Kept alive by ForwardIterator::sv_->mutable_cf_options
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor_;
|
||||
const bool allow_unprepared_value_;
|
||||
const uint8_t block_protection_bytes_per_key_;
|
||||
};
|
||||
|
||||
ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
|
||||
|
@ -738,7 +741,8 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
|
|||
/*skip_filters=*/false, /*level=*/-1,
|
||||
MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_));
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_,
|
||||
sv_->mutable_cf_options.block_protection_bytes_per_key));
|
||||
}
|
||||
BuildLevelIterators(vstorage, sv_);
|
||||
current_ = nullptr;
|
||||
|
@ -819,7 +823,8 @@ void ForwardIterator::RenewIterators() {
|
|||
/*skip_filters=*/false, /*level=*/-1,
|
||||
MaxFileSizeForL0MetaPin(svnew->mutable_cf_options),
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_));
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_,
|
||||
svnew->mutable_cf_options.block_protection_bytes_per_key));
|
||||
}
|
||||
|
||||
for (auto* f : l0_iters_) {
|
||||
|
@ -863,7 +868,8 @@ void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage,
|
|||
} else {
|
||||
level_iters_.push_back(new ForwardLevelIterator(
|
||||
cfd_, read_options_, level_files,
|
||||
sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_));
|
||||
sv->mutable_cf_options.prefix_extractor, allow_unprepared_value_,
|
||||
sv->mutable_cf_options.block_protection_bytes_per_key));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -885,7 +891,8 @@ void ForwardIterator::ResetIncompleteIterators() {
|
|||
/*skip_filters=*/false, /*level=*/-1,
|
||||
MaxFileSizeForL0MetaPin(sv_->mutable_cf_options),
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_);
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value_,
|
||||
sv_->mutable_cf_options.block_protection_bytes_per_key);
|
||||
l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_);
|
||||
}
|
||||
|
||||
|
|
|
@ -250,6 +250,7 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
|
|||
TableReaderOptions(
|
||||
*cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor,
|
||||
env_options_, cfd_->internal_comparator(),
|
||||
sv->mutable_cf_options.block_protection_bytes_per_key,
|
||||
/*skip_filters*/ false, /*immortal*/ false,
|
||||
/*force_direct_prefetch*/ false, /*level*/ -1,
|
||||
/*block_cache_tracer*/ nullptr,
|
||||
|
|
|
@ -46,6 +46,8 @@ template <typename T>
|
|||
class ProtectionInfoKVOC;
|
||||
template <typename T>
|
||||
class ProtectionInfoKVOS;
|
||||
template <typename T>
|
||||
class ProtectionInfoKV;
|
||||
|
||||
// Aliases for 64-bit protection infos.
|
||||
using ProtectionInfo64 = ProtectionInfo<uint64_t>;
|
||||
|
@ -64,13 +66,13 @@ class ProtectionInfo {
|
|||
ProtectionInfoKVO<T> ProtectKVO(const SliceParts& key,
|
||||
const SliceParts& value,
|
||||
ValueType op_type) const;
|
||||
|
||||
T GetVal() const { return val_; }
|
||||
ProtectionInfoKV<T> ProtectKV(const Slice& key, const Slice& value) const;
|
||||
|
||||
private:
|
||||
friend class ProtectionInfoKVO<T>;
|
||||
friend class ProtectionInfoKVOS<T>;
|
||||
friend class ProtectionInfoKVOC<T>;
|
||||
friend class ProtectionInfoKV<T>;
|
||||
|
||||
// Each field is hashed with an independent value so we can catch fields being
|
||||
// swapped. Per the `NPHash64()` docs, using consecutive seeds is a pitfall,
|
||||
|
@ -89,8 +91,47 @@ class ProtectionInfo {
|
|||
static_assert(sizeof(ProtectionInfo<T>) == sizeof(T), "");
|
||||
}
|
||||
|
||||
T GetVal() const { return val_; }
|
||||
void SetVal(T val) { val_ = val; }
|
||||
|
||||
void Encode(uint8_t len, char* dst) const {
|
||||
assert(sizeof(val_) >= len);
|
||||
switch (len) {
|
||||
case 1:
|
||||
dst[0] = static_cast<uint8_t>(val_);
|
||||
break;
|
||||
case 2:
|
||||
EncodeFixed16(dst, static_cast<uint16_t>(val_));
|
||||
break;
|
||||
case 4:
|
||||
EncodeFixed32(dst, static_cast<uint32_t>(val_));
|
||||
break;
|
||||
case 8:
|
||||
EncodeFixed64(dst, static_cast<uint64_t>(val_));
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
bool Verify(uint8_t len, const char* checksum_ptr) const {
|
||||
assert(sizeof(val_) >= len);
|
||||
switch (len) {
|
||||
case 1:
|
||||
return static_cast<uint8_t>(checksum_ptr[0]) ==
|
||||
static_cast<uint8_t>(val_);
|
||||
case 2:
|
||||
return DecodeFixed16(checksum_ptr) == static_cast<uint16_t>(val_);
|
||||
case 4:
|
||||
return DecodeFixed32(checksum_ptr) == static_cast<uint32_t>(val_);
|
||||
case 8:
|
||||
return DecodeFixed64(checksum_ptr) == static_cast<uint64_t>(val_);
|
||||
default:
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
T val_ = 0;
|
||||
};
|
||||
|
||||
|
@ -113,7 +154,14 @@ class ProtectionInfoKVO {
|
|||
void UpdateV(const SliceParts& old_value, const SliceParts& new_value);
|
||||
void UpdateO(ValueType old_op_type, ValueType new_op_type);
|
||||
|
||||
T GetVal() const { return info_.GetVal(); }
|
||||
// Encode this protection info into `len` bytes and stores them in `dst`.
|
||||
void Encode(uint8_t len, char* dst) const { info_.Encode(len, dst); }
|
||||
// Verify this protection info against the protection info encoded by Encode()
|
||||
// at the first `len` bytes of `checksum_ptr`.
|
||||
// Returns true iff the verification is successful.
|
||||
bool Verify(uint8_t len, const char* checksum_ptr) const {
|
||||
return info_.Verify(len, checksum_ptr);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ProtectionInfo<T>;
|
||||
|
@ -124,6 +172,7 @@ class ProtectionInfoKVO {
|
|||
static_assert(sizeof(ProtectionInfoKVO<T>) == sizeof(T), "");
|
||||
}
|
||||
|
||||
T GetVal() const { return info_.GetVal(); }
|
||||
void SetVal(T val) { info_.SetVal(val); }
|
||||
|
||||
ProtectionInfo<T> info_;
|
||||
|
@ -154,7 +203,10 @@ class ProtectionInfoKVOC {
|
|||
void UpdateC(ColumnFamilyId old_column_family_id,
|
||||
ColumnFamilyId new_column_family_id);
|
||||
|
||||
T GetVal() const { return kvo_.GetVal(); }
|
||||
void Encode(uint8_t len, char* dst) const { kvo_.Encode(len, dst); }
|
||||
bool Verify(uint8_t len, const char* checksum_ptr) const {
|
||||
return kvo_.Verify(len, checksum_ptr);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ProtectionInfoKVO<T>;
|
||||
|
@ -163,6 +215,7 @@ class ProtectionInfoKVOC {
|
|||
static_assert(sizeof(ProtectionInfoKVOC<T>) == sizeof(T), "");
|
||||
}
|
||||
|
||||
T GetVal() const { return kvo_.GetVal(); }
|
||||
void SetVal(T val) { kvo_.SetVal(val); }
|
||||
|
||||
ProtectionInfoKVO<T> kvo_;
|
||||
|
@ -193,7 +246,10 @@ class ProtectionInfoKVOS {
|
|||
void UpdateS(SequenceNumber old_sequence_number,
|
||||
SequenceNumber new_sequence_number);
|
||||
|
||||
T GetVal() const { return kvo_.GetVal(); }
|
||||
void Encode(uint8_t len, char* dst) const { kvo_.Encode(len, dst); }
|
||||
bool Verify(uint8_t len, const char* checksum_ptr) const {
|
||||
return kvo_.Verify(len, checksum_ptr);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ProtectionInfoKVO<T>;
|
||||
|
@ -202,11 +258,32 @@ class ProtectionInfoKVOS {
|
|||
static_assert(sizeof(ProtectionInfoKVOS<T>) == sizeof(T), "");
|
||||
}
|
||||
|
||||
T GetVal() const { return kvo_.GetVal(); }
|
||||
void SetVal(T val) { kvo_.SetVal(val); }
|
||||
|
||||
ProtectionInfoKVO<T> kvo_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class ProtectionInfoKV {
|
||||
public:
|
||||
ProtectionInfoKV() = default;
|
||||
|
||||
void Encode(uint8_t len, char* dst) const { info_.Encode(len, dst); }
|
||||
bool Verify(uint8_t len, const char* checksum_ptr) const {
|
||||
return info_.Verify(len, checksum_ptr);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ProtectionInfo<T>;
|
||||
|
||||
explicit ProtectionInfoKV(T val) : info_(val) {
|
||||
static_assert(sizeof(ProtectionInfoKV<T>) == sizeof(T));
|
||||
}
|
||||
|
||||
ProtectionInfo<T> info_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
Status ProtectionInfo<T>::GetStatus() const {
|
||||
if (val_ != 0) {
|
||||
|
@ -244,6 +321,16 @@ ProtectionInfoKVO<T> ProtectionInfo<T>::ProtectKVO(const SliceParts& key,
|
|||
return ProtectionInfoKVO<T>(val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ProtectionInfoKV<T> ProtectionInfo<T>::ProtectKV(const Slice& key,
|
||||
const Slice& value) const {
|
||||
T val = GetVal();
|
||||
val = val ^ static_cast<T>(GetSliceNPHash64(key, ProtectionInfo<T>::kSeedK));
|
||||
val =
|
||||
val ^ static_cast<T>(GetSliceNPHash64(value, ProtectionInfo<T>::kSeedV));
|
||||
return ProtectionInfoKV<T>(val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ProtectionInfoKVO<T>::UpdateK(const Slice& old_key, const Slice& new_key) {
|
||||
T val = GetVal();
|
||||
|
@ -394,5 +481,4 @@ void ProtectionInfoKVOS<T>::UpdateS(SequenceNumber old_sequence_number,
|
|||
sizeof(new_sequence_number), ProtectionInfo<T>::kSeedS));
|
||||
SetVal(val);
|
||||
}
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -256,7 +256,7 @@ void MemTable::UpdateOldestKeyTime() {
|
|||
}
|
||||
|
||||
Status MemTable::VerifyEntryChecksum(const char* entry,
|
||||
size_t protection_bytes_per_key,
|
||||
uint32_t protection_bytes_per_key,
|
||||
bool allow_data_in_errors) {
|
||||
if (protection_bytes_per_key == 0) {
|
||||
return Status::OK();
|
||||
|
@ -285,28 +285,11 @@ Status MemTable::VerifyEntryChecksum(const char* entry,
|
|||
Slice value = Slice(value_ptr, value_length);
|
||||
|
||||
const char* checksum_ptr = value_ptr + value_length;
|
||||
uint64_t expected = ProtectionInfo64()
|
||||
.ProtectKVO(user_key, value, type)
|
||||
.ProtectS(seq)
|
||||
.GetVal();
|
||||
bool match = true;
|
||||
switch (protection_bytes_per_key) {
|
||||
case 1:
|
||||
match = static_cast<uint8_t>(checksum_ptr[0]) ==
|
||||
static_cast<uint8_t>(expected);
|
||||
break;
|
||||
case 2:
|
||||
match = DecodeFixed16(checksum_ptr) == static_cast<uint16_t>(expected);
|
||||
break;
|
||||
case 4:
|
||||
match = DecodeFixed32(checksum_ptr) == static_cast<uint32_t>(expected);
|
||||
break;
|
||||
case 8:
|
||||
match = DecodeFixed64(checksum_ptr) == expected;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
bool match =
|
||||
ProtectionInfo64()
|
||||
.ProtectKVO(user_key, value, type)
|
||||
.ProtectS(seq)
|
||||
.Verify(static_cast<uint8_t>(protection_bytes_per_key), checksum_ptr);
|
||||
if (!match) {
|
||||
std::string msg(
|
||||
"Corrupted memtable entry, per key-value checksum verification "
|
||||
|
@ -526,7 +509,7 @@ class MemTableIterator : public InternalIterator {
|
|||
bool valid_;
|
||||
bool arena_mode_;
|
||||
bool value_pinned_;
|
||||
size_t protection_bytes_per_key_;
|
||||
uint32_t protection_bytes_per_key_;
|
||||
Status status_;
|
||||
Logger* logger_;
|
||||
|
||||
|
@ -684,28 +667,15 @@ void MemTable::UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
|
|||
return;
|
||||
}
|
||||
|
||||
uint64_t checksum = 0;
|
||||
if (kv_prot_info == nullptr) {
|
||||
checksum =
|
||||
ProtectionInfo64().ProtectKVO(key, value, type).ProtectS(s).GetVal();
|
||||
ProtectionInfo64()
|
||||
.ProtectKVO(key, value, type)
|
||||
.ProtectS(s)
|
||||
.Encode(static_cast<uint8_t>(moptions_.protection_bytes_per_key),
|
||||
checksum_ptr);
|
||||
} else {
|
||||
checksum = kv_prot_info->GetVal();
|
||||
}
|
||||
switch (moptions_.protection_bytes_per_key) {
|
||||
case 1:
|
||||
checksum_ptr[0] = static_cast<uint8_t>(checksum);
|
||||
break;
|
||||
case 2:
|
||||
EncodeFixed16(checksum_ptr, static_cast<uint16_t>(checksum));
|
||||
break;
|
||||
case 4:
|
||||
EncodeFixed32(checksum_ptr, static_cast<uint32_t>(checksum));
|
||||
break;
|
||||
case 8:
|
||||
EncodeFixed64(checksum_ptr, checksum);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
kv_prot_info->Encode(
|
||||
static_cast<uint8_t>(moptions_.protection_bytes_per_key), checksum_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -902,7 +872,7 @@ struct Saver {
|
|||
ReadCallback* callback_;
|
||||
bool* is_blob_index;
|
||||
bool allow_data_in_errors;
|
||||
size_t protection_bytes_per_key;
|
||||
uint32_t protection_bytes_per_key;
|
||||
bool CheckCallback(SequenceNumber _seq) {
|
||||
if (callback_) {
|
||||
return callback_->IsVisible(_seq);
|
||||
|
|
|
@ -529,7 +529,7 @@ class MemTable {
|
|||
|
||||
// Returns Corruption status if verification fails.
|
||||
static Status VerifyEntryChecksum(const char* entry,
|
||||
size_t protection_bytes_per_key,
|
||||
uint32_t protection_bytes_per_key,
|
||||
bool allow_data_in_errors = false);
|
||||
|
||||
private:
|
||||
|
|
12
db/repair.cc
12
db/repair.cc
|
@ -518,8 +518,9 @@ class Repairer {
|
|||
if (status.ok()) {
|
||||
// TODO: plumb Env::IOActivity
|
||||
const ReadOptions read_options;
|
||||
status = table_cache_->GetTableProperties(file_options_, read_options,
|
||||
icmp_, t->meta, &props);
|
||||
status = table_cache_->GetTableProperties(
|
||||
file_options_, read_options, icmp_, t->meta, &props,
|
||||
0 /* block_protection_bytes_per_key */);
|
||||
}
|
||||
if (status.ok()) {
|
||||
auto s =
|
||||
|
@ -577,7 +578,8 @@ class Repairer {
|
|||
/*level=*/-1, /*max_file_size_for_l0_meta_pin=*/0,
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr,
|
||||
/*allow_unprepared_value=*/false);
|
||||
/*allow_unprepared_value=*/false,
|
||||
cfd->GetLatestMutableCFOptions()->block_protection_bytes_per_key);
|
||||
ParsedInternalKey parsed;
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
Slice key = iter->key();
|
||||
|
@ -617,7 +619,9 @@ class Repairer {
|
|||
ReadOptions ropts;
|
||||
std::unique_ptr<FragmentedRangeTombstoneIterator> r_iter;
|
||||
status = table_cache_->GetRangeTombstoneIterator(
|
||||
ropts, cfd->internal_comparator(), t->meta, &r_iter);
|
||||
ropts, cfd->internal_comparator(), t->meta,
|
||||
cfd->GetLatestMutableCFOptions()->block_protection_bytes_per_key,
|
||||
&r_iter);
|
||||
|
||||
if (r_iter) {
|
||||
r_iter->SeekToFirst();
|
||||
|
|
|
@ -91,7 +91,8 @@ Status TableCache::GetTableReader(
|
|||
const ReadOptions& ro, const FileOptions& file_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats,
|
||||
HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
|
||||
uint8_t block_protection_bytes_per_key, HistogramImpl* file_read_hist,
|
||||
std::unique_ptr<TableReader>* table_reader,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
|
||||
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
|
||||
|
@ -140,7 +141,8 @@ Status TableCache::GetTableReader(
|
|||
s = ioptions_.table_factory->NewTableReader(
|
||||
ro,
|
||||
TableReaderOptions(ioptions_, prefix_extractor, file_options,
|
||||
internal_comparator, skip_filters, immortal_tables_,
|
||||
internal_comparator, block_protection_bytes_per_key,
|
||||
skip_filters, immortal_tables_,
|
||||
false /* force_direct_prefetch */, level,
|
||||
block_cache_tracer_, max_file_size_for_l0_meta_pin,
|
||||
db_session_id_, file_meta.fd.GetNumber(),
|
||||
|
@ -156,6 +158,7 @@ Status TableCache::FindTable(
|
|||
const ReadOptions& ro, const FileOptions& file_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, TypedHandle** handle,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
|
||||
|
@ -179,12 +182,12 @@ Status TableCache::FindTable(
|
|||
}
|
||||
|
||||
std::unique_ptr<TableReader> table_reader;
|
||||
Status s =
|
||||
GetTableReader(ro, file_options, internal_comparator, file_meta,
|
||||
false /* sequential mode */, record_read_stats,
|
||||
file_read_hist, &table_reader, prefix_extractor,
|
||||
skip_filters, level, prefetch_index_and_filter_in_cache,
|
||||
max_file_size_for_l0_meta_pin, file_temperature);
|
||||
Status s = GetTableReader(ro, file_options, internal_comparator, file_meta,
|
||||
false /* sequential mode */, record_read_stats,
|
||||
block_protection_bytes_per_key, file_read_hist,
|
||||
&table_reader, prefix_extractor, skip_filters,
|
||||
level, prefetch_index_and_filter_in_cache,
|
||||
max_file_size_for_l0_meta_pin, file_temperature);
|
||||
if (!s.ok()) {
|
||||
assert(table_reader == nullptr);
|
||||
RecordTick(ioptions_.stats, NO_FILE_ERRORS);
|
||||
|
@ -212,6 +215,7 @@ InternalIterator* TableCache::NewIterator(
|
|||
size_t max_file_size_for_l0_meta_pin,
|
||||
const InternalKey* smallest_compaction_key,
|
||||
const InternalKey* largest_compaction_key, bool allow_unprepared_value,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
TruncatedRangeDelIterator** range_del_iter) {
|
||||
PERF_TIMER_GUARD(new_table_iterator_nanos);
|
||||
|
||||
|
@ -225,12 +229,13 @@ InternalIterator* TableCache::NewIterator(
|
|||
auto& fd = file_meta.fd;
|
||||
table_reader = fd.table_reader;
|
||||
if (table_reader == nullptr) {
|
||||
s = FindTable(
|
||||
options, file_options, icomparator, file_meta, &handle,
|
||||
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
|
||||
!for_compaction /* record_read_stats */, file_read_hist, skip_filters,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
max_file_size_for_l0_meta_pin, file_meta.temperature);
|
||||
s = FindTable(options, file_options, icomparator, file_meta, &handle,
|
||||
block_protection_bytes_per_key, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
!for_compaction /* record_read_stats */, file_read_hist,
|
||||
skip_filters, level,
|
||||
true /* prefetch_index_and_filter_in_cache */,
|
||||
max_file_size_for_l0_meta_pin, file_meta.temperature);
|
||||
if (s.ok()) {
|
||||
table_reader = cache_.Value(handle);
|
||||
}
|
||||
|
@ -308,7 +313,7 @@ InternalIterator* TableCache::NewIterator(
|
|||
Status TableCache::GetRangeTombstoneIterator(
|
||||
const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
|
||||
std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter) {
|
||||
assert(out_iter);
|
||||
const FileDescriptor& fd = file_meta.fd;
|
||||
|
@ -317,7 +322,7 @@ Status TableCache::GetRangeTombstoneIterator(
|
|||
TypedHandle* handle = nullptr;
|
||||
if (t == nullptr) {
|
||||
s = FindTable(options, file_options_, internal_comparator, file_meta,
|
||||
&handle);
|
||||
&handle, block_protection_bytes_per_key);
|
||||
if (s.ok()) {
|
||||
t = cache_.Value(handle);
|
||||
}
|
||||
|
@ -403,6 +408,7 @@ Status TableCache::Get(
|
|||
const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, const Slice& k, GetContext* get_context,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
HistogramImpl* file_read_hist, bool skip_filters, int level,
|
||||
size_t max_file_size_for_l0_meta_pin) {
|
||||
|
@ -430,7 +436,7 @@ Status TableCache::Get(
|
|||
assert(s.ok());
|
||||
if (t == nullptr) {
|
||||
s = FindTable(options, file_options_, internal_comparator, file_meta,
|
||||
&handle, prefix_extractor,
|
||||
&handle, block_protection_bytes_per_key, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist, skip_filters,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
|
@ -513,7 +519,8 @@ Status TableCache::MultiGetFilter(
|
|||
const FileMetaData& file_meta,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
HistogramImpl* file_read_hist, int level,
|
||||
MultiGetContext::Range* mget_range, TypedHandle** table_handle) {
|
||||
MultiGetContext::Range* mget_range, TypedHandle** table_handle,
|
||||
uint8_t block_protection_bytes_per_key) {
|
||||
auto& fd = file_meta.fd;
|
||||
IterKey row_cache_key;
|
||||
std::string row_cache_entry_buffer;
|
||||
|
@ -531,12 +538,13 @@ Status TableCache::MultiGetFilter(
|
|||
MultiGetContext::Range tombstone_range(*mget_range, mget_range->begin(),
|
||||
mget_range->end());
|
||||
if (t == nullptr) {
|
||||
s = FindTable(
|
||||
options, file_options_, internal_comparator, file_meta, &handle,
|
||||
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist, /*skip_filters=*/false,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
/*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
|
||||
s = FindTable(options, file_options_, internal_comparator, file_meta,
|
||||
&handle, block_protection_bytes_per_key, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist,
|
||||
/*skip_filters=*/false, level,
|
||||
true /* prefetch_index_and_filter_in_cache */,
|
||||
/*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
|
||||
if (s.ok()) {
|
||||
t = cache_.Value(handle);
|
||||
}
|
||||
|
@ -564,6 +572,7 @@ Status TableCache::GetTableProperties(
|
|||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
std::shared_ptr<const TableProperties>* properties,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor, bool no_io) {
|
||||
auto table_reader = file_meta.fd.table_reader;
|
||||
// table already been pre-loaded?
|
||||
|
@ -575,7 +584,8 @@ Status TableCache::GetTableProperties(
|
|||
|
||||
TypedHandle* table_handle = nullptr;
|
||||
Status s = FindTable(read_options, file_options, internal_comparator,
|
||||
file_meta, &table_handle, prefix_extractor, no_io);
|
||||
file_meta, &table_handle, block_protection_bytes_per_key,
|
||||
prefix_extractor, no_io);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
@ -588,12 +598,14 @@ Status TableCache::GetTableProperties(
|
|||
|
||||
Status TableCache::ApproximateKeyAnchors(
|
||||
const ReadOptions& ro, const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, std::vector<TableReader::Anchor>& anchors) {
|
||||
const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
|
||||
std::vector<TableReader::Anchor>& anchors) {
|
||||
Status s;
|
||||
TableReader* t = file_meta.fd.table_reader;
|
||||
TypedHandle* handle = nullptr;
|
||||
if (t == nullptr) {
|
||||
s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle);
|
||||
s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle,
|
||||
block_protection_bytes_per_key);
|
||||
if (s.ok()) {
|
||||
t = cache_.Value(handle);
|
||||
}
|
||||
|
@ -610,7 +622,7 @@ Status TableCache::ApproximateKeyAnchors(
|
|||
size_t TableCache::GetMemoryUsageByTableReader(
|
||||
const FileOptions& file_options, const ReadOptions& read_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor) {
|
||||
auto table_reader = file_meta.fd.table_reader;
|
||||
// table already been pre-loaded?
|
||||
|
@ -620,7 +632,8 @@ size_t TableCache::GetMemoryUsageByTableReader(
|
|||
|
||||
TypedHandle* table_handle = nullptr;
|
||||
Status s = FindTable(read_options, file_options, internal_comparator,
|
||||
file_meta, &table_handle, prefix_extractor, true);
|
||||
file_meta, &table_handle, block_protection_bytes_per_key,
|
||||
prefix_extractor, true /* no_io */);
|
||||
if (!s.ok()) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -639,16 +652,17 @@ uint64_t TableCache::ApproximateOffsetOf(
|
|||
const ReadOptions& read_options, const Slice& key,
|
||||
const FileMetaData& file_meta, TableReaderCaller caller,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor) {
|
||||
uint64_t result = 0;
|
||||
TableReader* table_reader = file_meta.fd.table_reader;
|
||||
TypedHandle* table_handle = nullptr;
|
||||
if (table_reader == nullptr) {
|
||||
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
|
||||
Status s =
|
||||
FindTable(read_options, file_options_, internal_comparator, file_meta,
|
||||
&table_handle, prefix_extractor, false /* no_io */,
|
||||
!for_compaction /* record_read_stats */);
|
||||
Status s = FindTable(
|
||||
read_options, file_options_, internal_comparator, file_meta,
|
||||
&table_handle, block_protection_bytes_per_key, prefix_extractor,
|
||||
false /* no_io */, !for_compaction /* record_read_stats */);
|
||||
if (s.ok()) {
|
||||
table_reader = cache_.Value(table_handle);
|
||||
}
|
||||
|
@ -668,16 +682,17 @@ uint64_t TableCache::ApproximateSize(
|
|||
const ReadOptions& read_options, const Slice& start, const Slice& end,
|
||||
const FileMetaData& file_meta, TableReaderCaller caller,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor) {
|
||||
uint64_t result = 0;
|
||||
TableReader* table_reader = file_meta.fd.table_reader;
|
||||
TypedHandle* table_handle = nullptr;
|
||||
if (table_reader == nullptr) {
|
||||
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
|
||||
Status s =
|
||||
FindTable(read_options, file_options_, internal_comparator, file_meta,
|
||||
&table_handle, prefix_extractor, false /* no_io */,
|
||||
!for_compaction /* record_read_stats */);
|
||||
Status s = FindTable(
|
||||
read_options, file_options_, internal_comparator, file_meta,
|
||||
&table_handle, block_protection_bytes_per_key, prefix_extractor,
|
||||
false /* no_io */, !for_compaction /* record_read_stats */);
|
||||
if (s.ok()) {
|
||||
table_reader = cache_.Value(table_handle);
|
||||
}
|
||||
|
|
|
@ -96,6 +96,7 @@ class TableCache {
|
|||
size_t max_file_size_for_l0_meta_pin,
|
||||
const InternalKey* smallest_compaction_key,
|
||||
const InternalKey* largest_compaction_key, bool allow_unprepared_value,
|
||||
uint8_t protection_bytes_per_key,
|
||||
TruncatedRangeDelIterator** range_del_iter = nullptr);
|
||||
|
||||
// If a seek to internal key "k" in specified file finds an entry,
|
||||
|
@ -112,6 +113,7 @@ class TableCache {
|
|||
const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, const Slice& k, GetContext* get_context,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
|
||||
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
|
||||
int level = -1, size_t max_file_size_for_l0_meta_pin = 0);
|
||||
|
@ -121,7 +123,7 @@ class TableCache {
|
|||
Status GetRangeTombstoneIterator(
|
||||
const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
|
||||
std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter);
|
||||
|
||||
// Call table reader's MultiGetFilter to use the bloom filter to filter out
|
||||
|
@ -135,7 +137,8 @@ class TableCache {
|
|||
const FileMetaData& file_meta,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
HistogramImpl* file_read_hist, int level,
|
||||
MultiGetContext::Range* mget_range, TypedHandle** table_handle);
|
||||
MultiGetContext::Range* mget_range, TypedHandle** table_handle,
|
||||
uint8_t block_protection_bytes_per_key);
|
||||
|
||||
// If a seek to internal key "k" in specified file finds an entry,
|
||||
// call get_context->SaveValue() repeatedly until
|
||||
|
@ -150,6 +153,7 @@ class TableCache {
|
|||
Status, MultiGet, const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
|
||||
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
|
||||
bool skip_range_deletions = false, int level = -1,
|
||||
|
@ -165,6 +169,7 @@ class TableCache {
|
|||
const ReadOptions& ro, const FileOptions& toptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, TypedHandle**,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
|
||||
const bool no_io = false, bool record_read_stats = true,
|
||||
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
|
||||
|
@ -183,12 +188,14 @@ class TableCache {
|
|||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
std::shared_ptr<const TableProperties>* properties,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
|
||||
bool no_io = false);
|
||||
|
||||
Status ApproximateKeyAnchors(const ReadOptions& ro,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
std::vector<TableReader::Anchor>& anchors);
|
||||
|
||||
// Return total memory usage of the table reader of the file.
|
||||
|
@ -196,7 +203,7 @@ class TableCache {
|
|||
size_t GetMemoryUsageByTableReader(
|
||||
const FileOptions& toptions, const ReadOptions& read_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta,
|
||||
const FileMetaData& file_meta, uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
|
||||
|
||||
// Returns approximated offset of a key in a file represented by fd.
|
||||
|
@ -204,6 +211,7 @@ class TableCache {
|
|||
const ReadOptions& read_options, const Slice& key,
|
||||
const FileMetaData& file_meta, TableReaderCaller caller,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
|
||||
|
||||
// Returns approximated data size between start and end keys in a file
|
||||
|
@ -212,6 +220,7 @@ class TableCache {
|
|||
const ReadOptions& read_options, const Slice& start, const Slice& end,
|
||||
const FileMetaData& file_meta, TableReaderCaller caller,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
|
||||
|
||||
CacheInterface& get_cache() { return cache_; }
|
||||
|
@ -234,8 +243,8 @@ class TableCache {
|
|||
const ReadOptions& ro, const FileOptions& file_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, bool sequential_mode,
|
||||
bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
std::unique_ptr<TableReader>* table_reader,
|
||||
bool record_read_stats, uint8_t block_protection_bytes_per_key,
|
||||
HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
|
||||
bool skip_filters = false, int level = -1,
|
||||
bool prefetch_index_and_filter_in_cache = true,
|
||||
|
|
|
@ -17,6 +17,7 @@ namespace ROCKSDB_NAMESPACE {
|
|||
DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
|
||||
(const ReadOptions& options, const InternalKeyComparator& internal_comparator,
|
||||
const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
HistogramImpl* file_read_hist, bool skip_filters, bool skip_range_deletions,
|
||||
int level, TypedHandle* handle) {
|
||||
|
@ -65,7 +66,7 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
|
|||
if (t == nullptr) {
|
||||
assert(handle == nullptr);
|
||||
s = FindTable(options, file_options_, internal_comparator, file_meta,
|
||||
&handle, prefix_extractor,
|
||||
&handle, block_protection_bytes_per_key, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
true /* record_read_stats */, file_read_hist, skip_filters,
|
||||
level, true /* prefetch_index_and_filter_in_cache */,
|
||||
|
|
|
@ -1257,7 +1257,8 @@ class VersionBuilder::Rep {
|
|||
InternalStats* internal_stats, int max_threads,
|
||||
bool prefetch_index_and_filter_in_cache, bool is_initial_load,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options) {
|
||||
size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
|
||||
uint8_t block_protection_bytes_per_key) {
|
||||
assert(table_cache_ != nullptr);
|
||||
|
||||
size_t table_cache_capacity =
|
||||
|
@ -1326,7 +1327,8 @@ class VersionBuilder::Rep {
|
|||
statuses[file_idx] = table_cache_->FindTable(
|
||||
read_options, file_options_,
|
||||
*(base_vstorage_->InternalComparator()), *file_meta, &handle,
|
||||
prefix_extractor, false /*no_io */, true /* record_read_stats */,
|
||||
block_protection_bytes_per_key, prefix_extractor, false /*no_io */,
|
||||
true /* record_read_stats */,
|
||||
internal_stats->GetFileReadHist(level), false, level,
|
||||
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
|
||||
file_meta->temperature);
|
||||
|
@ -1384,11 +1386,12 @@ Status VersionBuilder::LoadTableHandlers(
|
|||
InternalStats* internal_stats, int max_threads,
|
||||
bool prefetch_index_and_filter_in_cache, bool is_initial_load,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options) {
|
||||
return rep_->LoadTableHandlers(internal_stats, max_threads,
|
||||
prefetch_index_and_filter_in_cache,
|
||||
is_initial_load, prefix_extractor,
|
||||
max_file_size_for_l0_meta_pin, read_options);
|
||||
size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
|
||||
uint8_t block_protection_bytes_per_key) {
|
||||
return rep_->LoadTableHandlers(
|
||||
internal_stats, max_threads, prefetch_index_and_filter_in_cache,
|
||||
is_initial_load, prefix_extractor, max_file_size_for_l0_meta_pin,
|
||||
read_options, block_protection_bytes_per_key);
|
||||
}
|
||||
|
||||
uint64_t VersionBuilder::GetMinOldestBlobFileNumber() const {
|
||||
|
|
|
@ -48,7 +48,8 @@ class VersionBuilder {
|
|||
InternalStats* internal_stats, int max_threads,
|
||||
bool prefetch_index_and_filter_in_cache, bool is_initial_load,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options);
|
||||
size_t max_file_size_for_l0_meta_pin, const ReadOptions& read_options,
|
||||
uint8_t block_protection_bytes_per_key);
|
||||
uint64_t GetMinOldestBlobFileNumber() const;
|
||||
|
||||
private:
|
||||
|
|
|
@ -566,13 +566,13 @@ Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd,
|
|||
assert(builder_iter->second != nullptr);
|
||||
VersionBuilder* builder = builder_iter->second->version_builder();
|
||||
assert(builder);
|
||||
const MutableCFOptions* moptions = cfd->GetLatestMutableCFOptions();
|
||||
Status s = builder->LoadTableHandlers(
|
||||
cfd->internal_stats(),
|
||||
version_set_->db_options_->max_file_opening_threads,
|
||||
prefetch_index_and_filter_in_cache, is_initial_load,
|
||||
cfd->GetLatestMutableCFOptions()->prefix_extractor,
|
||||
MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions()),
|
||||
read_options_);
|
||||
moptions->prefix_extractor, MaxFileSizeForL0MetaPin(*moptions),
|
||||
read_options_, moptions->block_protection_bytes_per_key);
|
||||
if ((s.IsPathNotFound() || s.IsCorruption()) && no_error_if_files_missing_) {
|
||||
s = Status::OK();
|
||||
}
|
||||
|
@ -812,16 +812,16 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
|
|||
assert(builder);
|
||||
}
|
||||
|
||||
const MutableCFOptions* cf_opts_ptr = cfd->GetLatestMutableCFOptions();
|
||||
auto* version = new Version(cfd, version_set_, version_set_->file_options_,
|
||||
*cfd->GetLatestMutableCFOptions(), io_tracer_,
|
||||
*cf_opts_ptr, io_tracer_,
|
||||
version_set_->current_version_number_++,
|
||||
epoch_number_requirement_);
|
||||
s = builder->LoadTableHandlers(
|
||||
cfd->internal_stats(),
|
||||
version_set_->db_options_->max_file_opening_threads, false, true,
|
||||
cfd->GetLatestMutableCFOptions()->prefix_extractor,
|
||||
MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions()),
|
||||
read_options_);
|
||||
cf_opts_ptr->prefix_extractor, MaxFileSizeForL0MetaPin(*cf_opts_ptr),
|
||||
read_options_, cf_opts_ptr->block_protection_bytes_per_key);
|
||||
if (!s.ok()) {
|
||||
delete version;
|
||||
if (s.IsCorruption()) {
|
||||
|
|
|
@ -941,7 +941,7 @@ class LevelIterator final : public InternalIterator {
|
|||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
bool should_sample, HistogramImpl* file_read_hist,
|
||||
TableReaderCaller caller, bool skip_filters, int level,
|
||||
RangeDelAggregator* range_del_agg,
|
||||
uint8_t block_protection_bytes_per_key, RangeDelAggregator* range_del_agg,
|
||||
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
|
||||
nullptr,
|
||||
bool allow_unprepared_value = false,
|
||||
|
@ -964,6 +964,7 @@ class LevelIterator final : public InternalIterator {
|
|||
pinned_iters_mgr_(nullptr),
|
||||
compaction_boundaries_(compaction_boundaries),
|
||||
is_next_read_sequential_(false),
|
||||
block_protection_bytes_per_key_(block_protection_bytes_per_key),
|
||||
range_tombstone_iter_(nullptr),
|
||||
to_return_sentinel_(false) {
|
||||
// Empty level is not supported.
|
||||
|
@ -1107,7 +1108,8 @@ class LevelIterator final : public InternalIterator {
|
|||
nullptr /* don't need reference to table */, file_read_hist_, caller_,
|
||||
/*arena=*/nullptr, skip_filters_, level_,
|
||||
/*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key,
|
||||
largest_compaction_key, allow_unprepared_value_, range_tombstone_iter_);
|
||||
largest_compaction_key, allow_unprepared_value_,
|
||||
block_protection_bytes_per_key_, range_tombstone_iter_);
|
||||
}
|
||||
|
||||
// Check if current file being fully within iterate_lower_bound.
|
||||
|
@ -1154,6 +1156,8 @@ class LevelIterator final : public InternalIterator {
|
|||
|
||||
bool is_next_read_sequential_;
|
||||
|
||||
uint8_t block_protection_bytes_per_key_;
|
||||
|
||||
// This is set when this level iterator is used under a merging iterator
|
||||
// that processes range tombstones. range_tombstone_iter_ points to where the
|
||||
// merging iterator stores the range tombstones iterator for this level. When
|
||||
|
@ -1535,6 +1539,7 @@ Status Version::GetTableProperties(const ReadOptions& read_options,
|
|||
auto ioptions = cfd_->ioptions();
|
||||
Status s = table_cache->GetTableProperties(
|
||||
file_options_, read_options, cfd_->internal_comparator(), *file_meta, tp,
|
||||
mutable_cf_options_.block_protection_bytes_per_key,
|
||||
mutable_cf_options_.prefix_extractor, true /* no io */);
|
||||
if (s.ok()) {
|
||||
return s;
|
||||
|
@ -1621,6 +1626,7 @@ Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
|
|||
|
||||
Status s = table_cache->GetRangeTombstoneIterator(
|
||||
read_options, cfd_->internal_comparator(), *file_meta,
|
||||
cfd_->GetLatestMutableCFOptions()->block_protection_bytes_per_key,
|
||||
&tombstone_iter);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
|
@ -1739,6 +1745,7 @@ size_t Version::GetMemoryUsageByTableReaders(const ReadOptions& read_options) {
|
|||
total_usage += cfd_->table_cache()->GetMemoryUsageByTableReader(
|
||||
file_options_, read_options, cfd_->internal_comparator(),
|
||||
*file_level.files[i].file_metadata,
|
||||
mutable_cf_options_.block_protection_bytes_per_key,
|
||||
mutable_cf_options_.prefix_extractor);
|
||||
}
|
||||
}
|
||||
|
@ -1848,6 +1855,7 @@ InternalIterator* Version::TEST_GetLevelIterator(
|
|||
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
||||
cfd_->internal_stats()->GetFileReadHist(level),
|
||||
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
||||
mutable_cf_options_.block_protection_bytes_per_key,
|
||||
nullptr /* range_del_agg */, nullptr /* compaction_boundaries */,
|
||||
allow_unprepared_value, &tombstone_iter_ptr);
|
||||
if (read_options.ignore_range_deletions) {
|
||||
|
@ -1946,7 +1954,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|||
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr, allow_unprepared_value,
|
||||
&tombstone_iter);
|
||||
mutable_cf_options_.block_protection_bytes_per_key, &tombstone_iter);
|
||||
if (read_options.ignore_range_deletions) {
|
||||
merge_iter_builder->AddIterator(table_iter);
|
||||
} else {
|
||||
|
@ -1975,8 +1983,10 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|||
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
||||
cfd_->internal_stats()->GetFileReadHist(level),
|
||||
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
||||
/*range_del_agg=*/nullptr, /*compaction_boundaries=*/nullptr,
|
||||
allow_unprepared_value, &tombstone_iter_ptr);
|
||||
mutable_cf_options_.block_protection_bytes_per_key,
|
||||
/*range_del_agg=*/nullptr,
|
||||
/*compaction_boundaries=*/nullptr, allow_unprepared_value,
|
||||
&tombstone_iter_ptr);
|
||||
if (read_options.ignore_range_deletions) {
|
||||
merge_iter_builder->AddIterator(level_iter);
|
||||
} else {
|
||||
|
@ -2019,7 +2029,8 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
|||
/*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_,
|
||||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr,
|
||||
/*allow_unprepared_value=*/false));
|
||||
/*allow_unprepared_value=*/false,
|
||||
mutable_cf_options_.block_protection_bytes_per_key));
|
||||
status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
|
||||
iter.get(), overlap);
|
||||
if (!status.ok() || *overlap) {
|
||||
|
@ -2034,7 +2045,8 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
|||
mutable_cf_options_.prefix_extractor, should_sample_file_read(),
|
||||
cfd_->internal_stats()->GetFileReadHist(level),
|
||||
TableReaderCaller::kUserIterator, IsFilterSkipped(level), level,
|
||||
&range_del_agg));
|
||||
mutable_cf_options_.block_protection_bytes_per_key, &range_del_agg,
|
||||
nullptr, false));
|
||||
status = OverlapWithIterator(ucmp, smallest_user_key, largest_user_key,
|
||||
iter.get(), overlap);
|
||||
}
|
||||
|
@ -2333,7 +2345,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|||
StopWatchNano timer(clock_, timer_enabled /* auto_start */);
|
||||
*status = table_cache_->Get(
|
||||
read_options, *internal_comparator(), *f->file_metadata, ikey,
|
||||
&get_context, mutable_cf_options_.prefix_extractor,
|
||||
&get_context, mutable_cf_options_.block_protection_bytes_per_key,
|
||||
mutable_cf_options_.prefix_extractor,
|
||||
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
||||
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
||||
fp.IsHitFileLastInLevel()),
|
||||
|
@ -2578,7 +2591,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|||
read_options, *internal_comparator(), *f->file_metadata,
|
||||
mutable_cf_options_.prefix_extractor,
|
||||
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
||||
fp.GetHitFileLevel(), &file_range, &table_handle);
|
||||
fp.GetHitFileLevel(), &file_range, &table_handle,
|
||||
mutable_cf_options_.block_protection_bytes_per_key);
|
||||
skip_range_deletions = true;
|
||||
if (status.ok()) {
|
||||
skip_filters = true;
|
||||
|
@ -2768,7 +2782,8 @@ Status Version::ProcessBatch(
|
|||
read_options, *internal_comparator(), *f->file_metadata,
|
||||
mutable_cf_options_.prefix_extractor,
|
||||
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
||||
fp.GetHitFileLevel(), &file_range, &table_handle);
|
||||
fp.GetHitFileLevel(), &file_range, &table_handle,
|
||||
mutable_cf_options_.block_protection_bytes_per_key);
|
||||
if (status.ok()) {
|
||||
skip_filters = true;
|
||||
skip_range_deletions = true;
|
||||
|
@ -5217,7 +5232,8 @@ Status VersionSet::ProcessManifestWrites(
|
|||
true /* prefetch_index_and_filter_in_cache */,
|
||||
false /* is_initial_load */,
|
||||
mutable_cf_options_ptrs[i]->prefix_extractor,
|
||||
MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i]), read_options);
|
||||
MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i]), read_options,
|
||||
mutable_cf_options_ptrs[i]->block_protection_bytes_per_key);
|
||||
if (!s.ok()) {
|
||||
if (db_options_->paranoid_checks) {
|
||||
break;
|
||||
|
@ -6553,10 +6569,11 @@ uint64_t VersionSet::ApproximateOffsetOf(const ReadOptions& read_options,
|
|||
// "key" falls in the range for this table. Add the
|
||||
// approximate offset of "key" within the table.
|
||||
TableCache* table_cache = v->cfd_->table_cache();
|
||||
const MutableCFOptions& cf_opts = v->GetMutableCFOptions();
|
||||
if (table_cache != nullptr) {
|
||||
result = table_cache->ApproximateOffsetOf(
|
||||
read_options, key, *f.file_metadata, caller, icmp,
|
||||
v->GetMutableCFOptions().prefix_extractor);
|
||||
cf_opts.block_protection_bytes_per_key, cf_opts.prefix_extractor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
@ -6596,9 +6613,10 @@ uint64_t VersionSet::ApproximateSize(const ReadOptions& read_options,
|
|||
if (table_cache == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
const MutableCFOptions& cf_opts = v->GetMutableCFOptions();
|
||||
return table_cache->ApproximateSize(
|
||||
read_options, start, end, *f.file_metadata, caller, icmp,
|
||||
v->GetMutableCFOptions().prefix_extractor);
|
||||
cf_opts.block_protection_bytes_per_key, cf_opts.prefix_extractor);
|
||||
}
|
||||
|
||||
void VersionSet::RemoveLiveFiles(
|
||||
|
@ -6757,6 +6775,7 @@ InternalIterator* VersionSet::MakeInputIterator(
|
|||
/*smallest_compaction_key=*/nullptr,
|
||||
/*largest_compaction_key=*/nullptr,
|
||||
/*allow_unprepared_value=*/false,
|
||||
c->mutable_cf_options()->block_protection_bytes_per_key,
|
||||
/*range_del_iter=*/&range_tombstone_iter);
|
||||
range_tombstones.emplace_back(range_tombstone_iter, nullptr);
|
||||
}
|
||||
|
@ -6770,8 +6789,9 @@ InternalIterator* VersionSet::MakeInputIterator(
|
|||
/*should_sample=*/false,
|
||||
/*no per level latency histogram=*/nullptr,
|
||||
TableReaderCaller::kCompaction, /*skip_filters=*/false,
|
||||
/*level=*/static_cast<int>(c->level(which)), range_del_agg,
|
||||
c->boundaries(which), false, &tombstone_iter_ptr);
|
||||
/*level=*/static_cast<int>(c->level(which)),
|
||||
c->mutable_cf_options()->block_protection_bytes_per_key,
|
||||
range_del_agg, c->boundaries(which), false, &tombstone_iter_ptr);
|
||||
range_tombstones.emplace_back(nullptr, tombstone_iter_ptr);
|
||||
}
|
||||
}
|
||||
|
@ -7008,7 +7028,8 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options,
|
|||
TableCache::TypedHandle* handle = nullptr;
|
||||
FileMetaData meta_copy = meta;
|
||||
status = table_cache->FindTable(
|
||||
read_options, file_opts, *icmp, meta_copy, &handle, pe,
|
||||
read_options, file_opts, *icmp, meta_copy, &handle,
|
||||
cf_opts->block_protection_bytes_per_key, pe,
|
||||
/*no_io=*/false, /*record_read_stats=*/true,
|
||||
internal_stats->GetFileReadHist(level), false, level,
|
||||
/*prefetch_index_and_filter_in_cache*/ false, max_sz_for_l0_meta_pin,
|
||||
|
|
|
@ -25,6 +25,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
|
|||
StopWatchNano timer(clock_, timer_enabled /* auto_start */);
|
||||
s = CO_AWAIT(table_cache_->MultiGet)(
|
||||
read_options, *internal_comparator(), *f->file_metadata, &file_range,
|
||||
mutable_cf_options_.block_protection_bytes_per_key,
|
||||
mutable_cf_options_.prefix_extractor,
|
||||
cfd_->internal_stats()->GetFileReadHist(hit_file_level), skip_filters,
|
||||
skip_range_deletions, hit_file_level, table_handle);
|
||||
|
|
|
@ -290,6 +290,7 @@ DECLARE_bool(paranoid_file_checks);
|
|||
DECLARE_bool(fail_if_options_file_error);
|
||||
DECLARE_uint64(batch_protection_bytes_per_key);
|
||||
DECLARE_uint32(memtable_protection_bytes_per_key);
|
||||
DECLARE_uint32(block_protection_bytes_per_key);
|
||||
|
||||
DECLARE_uint64(user_timestamp_size);
|
||||
DECLARE_string(secondary_cache_uri);
|
||||
|
|
|
@ -975,6 +975,11 @@ DEFINE_uint32(
|
|||
"specified number of bytes per key. Currently the supported "
|
||||
"nonzero values are 1, 2, 4 and 8.");
|
||||
|
||||
DEFINE_uint32(block_protection_bytes_per_key, 0,
|
||||
"If nonzero, enables integrity protection in blocks at the "
|
||||
"specified number of bytes per key. Currently the supported "
|
||||
"nonzero values are 1, 2, 4 and 8.");
|
||||
|
||||
DEFINE_string(file_checksum_impl, "none",
|
||||
"Name of an implementation for file_checksum_gen_factory, or "
|
||||
"\"none\" for null.");
|
||||
|
|
|
@ -3122,6 +3122,7 @@ void InitializeOptionsFromFlags(
|
|||
FLAGS_verify_sst_unique_id_in_manifest;
|
||||
options.memtable_protection_bytes_per_key =
|
||||
FLAGS_memtable_protection_bytes_per_key;
|
||||
options.block_protection_bytes_per_key = FLAGS_block_protection_bytes_per_key;
|
||||
|
||||
// Integrated BlobDB
|
||||
options.enable_blob_files = FLAGS_enable_blob_files;
|
||||
|
|
|
@ -92,7 +92,8 @@ TableReader* NewTableReader(const std::string& sst_file_path,
|
|||
if (s.ok()) {
|
||||
ImmutableOptions iopts(options, cf_ioptions);
|
||||
TableReaderOptions t_opt(iopts, /*prefix_extractor=*/nullptr, env_options,
|
||||
cf_ioptions.internal_comparator);
|
||||
cf_ioptions.internal_comparator,
|
||||
0 /* block_protection_bytes_per_key */);
|
||||
t_opt.largest_seqno = kMaxSequenceNumber;
|
||||
s = options.table_factory->NewTableReader(t_opt, std::move(file_reader),
|
||||
file_size, &table_reader,
|
||||
|
|
|
@ -1122,6 +1122,20 @@ struct AdvancedColumnFamilyOptions {
|
|||
// only compatible changes are allowed.
|
||||
bool persist_user_defined_timestamps = true;
|
||||
|
||||
// Enable/disable per key-value checksum protection for in memory blocks.
|
||||
//
|
||||
// Checksum is constructed when a block is loaded into memory and verification
|
||||
// is done for each key read from the block. This is useful for detecting
|
||||
// in-memory data corruption. Note that this feature has a non-trivial
|
||||
// negative impact on read performance. Different values of the
|
||||
// option have similar performance impact, but different memory cost and
|
||||
// corruption detection probability (e.g. 1 byte gives 255/256 chance for
|
||||
// detecting a corruption).
|
||||
//
|
||||
// Default: 0 (no protection)
|
||||
// Supported values: 0, 1, 2, 4, 8.
|
||||
uint8_t block_protection_bytes_per_key = 0;
|
||||
|
||||
// Create ColumnFamilyOptions with default values for all fields
|
||||
AdvancedColumnFamilyOptions();
|
||||
// Create ColumnFamilyOptions from Options
|
||||
|
|
|
@ -488,6 +488,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
|
|||
{offsetof(struct MutableCFOptions, memtable_protection_bytes_per_key),
|
||||
OptionType::kUInt32T, OptionVerificationType::kNormal,
|
||||
OptionTypeFlags::kMutable}},
|
||||
{"block_protection_bytes_per_key",
|
||||
{offsetof(struct MutableCFOptions, block_protection_bytes_per_key),
|
||||
OptionType::kUInt8T, OptionVerificationType::kNormal,
|
||||
OptionTypeFlags::kMutable}},
|
||||
{kOptNameCompOpts,
|
||||
OptionTypeInfo::Struct(
|
||||
kOptNameCompOpts, &compression_options_type_info,
|
||||
|
|
|
@ -172,6 +172,7 @@ struct MutableCFOptions {
|
|||
: options.last_level_temperature),
|
||||
memtable_protection_bytes_per_key(
|
||||
options.memtable_protection_bytes_per_key),
|
||||
block_protection_bytes_per_key(options.block_protection_bytes_per_key),
|
||||
sample_for_compression(
|
||||
options.sample_for_compression), // TODO: is 0 fine here?
|
||||
compression_per_level(options.compression_per_level) {
|
||||
|
@ -222,6 +223,7 @@ struct MutableCFOptions {
|
|||
bottommost_compression(kDisableCompressionOption),
|
||||
last_level_temperature(Temperature::kUnknown),
|
||||
memtable_protection_bytes_per_key(0),
|
||||
block_protection_bytes_per_key(0),
|
||||
sample_for_compression(0) {}
|
||||
|
||||
explicit MutableCFOptions(const Options& options);
|
||||
|
@ -312,6 +314,7 @@ struct MutableCFOptions {
|
|||
CompressionOptions bottommost_compression_opts;
|
||||
Temperature last_level_temperature;
|
||||
uint32_t memtable_protection_bytes_per_key;
|
||||
uint8_t block_protection_bytes_per_key;
|
||||
|
||||
uint64_t sample_for_compression;
|
||||
std::vector<CompressionType> compression_per_level;
|
||||
|
|
|
@ -206,6 +206,8 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
|
|||
moptions.experimental_mempurge_threshold;
|
||||
cf_opts->memtable_protection_bytes_per_key =
|
||||
moptions.memtable_protection_bytes_per_key;
|
||||
cf_opts->block_protection_bytes_per_key =
|
||||
moptions.block_protection_bytes_per_key;
|
||||
|
||||
// Compaction related options
|
||||
cf_opts->disable_auto_compactions = moptions.disable_auto_compactions;
|
||||
|
|
|
@ -552,7 +552,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
|||
"compaction=false;age_for_warm=1;};"
|
||||
"blob_cache=1M;"
|
||||
"memtable_protection_bytes_per_key=2;"
|
||||
"persist_user_defined_timestamps=true;",
|
||||
"persist_user_defined_timestamps=true;"
|
||||
"block_protection_bytes_per_key=1;",
|
||||
new_options));
|
||||
|
||||
ASSERT_NE(new_options->blob_cache.get(), nullptr);
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace ROCKSDB_NAMESPACE {
|
|||
// Helper routine: decode the next block entry starting at "p",
|
||||
// storing the number of shared key bytes, non_shared key bytes,
|
||||
// and the length of the value in "*shared", "*non_shared", and
|
||||
// "*value_length", respectively. Will not derefence past "limit".
|
||||
// "*value_length", respectively. Will not dereference past "limit".
|
||||
//
|
||||
// If any errors are detected, returns nullptr. Otherwise, returns a
|
||||
// pointer to the key delta (just past the three decoded values).
|
||||
|
@ -137,17 +137,26 @@ struct DecodeEntryV4 {
|
|||
return DecodeKeyV4()(p, limit, shared, non_shared);
|
||||
}
|
||||
};
|
||||
|
||||
void DataBlockIter::NextImpl() {
|
||||
#ifndef NDEBUG
|
||||
if (TEST_Corrupt_Callback("DataBlockIter::NextImpl")) return;
|
||||
#endif
|
||||
bool is_shared = false;
|
||||
ParseNextDataKey(&is_shared);
|
||||
++cur_entry_idx_;
|
||||
}
|
||||
|
||||
void MetaBlockIter::NextImpl() {
|
||||
bool is_shared = false;
|
||||
ParseNextKey<CheckAndDecodeEntry>(&is_shared);
|
||||
++cur_entry_idx_;
|
||||
}
|
||||
|
||||
void IndexBlockIter::NextImpl() { ParseNextIndexKey(); }
|
||||
void IndexBlockIter::NextImpl() {
|
||||
ParseNextIndexKey();
|
||||
++cur_entry_idx_;
|
||||
}
|
||||
|
||||
void IndexBlockIter::PrevImpl() {
|
||||
assert(Valid());
|
||||
|
@ -166,6 +175,7 @@ void IndexBlockIter::PrevImpl() {
|
|||
// Loop until end of current entry hits the start of original entry
|
||||
while (ParseNextIndexKey() && NextEntryOffset() < original) {
|
||||
}
|
||||
--cur_entry_idx_;
|
||||
}
|
||||
|
||||
void MetaBlockIter::PrevImpl() {
|
||||
|
@ -187,6 +197,7 @@ void MetaBlockIter::PrevImpl() {
|
|||
while (ParseNextKey<CheckAndDecodeEntry>(&is_shared) &&
|
||||
NextEntryOffset() < original) {
|
||||
}
|
||||
--cur_entry_idx_;
|
||||
}
|
||||
|
||||
// Similar to IndexBlockIter::PrevImpl but also caches the prev entries
|
||||
|
@ -195,6 +206,7 @@ void DataBlockIter::PrevImpl() {
|
|||
|
||||
assert(prev_entries_idx_ == -1 ||
|
||||
static_cast<size_t>(prev_entries_idx_) < prev_entries_.size());
|
||||
--cur_entry_idx_;
|
||||
// Check if we can use cached prev_entries_
|
||||
if (prev_entries_idx_ > 0 &&
|
||||
prev_entries_[prev_entries_idx_].offset == current_) {
|
||||
|
@ -319,10 +331,10 @@ void MetaBlockIter::SeekImpl(const Slice& target) {
|
|||
// inclusive; AND
|
||||
// 2) the last key of this block has a greater user_key from seek_user_key
|
||||
//
|
||||
// If the return value is TRUE, iter location has two possibilies:
|
||||
// 1) If iter is valid, it is set to a location as if set by BinarySeek. In
|
||||
// this case, it points to the first key with a larger user_key or a matching
|
||||
// user_key with a seqno no greater than the seeking seqno.
|
||||
// If the return value is TRUE, iter location has two possibilities:
|
||||
// 1) If iter is valid, it is set to a location as if set by SeekImpl(target).
|
||||
// In this case, it points to the first key with a larger user_key or a
|
||||
// matching user_key with a seqno no greater than the seeking seqno.
|
||||
// 2) If the iter is invalid, it means that either all the user_key is less
|
||||
// than the seek_user_key, or the block ends with a matching user_key but
|
||||
// with a smaller [ type | seqno ] (i.e. a larger seqno, or the same seqno
|
||||
|
@ -347,11 +359,11 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
|
|||
// boundary key: axy@50 (we make minimal assumption about a boundary key)
|
||||
// Block N+1: [axy@10, ... ]
|
||||
//
|
||||
// If seek_key = axy@60, the search will starts from Block N.
|
||||
// If seek_key = axy@60, the search will start from Block N.
|
||||
// Even if the user_key is not found in the hash map, the caller still
|
||||
// have to continue searching the next block.
|
||||
//
|
||||
// In this case, we pretend the key is the the last restart interval.
|
||||
// In this case, we pretend the key is in the last restart interval.
|
||||
// The while-loop below will search the last restart interval for the
|
||||
// key. It will stop at the first key that is larger than the seek_key,
|
||||
// or to the end of the block if no one is larger.
|
||||
|
@ -364,12 +376,15 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
|
|||
assert(restart_index < num_restarts_);
|
||||
SeekToRestartPoint(restart_index);
|
||||
current_ = GetRestartPoint(restart_index);
|
||||
cur_entry_idx_ =
|
||||
static_cast<int32_t>(restart_index * block_restart_interval_) - 1;
|
||||
|
||||
uint32_t limit = restarts_;
|
||||
if (restart_index + 1 < num_restarts_) {
|
||||
limit = GetRestartPoint(restart_index + 1);
|
||||
}
|
||||
while (current_ < limit) {
|
||||
++cur_entry_idx_;
|
||||
bool shared;
|
||||
// Here we only linear seek the target key inside the restart interval.
|
||||
// If a key does not exist inside a restart interval, we avoid
|
||||
|
@ -381,14 +396,20 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
|
|||
// we stop at the first potential matching user key.
|
||||
break;
|
||||
}
|
||||
// If the loop exits due to CompareCurrentKey(target) >= 0, then current key
|
||||
// exists, and its checksum verification will be done in UpdateKey() called
|
||||
// in SeekForGet().
|
||||
// TODO(cbi): If this loop exits with current_ == restart_, per key-value
|
||||
// checksum will not be verified in UpdateKey() since Valid()
|
||||
// will return false.
|
||||
}
|
||||
|
||||
if (current_ == restarts_) {
|
||||
// Search reaches to the end of the block. There are three possibilites:
|
||||
// 1) there is only one user_key match in the block (otherwise collsion).
|
||||
// Search reaches to the end of the block. There are three possibilities:
|
||||
// 1) there is only one user_key match in the block (otherwise collision).
|
||||
// the matching user_key resides in the last restart interval, and it
|
||||
// is the last key of the restart interval and of the block as well.
|
||||
// ParseNextKey() skiped it as its [ type | seqno ] is smaller.
|
||||
// ParseNextKey() skipped it as its [ type | seqno ] is smaller.
|
||||
//
|
||||
// 2) The seek_key is not found in the HashIndex Lookup(), i.e. kNoEntry,
|
||||
// AND all existing user_keys in the restart interval are smaller than
|
||||
|
@ -424,6 +445,9 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
|
|||
}
|
||||
|
||||
void IndexBlockIter::SeekImpl(const Slice& target) {
|
||||
#ifndef NDEBUG
|
||||
if (TEST_Corrupt_Callback("IndexBlockIter::SeekImpl")) return;
|
||||
#endif
|
||||
TEST_SYNC_POINT("IndexBlockIter::Seek:0");
|
||||
PERF_TIMER_GUARD(block_seek_nanos);
|
||||
if (data_ == nullptr) { // Not init yet
|
||||
|
@ -478,7 +502,9 @@ void DataBlockIter::SeekForPrevImpl(const Slice& target) {
|
|||
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
|
||||
|
||||
if (!Valid()) {
|
||||
SeekToLastImpl();
|
||||
if (status_.ok()) {
|
||||
SeekToLastImpl();
|
||||
}
|
||||
} else {
|
||||
while (Valid() && CompareCurrentKey(seek_key) > 0) {
|
||||
PrevImpl();
|
||||
|
@ -502,7 +528,9 @@ void MetaBlockIter::SeekForPrevImpl(const Slice& target) {
|
|||
FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan);
|
||||
|
||||
if (!Valid()) {
|
||||
SeekToLastImpl();
|
||||
if (status_.ok()) {
|
||||
SeekToLastImpl();
|
||||
}
|
||||
} else {
|
||||
while (Valid() && CompareCurrentKey(seek_key) > 0) {
|
||||
PrevImpl();
|
||||
|
@ -517,6 +545,7 @@ void DataBlockIter::SeekToFirstImpl() {
|
|||
SeekToRestartPoint(0);
|
||||
bool is_shared = false;
|
||||
ParseNextDataKey(&is_shared);
|
||||
cur_entry_idx_ = 0;
|
||||
}
|
||||
|
||||
void MetaBlockIter::SeekToFirstImpl() {
|
||||
|
@ -526,15 +555,20 @@ void MetaBlockIter::SeekToFirstImpl() {
|
|||
SeekToRestartPoint(0);
|
||||
bool is_shared = false;
|
||||
ParseNextKey<CheckAndDecodeEntry>(&is_shared);
|
||||
cur_entry_idx_ = 0;
|
||||
}
|
||||
|
||||
void IndexBlockIter::SeekToFirstImpl() {
|
||||
#ifndef NDEBUG
|
||||
if (TEST_Corrupt_Callback("IndexBlockIter::SeekToFirstImpl")) return;
|
||||
#endif
|
||||
if (data_ == nullptr) { // Not init yet
|
||||
return;
|
||||
}
|
||||
status_ = Status::OK();
|
||||
SeekToRestartPoint(0);
|
||||
ParseNextIndexKey();
|
||||
cur_entry_idx_ = 0;
|
||||
}
|
||||
|
||||
void DataBlockIter::SeekToLastImpl() {
|
||||
|
@ -543,8 +577,10 @@ void DataBlockIter::SeekToLastImpl() {
|
|||
}
|
||||
SeekToRestartPoint(num_restarts_ - 1);
|
||||
bool is_shared = false;
|
||||
cur_entry_idx_ = (num_restarts_ - 1) * block_restart_interval_;
|
||||
while (ParseNextDataKey(&is_shared) && NextEntryOffset() < restarts_) {
|
||||
// Keep skipping
|
||||
++cur_entry_idx_;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -554,9 +590,13 @@ void MetaBlockIter::SeekToLastImpl() {
|
|||
}
|
||||
SeekToRestartPoint(num_restarts_ - 1);
|
||||
bool is_shared = false;
|
||||
assert(num_restarts_ >= 1);
|
||||
cur_entry_idx_ =
|
||||
static_cast<int32_t>((num_restarts_ - 1) * block_restart_interval_);
|
||||
while (ParseNextKey<CheckAndDecodeEntry>(&is_shared) &&
|
||||
NextEntryOffset() < restarts_) {
|
||||
// Keep skipping
|
||||
// Will probably never reach here since restart_interval is always 1
|
||||
++cur_entry_idx_;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -566,20 +606,12 @@ void IndexBlockIter::SeekToLastImpl() {
|
|||
}
|
||||
status_ = Status::OK();
|
||||
SeekToRestartPoint(num_restarts_ - 1);
|
||||
cur_entry_idx_ = (num_restarts_ - 1) * block_restart_interval_;
|
||||
while (ParseNextIndexKey() && NextEntryOffset() < restarts_) {
|
||||
// Keep skipping
|
||||
++cur_entry_idx_;
|
||||
}
|
||||
}
|
||||
|
||||
template <class TValue>
|
||||
void BlockIter<TValue>::CorruptionError() {
|
||||
current_ = restarts_;
|
||||
restart_index_ = num_restarts_;
|
||||
status_ = Status::Corruption("bad entry in block");
|
||||
raw_key_.Clear();
|
||||
value_.clear();
|
||||
}
|
||||
|
||||
template <class TValue>
|
||||
template <typename DecodeEntryFunc>
|
||||
bool BlockIter<TValue>::ParseNextKey(bool* is_shared) {
|
||||
|
@ -666,12 +698,12 @@ bool IndexBlockIter::ParseNextIndexKey() {
|
|||
// restart_point 1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz)
|
||||
// ...
|
||||
// restart_point n-1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz)
|
||||
// where, k is key, v is value, and its encoding is in parenthesis.
|
||||
// where, k is key, v is value, and its encoding is in parentheses.
|
||||
// The format of each key is (shared_size, non_shared_size, shared, non_shared)
|
||||
// The format of each value, i.e., block handle, is (offset, size) whenever the
|
||||
// is_shared is false, which included the first entry in each restart point.
|
||||
// Otherwise the format is delta-size = block handle size - size of last block
|
||||
// handle.
|
||||
// Otherwise, the format is delta-size = the size of current block - the size o
|
||||
// last block.
|
||||
void IndexBlockIter::DecodeCurrentValue(bool is_shared) {
|
||||
Slice v(value_.data(), data_ + restarts_ - value_.data());
|
||||
// Delta encoding is used if `shared` != 0.
|
||||
|
@ -710,6 +742,7 @@ void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
|
|||
// to follow it up with NextImpl() to position the iterator at the restart
|
||||
// key.
|
||||
SeekToRestartPoint(index);
|
||||
cur_entry_idx_ = static_cast<int32_t>(index * block_restart_interval_) - 1;
|
||||
NextImpl();
|
||||
|
||||
if (!skip_linear_scan) {
|
||||
|
@ -728,6 +761,8 @@ void BlockIter<TValue>::FindKeyAfterBinarySeek(const Slice& target,
|
|||
while (true) {
|
||||
NextImpl();
|
||||
if (!Valid()) {
|
||||
// TODO(cbi): per key-value checksum will not be verified in UpdateKey()
|
||||
// since Valid() will returns false.
|
||||
break;
|
||||
}
|
||||
if (current_ == max_offset) {
|
||||
|
@ -976,6 +1011,7 @@ Block::~Block() {
|
|||
// This sync point can be re-enabled if RocksDB can control the
|
||||
// initialization order of any/all static options created by the user.
|
||||
// TEST_SYNC_POINT("Block::~Block");
|
||||
delete[] kv_checksum_;
|
||||
}
|
||||
|
||||
Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
|
||||
|
@ -1035,6 +1071,126 @@ Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit,
|
|||
}
|
||||
}
|
||||
|
||||
void Block::InitializeDataBlockProtectionInfo(uint8_t protection_bytes_per_key,
|
||||
const Comparator* raw_ucmp) {
|
||||
protection_bytes_per_key_ = 0;
|
||||
if (protection_bytes_per_key > 0 && num_restarts_ > 0) {
|
||||
// NewDataIterator() is called with protection_bytes_per_key_ = 0.
|
||||
// This is intended since checksum is not constructed yet.
|
||||
//
|
||||
// We do not know global_seqno yet, so checksum computation and
|
||||
// verification all assume global_seqno = 0.
|
||||
std::unique_ptr<DataBlockIter> iter{NewDataIterator(
|
||||
raw_ucmp, kDisableGlobalSequenceNumber, nullptr /* iter */,
|
||||
nullptr /* stats */, true /* block_contents_pinned */)};
|
||||
if (iter->status().ok()) {
|
||||
block_restart_interval_ = iter->GetRestartInterval();
|
||||
}
|
||||
uint32_t num_keys = 0;
|
||||
if (iter->status().ok()) {
|
||||
num_keys = iter->NumberOfKeys(block_restart_interval_);
|
||||
}
|
||||
if (iter->status().ok()) {
|
||||
checksum_size_ = num_keys * protection_bytes_per_key;
|
||||
kv_checksum_ = new char[(size_t)checksum_size_];
|
||||
size_t i = 0;
|
||||
iter->SeekToFirst();
|
||||
while (iter->Valid()) {
|
||||
GenerateKVChecksum(kv_checksum_ + i, protection_bytes_per_key,
|
||||
iter->key(), iter->value());
|
||||
iter->Next();
|
||||
i += protection_bytes_per_key;
|
||||
}
|
||||
assert(!iter->status().ok() || i == num_keys * protection_bytes_per_key);
|
||||
}
|
||||
if (!iter->status().ok()) {
|
||||
size_ = 0; // Error marker
|
||||
return;
|
||||
}
|
||||
protection_bytes_per_key_ = protection_bytes_per_key;
|
||||
}
|
||||
}
|
||||
|
||||
void Block::InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
|
||||
const Comparator* raw_ucmp,
|
||||
bool value_is_full,
|
||||
bool index_has_first_key) {
|
||||
protection_bytes_per_key_ = 0;
|
||||
if (num_restarts_ > 0 && protection_bytes_per_key > 0) {
|
||||
// Note that `global_seqno` and `key_includes_seq` are hardcoded here. They
|
||||
// do not impact how the index block is parsed. During checksum
|
||||
// construction/verification, we use the entire key buffer from
|
||||
// raw_key_.GetKey() returned by iter->key() as the `key` part of key-value
|
||||
// checksum, and the content of this buffer do not change for different
|
||||
// values of `global_seqno` or `key_includes_seq`.
|
||||
std::unique_ptr<IndexBlockIter> iter{NewIndexIterator(
|
||||
raw_ucmp, kDisableGlobalSequenceNumber /* global_seqno */, nullptr,
|
||||
nullptr /* Statistics */, true /* total_order_seek */,
|
||||
index_has_first_key /* have_first_key */, false /* key_includes_seq */,
|
||||
value_is_full, true /* block_contents_pinned */,
|
||||
nullptr /* prefix_index */)};
|
||||
if (iter->status().ok()) {
|
||||
block_restart_interval_ = iter->GetRestartInterval();
|
||||
}
|
||||
uint32_t num_keys = 0;
|
||||
if (iter->status().ok()) {
|
||||
num_keys = iter->NumberOfKeys(block_restart_interval_);
|
||||
}
|
||||
if (iter->status().ok()) {
|
||||
checksum_size_ = num_keys * protection_bytes_per_key;
|
||||
kv_checksum_ = new char[(size_t)checksum_size_];
|
||||
iter->SeekToFirst();
|
||||
size_t i = 0;
|
||||
while (iter->Valid()) {
|
||||
GenerateKVChecksum(kv_checksum_ + i, protection_bytes_per_key,
|
||||
iter->key(), iter->raw_value());
|
||||
iter->Next();
|
||||
i += protection_bytes_per_key;
|
||||
}
|
||||
assert(!iter->status().ok() || i == num_keys * protection_bytes_per_key);
|
||||
}
|
||||
if (!iter->status().ok()) {
|
||||
size_ = 0; // Error marker
|
||||
return;
|
||||
}
|
||||
protection_bytes_per_key_ = protection_bytes_per_key;
|
||||
}
|
||||
}
|
||||
|
||||
void Block::InitializeMetaIndexBlockProtectionInfo(
|
||||
uint8_t protection_bytes_per_key) {
|
||||
protection_bytes_per_key_ = 0;
|
||||
if (num_restarts_ > 0 && protection_bytes_per_key > 0) {
|
||||
std::unique_ptr<MetaBlockIter> iter{
|
||||
NewMetaIterator(true /* block_contents_pinned */)};
|
||||
if (iter->status().ok()) {
|
||||
block_restart_interval_ = iter->GetRestartInterval();
|
||||
}
|
||||
uint32_t num_keys = 0;
|
||||
if (iter->status().ok()) {
|
||||
num_keys = iter->NumberOfKeys(block_restart_interval_);
|
||||
}
|
||||
if (iter->status().ok()) {
|
||||
checksum_size_ = num_keys * protection_bytes_per_key;
|
||||
kv_checksum_ = new char[(size_t)checksum_size_];
|
||||
iter->SeekToFirst();
|
||||
size_t i = 0;
|
||||
while (iter->Valid()) {
|
||||
GenerateKVChecksum(kv_checksum_ + i, protection_bytes_per_key,
|
||||
iter->key(), iter->value());
|
||||
iter->Next();
|
||||
i += protection_bytes_per_key;
|
||||
}
|
||||
assert(!iter->status().ok() || i == num_keys * protection_bytes_per_key);
|
||||
}
|
||||
if (!iter->status().ok()) {
|
||||
size_ = 0; // Error marker
|
||||
return;
|
||||
}
|
||||
protection_bytes_per_key_ = protection_bytes_per_key;
|
||||
}
|
||||
}
|
||||
|
||||
MetaBlockIter* Block::NewMetaIterator(bool block_contents_pinned) {
|
||||
MetaBlockIter* iter = new MetaBlockIter();
|
||||
if (size_ < 2 * sizeof(uint32_t)) {
|
||||
|
@ -1045,7 +1201,8 @@ MetaBlockIter* Block::NewMetaIterator(bool block_contents_pinned) {
|
|||
iter->Invalidate(Status::OK());
|
||||
} else {
|
||||
iter->Initialize(data_, restart_offset_, num_restarts_,
|
||||
block_contents_pinned);
|
||||
block_contents_pinned, protection_bytes_per_key_,
|
||||
kv_checksum_, block_restart_interval_);
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
|
@ -1072,7 +1229,8 @@ DataBlockIter* Block::NewDataIterator(const Comparator* raw_ucmp,
|
|||
ret_iter->Initialize(
|
||||
raw_ucmp, data_, restart_offset_, num_restarts_, global_seqno,
|
||||
read_amp_bitmap_.get(), block_contents_pinned,
|
||||
data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr);
|
||||
data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr,
|
||||
protection_bytes_per_key_, kv_checksum_, block_restart_interval_);
|
||||
if (read_amp_bitmap_) {
|
||||
if (read_amp_bitmap_->GetStatistics() != stats) {
|
||||
// DB changed the Statistics pointer, we need to notify read_amp_bitmap_
|
||||
|
@ -1108,8 +1266,9 @@ IndexBlockIter* Block::NewIndexIterator(
|
|||
total_order_seek ? nullptr : prefix_index;
|
||||
ret_iter->Initialize(raw_ucmp, data_, restart_offset_, num_restarts_,
|
||||
global_seqno, prefix_index_ptr, have_first_key,
|
||||
key_includes_seq, value_is_full,
|
||||
block_contents_pinned);
|
||||
key_includes_seq, value_is_full, block_contents_pinned,
|
||||
protection_bytes_per_key_, kv_checksum_,
|
||||
block_restart_interval_);
|
||||
}
|
||||
|
||||
return ret_iter;
|
||||
|
@ -1125,6 +1284,7 @@ size_t Block::ApproximateMemoryUsage() const {
|
|||
if (read_amp_bitmap_) {
|
||||
usage += read_amp_bitmap_->ApproximateMemoryUsage();
|
||||
}
|
||||
usage += checksum_size_;
|
||||
return usage;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "db/kv_checksum.h"
|
||||
#include "db/pinned_iterators_manager.h"
|
||||
#include "port/malloc.h"
|
||||
#include "rocksdb/advanced_cache.h"
|
||||
|
@ -240,6 +241,34 @@ class Block {
|
|||
// For TypedCacheInterface
|
||||
const Slice& ContentSlice() const { return contents_.data; }
|
||||
|
||||
// Initializes per key-value checksum protection.
|
||||
// After this method is called, each DataBlockIterator returned
|
||||
// by NewDataIterator will verify per key-value checksum for any key it read.
|
||||
void InitializeDataBlockProtectionInfo(uint8_t protection_bytes_per_key,
|
||||
const Comparator* raw_ucmp);
|
||||
|
||||
// Initializes per key-value checksum protection.
|
||||
// After this method is called, each IndexBlockIterator returned
|
||||
// by NewIndexIterator will verify per key-value checksum for any key it read.
|
||||
// value_is_full and index_has_first_key are needed to be able to parse
|
||||
// the index block content and construct checksums.
|
||||
void InitializeIndexBlockProtectionInfo(uint8_t protection_bytes_per_key,
|
||||
const Comparator* raw_ucmp,
|
||||
bool value_is_full,
|
||||
bool index_has_first_key);
|
||||
|
||||
// Initializes per key-value checksum protection.
|
||||
// After this method is called, each MetaBlockIter returned
|
||||
// by NewMetaIterator will verify per key-value checksum for any key it read.
|
||||
void InitializeMetaIndexBlockProtectionInfo(uint8_t protection_bytes_per_key);
|
||||
|
||||
static void GenerateKVChecksum(char* checksum_ptr, uint8_t checksum_len,
|
||||
const Slice& key, const Slice& value) {
|
||||
ProtectionInfo64().ProtectKV(key, value).Encode(checksum_len, checksum_ptr);
|
||||
}
|
||||
|
||||
const char* TEST_GetKVChecksum() const { return kv_checksum_; }
|
||||
|
||||
private:
|
||||
BlockContents contents_;
|
||||
const char* data_; // contents_.data.data()
|
||||
|
@ -247,6 +276,11 @@ class Block {
|
|||
uint32_t restart_offset_; // Offset in data_ of restart array
|
||||
uint32_t num_restarts_;
|
||||
std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_;
|
||||
char* kv_checksum_{nullptr};
|
||||
uint32_t checksum_size_{0};
|
||||
// Used by block iterators to calculate current key index within a block
|
||||
uint32_t block_restart_interval_{0};
|
||||
uint8_t protection_bytes_per_key_{0};
|
||||
DataBlockHashIndex data_block_hash_index_;
|
||||
};
|
||||
|
||||
|
@ -269,6 +303,14 @@ class Block {
|
|||
// `Seek()` logic would be implemented by subclasses in `SeekImpl()`. These
|
||||
// "Impl" functions are responsible for positioning `raw_key_` but not
|
||||
// invoking `UpdateKey()`.
|
||||
//
|
||||
// Per key-value checksum is enabled if relevant states are passed in during
|
||||
// `InitializeBase()`. The checksum verification is done in each call to
|
||||
// UpdateKey() for the current key. Each subclass is responsible for keeping
|
||||
// track of cur_entry_idx_, the index of the current key within the block.
|
||||
// BlockIter uses this index to get the corresponding checksum for current key.
|
||||
// Additional checksum verification may be done in subclasses if they read keys
|
||||
// other than the key being processed in UpdateKey().
|
||||
template <class TValue>
|
||||
class BlockIter : public InternalIteratorBase<TValue> {
|
||||
public:
|
||||
|
@ -286,9 +328,16 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
Cleanable::Reset();
|
||||
}
|
||||
|
||||
bool Valid() const override { return current_ < restarts_; }
|
||||
bool Valid() const override {
|
||||
// When status_ is not ok, iter should be invalid.
|
||||
assert(status_.ok() || current_ >= restarts_);
|
||||
return current_ < restarts_;
|
||||
}
|
||||
|
||||
virtual void SeekToFirst() override final {
|
||||
#ifndef NDEBUG
|
||||
if (TEST_Corrupt_Callback("BlockIter::SeekToFirst")) return;
|
||||
#endif
|
||||
SeekToFirstImpl();
|
||||
UpdateKey();
|
||||
}
|
||||
|
@ -325,6 +374,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
}
|
||||
|
||||
Status status() const override { return status_; }
|
||||
|
||||
Slice key() const override {
|
||||
assert(Valid());
|
||||
return key_;
|
||||
|
@ -337,10 +387,22 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
(pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled()));
|
||||
status_.PermitUncheckedError();
|
||||
}
|
||||
|
||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
||||
pinned_iters_mgr_ = pinned_iters_mgr;
|
||||
}
|
||||
|
||||
PinnedIteratorsManager* pinned_iters_mgr_ = nullptr;
|
||||
|
||||
bool TEST_Corrupt_Callback(const std::string& sync_point) {
|
||||
bool corrupt = false;
|
||||
TEST_SYNC_POINT_CALLBACK(sync_point, static_cast<void*>(&corrupt));
|
||||
|
||||
if (corrupt) {
|
||||
CorruptionError();
|
||||
}
|
||||
return corrupt;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool IsKeyPinned() const override {
|
||||
|
@ -377,27 +439,74 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
Status status_;
|
||||
// Key to be exposed to users.
|
||||
Slice key_;
|
||||
SequenceNumber global_seqno_;
|
||||
|
||||
// Per key-value checksum related states
|
||||
const char* kv_checksum_;
|
||||
int32_t cur_entry_idx_;
|
||||
uint32_t block_restart_interval_;
|
||||
uint8_t protection_bytes_per_key_;
|
||||
|
||||
bool key_pinned_;
|
||||
// Whether the block data is guaranteed to outlive this iterator, and
|
||||
// as long as the cleanup functions are transferred to another class,
|
||||
// e.g. PinnableSlice, the pointer to the bytes will still be valid.
|
||||
bool block_contents_pinned_;
|
||||
SequenceNumber global_seqno_;
|
||||
|
||||
virtual void SeekToFirstImpl() = 0;
|
||||
virtual void SeekToLastImpl() = 0;
|
||||
virtual void SeekImpl(const Slice& target) = 0;
|
||||
virtual void SeekForPrevImpl(const Slice& target) = 0;
|
||||
virtual void NextImpl() = 0;
|
||||
|
||||
virtual void PrevImpl() = 0;
|
||||
|
||||
// Returns the restart interval of this block.
|
||||
// Returns 0 if num_restarts_ <= 1 or if the BlockIter is not initialized.
|
||||
virtual uint32_t GetRestartInterval() {
|
||||
if (num_restarts_ <= 1 || data_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
SeekToFirstImpl();
|
||||
uint32_t end_index = GetRestartPoint(1);
|
||||
uint32_t count = 1;
|
||||
while (NextEntryOffset() < end_index && status_.ok()) {
|
||||
assert(Valid());
|
||||
NextImpl();
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Returns the number of keys in this block.
|
||||
virtual uint32_t NumberOfKeys(uint32_t block_restart_interval) {
|
||||
if (num_restarts_ == 0 || data_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t count = (num_restarts_ - 1) * block_restart_interval;
|
||||
// Add number of keys from the last restart interval
|
||||
SeekToRestartPoint(num_restarts_ - 1);
|
||||
while (NextEntryOffset() < restarts_ && status_.ok()) {
|
||||
NextImpl();
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Stores whether the current key has a shared bytes with prev key in
|
||||
// *is_shared.
|
||||
// Sets raw_key_, value_ to the current parsed key and value.
|
||||
// Sets restart_index_ to point to the restart interval that contains
|
||||
// the current key.
|
||||
template <typename DecodeEntryFunc>
|
||||
inline bool ParseNextKey(bool* is_shared);
|
||||
|
||||
// protection_bytes_per_key, kv_checksum, and block_restart_interval
|
||||
// are needed only for per kv checksum verification.
|
||||
void InitializeBase(const Comparator* raw_ucmp, const char* data,
|
||||
uint32_t restarts, uint32_t num_restarts,
|
||||
SequenceNumber global_seqno, bool block_contents_pinned) {
|
||||
SequenceNumber global_seqno, bool block_contents_pinned,
|
||||
uint8_t protection_bytes_per_key, const char* kv_checksum,
|
||||
uint32_t block_restart_interval) {
|
||||
assert(data_ == nullptr); // Ensure it is called only once
|
||||
assert(num_restarts > 0); // Ensure the param is valid
|
||||
|
||||
|
@ -410,11 +519,41 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
global_seqno_ = global_seqno;
|
||||
block_contents_pinned_ = block_contents_pinned;
|
||||
cache_handle_ = nullptr;
|
||||
cur_entry_idx_ = -1;
|
||||
protection_bytes_per_key_ = protection_bytes_per_key;
|
||||
kv_checksum_ = kv_checksum;
|
||||
block_restart_interval_ = block_restart_interval;
|
||||
// Checksum related states are either all 0/nullptr or all non-zero.
|
||||
// One exception is when num_restarts == 0, block_restart_interval can be 0
|
||||
// since we are not able to compute it.
|
||||
assert((protection_bytes_per_key == 0 && kv_checksum == nullptr) ||
|
||||
(protection_bytes_per_key > 0 && kv_checksum != nullptr &&
|
||||
(block_restart_interval > 0 || num_restarts == 1)));
|
||||
}
|
||||
|
||||
void CorruptionError(const std::string& error_msg = "bad entry in block") {
|
||||
current_ = restarts_;
|
||||
restart_index_ = num_restarts_;
|
||||
status_ = Status::Corruption(error_msg);
|
||||
raw_key_.Clear();
|
||||
value_.clear();
|
||||
}
|
||||
|
||||
void PerKVChecksumCorruptionError() {
|
||||
std::string error_msg{
|
||||
"Corrupted block entry: per key-value checksum verification "
|
||||
"failed."};
|
||||
error_msg.append(" Offset: " + std::to_string(current_) + ".");
|
||||
error_msg.append(" Entry index: " + std::to_string(cur_entry_idx_) + ".");
|
||||
CorruptionError(error_msg);
|
||||
}
|
||||
|
||||
// Must be called every time a key is found that needs to be returned to user,
|
||||
// and may be called when no key is found (as a no-op). Updates `key_`,
|
||||
// `key_buf_`, and `key_pinned_` with info about the found key.
|
||||
// Per key-value checksum verification is done if available for the key to be
|
||||
// returned. Iterator is invalidated with corruption status if checksum
|
||||
// verification fails.
|
||||
void UpdateKey() {
|
||||
key_buf_.Clear();
|
||||
if (!Valid()) {
|
||||
|
@ -433,6 +572,19 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
key_ = key_buf_.GetInternalKey();
|
||||
key_pinned_ = false;
|
||||
}
|
||||
TEST_SYNC_POINT_CALLBACK("BlockIter::UpdateKey::value",
|
||||
(void*)value_.data());
|
||||
TEST_SYNC_POINT_CALLBACK("Block::VerifyChecksum::checksum_len",
|
||||
&protection_bytes_per_key_);
|
||||
if (protection_bytes_per_key_ > 0) {
|
||||
if (!ProtectionInfo64()
|
||||
.ProtectKV(raw_key_.GetKey(), value_)
|
||||
.Verify(
|
||||
protection_bytes_per_key_,
|
||||
kv_checksum_ + protection_bytes_per_key_ * cur_entry_idx_)) {
|
||||
PerKVChecksumCorruptionError();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the result of `Comparator::Compare()`, where the appropriate
|
||||
|
@ -464,7 +616,7 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
return static_cast<uint32_t>((value_.data() + value_.size()) - data_);
|
||||
}
|
||||
|
||||
uint32_t GetRestartPoint(uint32_t index) {
|
||||
uint32_t GetRestartPoint(uint32_t index) const {
|
||||
assert(index < num_restarts_);
|
||||
return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
|
||||
}
|
||||
|
@ -479,13 +631,20 @@ class BlockIter : public InternalIteratorBase<TValue> {
|
|||
value_ = Slice(data_ + offset, 0);
|
||||
}
|
||||
|
||||
void CorruptionError();
|
||||
|
||||
protected:
|
||||
template <typename DecodeKeyFunc>
|
||||
inline bool BinarySeek(const Slice& target, uint32_t* index,
|
||||
bool* is_index_key_result);
|
||||
|
||||
// Find the first key in restart interval `index` that is >= `target`.
|
||||
// If there is no such key, iterator is positioned at the first key in
|
||||
// restart interval `index + 1`.
|
||||
// If is_index_key_result is true, it positions the iterator at the first key
|
||||
// in this restart interval.
|
||||
// Per key-value checksum verification is done for all keys scanned
|
||||
// up to but not including the last key (the key that current_ points to
|
||||
// when this function returns). This key's checksum is verified in
|
||||
// UpdateKey().
|
||||
void FindKeyAfterBinarySeek(const Slice& target, uint32_t index,
|
||||
bool is_index_key_result);
|
||||
};
|
||||
|
@ -494,22 +653,17 @@ class DataBlockIter final : public BlockIter<Slice> {
|
|||
public:
|
||||
DataBlockIter()
|
||||
: BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {}
|
||||
DataBlockIter(const Comparator* raw_ucmp, const char* data, uint32_t restarts,
|
||||
uint32_t num_restarts, SequenceNumber global_seqno,
|
||||
BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned,
|
||||
DataBlockHashIndex* data_block_hash_index)
|
||||
: DataBlockIter() {
|
||||
Initialize(raw_ucmp, data, restarts, num_restarts, global_seqno,
|
||||
read_amp_bitmap, block_contents_pinned, data_block_hash_index);
|
||||
}
|
||||
void Initialize(const Comparator* raw_ucmp, const char* data,
|
||||
uint32_t restarts, uint32_t num_restarts,
|
||||
SequenceNumber global_seqno,
|
||||
BlockReadAmpBitmap* read_amp_bitmap,
|
||||
bool block_contents_pinned,
|
||||
DataBlockHashIndex* data_block_hash_index) {
|
||||
DataBlockHashIndex* data_block_hash_index,
|
||||
uint8_t protection_bytes_per_key, const char* kv_checksum,
|
||||
uint32_t block_restart_interval) {
|
||||
InitializeBase(raw_ucmp, data, restarts, num_restarts, global_seqno,
|
||||
block_contents_pinned);
|
||||
block_contents_pinned, protection_bytes_per_key, kv_checksum,
|
||||
block_restart_interval);
|
||||
raw_key_.SetIsUserKey(false);
|
||||
read_amp_bitmap_ = read_amp_bitmap;
|
||||
last_bitmap_offset_ = current_ + 1;
|
||||
|
@ -527,7 +681,11 @@ class DataBlockIter final : public BlockIter<Slice> {
|
|||
return value_;
|
||||
}
|
||||
|
||||
// Returns if `target` may exist.
|
||||
inline bool SeekForGet(const Slice& target) {
|
||||
#ifndef NDEBUG
|
||||
if (TEST_Corrupt_Callback("DataBlockIter::SeekForGet")) return true;
|
||||
#endif
|
||||
if (!data_block_hash_index_) {
|
||||
SeekImpl(target);
|
||||
UpdateKey();
|
||||
|
@ -599,11 +757,14 @@ class MetaBlockIter final : public BlockIter<Slice> {
|
|||
public:
|
||||
MetaBlockIter() : BlockIter() { raw_key_.SetIsUserKey(true); }
|
||||
void Initialize(const char* data, uint32_t restarts, uint32_t num_restarts,
|
||||
bool block_contents_pinned) {
|
||||
bool block_contents_pinned, uint8_t protection_bytes_per_key,
|
||||
const char* kv_checksum, uint32_t block_restart_interval) {
|
||||
// Initializes the iterator with a BytewiseComparator and
|
||||
// the raw key being a user key.
|
||||
InitializeBase(BytewiseComparator(), data, restarts, num_restarts,
|
||||
kDisableGlobalSequenceNumber, block_contents_pinned);
|
||||
kDisableGlobalSequenceNumber, block_contents_pinned,
|
||||
protection_bytes_per_key, kv_checksum,
|
||||
block_restart_interval);
|
||||
raw_key_.SetIsUserKey(true);
|
||||
}
|
||||
|
||||
|
@ -613,12 +774,17 @@ class MetaBlockIter final : public BlockIter<Slice> {
|
|||
}
|
||||
|
||||
protected:
|
||||
friend Block;
|
||||
void SeekToFirstImpl() override;
|
||||
void SeekToLastImpl() override;
|
||||
void SeekImpl(const Slice& target) override;
|
||||
void SeekForPrevImpl(const Slice& target) override;
|
||||
void NextImpl() override;
|
||||
void PrevImpl() override;
|
||||
// Meta index block's restart interval is always 1. See
|
||||
// MetaIndexBuilder::MetaIndexBuilder() for hard-coded restart interval.
|
||||
uint32_t GetRestartInterval() override { return 1; }
|
||||
uint32_t NumberOfKeys(uint32_t) override { return num_restarts_; }
|
||||
};
|
||||
|
||||
class IndexBlockIter final : public BlockIter<IndexValue> {
|
||||
|
@ -633,9 +799,13 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
|
|||
uint32_t restarts, uint32_t num_restarts,
|
||||
SequenceNumber global_seqno, BlockPrefixIndex* prefix_index,
|
||||
bool have_first_key, bool key_includes_seq,
|
||||
bool value_is_full, bool block_contents_pinned) {
|
||||
bool value_is_full, bool block_contents_pinned,
|
||||
uint8_t protection_bytes_per_key, const char* kv_checksum,
|
||||
uint32_t block_restart_interval) {
|
||||
InitializeBase(raw_ucmp, data, restarts, num_restarts,
|
||||
kDisableGlobalSequenceNumber, block_contents_pinned);
|
||||
kDisableGlobalSequenceNumber, block_contents_pinned,
|
||||
protection_bytes_per_key, kv_checksum,
|
||||
block_restart_interval);
|
||||
raw_key_.SetIsUserKey(!key_includes_seq);
|
||||
prefix_index_ = prefix_index;
|
||||
value_delta_encoded_ = !value_is_full;
|
||||
|
@ -666,11 +836,17 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
|
|||
}
|
||||
}
|
||||
|
||||
Slice raw_value() const {
|
||||
assert(Valid());
|
||||
return value_;
|
||||
}
|
||||
|
||||
bool IsValuePinned() const override {
|
||||
return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned();
|
||||
}
|
||||
|
||||
protected:
|
||||
friend Block;
|
||||
// IndexBlockIter follows a different contract for prefix iterator
|
||||
// from data iterators.
|
||||
// If prefix of the seek key `target` exists in the file, it must
|
||||
|
@ -692,11 +868,8 @@ class IndexBlockIter final : public BlockIter<IndexValue> {
|
|||
}
|
||||
|
||||
void PrevImpl() override;
|
||||
|
||||
void NextImpl() override;
|
||||
|
||||
void SeekToFirstImpl() override;
|
||||
|
||||
void SeekToLastImpl() override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -450,7 +450,12 @@ struct BlockBasedTableBuilder::Rep {
|
|||
table_options, data_block)),
|
||||
create_context(&table_options, ioptions.stats,
|
||||
compression_type == kZSTD ||
|
||||
compression_type == kZSTDNotFinalCompression),
|
||||
compression_type == kZSTDNotFinalCompression,
|
||||
tbo.moptions.block_protection_bytes_per_key,
|
||||
tbo.internal_comparator.user_comparator(),
|
||||
!use_delta_encoding_for_index_values,
|
||||
table_opt.index_type ==
|
||||
BlockBasedTableOptions::kBinarySearchWithFirstKey),
|
||||
status_ok(true),
|
||||
io_status_ok(true) {
|
||||
if (tbo.target_file_size == 0) {
|
||||
|
|
|
@ -567,7 +567,8 @@ Status BlockBasedTableFactory::NewTableReader(
|
|||
return BlockBasedTable::Open(
|
||||
ro, table_reader_options.ioptions, table_reader_options.env_options,
|
||||
table_options_, table_reader_options.internal_comparator, std::move(file),
|
||||
file_size, table_reader, table_reader_cache_res_mgr_,
|
||||
file_size, table_reader_options.block_protection_bytes_per_key,
|
||||
table_reader, table_reader_cache_res_mgr_,
|
||||
table_reader_options.prefix_extractor, prefetch_index_and_filter_in_cache,
|
||||
table_reader_options.skip_filters, table_reader_options.level,
|
||||
table_reader_options.immortal, table_reader_options.largest_seqno,
|
||||
|
|
|
@ -560,6 +560,7 @@ Status BlockBasedTable::Open(
|
|||
const EnvOptions& env_options, const BlockBasedTableOptions& table_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
std::unique_ptr<TableReader>* table_reader,
|
||||
std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr,
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor,
|
||||
|
@ -645,6 +646,7 @@ Status BlockBasedTable::Open(
|
|||
// meta-block reads.
|
||||
rep->compression_dict_handle = BlockHandle::NullBlockHandle();
|
||||
|
||||
rep->create_context.protection_bytes_per_key = block_protection_bytes_per_key;
|
||||
// Read metaindex
|
||||
std::unique_ptr<BlockBasedTable> new_table(
|
||||
new BlockBasedTable(rep, block_cache_tracer));
|
||||
|
@ -671,9 +673,11 @@ Status BlockBasedTable::Open(
|
|||
CompressionTypeToString(kZSTD) ||
|
||||
rep->table_properties->compression_name ==
|
||||
CompressionTypeToString(kZSTDNotFinalCompression));
|
||||
rep->create_context =
|
||||
BlockCreateContext(&rep->table_options, rep->ioptions.stats,
|
||||
blocks_definitely_zstd_compressed);
|
||||
rep->create_context = BlockCreateContext(
|
||||
&rep->table_options, rep->ioptions.stats,
|
||||
blocks_definitely_zstd_compressed, block_protection_bytes_per_key,
|
||||
rep->internal_comparator.user_comparator(), rep->index_value_is_full,
|
||||
rep->index_has_first_key);
|
||||
|
||||
// Check expected unique id if provided
|
||||
if (expected_unique_id != kNullUniqueId64x2) {
|
||||
|
@ -2168,6 +2172,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|||
}
|
||||
}
|
||||
s = biter.status();
|
||||
if (!s.ok()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Write the block cache access record.
|
||||
if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
|
||||
|
|
|
@ -98,6 +98,7 @@ class BlockBasedTable : public TableReader {
|
|||
const BlockBasedTableOptions& table_options,
|
||||
const InternalKeyComparator& internal_key_comparator,
|
||||
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
|
||||
uint8_t block_protection_bytes_per_key,
|
||||
std::unique_ptr<TableReader>* table_reader,
|
||||
std::shared_ptr<CacheReservationManager> table_reader_cache_res_mgr =
|
||||
nullptr,
|
||||
|
|
|
@ -116,8 +116,9 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
|
|||
bool prefetch_index_and_filter_in_cache = true,
|
||||
Status* status = nullptr) {
|
||||
const MutableCFOptions moptions(options_);
|
||||
TableReaderOptions table_reader_options = TableReaderOptions(
|
||||
ioptions, moptions.prefix_extractor, EnvOptions(), comparator);
|
||||
TableReaderOptions table_reader_options =
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor, EnvOptions(),
|
||||
comparator, 0 /* block_protection_bytes_per_key */);
|
||||
|
||||
std::unique_ptr<RandomAccessFileReader> file;
|
||||
NewFileReader(table_name, foptions, &file);
|
||||
|
|
|
@ -11,17 +11,25 @@ void BlockCreateContext::Create(std::unique_ptr<Block_kData>* parsed_out,
|
|||
BlockContents&& block) {
|
||||
parsed_out->reset(new Block_kData(
|
||||
std::move(block), table_options->read_amp_bytes_per_bit, statistics));
|
||||
parsed_out->get()->InitializeDataBlockProtectionInfo(protection_bytes_per_key,
|
||||
raw_ucmp);
|
||||
}
|
||||
void BlockCreateContext::Create(std::unique_ptr<Block_kIndex>* parsed_out,
|
||||
BlockContents&& block) {
|
||||
parsed_out->reset(new Block_kIndex(std::move(block),
|
||||
/*read_amp_bytes_per_bit*/ 0, statistics));
|
||||
parsed_out->get()->InitializeIndexBlockProtectionInfo(
|
||||
protection_bytes_per_key, raw_ucmp, index_value_is_full,
|
||||
index_has_first_key);
|
||||
}
|
||||
void BlockCreateContext::Create(
|
||||
std::unique_ptr<Block_kFilterPartitionIndex>* parsed_out,
|
||||
BlockContents&& block) {
|
||||
parsed_out->reset(new Block_kFilterPartitionIndex(
|
||||
std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
|
||||
parsed_out->get()->InitializeIndexBlockProtectionInfo(
|
||||
protection_bytes_per_key, raw_ucmp, index_value_is_full,
|
||||
index_has_first_key);
|
||||
}
|
||||
void BlockCreateContext::Create(
|
||||
std::unique_ptr<Block_kRangeDeletion>* parsed_out, BlockContents&& block) {
|
||||
|
@ -32,6 +40,8 @@ void BlockCreateContext::Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
|
|||
BlockContents&& block) {
|
||||
parsed_out->reset(new Block_kMetaIndex(
|
||||
std::move(block), /*read_amp_bytes_per_bit*/ 0, statistics));
|
||||
parsed_out->get()->InitializeMetaIndexBlockProtectionInfo(
|
||||
protection_bytes_per_key);
|
||||
}
|
||||
|
||||
void BlockCreateContext::Create(
|
||||
|
|
|
@ -70,14 +70,26 @@ class Block_kMetaIndex : public Block {
|
|||
struct BlockCreateContext : public Cache::CreateContext {
|
||||
BlockCreateContext() {}
|
||||
BlockCreateContext(const BlockBasedTableOptions* _table_options,
|
||||
Statistics* _statistics, bool _using_zstd)
|
||||
Statistics* _statistics, bool _using_zstd,
|
||||
uint8_t _protection_bytes_per_key,
|
||||
const Comparator* _raw_ucmp,
|
||||
bool _index_value_is_full = false,
|
||||
bool _index_has_first_key = false)
|
||||
: table_options(_table_options),
|
||||
statistics(_statistics),
|
||||
using_zstd(_using_zstd) {}
|
||||
using_zstd(_using_zstd),
|
||||
protection_bytes_per_key(_protection_bytes_per_key),
|
||||
raw_ucmp(_raw_ucmp),
|
||||
index_value_is_full(_index_value_is_full),
|
||||
index_has_first_key(_index_has_first_key) {}
|
||||
|
||||
const BlockBasedTableOptions* table_options = nullptr;
|
||||
Statistics* statistics = nullptr;
|
||||
bool using_zstd = false;
|
||||
uint8_t protection_bytes_per_key = 0;
|
||||
const Comparator* raw_ucmp = nullptr;
|
||||
bool index_value_is_full;
|
||||
bool index_has_first_key;
|
||||
|
||||
// For TypedCacheInterface
|
||||
template <typename TBlocklike>
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "db/db_test_util.h"
|
||||
#include "db/dbformat.h"
|
||||
#include "db/memtable.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
|
@ -506,7 +507,7 @@ class IndexBlockTest
|
|||
void GenerateRandomIndexEntries(std::vector<std::string> *separators,
|
||||
std::vector<BlockHandle> *block_handles,
|
||||
std::vector<std::string> *first_keys,
|
||||
const int len) {
|
||||
const int len, bool zero_seqno = false) {
|
||||
Random rnd(42);
|
||||
|
||||
// For each of `len` blocks, we need to generate a first and last key.
|
||||
|
@ -514,7 +515,11 @@ void GenerateRandomIndexEntries(std::vector<std::string> *separators,
|
|||
std::set<std::string> keys;
|
||||
while ((int)keys.size() < len * 2) {
|
||||
// Keys need to be at least 8 bytes long to look like internal keys.
|
||||
keys.insert(test::RandomKey(&rnd, 12));
|
||||
std::string new_key = test::RandomKey(&rnd, 12);
|
||||
if (zero_seqno) {
|
||||
AppendInternalKeyFooter(&new_key, 0 /* seqno */, kTypeValue);
|
||||
}
|
||||
keys.insert(std::move(new_key));
|
||||
}
|
||||
|
||||
uint64_t offset = 0;
|
||||
|
@ -618,6 +623,917 @@ INSTANTIATE_TEST_CASE_P(P, IndexBlockTest,
|
|||
std::make_tuple(true, false),
|
||||
std::make_tuple(true, true)));
|
||||
|
||||
class BlockPerKVChecksumTest : public DBTestBase {
|
||||
public:
|
||||
BlockPerKVChecksumTest()
|
||||
: DBTestBase("block_per_kv_checksum", /*env_do_fsync=*/false) {}
|
||||
|
||||
template <typename TBlockIter>
|
||||
void TestIterateForward(std::unique_ptr<TBlockIter> &biter,
|
||||
size_t &verification_count) {
|
||||
while (biter->Valid()) {
|
||||
verification_count = 0;
|
||||
biter->Next();
|
||||
if (biter->Valid()) {
|
||||
ASSERT_GE(verification_count, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TBlockIter>
|
||||
void TestIterateBackward(std::unique_ptr<TBlockIter> &biter,
|
||||
size_t &verification_count) {
|
||||
while (biter->Valid()) {
|
||||
verification_count = 0;
|
||||
biter->Prev();
|
||||
if (biter->Valid()) {
|
||||
ASSERT_GE(verification_count, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TBlockIter>
|
||||
void TestSeekToFirst(std::unique_ptr<TBlockIter> &biter,
|
||||
size_t &verification_count) {
|
||||
verification_count = 0;
|
||||
biter->SeekToFirst();
|
||||
ASSERT_GE(verification_count, 1);
|
||||
TestIterateForward(biter, verification_count);
|
||||
}
|
||||
|
||||
template <typename TBlockIter>
|
||||
void TestSeekToLast(std::unique_ptr<TBlockIter> &biter,
|
||||
size_t &verification_count) {
|
||||
verification_count = 0;
|
||||
biter->SeekToLast();
|
||||
ASSERT_GE(verification_count, 1);
|
||||
TestIterateBackward(biter, verification_count);
|
||||
}
|
||||
|
||||
template <typename TBlockIter>
|
||||
void TestSeekForPrev(std::unique_ptr<TBlockIter> &biter,
|
||||
size_t &verification_count, std::string k) {
|
||||
verification_count = 0;
|
||||
biter->SeekForPrev(k);
|
||||
ASSERT_GE(verification_count, 1);
|
||||
TestIterateBackward(biter, verification_count);
|
||||
}
|
||||
|
||||
template <typename TBlockIter>
|
||||
void TestSeek(std::unique_ptr<TBlockIter> &biter, size_t &verification_count,
|
||||
std::string k) {
|
||||
verification_count = 0;
|
||||
biter->Seek(k);
|
||||
ASSERT_GE(verification_count, 1);
|
||||
TestIterateForward(biter, verification_count);
|
||||
}
|
||||
|
||||
bool VerifyChecksum(uint32_t checksum_len, const char *checksum_ptr,
|
||||
const Slice &key, const Slice &val) {
|
||||
if (!checksum_len) {
|
||||
return checksum_ptr == nullptr;
|
||||
}
|
||||
return ProtectionInfo64().ProtectKV(key, val).Verify(
|
||||
static_cast<uint8_t>(checksum_len), checksum_ptr);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(BlockPerKVChecksumTest, EmptyBlock) {
|
||||
// Tests that empty block code path is not broken by per kv checksum.
|
||||
BlockBuilder builder(
|
||||
16 /* block_restart_interval */, true /* use_delta_encoding */,
|
||||
false /* use_value_delta_encoding */,
|
||||
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch);
|
||||
Slice raw_block = builder.Finish();
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
|
||||
std::unique_ptr<Block_kData> data_block;
|
||||
Options options = Options();
|
||||
BlockBasedTableOptions tbo;
|
||||
uint8_t protection_bytes_per_key = 8;
|
||||
BlockCreateContext create_context{
|
||||
&tbo, nullptr /* statistics */, false /* using_zstd */,
|
||||
protection_bytes_per_key, options.comparator};
|
||||
create_context.Create(&data_block, std::move(contents));
|
||||
std::unique_ptr<DataBlockIter> biter{data_block->NewDataIterator(
|
||||
options.comparator, kDisableGlobalSequenceNumber)};
|
||||
biter->SeekToFirst();
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
Random rnd(33);
|
||||
biter->SeekForGet(GenerateInternalKey(1, 1, 10, &rnd));
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
biter->SeekToLast();
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
biter->Seek(GenerateInternalKey(1, 1, 10, &rnd));
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
biter->SeekForPrev(GenerateInternalKey(1, 1, 10, &rnd));
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
}
|
||||
|
||||
TEST_F(BlockPerKVChecksumTest, UnsupportedOptionValue) {
|
||||
Options options = Options();
|
||||
options.block_protection_bytes_per_key = 128;
|
||||
Destroy(options);
|
||||
ASSERT_TRUE(TryReopen(options).IsNotSupported());
|
||||
}
|
||||
|
||||
TEST_F(BlockPerKVChecksumTest, InitializeProtectionInfo) {
|
||||
// Make sure that the checksum construction code path does not break
|
||||
// when the block is itself already corrupted.
|
||||
Options options = Options();
|
||||
BlockBasedTableOptions tbo;
|
||||
uint8_t protection_bytes_per_key = 8;
|
||||
BlockCreateContext create_context{
|
||||
&tbo, nullptr /* statistics */, false /* using_zstd */,
|
||||
protection_bytes_per_key, options.comparator};
|
||||
|
||||
{
|
||||
std::string invalid_content = "1";
|
||||
Slice raw_block = invalid_content;
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
std::unique_ptr<Block_kData> data_block;
|
||||
create_context.Create(&data_block, std::move(contents));
|
||||
std::unique_ptr<DataBlockIter> iter{data_block->NewDataIterator(
|
||||
options.comparator, kDisableGlobalSequenceNumber)};
|
||||
ASSERT_TRUE(iter->status().IsCorruption());
|
||||
}
|
||||
{
|
||||
std::string invalid_content = "1";
|
||||
Slice raw_block = invalid_content;
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
std::unique_ptr<Block_kIndex> index_block;
|
||||
create_context.Create(&index_block, std::move(contents));
|
||||
std::unique_ptr<IndexBlockIter> iter{index_block->NewIndexIterator(
|
||||
options.comparator, kDisableGlobalSequenceNumber, nullptr, nullptr,
|
||||
true, false, true, true)};
|
||||
ASSERT_TRUE(iter->status().IsCorruption());
|
||||
}
|
||||
{
|
||||
std::string invalid_content = "1";
|
||||
Slice raw_block = invalid_content;
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
std::unique_ptr<Block_kMetaIndex> meta_block;
|
||||
create_context.Create(&meta_block, std::move(contents));
|
||||
std::unique_ptr<MetaBlockIter> iter{meta_block->NewMetaIterator(true)};
|
||||
ASSERT_TRUE(iter->status().IsCorruption());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BlockPerKVChecksumTest, ApproximateMemory) {
|
||||
// Tests that ApproximateMemoryUsage() includes memory used by block kv
|
||||
// checksum.
|
||||
const int kNumRecords = 20;
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
GenerateRandomKVs(&keys, &values, 0, kNumRecords, 1 /* step */,
|
||||
24 /* padding_size */);
|
||||
std::unique_ptr<BlockBuilder> builder;
|
||||
auto generate_block_content = [&]() {
|
||||
builder = std::make_unique<BlockBuilder>(16 /* restart_interval */);
|
||||
for (int i = 0; i < kNumRecords; ++i) {
|
||||
builder->Add(keys[i], values[i]);
|
||||
}
|
||||
Slice raw_block = builder->Finish();
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
return contents;
|
||||
};
|
||||
|
||||
Options options = Options();
|
||||
BlockBasedTableOptions tbo;
|
||||
uint8_t protection_bytes_per_key = 8;
|
||||
BlockCreateContext with_checksum_create_context{
|
||||
&tbo,
|
||||
nullptr /* statistics */,
|
||||
false /* using_zstd */,
|
||||
protection_bytes_per_key,
|
||||
options.comparator,
|
||||
true /* index_value_is_full */};
|
||||
BlockCreateContext create_context{
|
||||
&tbo, nullptr /* statistics */, false /* using_zstd */,
|
||||
0, options.comparator, true /* index_value_is_full */};
|
||||
|
||||
{
|
||||
std::unique_ptr<Block_kData> data_block;
|
||||
create_context.Create(&data_block, generate_block_content());
|
||||
size_t block_memory = data_block->ApproximateMemoryUsage();
|
||||
std::unique_ptr<Block_kData> with_checksum_data_block;
|
||||
with_checksum_create_context.Create(&with_checksum_data_block,
|
||||
generate_block_content());
|
||||
ASSERT_GT(with_checksum_data_block->ApproximateMemoryUsage() - block_memory,
|
||||
100);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Block_kData> meta_block;
|
||||
create_context.Create(&meta_block, generate_block_content());
|
||||
size_t block_memory = meta_block->ApproximateMemoryUsage();
|
||||
std::unique_ptr<Block_kData> with_checksum_meta_block;
|
||||
with_checksum_create_context.Create(&with_checksum_meta_block,
|
||||
generate_block_content());
|
||||
// Rough comparison to avoid flaky test due to memory allocation alignment.
|
||||
ASSERT_GT(with_checksum_meta_block->ApproximateMemoryUsage() - block_memory,
|
||||
100);
|
||||
}
|
||||
|
||||
{
|
||||
// Index block has different contents.
|
||||
std::vector<std::string> separators;
|
||||
std::vector<BlockHandle> block_handles;
|
||||
std::vector<std::string> first_keys;
|
||||
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
|
||||
kNumRecords);
|
||||
auto generate_index_content = [&]() {
|
||||
builder = std::make_unique<BlockBuilder>(16 /* restart_interval */);
|
||||
BlockHandle last_encoded_handle;
|
||||
for (int i = 0; i < kNumRecords; ++i) {
|
||||
IndexValue entry(block_handles[i], first_keys[i]);
|
||||
std::string encoded_entry;
|
||||
std::string delta_encoded_entry;
|
||||
entry.EncodeTo(&encoded_entry, false, nullptr);
|
||||
last_encoded_handle = entry.handle;
|
||||
const Slice delta_encoded_entry_slice(delta_encoded_entry);
|
||||
builder->Add(separators[i], encoded_entry, &delta_encoded_entry_slice);
|
||||
}
|
||||
Slice raw_block = builder->Finish();
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
return contents;
|
||||
};
|
||||
|
||||
std::unique_ptr<Block_kIndex> index_block;
|
||||
create_context.Create(&index_block, generate_index_content());
|
||||
size_t block_memory = index_block->ApproximateMemoryUsage();
|
||||
std::unique_ptr<Block_kIndex> with_checksum_index_block;
|
||||
with_checksum_create_context.Create(&with_checksum_index_block,
|
||||
generate_index_content());
|
||||
ASSERT_GT(
|
||||
with_checksum_index_block->ApproximateMemoryUsage() - block_memory,
|
||||
100);
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetDataBlockIndexTypeStr(
|
||||
BlockBasedTableOptions::DataBlockIndexType t) {
|
||||
return t == BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch
|
||||
? "BinarySearch"
|
||||
: "BinaryAndHash";
|
||||
}
|
||||
|
||||
class DataBlockKVChecksumTest
|
||||
: public BlockPerKVChecksumTest,
|
||||
public testing::WithParamInterface<std::tuple<
|
||||
BlockBasedTableOptions::DataBlockIndexType,
|
||||
uint8_t /* block_protection_bytes_per_key */,
|
||||
uint32_t /* restart_interval*/, bool /* use_delta_encoding */>> {
|
||||
public:
|
||||
DataBlockKVChecksumTest() = default;
|
||||
|
||||
BlockBasedTableOptions::DataBlockIndexType GetDataBlockIndexType() const {
|
||||
return std::get<0>(GetParam());
|
||||
}
|
||||
uint8_t GetChecksumLen() const { return std::get<1>(GetParam()); }
|
||||
uint32_t GetRestartInterval() const { return std::get<2>(GetParam()); }
|
||||
bool GetUseDeltaEncoding() const { return std::get<3>(GetParam()); }
|
||||
|
||||
std::unique_ptr<Block_kData> GenerateDataBlock(
|
||||
std::vector<std::string> &keys, std::vector<std::string> &values,
|
||||
int num_record) {
|
||||
BlockBasedTableOptions tbo;
|
||||
BlockCreateContext create_context{&tbo, nullptr /* statistics */,
|
||||
false /* using_zstd */, GetChecksumLen(),
|
||||
Options().comparator};
|
||||
builder_ = std::make_unique<BlockBuilder>(
|
||||
static_cast<int>(GetRestartInterval()),
|
||||
GetUseDeltaEncoding() /* use_delta_encoding */,
|
||||
false /* use_value_delta_encoding */, GetDataBlockIndexType());
|
||||
for (int i = 0; i < num_record; i++) {
|
||||
builder_->Add(keys[i], values[i]);
|
||||
}
|
||||
Slice raw_block = builder_->Finish();
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
std::unique_ptr<Block_kData> data_block;
|
||||
create_context.Create(&data_block, std::move(contents));
|
||||
return data_block;
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockBuilder> builder_;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
P, DataBlockKVChecksumTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
|
||||
BlockBasedTableOptions::DataBlockIndexType::
|
||||
kDataBlockBinaryAndHash),
|
||||
::testing::Values(0, 1, 2, 4, 8) /* protection_bytes_per_key */,
|
||||
::testing::Values(1, 2, 3, 8, 16) /* restart_interval */,
|
||||
::testing::Values(false, true)) /* delta_encoding */,
|
||||
[](const testing::TestParamInfo<std::tuple<
|
||||
BlockBasedTableOptions::DataBlockIndexType, uint8_t, uint32_t, bool>>
|
||||
&args) {
|
||||
std::ostringstream oss;
|
||||
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param))
|
||||
<< "ProtectionPerKey" << std::to_string(std::get<1>(args.param))
|
||||
<< "RestartInterval" << std::to_string(std::get<2>(args.param))
|
||||
<< "DeltaEncode" << std::to_string(std::get<3>(args.param));
|
||||
return oss.str();
|
||||
});
|
||||
|
||||
TEST_P(DataBlockKVChecksumTest, ChecksumConstructionAndVerification) {
|
||||
uint8_t protection_bytes_per_key = GetChecksumLen();
|
||||
std::vector<int> num_restart_intervals = {1, 16};
|
||||
for (const auto num_restart_interval : num_restart_intervals) {
|
||||
const int kNumRecords =
|
||||
num_restart_interval * static_cast<int>(GetRestartInterval());
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 /* step */,
|
||||
24 /* padding_size */);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
std::unique_ptr<Block_kData> data_block =
|
||||
GenerateDataBlock(keys, values, kNumRecords);
|
||||
|
||||
const char *checksum_ptr = data_block->TEST_GetKVChecksum();
|
||||
// Check checksum of correct length is generated
|
||||
for (int i = 0; i < kNumRecords; i++) {
|
||||
ASSERT_TRUE(VerifyChecksum(protection_bytes_per_key,
|
||||
checksum_ptr + i * protection_bytes_per_key,
|
||||
keys[i], values[i]));
|
||||
}
|
||||
std::vector<SequenceNumber> seqnos{kDisableGlobalSequenceNumber, 0};
|
||||
|
||||
// Could just use a boolean flag. Use a counter here just to keep open the
|
||||
// possibility of checking the exact number of verifications in the future.
|
||||
size_t verification_count = 0;
|
||||
// The SyncPoint is placed before checking checksum_len == 0 in
|
||||
// Block::VerifyChecksum(). So verification count is incremented even with
|
||||
// protection_bytes_per_key = 0. No actual checksum computation is done in
|
||||
// that case (see Block::VerifyChecksum()).
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"Block::VerifyChecksum::checksum_len",
|
||||
[&verification_count, protection_bytes_per_key](void *checksum_len) {
|
||||
ASSERT_EQ((*static_cast<uint8_t *>(checksum_len)),
|
||||
protection_bytes_per_key);
|
||||
++verification_count;
|
||||
});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
for (const auto seqno : seqnos) {
|
||||
std::unique_ptr<DataBlockIter> biter{
|
||||
data_block->NewDataIterator(Options().comparator, seqno)};
|
||||
|
||||
// SeekForGet() some key that does not exist
|
||||
biter->SeekForGet(keys[kNumRecords]);
|
||||
TestIterateForward(biter, verification_count);
|
||||
|
||||
verification_count = 0;
|
||||
biter->SeekForGet(keys[kNumRecords / 2]);
|
||||
ASSERT_GE(verification_count, 1);
|
||||
TestIterateForward(biter, verification_count);
|
||||
|
||||
TestSeekToFirst(biter, verification_count);
|
||||
TestSeekToLast(biter, verification_count);
|
||||
TestSeekForPrev(biter, verification_count, keys[kNumRecords / 2]);
|
||||
TestSeek(biter, verification_count, keys[kNumRecords / 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class IndexBlockKVChecksumTest
|
||||
: public BlockPerKVChecksumTest,
|
||||
public testing::WithParamInterface<
|
||||
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
|
||||
uint32_t, bool, bool>> {
|
||||
public:
|
||||
IndexBlockKVChecksumTest() = default;
|
||||
|
||||
BlockBasedTableOptions::DataBlockIndexType GetDataBlockIndexType() const {
|
||||
return std::get<0>(GetParam());
|
||||
}
|
||||
uint8_t GetChecksumLen() const { return std::get<1>(GetParam()); }
|
||||
uint32_t GetRestartInterval() const { return std::get<2>(GetParam()); }
|
||||
bool UseValueDeltaEncoding() const { return std::get<3>(GetParam()); }
|
||||
bool IncludeFirstKey() const { return std::get<4>(GetParam()); }
|
||||
|
||||
std::unique_ptr<Block_kIndex> GenerateIndexBlock(
|
||||
std::vector<std::string> &separators,
|
||||
std::vector<BlockHandle> &block_handles,
|
||||
std::vector<std::string> &first_keys, int num_record) {
|
||||
Options options = Options();
|
||||
BlockBasedTableOptions tbo;
|
||||
uint8_t protection_bytes_per_key = GetChecksumLen();
|
||||
BlockCreateContext create_context{
|
||||
&tbo,
|
||||
nullptr /* statistics */,
|
||||
false /* _using_zstd */,
|
||||
protection_bytes_per_key,
|
||||
options.comparator,
|
||||
!UseValueDeltaEncoding() /* value_is_full */,
|
||||
IncludeFirstKey()};
|
||||
builder_ = std::make_unique<BlockBuilder>(
|
||||
static_cast<int>(GetRestartInterval()), true /* use_delta_encoding */,
|
||||
UseValueDeltaEncoding() /* use_value_delta_encoding */,
|
||||
GetDataBlockIndexType());
|
||||
BlockHandle last_encoded_handle;
|
||||
for (int i = 0; i < num_record; i++) {
|
||||
IndexValue entry(block_handles[i], first_keys[i]);
|
||||
std::string encoded_entry;
|
||||
std::string delta_encoded_entry;
|
||||
entry.EncodeTo(&encoded_entry, IncludeFirstKey(), nullptr);
|
||||
if (UseValueDeltaEncoding() && i > 0) {
|
||||
entry.EncodeTo(&delta_encoded_entry, IncludeFirstKey(),
|
||||
&last_encoded_handle);
|
||||
}
|
||||
|
||||
last_encoded_handle = entry.handle;
|
||||
const Slice delta_encoded_entry_slice(delta_encoded_entry);
|
||||
builder_->Add(separators[i], encoded_entry, &delta_encoded_entry_slice);
|
||||
}
|
||||
// read serialized contents of the block
|
||||
Slice raw_block = builder_->Finish();
|
||||
// create block reader
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
std::unique_ptr<Block_kIndex> index_block;
|
||||
|
||||
create_context.Create(&index_block, std::move(contents));
|
||||
return index_block;
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockBuilder> builder_;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
P, IndexBlockKVChecksumTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
|
||||
BlockBasedTableOptions::DataBlockIndexType::
|
||||
kDataBlockBinaryAndHash),
|
||||
::testing::Values(0, 1, 2, 4, 8), ::testing::Values(1, 3, 8, 16),
|
||||
::testing::Values(true, false), ::testing::Values(true, false)),
|
||||
[](const testing::TestParamInfo<
|
||||
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
|
||||
uint32_t, bool, bool>> &args) {
|
||||
std::ostringstream oss;
|
||||
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param)) << "ProtBytes"
|
||||
<< std::to_string(std::get<1>(args.param)) << "RestartInterval"
|
||||
<< std::to_string(std::get<2>(args.param)) << "ValueDeltaEncode"
|
||||
<< std::to_string(std::get<3>(args.param)) << "IncludeFirstKey"
|
||||
<< std::to_string(std::get<4>(args.param));
|
||||
return oss.str();
|
||||
});
|
||||
|
||||
TEST_P(IndexBlockKVChecksumTest, ChecksumConstructionAndVerification) {
|
||||
Options options = Options();
|
||||
uint8_t protection_bytes_per_key = GetChecksumLen();
|
||||
std::vector<int> num_restart_intervals = {1, 16};
|
||||
std::vector<SequenceNumber> seqnos{kDisableGlobalSequenceNumber, 10001};
|
||||
|
||||
for (const auto num_restart_interval : num_restart_intervals) {
|
||||
const int kNumRecords =
|
||||
num_restart_interval * static_cast<int>(GetRestartInterval());
|
||||
for (const auto seqno : seqnos) {
|
||||
std::vector<std::string> separators;
|
||||
std::vector<BlockHandle> block_handles;
|
||||
std::vector<std::string> first_keys;
|
||||
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
|
||||
kNumRecords,
|
||||
seqno != kDisableGlobalSequenceNumber);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
std::unique_ptr<Block_kIndex> index_block = GenerateIndexBlock(
|
||||
separators, block_handles, first_keys, kNumRecords);
|
||||
IndexBlockIter *kNullIter = nullptr;
|
||||
Statistics *kNullStats = nullptr;
|
||||
// read contents of block sequentially
|
||||
std::unique_ptr<IndexBlockIter> biter{index_block->NewIndexIterator(
|
||||
options.comparator, seqno, kNullIter, kNullStats,
|
||||
true /* total_order_seek */, IncludeFirstKey() /* have_first_key */,
|
||||
true /* key_includes_seq */,
|
||||
!UseValueDeltaEncoding() /* value_is_full */,
|
||||
true /* block_contents_pinned */, nullptr /* prefix_index */)};
|
||||
biter->SeekToFirst();
|
||||
const char *checksum_ptr = index_block->TEST_GetKVChecksum();
|
||||
// Check checksum of correct length is generated
|
||||
for (int i = 0; i < kNumRecords; i++) {
|
||||
// Obtaining the actual content written as value to index block is not
|
||||
// trivial: delta-encoded value is only persisted when not at block
|
||||
// restart point and that keys share some byte (see more in
|
||||
// BlockBuilder::AddWithLastKeyImpl()). So here we just do verification
|
||||
// using value from iterator unlike tests for DataBlockIter or
|
||||
// MetaBlockIter.
|
||||
ASSERT_TRUE(VerifyChecksum(protection_bytes_per_key, checksum_ptr,
|
||||
biter->key(), biter->raw_value()));
|
||||
}
|
||||
|
||||
size_t verification_count = 0;
|
||||
// The SyncPoint is placed before checking checksum_len == 0 in
|
||||
// Block::VerifyChecksum(). To make the testing code below simpler and not
|
||||
// having to differentiate 0 vs non-0 checksum_len, we do an explicit
|
||||
// assert checking on checksum_len here.
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"Block::VerifyChecksum::checksum_len",
|
||||
[&verification_count, protection_bytes_per_key](void *checksum_len) {
|
||||
ASSERT_EQ((*static_cast<uint8_t *>(checksum_len)),
|
||||
protection_bytes_per_key);
|
||||
++verification_count;
|
||||
});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
TestSeekToFirst(biter, verification_count);
|
||||
TestSeekToLast(biter, verification_count);
|
||||
TestSeek(biter, verification_count, first_keys[kNumRecords / 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class MetaIndexBlockKVChecksumTest
|
||||
: public BlockPerKVChecksumTest,
|
||||
public testing::WithParamInterface<
|
||||
uint8_t /* block_protection_bytes_per_key */> {
|
||||
public:
|
||||
MetaIndexBlockKVChecksumTest() = default;
|
||||
uint8_t GetChecksumLen() const { return GetParam(); }
|
||||
uint32_t GetRestartInterval() const { return 1; }
|
||||
|
||||
std::unique_ptr<Block_kMetaIndex> GenerateMetaIndexBlock(
|
||||
std::vector<std::string> &keys, std::vector<std::string> &values,
|
||||
int num_record) {
|
||||
Options options = Options();
|
||||
BlockBasedTableOptions tbo;
|
||||
uint8_t protection_bytes_per_key = GetChecksumLen();
|
||||
BlockCreateContext create_context{
|
||||
&tbo, nullptr /* statistics */, false /* using_zstd */,
|
||||
protection_bytes_per_key, options.comparator};
|
||||
builder_ =
|
||||
std::make_unique<BlockBuilder>(static_cast<int>(GetRestartInterval()));
|
||||
// add a bunch of records to a block
|
||||
for (int i = 0; i < num_record; i++) {
|
||||
builder_->Add(keys[i], values[i]);
|
||||
}
|
||||
Slice raw_block = builder_->Finish();
|
||||
BlockContents contents;
|
||||
contents.data = raw_block;
|
||||
std::unique_ptr<Block_kMetaIndex> meta_block;
|
||||
create_context.Create(&meta_block, std::move(contents));
|
||||
return meta_block;
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockBuilder> builder_;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(P, MetaIndexBlockKVChecksumTest,
|
||||
::testing::Values(0, 1, 2, 4, 8),
|
||||
[](const testing::TestParamInfo<uint8_t> &args) {
|
||||
std::ostringstream oss;
|
||||
oss << "ProtBytes" << std::to_string(args.param);
|
||||
return oss.str();
|
||||
});
|
||||
|
||||
TEST_P(MetaIndexBlockKVChecksumTest, ChecksumConstructionAndVerification) {
|
||||
Options options = Options();
|
||||
BlockBasedTableOptions tbo;
|
||||
uint8_t protection_bytes_per_key = GetChecksumLen();
|
||||
BlockCreateContext create_context{
|
||||
&tbo, nullptr /* statistics */, false /* using_zstd */,
|
||||
protection_bytes_per_key, options.comparator};
|
||||
std::vector<int> num_restart_intervals = {1, 16};
|
||||
for (const auto num_restart_interval : num_restart_intervals) {
|
||||
const int kNumRecords = num_restart_interval * GetRestartInterval();
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 /* step */,
|
||||
24 /* padding_size */);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
std::unique_ptr<Block_kMetaIndex> meta_block =
|
||||
GenerateMetaIndexBlock(keys, values, kNumRecords);
|
||||
const char *checksum_ptr = meta_block->TEST_GetKVChecksum();
|
||||
// Check checksum of correct length is generated
|
||||
for (int i = 0; i < kNumRecords; i++) {
|
||||
ASSERT_TRUE(VerifyChecksum(protection_bytes_per_key,
|
||||
checksum_ptr + i * protection_bytes_per_key,
|
||||
keys[i], values[i]));
|
||||
}
|
||||
|
||||
size_t verification_count = 0;
|
||||
// The SyncPoint is placed before checking checksum_len == 0 in
|
||||
// Block::VerifyChecksum(). To make the testing code below simpler and not
|
||||
// having to differentiate 0 vs non-0 checksum_len, we do an explicit assert
|
||||
// checking on checksum_len here.
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"Block::VerifyChecksum::checksum_len",
|
||||
[&verification_count, protection_bytes_per_key](void *checksum_len) {
|
||||
ASSERT_EQ((*static_cast<uint8_t *>(checksum_len)),
|
||||
protection_bytes_per_key);
|
||||
++verification_count;
|
||||
});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
// Check that block iterator does checksum verification
|
||||
std::unique_ptr<MetaBlockIter> biter{
|
||||
meta_block->NewMetaIterator(true /* block_contents_pinned */)};
|
||||
TestSeekToFirst(biter, verification_count);
|
||||
TestSeekToLast(biter, verification_count);
|
||||
TestSeek(biter, verification_count, keys[kNumRecords / 2]);
|
||||
TestSeekForPrev(biter, verification_count, keys[kNumRecords / 2]);
|
||||
}
|
||||
}
|
||||
|
||||
class DataBlockKVChecksumCorruptionTest : public DataBlockKVChecksumTest {
|
||||
public:
|
||||
DataBlockKVChecksumCorruptionTest() = default;
|
||||
|
||||
std::unique_ptr<DataBlockIter> GenerateDataBlockIter(
|
||||
std::vector<std::string> &keys, std::vector<std::string> &values,
|
||||
int num_record) {
|
||||
// During Block construction, we may create block iter to initialize per kv
|
||||
// checksum. Disable syncpoint that may be created for block iter methods.
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
block_ = GenerateDataBlock(keys, values, num_record);
|
||||
std::unique_ptr<DataBlockIter> biter{block_->NewDataIterator(
|
||||
Options().comparator, kDisableGlobalSequenceNumber)};
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
return biter;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Block_kData> block_;
|
||||
};
|
||||
|
||||
TEST_P(DataBlockKVChecksumCorruptionTest, CorruptEntry) {
|
||||
std::vector<int> num_restart_intervals = {1, 3};
|
||||
for (const auto num_restart_interval : num_restart_intervals) {
|
||||
const int kNumRecords =
|
||||
num_restart_interval * static_cast<int>(GetRestartInterval());
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 /* step */,
|
||||
24 /* padding_size */);
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlockIter::UpdateKey::value", [](void *arg) {
|
||||
char *value = static_cast<char *>(arg);
|
||||
// values generated by GenerateRandomKVs are of length 100
|
||||
++value[10];
|
||||
});
|
||||
|
||||
// Purely for reducing the number of lines of code.
|
||||
typedef std::unique_ptr<DataBlockIter> IterPtr;
|
||||
typedef void(IterAPI)(IterPtr & iter, std::string &);
|
||||
|
||||
std::string seek_key = keys[kNumRecords / 2];
|
||||
auto test_seek = [&](IterAPI iter_api) {
|
||||
IterPtr biter = GenerateDataBlockIter(keys, values, kNumRecords);
|
||||
ASSERT_OK(biter->status());
|
||||
iter_api(biter, seek_key);
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_TRUE(biter->status().IsCorruption());
|
||||
};
|
||||
|
||||
test_seek([](IterPtr &iter, std::string &) { iter->SeekToFirst(); });
|
||||
test_seek([](IterPtr &iter, std::string &) { iter->SeekToLast(); });
|
||||
test_seek([](IterPtr &iter, std::string &k) { iter->Seek(k); });
|
||||
test_seek([](IterPtr &iter, std::string &k) { iter->SeekForPrev(k); });
|
||||
test_seek([](IterPtr &iter, std::string &k) { iter->SeekForGet(k); });
|
||||
|
||||
typedef void (DataBlockIter::*IterStepAPI)();
|
||||
auto test_step = [&](IterStepAPI iter_api, std::string &k) {
|
||||
IterPtr biter = GenerateDataBlockIter(keys, values, kNumRecords);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
biter->Seek(k);
|
||||
ASSERT_TRUE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
std::invoke(iter_api, biter);
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_TRUE(biter->status().IsCorruption());
|
||||
};
|
||||
|
||||
if (kNumRecords > 1) {
|
||||
test_step(&DataBlockIter::Prev, seek_key);
|
||||
test_step(&DataBlockIter::Next, seek_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
P, DataBlockKVChecksumCorruptionTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
|
||||
BlockBasedTableOptions::DataBlockIndexType::
|
||||
kDataBlockBinaryAndHash),
|
||||
::testing::Values(4, 8) /* block_protection_bytes_per_key */,
|
||||
::testing::Values(1, 3, 8, 16) /* restart_interval */,
|
||||
::testing::Values(false, true)),
|
||||
[](const testing::TestParamInfo<std::tuple<
|
||||
BlockBasedTableOptions::DataBlockIndexType, uint8_t, uint32_t, bool>>
|
||||
&args) {
|
||||
std::ostringstream oss;
|
||||
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param)) << "ProtBytes"
|
||||
<< std::to_string(std::get<1>(args.param)) << "RestartInterval"
|
||||
<< std::to_string(std::get<2>(args.param)) << "DeltaEncode"
|
||||
<< std::to_string(std::get<3>(args.param));
|
||||
return oss.str();
|
||||
});
|
||||
|
||||
class IndexBlockKVChecksumCorruptionTest : public IndexBlockKVChecksumTest {
|
||||
public:
|
||||
IndexBlockKVChecksumCorruptionTest() = default;
|
||||
|
||||
std::unique_ptr<IndexBlockIter> GenerateIndexBlockIter(
|
||||
std::vector<std::string> &separators,
|
||||
std::vector<BlockHandle> &block_handles,
|
||||
std::vector<std::string> &first_keys, int num_record,
|
||||
SequenceNumber seqno) {
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
block_ =
|
||||
GenerateIndexBlock(separators, block_handles, first_keys, num_record);
|
||||
std::unique_ptr<IndexBlockIter> biter{block_->NewIndexIterator(
|
||||
Options().comparator, seqno, nullptr, nullptr,
|
||||
true /* total_order_seek */, IncludeFirstKey() /* have_first_key */,
|
||||
true /* key_includes_seq */,
|
||||
!UseValueDeltaEncoding() /* value_is_full */,
|
||||
true /* block_contents_pinned */, nullptr /* prefix_index */)};
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
return biter;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Block_kIndex> block_;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
P, IndexBlockKVChecksumCorruptionTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch,
|
||||
BlockBasedTableOptions::DataBlockIndexType::
|
||||
kDataBlockBinaryAndHash),
|
||||
::testing::Values(4, 8) /* block_protection_bytes_per_key */,
|
||||
::testing::Values(1, 3, 8, 16) /* restart_interval */,
|
||||
::testing::Values(true, false), ::testing::Values(true, false)),
|
||||
[](const testing::TestParamInfo<
|
||||
std::tuple<BlockBasedTableOptions::DataBlockIndexType, uint8_t,
|
||||
uint32_t, bool, bool>> &args) {
|
||||
std::ostringstream oss;
|
||||
oss << GetDataBlockIndexTypeStr(std::get<0>(args.param)) << "ProtBytes"
|
||||
<< std::to_string(std::get<1>(args.param)) << "RestartInterval"
|
||||
<< std::to_string(std::get<2>(args.param)) << "ValueDeltaEncode"
|
||||
<< std::to_string(std::get<3>(args.param)) << "IncludeFirstKey"
|
||||
<< std::to_string(std::get<4>(args.param));
|
||||
return oss.str();
|
||||
});
|
||||
|
||||
TEST_P(IndexBlockKVChecksumCorruptionTest, CorruptEntry) {
|
||||
std::vector<int> num_restart_intervals = {1, 3};
|
||||
std::vector<SequenceNumber> seqnos{kDisableGlobalSequenceNumber, 10001};
|
||||
|
||||
for (const auto num_restart_interval : num_restart_intervals) {
|
||||
const int kNumRecords =
|
||||
num_restart_interval * static_cast<int>(GetRestartInterval());
|
||||
for (const auto seqno : seqnos) {
|
||||
std::vector<std::string> separators;
|
||||
std::vector<BlockHandle> block_handles;
|
||||
std::vector<std::string> first_keys;
|
||||
GenerateRandomIndexEntries(&separators, &block_handles, &first_keys,
|
||||
kNumRecords,
|
||||
seqno != kDisableGlobalSequenceNumber);
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlockIter::UpdateKey::value", [](void *arg) {
|
||||
char *value = static_cast<char *>(arg);
|
||||
// value can be delta-encoded with different lengths, so we corrupt
|
||||
// first bytes here to be safe
|
||||
++value[0];
|
||||
});
|
||||
|
||||
typedef std::unique_ptr<IndexBlockIter> IterPtr;
|
||||
typedef void(IterAPI)(IterPtr & iter, std::string &);
|
||||
std::string seek_key = first_keys[kNumRecords / 2];
|
||||
auto test_seek = [&](IterAPI iter_api) {
|
||||
std::unique_ptr<IndexBlockIter> biter = GenerateIndexBlockIter(
|
||||
separators, block_handles, first_keys, kNumRecords, seqno);
|
||||
ASSERT_OK(biter->status());
|
||||
iter_api(biter, seek_key);
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_TRUE(biter->status().IsCorruption());
|
||||
};
|
||||
test_seek([](IterPtr &iter, std::string &) { iter->SeekToFirst(); });
|
||||
test_seek([](IterPtr &iter, std::string &) { iter->SeekToLast(); });
|
||||
test_seek([](IterPtr &iter, std::string &k) { iter->Seek(k); });
|
||||
|
||||
typedef void (IndexBlockIter::*IterStepAPI)();
|
||||
auto test_step = [&](IterStepAPI iter_api, std::string &k) {
|
||||
std::unique_ptr<IndexBlockIter> biter = GenerateIndexBlockIter(
|
||||
separators, block_handles, first_keys, kNumRecords, seqno);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
biter->Seek(k);
|
||||
ASSERT_TRUE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
std::invoke(iter_api, biter);
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_TRUE(biter->status().IsCorruption());
|
||||
};
|
||||
if (kNumRecords > 1) {
|
||||
test_step(&IndexBlockIter::Prev, seek_key);
|
||||
test_step(&IndexBlockIter::Next, seek_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class MetaIndexBlockKVChecksumCorruptionTest
|
||||
: public MetaIndexBlockKVChecksumTest {
|
||||
public:
|
||||
MetaIndexBlockKVChecksumCorruptionTest() = default;
|
||||
|
||||
std::unique_ptr<MetaBlockIter> GenerateMetaIndexBlockIter(
|
||||
std::vector<std::string> &keys, std::vector<std::string> &values,
|
||||
int num_record) {
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
block_ = GenerateMetaIndexBlock(keys, values, num_record);
|
||||
std::unique_ptr<MetaBlockIter> biter{
|
||||
block_->NewMetaIterator(true /* block_contents_pinned */)};
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
return biter;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Block_kMetaIndex> block_;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
P, MetaIndexBlockKVChecksumCorruptionTest,
|
||||
::testing::Values(4, 8) /* block_protection_bytes_per_key */,
|
||||
[](const testing::TestParamInfo<uint8_t> &args) {
|
||||
std::ostringstream oss;
|
||||
oss << "ProtBytes" << std::to_string(args.param);
|
||||
return oss.str();
|
||||
});
|
||||
|
||||
TEST_P(MetaIndexBlockKVChecksumCorruptionTest, CorruptEntry) {
|
||||
Options options = Options();
|
||||
std::vector<int> num_restart_intervals = {1, 3};
|
||||
for (const auto num_restart_interval : num_restart_intervals) {
|
||||
const int kNumRecords =
|
||||
num_restart_interval * static_cast<int>(GetRestartInterval());
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
GenerateRandomKVs(&keys, &values, 0, kNumRecords + 1, 1 /* step */,
|
||||
24 /* padding_size */);
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlockIter::UpdateKey::value", [](void *arg) {
|
||||
char *value = static_cast<char *>(arg);
|
||||
// values generated by GenerateRandomKVs are of length 100
|
||||
++value[10];
|
||||
});
|
||||
|
||||
typedef std::unique_ptr<MetaBlockIter> IterPtr;
|
||||
typedef void(IterAPI)(IterPtr & iter, std::string &);
|
||||
typedef void (MetaBlockIter::*IterStepAPI)();
|
||||
std::string seek_key = keys[kNumRecords / 2];
|
||||
auto test_seek = [&](IterAPI iter_api) {
|
||||
IterPtr biter = GenerateMetaIndexBlockIter(keys, values, kNumRecords);
|
||||
ASSERT_OK(biter->status());
|
||||
iter_api(biter, seek_key);
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_TRUE(biter->status().IsCorruption());
|
||||
};
|
||||
|
||||
test_seek([](IterPtr &iter, std::string &) { iter->SeekToFirst(); });
|
||||
test_seek([](IterPtr &iter, std::string &) { iter->SeekToLast(); });
|
||||
test_seek([](IterPtr &iter, std::string &k) { iter->Seek(k); });
|
||||
test_seek([](IterPtr &iter, std::string &k) { iter->SeekForPrev(k); });
|
||||
|
||||
auto test_step = [&](IterStepAPI iter_api, const std::string &k) {
|
||||
IterPtr biter = GenerateMetaIndexBlockIter(keys, values, kNumRecords);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
biter->Seek(k);
|
||||
ASSERT_TRUE(biter->Valid());
|
||||
ASSERT_OK(biter->status());
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
std::invoke(iter_api, biter);
|
||||
ASSERT_FALSE(biter->Valid());
|
||||
ASSERT_TRUE(biter->status().IsCorruption());
|
||||
};
|
||||
|
||||
if (kNumRecords > 1) {
|
||||
test_step(&MetaBlockIter::Prev, seek_key);
|
||||
test_step(&MetaBlockIter::Next, seek_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
|
|
@ -581,8 +581,9 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2,
|
|||
const bool kImmortal = true;
|
||||
ASSERT_OK(ioptions.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor, soptions,
|
||||
internal_comparator, !kSkipFilters, !kImmortal,
|
||||
level_),
|
||||
internal_comparator,
|
||||
0 /* block_protection_bytes_per_key */, !kSkipFilters,
|
||||
!kImmortal, level_),
|
||||
std::move(file_reader), sink->contents().size(), &table_reader));
|
||||
// Search using Get()
|
||||
ReadOptions ro;
|
||||
|
|
|
@ -266,9 +266,9 @@ class BlockFetcherTest : public testing::Test {
|
|||
const auto* table_options =
|
||||
table_factory_.GetOptions<BlockBasedTableOptions>();
|
||||
ASSERT_NE(table_options, nullptr);
|
||||
ASSERT_OK(BlockBasedTable::Open(ro, ioptions, EnvOptions(), *table_options,
|
||||
comparator, std::move(file), file_size,
|
||||
&table_reader));
|
||||
ASSERT_OK(BlockBasedTable::Open(
|
||||
ro, ioptions, EnvOptions(), *table_options, comparator, std::move(file),
|
||||
file_size, 0 /* block_protection_bytes_per_key */, &table_reader));
|
||||
|
||||
table->reset(reinterpret_cast<BlockBasedTable*>(table_reader.release()));
|
||||
}
|
||||
|
|
|
@ -165,10 +165,10 @@ Status SstFileDumper::NewTableReader(
|
|||
const ImmutableOptions& /*ioptions*/, const EnvOptions& /*soptions*/,
|
||||
const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size,
|
||||
std::unique_ptr<TableReader>* /*table_reader*/) {
|
||||
auto t_opt =
|
||||
TableReaderOptions(ioptions_, moptions_.prefix_extractor, soptions_,
|
||||
internal_comparator_, false /* skip_filters */,
|
||||
false /* imortal */, true /* force_direct_prefetch */);
|
||||
auto t_opt = TableReaderOptions(
|
||||
ioptions_, moptions_.prefix_extractor, soptions_, internal_comparator_,
|
||||
0 /* block_protection_bytes_per_key */, false /* skip_filters */,
|
||||
false /* immortal */, true /* force_direct_prefetch */);
|
||||
// Allow open file with global sequence number for backward compatibility.
|
||||
t_opt.largest_seqno = kMaxSequenceNumber;
|
||||
|
||||
|
|
|
@ -56,7 +56,8 @@ Status SstFileReader::Open(const std::string& file_path) {
|
|||
}
|
||||
if (s.ok()) {
|
||||
TableReaderOptions t_opt(r->ioptions, r->moptions.prefix_extractor,
|
||||
r->soptions, r->ioptions.internal_comparator);
|
||||
r->soptions, r->ioptions.internal_comparator,
|
||||
r->moptions.block_protection_bytes_per_key);
|
||||
// Allow open file with global sequence number for backward compatibility.
|
||||
t_opt.largest_seqno = kMaxSequenceNumber;
|
||||
s = r->options.table_factory->NewTableReader(t_opt, std::move(file_reader),
|
||||
|
|
|
@ -37,9 +37,9 @@ struct TableReaderOptions {
|
|||
const std::shared_ptr<const SliceTransform>& _prefix_extractor,
|
||||
const EnvOptions& _env_options,
|
||||
const InternalKeyComparator& _internal_comparator,
|
||||
bool _skip_filters = false, bool _immortal = false,
|
||||
bool _force_direct_prefetch = false, int _level = -1,
|
||||
BlockCacheTracer* const _block_cache_tracer = nullptr,
|
||||
uint8_t _block_protection_bytes_per_key, bool _skip_filters = false,
|
||||
bool _immortal = false, bool _force_direct_prefetch = false,
|
||||
int _level = -1, BlockCacheTracer* const _block_cache_tracer = nullptr,
|
||||
size_t _max_file_size_for_l0_meta_pin = 0,
|
||||
const std::string& _cur_db_session_id = "", uint64_t _cur_file_num = 0,
|
||||
UniqueId64x2 _unique_id = {}, SequenceNumber _largest_seqno = 0)
|
||||
|
@ -56,7 +56,8 @@ struct TableReaderOptions {
|
|||
max_file_size_for_l0_meta_pin(_max_file_size_for_l0_meta_pin),
|
||||
cur_db_session_id(_cur_db_session_id),
|
||||
cur_file_num(_cur_file_num),
|
||||
unique_id(_unique_id) {}
|
||||
unique_id(_unique_id),
|
||||
block_protection_bytes_per_key(_block_protection_bytes_per_key) {}
|
||||
|
||||
const ImmutableOptions& ioptions;
|
||||
const std::shared_ptr<const SliceTransform>& prefix_extractor;
|
||||
|
@ -86,6 +87,8 @@ struct TableReaderOptions {
|
|||
|
||||
// Known unique_id or {}, kNullUniqueId64x2 means unknown
|
||||
UniqueId64x2 unique_id;
|
||||
|
||||
uint8_t block_protection_bytes_per_key;
|
||||
};
|
||||
|
||||
struct TableBuilderOptions {
|
||||
|
|
|
@ -144,7 +144,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
|
|||
new RandomAccessFileReader(std::move(raf), file_name));
|
||||
s = opts.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor, env_options,
|
||||
ikc),
|
||||
ikc, 0 /* block_protection_bytes_per_key */),
|
||||
std::move(file_reader), file_size, &table_reader);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "Open Table Error: %s\n", s.ToString().c_str());
|
||||
|
|
|
@ -444,7 +444,9 @@ class TableConstructor : public Constructor {
|
|||
file_reader_.reset(new RandomAccessFileReader(std::move(source), "test"));
|
||||
return ioptions.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor, soptions,
|
||||
*last_internal_comparator_, /*skip_filters*/ false,
|
||||
*last_internal_comparator_,
|
||||
0 /* block_protection_bytes_per_key */,
|
||||
/*skip_filters*/ false,
|
||||
/*immortal*/ false, false, level_,
|
||||
&block_cache_tracer_, moptions.write_buffer_size, "",
|
||||
file_num_, kNullUniqueId64x2, largest_seqno_),
|
||||
|
@ -4795,7 +4797,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
|
|||
|
||||
options.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions, moptions.prefix_extractor, EnvOptions(),
|
||||
ikc),
|
||||
ikc, 0 /* block_protection_bytes_per_key */),
|
||||
std::move(file_reader), ss_rw.contents().size(), &table_reader);
|
||||
|
||||
return table_reader->NewIterator(
|
||||
|
@ -4964,7 +4966,8 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) {
|
|||
|
||||
ASSERT_OK(ioptions.table_factory->NewTableReader(
|
||||
TableReaderOptions(ioptions2, moptions2.prefix_extractor, EnvOptions(),
|
||||
GetPlainInternalComparator(options2.comparator)),
|
||||
GetPlainInternalComparator(options2.comparator),
|
||||
0 /* block_protection_bytes_per_key */),
|
||||
std::move(file_reader), sink->contents().size(), &table_reader));
|
||||
|
||||
ReadOptions read_options;
|
||||
|
|
|
@ -1725,6 +1725,10 @@ DEFINE_uint32(
|
|||
"This options determines the size of such checksums. "
|
||||
"Supported values: 0, 1, 2, 4, 8.");
|
||||
|
||||
DEFINE_uint32(block_protection_bytes_per_key, 0,
|
||||
"Enable block per key-value checksum protection. "
|
||||
"Supported values: 0, 1, 2, 4, 8.");
|
||||
|
||||
DEFINE_bool(build_info, false,
|
||||
"Print the build info via GetRocksBuildInfoAsString");
|
||||
|
||||
|
@ -4565,6 +4569,8 @@ class Benchmark {
|
|||
}
|
||||
options.memtable_protection_bytes_per_key =
|
||||
FLAGS_memtable_protection_bytes_per_key;
|
||||
options.block_protection_bytes_per_key =
|
||||
FLAGS_block_protection_bytes_per_key;
|
||||
}
|
||||
|
||||
void InitializeOptionsGeneral(Options* opts) {
|
||||
|
|
|
@ -37,6 +37,7 @@ default_params = {
|
|||
"backup_one_in": 100000,
|
||||
"batch_protection_bytes_per_key": lambda: random.choice([0, 8]),
|
||||
"memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
|
||||
"block_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
|
||||
"block_size": 16384,
|
||||
"bloom_bits": lambda: random.choice(
|
||||
[random.randint(0, 19), random.lognormvariate(2.3, 1.3)]
|
||||
|
|
Loading…
Reference in New Issue