mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 04:41:49 +00:00
fd165c869d
Summary: Append per key-value checksum to internal key. These checksums are verified on read paths including Get, Iterator and during Flush. Get and Iterator will return `Corruption` status if there is a checksum verification failure. Flush will make DB become read-only upon memtable entry checksum verification failure. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10281 Test Plan: - Added new unit test cases: `make check` - Benchmark on memtable insert ``` TEST_TMPDIR=/dev/shm/memtable_write ./db_bench -benchmarks=fillseq -disable_wal=true -max_write_buffer_number=100 -num=10000000 -min_write_buffer_number_to_merge=100 # avg over 10 runs Baseline: 1166936 ops/sec memtable 2 bytes kv checksum : 1.11674e+06 ops/sec (-4%) memtable 2 bytes kv checksum + write batch 8 bytes kv checksum: 1.08579e+06 ops/sec (-6.95%) write batch 8 bytes kv checksum: 1.17979e+06 ops/sec (+1.1%) ``` - Benchmark on only memtable read: ops/sec dropped 31% for `readseq` due to time spend on verifying checksum. ops/sec for `readrandom` dropped ~6.8%. ``` # Readseq sudo TEST_TMPDIR=/dev/shm/memtable_read ./db_bench -benchmarks=fillseq,readseq"[-X20]" -disable_wal=true -max_write_buffer_number=100 -num=10000000 -min_write_buffer_number_to_merge=100 readseq [AVG 20 runs] : 7432840 (± 212005) ops/sec; 822.3 (± 23.5) MB/sec readseq [MEDIAN 20 runs] : 7573878 ops/sec; 837.9 MB/sec With -memtable_protection_bytes_per_key=2: readseq [AVG 20 runs] : 5134607 (± 119596) ops/sec; 568.0 (± 13.2) MB/sec readseq [MEDIAN 20 runs] : 5232946 ops/sec; 578.9 MB/sec # Readrandom sudo TEST_TMPDIR=/dev/shm/memtable_read ./db_bench -benchmarks=fillrandom,readrandom"[-X10]" -disable_wal=true -max_write_buffer_number=100 -num=1000000 -min_write_buffer_number_to_merge=100 readrandom [AVG 10 runs] : 140236 (± 3938) ops/sec; 9.8 (± 0.3) MB/sec readrandom [MEDIAN 10 runs] : 140545 ops/sec; 9.8 MB/sec With -memtable_protection_bytes_per_key=2: readrandom [AVG 10 runs] : 130632 (± 2738) ops/sec; 9.1 (± 0.2) MB/sec readrandom [MEDIAN 10 runs] : 130341 ops/sec; 9.1 MB/sec ``` - Stress test: `python3 -u tools/db_crashtest.py whitebox --duration=1800` Reviewed By: ajkr Differential Revision: D37607896 Pulled By: cbi42 fbshipit-source-id: fdaefb475629d2471780d4a5f5bf81b44ee56113
399 lines
14 KiB
C++
399 lines
14 KiB
C++
// Copyright (c) 2020-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// This file contains classes containing fields to protect individual entries.
|
|
// The classes are named "ProtectionInfo<suffix>", where <suffix> indicates the
|
|
// combination of fields that are covered. Each field has a single letter
|
|
// abbreviation as follows.
|
|
//
|
|
// K = key
|
|
// V = value
|
|
// O = optype aka value type
|
|
// S = seqno
|
|
// C = CF ID
|
|
//
|
|
// Then, for example, a class that protects an entry consisting of key, value,
|
|
// optype, and CF ID (i.e., a `WriteBatch` entry) would be named
|
|
// `ProtectionInfoKVOC`.
|
|
//
|
|
// The `ProtectionInfo.*` classes are templated on the integer type used to hold
|
|
// the XOR of hashes for each field. Only unsigned integer types are supported,
|
|
// and the maximum supported integer width is 64 bits. When the integer type is
|
|
// narrower than the hash values, we lop off the most significant bits to make
|
|
// them fit.
|
|
//
|
|
// The `ProtectionInfo.*` classes are all intended to be non-persistent. We do
|
|
// not currently make the byte order consistent for integer fields before
|
|
// hashing them, so the resulting values are endianness-dependent.
|
|
|
|
#pragma once
|
|
|
|
#include <type_traits>
|
|
|
|
#include "db/dbformat.h"
|
|
#include "rocksdb/types.h"
|
|
#include "util/hash.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
template <typename T>
|
|
class ProtectionInfo;
|
|
template <typename T>
|
|
class ProtectionInfoKVO;
|
|
template <typename T>
|
|
class ProtectionInfoKVOC;
|
|
template <typename T>
|
|
class ProtectionInfoKVOS;
|
|
|
|
// Aliases for 64-bit protection infos.
|
|
using ProtectionInfo64 = ProtectionInfo<uint64_t>;
|
|
using ProtectionInfoKVO64 = ProtectionInfoKVO<uint64_t>;
|
|
using ProtectionInfoKVOC64 = ProtectionInfoKVOC<uint64_t>;
|
|
using ProtectionInfoKVOS64 = ProtectionInfoKVOS<uint64_t>;
|
|
|
|
template <typename T>
|
|
class ProtectionInfo {
|
|
public:
|
|
ProtectionInfo() = default;
|
|
|
|
Status GetStatus() const;
|
|
ProtectionInfoKVO<T> ProtectKVO(const Slice& key, const Slice& value,
|
|
ValueType op_type) const;
|
|
ProtectionInfoKVO<T> ProtectKVO(const SliceParts& key,
|
|
const SliceParts& value,
|
|
ValueType op_type) const;
|
|
|
|
T GetVal() const { return val_; }
|
|
|
|
private:
|
|
friend class ProtectionInfoKVO<T>;
|
|
friend class ProtectionInfoKVOS<T>;
|
|
friend class ProtectionInfoKVOC<T>;
|
|
|
|
// Each field is hashed with an independent value so we can catch fields being
|
|
// swapped. Per the `NPHash64()` docs, using consecutive seeds is a pitfall,
|
|
// and we should instead vary our seeds by a large odd number. This value by
|
|
// which we increment (0xD28AAD72F49BD50B) was taken from
|
|
// `head -c8 /dev/urandom | hexdump`, run repeatedly until it yielded an odd
|
|
// number. The values are computed manually since the Windows C++ compiler
|
|
// complains about the overflow when adding constants.
|
|
static const uint64_t kSeedK = 0;
|
|
static const uint64_t kSeedV = 0xD28AAD72F49BD50B;
|
|
static const uint64_t kSeedO = 0xA5155AE5E937AA16;
|
|
static const uint64_t kSeedS = 0x77A00858DDD37F21;
|
|
static const uint64_t kSeedC = 0x4A2AB5CBD26F542C;
|
|
|
|
ProtectionInfo(T val) : val_(val) {
|
|
static_assert(sizeof(ProtectionInfo<T>) == sizeof(T), "");
|
|
}
|
|
|
|
void SetVal(T val) { val_ = val; }
|
|
|
|
T val_ = 0;
|
|
};
|
|
|
|
template <typename T>
|
|
class ProtectionInfoKVO {
|
|
public:
|
|
ProtectionInfoKVO() = default;
|
|
|
|
ProtectionInfo<T> StripKVO(const Slice& key, const Slice& value,
|
|
ValueType op_type) const;
|
|
ProtectionInfo<T> StripKVO(const SliceParts& key, const SliceParts& value,
|
|
ValueType op_type) const;
|
|
|
|
ProtectionInfoKVOC<T> ProtectC(ColumnFamilyId column_family_id) const;
|
|
ProtectionInfoKVOS<T> ProtectS(SequenceNumber sequence_number) const;
|
|
|
|
void UpdateK(const Slice& old_key, const Slice& new_key);
|
|
void UpdateK(const SliceParts& old_key, const SliceParts& new_key);
|
|
void UpdateV(const Slice& old_value, const Slice& new_value);
|
|
void UpdateV(const SliceParts& old_value, const SliceParts& new_value);
|
|
void UpdateO(ValueType old_op_type, ValueType new_op_type);
|
|
|
|
T GetVal() const { return info_.GetVal(); }
|
|
|
|
private:
|
|
friend class ProtectionInfo<T>;
|
|
friend class ProtectionInfoKVOS<T>;
|
|
friend class ProtectionInfoKVOC<T>;
|
|
|
|
explicit ProtectionInfoKVO(T val) : info_(val) {
|
|
static_assert(sizeof(ProtectionInfoKVO<T>) == sizeof(T), "");
|
|
}
|
|
|
|
void SetVal(T val) { info_.SetVal(val); }
|
|
|
|
ProtectionInfo<T> info_;
|
|
};
|
|
|
|
template <typename T>
|
|
class ProtectionInfoKVOC {
|
|
public:
|
|
ProtectionInfoKVOC() = default;
|
|
|
|
ProtectionInfoKVO<T> StripC(ColumnFamilyId column_family_id) const;
|
|
|
|
void UpdateK(const Slice& old_key, const Slice& new_key) {
|
|
kvo_.UpdateK(old_key, new_key);
|
|
}
|
|
void UpdateK(const SliceParts& old_key, const SliceParts& new_key) {
|
|
kvo_.UpdateK(old_key, new_key);
|
|
}
|
|
void UpdateV(const Slice& old_value, const Slice& new_value) {
|
|
kvo_.UpdateV(old_value, new_value);
|
|
}
|
|
void UpdateV(const SliceParts& old_value, const SliceParts& new_value) {
|
|
kvo_.UpdateV(old_value, new_value);
|
|
}
|
|
void UpdateO(ValueType old_op_type, ValueType new_op_type) {
|
|
kvo_.UpdateO(old_op_type, new_op_type);
|
|
}
|
|
void UpdateC(ColumnFamilyId old_column_family_id,
|
|
ColumnFamilyId new_column_family_id);
|
|
|
|
T GetVal() const { return kvo_.GetVal(); }
|
|
|
|
private:
|
|
friend class ProtectionInfoKVO<T>;
|
|
|
|
explicit ProtectionInfoKVOC(T val) : kvo_(val) {
|
|
static_assert(sizeof(ProtectionInfoKVOC<T>) == sizeof(T), "");
|
|
}
|
|
|
|
void SetVal(T val) { kvo_.SetVal(val); }
|
|
|
|
ProtectionInfoKVO<T> kvo_;
|
|
};
|
|
|
|
template <typename T>
|
|
class ProtectionInfoKVOS {
|
|
public:
|
|
ProtectionInfoKVOS() = default;
|
|
|
|
ProtectionInfoKVO<T> StripS(SequenceNumber sequence_number) const;
|
|
|
|
void UpdateK(const Slice& old_key, const Slice& new_key) {
|
|
kvo_.UpdateK(old_key, new_key);
|
|
}
|
|
void UpdateK(const SliceParts& old_key, const SliceParts& new_key) {
|
|
kvo_.UpdateK(old_key, new_key);
|
|
}
|
|
void UpdateV(const Slice& old_value, const Slice& new_value) {
|
|
kvo_.UpdateV(old_value, new_value);
|
|
}
|
|
void UpdateV(const SliceParts& old_value, const SliceParts& new_value) {
|
|
kvo_.UpdateV(old_value, new_value);
|
|
}
|
|
void UpdateO(ValueType old_op_type, ValueType new_op_type) {
|
|
kvo_.UpdateO(old_op_type, new_op_type);
|
|
}
|
|
void UpdateS(SequenceNumber old_sequence_number,
|
|
SequenceNumber new_sequence_number);
|
|
|
|
T GetVal() const { return kvo_.GetVal(); }
|
|
|
|
private:
|
|
friend class ProtectionInfoKVO<T>;
|
|
|
|
explicit ProtectionInfoKVOS(T val) : kvo_(val) {
|
|
static_assert(sizeof(ProtectionInfoKVOS<T>) == sizeof(T), "");
|
|
}
|
|
|
|
void SetVal(T val) { kvo_.SetVal(val); }
|
|
|
|
ProtectionInfoKVO<T> kvo_;
|
|
};
|
|
|
|
template <typename T>
|
|
Status ProtectionInfo<T>::GetStatus() const {
|
|
if (val_ != 0) {
|
|
return Status::Corruption("ProtectionInfo mismatch");
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfoKVO<T> ProtectionInfo<T>::ProtectKVO(const Slice& key,
|
|
const Slice& value,
|
|
ValueType op_type) const {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(GetSliceNPHash64(key, ProtectionInfo<T>::kSeedK));
|
|
val =
|
|
val ^ static_cast<T>(GetSliceNPHash64(value, ProtectionInfo<T>::kSeedV));
|
|
val = val ^
|
|
static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
|
|
sizeof(op_type), ProtectionInfo<T>::kSeedO));
|
|
return ProtectionInfoKVO<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfoKVO<T> ProtectionInfo<T>::ProtectKVO(const SliceParts& key,
|
|
const SliceParts& value,
|
|
ValueType op_type) const {
|
|
T val = GetVal();
|
|
val = val ^
|
|
static_cast<T>(GetSlicePartsNPHash64(key, ProtectionInfo<T>::kSeedK));
|
|
val = val ^
|
|
static_cast<T>(GetSlicePartsNPHash64(value, ProtectionInfo<T>::kSeedV));
|
|
val = val ^
|
|
static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
|
|
sizeof(op_type), ProtectionInfo<T>::kSeedO));
|
|
return ProtectionInfoKVO<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVO<T>::UpdateK(const Slice& old_key, const Slice& new_key) {
|
|
T val = GetVal();
|
|
val = val ^
|
|
static_cast<T>(GetSliceNPHash64(old_key, ProtectionInfo<T>::kSeedK));
|
|
val = val ^
|
|
static_cast<T>(GetSliceNPHash64(new_key, ProtectionInfo<T>::kSeedK));
|
|
SetVal(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVO<T>::UpdateK(const SliceParts& old_key,
|
|
const SliceParts& new_key) {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(
|
|
GetSlicePartsNPHash64(old_key, ProtectionInfo<T>::kSeedK));
|
|
val = val ^ static_cast<T>(
|
|
GetSlicePartsNPHash64(new_key, ProtectionInfo<T>::kSeedK));
|
|
SetVal(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVO<T>::UpdateV(const Slice& old_value,
|
|
const Slice& new_value) {
|
|
T val = GetVal();
|
|
val = val ^
|
|
static_cast<T>(GetSliceNPHash64(old_value, ProtectionInfo<T>::kSeedV));
|
|
val = val ^
|
|
static_cast<T>(GetSliceNPHash64(new_value, ProtectionInfo<T>::kSeedV));
|
|
SetVal(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVO<T>::UpdateV(const SliceParts& old_value,
|
|
const SliceParts& new_value) {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(
|
|
GetSlicePartsNPHash64(old_value, ProtectionInfo<T>::kSeedV));
|
|
val = val ^ static_cast<T>(
|
|
GetSlicePartsNPHash64(new_value, ProtectionInfo<T>::kSeedV));
|
|
SetVal(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVO<T>::UpdateO(ValueType old_op_type,
|
|
ValueType new_op_type) {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&old_op_type),
|
|
sizeof(old_op_type),
|
|
ProtectionInfo<T>::kSeedO));
|
|
val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&new_op_type),
|
|
sizeof(new_op_type),
|
|
ProtectionInfo<T>::kSeedO));
|
|
SetVal(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfo<T> ProtectionInfoKVO<T>::StripKVO(const Slice& key,
|
|
const Slice& value,
|
|
ValueType op_type) const {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(GetSliceNPHash64(key, ProtectionInfo<T>::kSeedK));
|
|
val =
|
|
val ^ static_cast<T>(GetSliceNPHash64(value, ProtectionInfo<T>::kSeedV));
|
|
val = val ^
|
|
static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
|
|
sizeof(op_type), ProtectionInfo<T>::kSeedO));
|
|
return ProtectionInfo<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfo<T> ProtectionInfoKVO<T>::StripKVO(const SliceParts& key,
|
|
const SliceParts& value,
|
|
ValueType op_type) const {
|
|
T val = GetVal();
|
|
val = val ^
|
|
static_cast<T>(GetSlicePartsNPHash64(key, ProtectionInfo<T>::kSeedK));
|
|
val = val ^
|
|
static_cast<T>(GetSlicePartsNPHash64(value, ProtectionInfo<T>::kSeedV));
|
|
val = val ^
|
|
static_cast<T>(NPHash64(reinterpret_cast<char*>(&op_type),
|
|
sizeof(op_type), ProtectionInfo<T>::kSeedO));
|
|
return ProtectionInfo<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfoKVOC<T> ProtectionInfoKVO<T>::ProtectC(
|
|
ColumnFamilyId column_family_id) const {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(
|
|
reinterpret_cast<char*>(&column_family_id),
|
|
sizeof(column_family_id), ProtectionInfo<T>::kSeedC));
|
|
return ProtectionInfoKVOC<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfoKVO<T> ProtectionInfoKVOC<T>::StripC(
|
|
ColumnFamilyId column_family_id) const {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(
|
|
reinterpret_cast<char*>(&column_family_id),
|
|
sizeof(column_family_id), ProtectionInfo<T>::kSeedC));
|
|
return ProtectionInfoKVO<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVOC<T>::UpdateC(ColumnFamilyId old_column_family_id,
|
|
ColumnFamilyId new_column_family_id) {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(
|
|
reinterpret_cast<char*>(&old_column_family_id),
|
|
sizeof(old_column_family_id), ProtectionInfo<T>::kSeedC));
|
|
val = val ^ static_cast<T>(NPHash64(
|
|
reinterpret_cast<char*>(&new_column_family_id),
|
|
sizeof(new_column_family_id), ProtectionInfo<T>::kSeedC));
|
|
SetVal(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfoKVOS<T> ProtectionInfoKVO<T>::ProtectS(
|
|
SequenceNumber sequence_number) const {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&sequence_number),
|
|
sizeof(sequence_number),
|
|
ProtectionInfo<T>::kSeedS));
|
|
return ProtectionInfoKVOS<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
ProtectionInfoKVO<T> ProtectionInfoKVOS<T>::StripS(
|
|
SequenceNumber sequence_number) const {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(reinterpret_cast<char*>(&sequence_number),
|
|
sizeof(sequence_number),
|
|
ProtectionInfo<T>::kSeedS));
|
|
return ProtectionInfoKVO<T>(val);
|
|
}
|
|
|
|
template <typename T>
|
|
void ProtectionInfoKVOS<T>::UpdateS(SequenceNumber old_sequence_number,
|
|
SequenceNumber new_sequence_number) {
|
|
T val = GetVal();
|
|
val = val ^ static_cast<T>(NPHash64(
|
|
reinterpret_cast<char*>(&old_sequence_number),
|
|
sizeof(old_sequence_number), ProtectionInfo<T>::kSeedS));
|
|
val = val ^ static_cast<T>(NPHash64(
|
|
reinterpret_cast<char*>(&new_sequence_number),
|
|
sizeof(new_sequence_number), ProtectionInfo<T>::kSeedS));
|
|
SetVal(val);
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|