rocksdb/fuzz/sst_file_writer_fuzzer.cc
Changyu Bi 62fc15f009 Block per key-value checksum (#11287)
Summary:
add option `block_protection_bytes_per_key` and implementation for block per key-value checksum. The main changes are
1. checksum construction and verification in block.cc/h
2. pass the option `block_protection_bytes_per_key` around (mainly for methods defined in table_cache.h)
3. unit tests/crash test updates

Tests:
* Added unit tests
* Crash test: `python3 tools/db_crashtest.py blackbox --simple --block_protection_bytes_per_key=1 --write_buffer_size=1048576`

Follow up (maybe as a separate PR): make sure corruption status returned from BlockIters are correctly handled.

Performance:
Turning on block per KV protection has a non-trivial negative impact on read performance and costs additional memory.
For memory, each block includes additional 24 bytes for checksum-related states beside checksum itself. For CPU, I set up a DB of size ~1.2GB with 5M keys (32 bytes key and 200 bytes value) which compacts to ~5 SST files (target file size 256 MB) in L6 without compression. I tested readrandom performance with various block cache size (to mimic various cache hit rates):

```
SETUP
make OPTIMIZE_LEVEL="-O3" USE_LTO=1 DEBUG_LEVEL=0 -j32 db_bench
./db_bench -benchmarks=fillseq,compact0,waitforcompaction,compact,waitforcompaction -write_buffer_size=33554432 -level_compaction_dynamic_level_bytes=true -max_background_jobs=8 -target_file_size_base=268435456 --num=5000000 --key_size=32 --value_size=200 --compression_type=none

BENCHMARK
./db_bench --use_existing_db -benchmarks=readtocache,readrandom[-X10] --num=5000000 --key_size=32 --disable_auto_compactions --reads=1000000 --block_protection_bytes_per_key=[0|1] --cache_size=$CACHESIZE

The readrandom ops/sec looks like the following:
Block cache size:  2GB        1.2GB * 0.9    1.2GB * 0.8     1.2GB * 0.5   8MB
Main              240805     223604         198176           161653       139040
PR prot_bytes=0   238691     226693         200127           161082       141153
PR prot_bytes=1   214983     193199         178532           137013       108211
prot_bytes=1 vs    -10%        -15%          -10.8%          -15%        -23%
prot_bytes=0
```

The benchmark has a lot of variance, but there was a 5% to 25% regression in this benchmark with different cache hit rates.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11287

Reviewed By: ajkr

Differential Revision: D43970708

Pulled By: cbi42

fbshipit-source-id: ef98d898b71779846fa74212b9ec9e08b7183940
2023-04-25 12:08:23 -07:00

211 lines
7.2 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <algorithm>
#include <iostream>
#include <memory>
#include <string>
#include "proto/gen/db_operation.pb.h"
#include "rocksdb/file_system.h"
#include "rocksdb/sst_file_writer.h"
#include "src/libfuzzer/libfuzzer_macro.h"
#include "table/table_builder.h"
#include "table/table_reader.h"
#include "util.h"
using ROCKSDB_NAMESPACE::BytewiseComparator;
using ROCKSDB_NAMESPACE::Comparator;
using ROCKSDB_NAMESPACE::EnvOptions;
using ROCKSDB_NAMESPACE::ExternalSstFileInfo;
using ROCKSDB_NAMESPACE::FileOptions;
using ROCKSDB_NAMESPACE::FileSystem;
using ROCKSDB_NAMESPACE::ImmutableCFOptions;
using ROCKSDB_NAMESPACE::ImmutableOptions;
using ROCKSDB_NAMESPACE::InternalIterator;
using ROCKSDB_NAMESPACE::IOOptions;
using ROCKSDB_NAMESPACE::kMaxSequenceNumber;
using ROCKSDB_NAMESPACE::Options;
using ROCKSDB_NAMESPACE::ParsedInternalKey;
using ROCKSDB_NAMESPACE::ParseInternalKey;
using ROCKSDB_NAMESPACE::RandomAccessFileReader;
using ROCKSDB_NAMESPACE::ReadOptions;
using ROCKSDB_NAMESPACE::SstFileWriter;
using ROCKSDB_NAMESPACE::Status;
using ROCKSDB_NAMESPACE::TableReader;
using ROCKSDB_NAMESPACE::TableReaderCaller;
using ROCKSDB_NAMESPACE::TableReaderOptions;
using ROCKSDB_NAMESPACE::ValueType;
// Keys in SST file writer operations must be unique and in ascending order.
// For each DBOperation generated by the fuzzer, this function is called on
// it to deduplicate and sort the keys in the DBOperations.
protobuf_mutator::libfuzzer::PostProcessorRegistration<DBOperations> reg = {
[](DBOperations* input, unsigned int /* seed */) {
const Comparator* comparator = BytewiseComparator();
auto ops = input->mutable_operations();
// Make sure begin <= end for DELETE_RANGE.
for (DBOperation& op : *ops) {
if (op.type() == OpType::DELETE_RANGE) {
auto begin = op.key();
auto end = op.value();
if (comparator->Compare(begin, end) > 0) {
std::swap(begin, end);
op.set_key(begin);
op.set_value(end);
}
}
}
std::sort(ops->begin(), ops->end(),
[&comparator](const DBOperation& a, const DBOperation& b) {
return comparator->Compare(a.key(), b.key()) < 0;
});
auto last = std::unique(
ops->begin(), ops->end(),
[&comparator](const DBOperation& a, const DBOperation& b) {
return comparator->Compare(a.key(), b.key()) == 0;
});
ops->erase(last, ops->end());
}};
TableReader* NewTableReader(const std::string& sst_file_path,
const Options& options,
const EnvOptions& env_options,
const ImmutableCFOptions& cf_ioptions) {
// This code block is similar to SstFileReader::Open.
uint64_t file_size = 0;
std::unique_ptr<RandomAccessFileReader> file_reader;
std::unique_ptr<TableReader> table_reader;
const auto& fs = options.env->GetFileSystem();
FileOptions fopts(env_options);
Status s = options.env->GetFileSize(sst_file_path, &file_size);
if (s.ok()) {
s = RandomAccessFileReader::Create(fs, sst_file_path, fopts, &file_reader,
nullptr);
}
if (s.ok()) {
ImmutableOptions iopts(options, cf_ioptions);
TableReaderOptions t_opt(iopts, /*prefix_extractor=*/nullptr, env_options,
cf_ioptions.internal_comparator,
0 /* block_protection_bytes_per_key */);
t_opt.largest_seqno = kMaxSequenceNumber;
s = options.table_factory->NewTableReader(t_opt, std::move(file_reader),
file_size, &table_reader,
/*prefetch=*/false);
}
if (!s.ok()) {
std::cerr << "Failed to create TableReader for " << sst_file_path << ": "
<< s.ToString() << std::endl;
abort();
}
return table_reader.release();
}
ValueType ToValueType(OpType op_type) {
switch (op_type) {
case OpType::PUT:
return ValueType::kTypeValue;
case OpType::MERGE:
return ValueType::kTypeMerge;
case OpType::DELETE:
return ValueType::kTypeDeletion;
case OpType::DELETE_RANGE:
return ValueType::kTypeRangeDeletion;
default:
std::cerr << "Unknown operation type " << static_cast<int>(op_type)
<< std::endl;
abort();
}
}
// Fuzzes DB operations as input, let SstFileWriter generate a SST file
// according to the operations, then let TableReader read and check all the
// key-value pairs from the generated SST file.
DEFINE_PROTO_FUZZER(DBOperations& input) {
if (input.operations().empty()) {
return;
}
std::string sstfile;
{
auto fs = FileSystem::Default();
std::string dir;
IOOptions opt;
CHECK_OK(fs->GetTestDirectory(opt, &dir, nullptr));
sstfile = dir + "/SstFileWriterFuzzer.sst";
}
Options options;
EnvOptions env_options(options);
ImmutableCFOptions cf_ioptions(options);
// Generate sst file.
SstFileWriter writer(env_options, options);
CHECK_OK(writer.Open(sstfile));
for (const DBOperation& op : input.operations()) {
switch (op.type()) {
case OpType::PUT: {
CHECK_OK(writer.Put(op.key(), op.value()));
break;
}
case OpType::MERGE: {
CHECK_OK(writer.Merge(op.key(), op.value()));
break;
}
case OpType::DELETE: {
CHECK_OK(writer.Delete(op.key()));
break;
}
case OpType::DELETE_RANGE: {
CHECK_OK(writer.DeleteRange(op.key(), op.value()));
break;
}
default: {
std::cerr << "Unsupported operation" << static_cast<int>(op.type())
<< std::endl;
abort();
}
}
}
ExternalSstFileInfo info;
CHECK_OK(writer.Finish(&info));
// Iterate and verify key-value pairs.
std::unique_ptr<TableReader> table_reader(
::NewTableReader(sstfile, options, env_options, cf_ioptions));
ReadOptions roptions;
CHECK_OK(table_reader->VerifyChecksum(roptions,
TableReaderCaller::kUncategorized));
std::unique_ptr<InternalIterator> it(
table_reader->NewIterator(roptions, /*prefix_extractor=*/nullptr,
/*arena=*/nullptr, /*skip_filters=*/true,
TableReaderCaller::kUncategorized));
it->SeekToFirst();
for (const DBOperation& op : input.operations()) {
if (op.type() == OpType::DELETE_RANGE) {
// InternalIterator cannot iterate over DELETE_RANGE entries.
continue;
}
CHECK_TRUE(it->Valid());
ParsedInternalKey ikey;
CHECK_OK(ParseInternalKey(it->key(), &ikey, /*log_err_key=*/true));
CHECK_EQ(ikey.user_key.ToString(), op.key());
CHECK_EQ(ikey.sequence, 0);
CHECK_EQ(ikey.type, ToValueType(op.type()));
if (op.type() != OpType::DELETE) {
CHECK_EQ(op.value(), it->value().ToString());
}
it->Next();
}
CHECK_TRUE(!it->Valid());
// Delete sst file.
remove(sstfile.c_str());
}