mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-27 11:43:49 +00:00
babe56ddba
Summary: Users can set the priority for file reads associated with their operation by setting `ReadOptions::rate_limiter_priority` to something other than `Env::IO_TOTAL`. Rate limiting `VerifyChecksum()` and `VerifyFileChecksums()` is the motivation for this PR, so it also includes benchmarks and minor bug fixes to get that working. `RandomAccessFileReader::Read()` already had support for rate limiting compaction reads. I changed that rate limiting to be non-specific to compaction, but rather performed according to the passed in `Env::IOPriority`. Now the compaction read rate limiting is supported by setting `rate_limiter_priority = Env::IO_LOW` on its `ReadOptions`. There is no default value for the new `Env::IOPriority` parameter to `RandomAccessFileReader::Read()`. That means this PR goes through all callers (in some cases multiple layers up the call stack) to find a `ReadOptions` to provide the priority. There are TODOs for cases I believe it would be good to let user control the priority some day (e.g., file footer reads), and no TODO in cases I believe it doesn't matter (e.g., trace file reads). The API doc only lists the missing cases where a file read associated with a provided `ReadOptions` cannot be rate limited. For cases like file ingestion checksum calculation, there is no API to provide `ReadOptions` or `Env::IOPriority`, so I didn't count that as missing. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9424 Test Plan: - new unit tests - new benchmarks on ~50MB database with 1MB/s read rate limit and 100ms refill interval; verified with strace reads are chunked (at 0.1MB per chunk) and spaced roughly 100ms apart. - setup command: `./db_bench -benchmarks=fillrandom,compact -db=/tmp/testdb -target_file_size_base=1048576 -disable_auto_compactions=true -file_checksum=true` - benchmarks command: `strace -ttfe pread64 ./db_bench -benchmarks=verifychecksum,verifyfilechecksums -use_existing_db=true -db=/tmp/testdb -rate_limiter_bytes_per_sec=1048576 -rate_limit_bg_reads=1 -rate_limit_user_ops=true -file_checksum=true` - crash test using IO_USER priority on non-validation reads with https://github.com/facebook/rocksdb/issues/9567 reverted: `python3 tools/db_crashtest.py blackbox --max_key=1000000 --write_buffer_size=524288 --target_file_size_base=524288 --level_compaction_dynamic_level_bytes=true --duration=3600 --rate_limit_bg_reads=true --rate_limit_user_ops=true --rate_limiter_bytes_per_sec=10485760 --interval=10` Reviewed By: hx235 Differential Revision: D33747386 Pulled By: ajkr fbshipit-source-id: a2d985e97912fba8c54763798e04f006ccc56e0c
134 lines
4.1 KiB
C++
134 lines
4.1 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "utilities/trace/file_trace_reader_writer.h"
|
|
|
|
#include "env/composite_env_wrapper.h"
|
|
#include "file/random_access_file_reader.h"
|
|
#include "file/writable_file_writer.h"
|
|
#include "trace_replay/trace_replay.h"
|
|
#include "util/coding.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
const unsigned int FileTraceReader::kBufferSize = 1024; // 1KB
|
|
|
|
FileTraceReader::FileTraceReader(
|
|
std::unique_ptr<RandomAccessFileReader>&& reader)
|
|
: file_reader_(std::move(reader)),
|
|
offset_(0),
|
|
buffer_(new char[kBufferSize]) {}
|
|
|
|
FileTraceReader::~FileTraceReader() {
|
|
Close().PermitUncheckedError();
|
|
delete[] buffer_;
|
|
}
|
|
|
|
Status FileTraceReader::Close() {
|
|
file_reader_.reset();
|
|
return Status::OK();
|
|
}
|
|
|
|
Status FileTraceReader::Reset() {
|
|
if (file_reader_ == nullptr) {
|
|
return Status::IOError("TraceReader is closed.");
|
|
}
|
|
offset_ = 0;
|
|
return Status::OK();
|
|
}
|
|
|
|
Status FileTraceReader::Read(std::string* data) {
|
|
assert(file_reader_ != nullptr);
|
|
Status s = file_reader_->Read(IOOptions(), offset_, kTraceMetadataSize,
|
|
&result_, buffer_, nullptr,
|
|
Env::IO_TOTAL /* rate_limiter_priority */);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
if (result_.size() == 0) {
|
|
// No more data to read
|
|
// Todo: Come up with a better way to indicate end of data. May be this
|
|
// could be avoided once footer is introduced.
|
|
return Status::Incomplete();
|
|
}
|
|
if (result_.size() < kTraceMetadataSize) {
|
|
return Status::Corruption("Corrupted trace file.");
|
|
}
|
|
*data = result_.ToString();
|
|
offset_ += kTraceMetadataSize;
|
|
|
|
uint32_t payload_len =
|
|
DecodeFixed32(&buffer_[kTraceTimestampSize + kTraceTypeSize]);
|
|
|
|
// Read Payload
|
|
unsigned int bytes_to_read = payload_len;
|
|
unsigned int to_read =
|
|
bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read;
|
|
while (to_read > 0) {
|
|
s = file_reader_->Read(IOOptions(), offset_, to_read, &result_, buffer_,
|
|
nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
if (result_.size() < to_read) {
|
|
return Status::Corruption("Corrupted trace file.");
|
|
}
|
|
data->append(result_.data(), result_.size());
|
|
|
|
offset_ += to_read;
|
|
bytes_to_read -= to_read;
|
|
to_read = bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read;
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
FileTraceWriter::FileTraceWriter(
|
|
std::unique_ptr<WritableFileWriter>&& file_writer)
|
|
: file_writer_(std::move(file_writer)) {}
|
|
|
|
FileTraceWriter::~FileTraceWriter() { Close().PermitUncheckedError(); }
|
|
|
|
Status FileTraceWriter::Close() {
|
|
file_writer_.reset();
|
|
return Status::OK();
|
|
}
|
|
|
|
Status FileTraceWriter::Write(const Slice& data) {
|
|
return file_writer_->Append(data);
|
|
}
|
|
|
|
uint64_t FileTraceWriter::GetFileSize() { return file_writer_->GetFileSize(); }
|
|
|
|
Status NewFileTraceReader(Env* env, const EnvOptions& env_options,
|
|
const std::string& trace_filename,
|
|
std::unique_ptr<TraceReader>* trace_reader) {
|
|
std::unique_ptr<RandomAccessFileReader> file_reader;
|
|
Status s = RandomAccessFileReader::Create(
|
|
env->GetFileSystem(), trace_filename, FileOptions(env_options),
|
|
&file_reader, nullptr);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
trace_reader->reset(new FileTraceReader(std::move(file_reader)));
|
|
return s;
|
|
}
|
|
|
|
Status NewFileTraceWriter(Env* env, const EnvOptions& env_options,
|
|
const std::string& trace_filename,
|
|
std::unique_ptr<TraceWriter>* trace_writer) {
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
Status s = WritableFileWriter::Create(env->GetFileSystem(), trace_filename,
|
|
FileOptions(env_options), &file_writer,
|
|
nullptr);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
trace_writer->reset(new FileTraceWriter(std::move(file_writer)));
|
|
return s;
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|