mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-27 02:44:18 +00:00
06e593376c
Summary: ## Context/Summary Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity. For that, this PR does the following: - Tag different write IOs by passing down and converting WriteOptions to IOOptions - Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS Some related code refactory to make implementation cleaner: - Blob stats - Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info. - Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write. - Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority - Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification - Build table - TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables - Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder. This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more - Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority ## Test ### db bench Flush ``` ./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100 rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377 rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377 rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0 rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0 ``` compaction, db oopen ``` Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1 rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279 rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0 rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213 rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66 ``` blob stats - just to make sure they aren't broken by this PR ``` Integrated Blob DB Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1 pre-PR: rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600 rocksdb.blobdb.blob.file.synced COUNT : 1 rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 post-PR: rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614 - COUNT is higher and values are smaller as it includes header and footer write - COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164 rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same) rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same) ``` ``` Stacked Blob DB Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench pre-PR: rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876 rocksdb.blobdb.blob.file.synced COUNT : 8 rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 post-PR: rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924 - COUNT is higher and values are smaller as it includes header and footer write - COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164 rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same) rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same) ``` ### Rehearsal CI stress test Trigger 3 full runs of all our CI stress tests ### Performance Flush ``` TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000 -- default: 1 thread is used to run benchmark; enable_statistics = true Pre-pr: avg 507515519.3 ns 497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908, Post-pr: avg 511971266.5 ns, regressed 0.88% 502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408, ``` Compaction ``` TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000 -- default: 1 thread is used to run benchmark Pre-pr: avg 495346098.30 ns 492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846 Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97% 502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007 ``` Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats) ``` TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000 -- default: 1 thread is used to run benchmark Pre-pr: avg 3848.10 ns 3814,3838,3839,3848,3854,3854,3854,3860,3860,3860 Post-pr: avg 3874.20 ns, regressed 0.68% 3863,3867,3871,3874,3875,3877,3877,3877,3880,3881 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910 Reviewed By: ajkr Differential Revision: D49788060 Pulled By: hx235 fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
746 lines
27 KiB
C++
746 lines
27 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
#include "options/options_parser.h"
|
|
|
|
#include <cmath>
|
|
#include <map>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "file/line_file_reader.h"
|
|
#include "file/writable_file_writer.h"
|
|
#include "options/cf_options.h"
|
|
#include "options/db_options.h"
|
|
#include "options/options_helper.h"
|
|
#include "port/port.h"
|
|
#include "rocksdb/convenience.h"
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/utilities/options_type.h"
|
|
#include "test_util/sync_point.h"
|
|
#include "util/cast_util.h"
|
|
#include "util/string_util.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
static const std::string option_file_header =
|
|
"# This is a RocksDB option file.\n"
|
|
"#\n"
|
|
"# For detailed file format spec, please refer to the example file\n"
|
|
"# in examples/rocksdb_option_file_example.ini\n"
|
|
"#\n"
|
|
"\n";
|
|
|
|
Status PersistRocksDBOptions(const WriteOptions& write_options,
|
|
const DBOptions& db_opt,
|
|
const std::vector<std::string>& cf_names,
|
|
const std::vector<ColumnFamilyOptions>& cf_opts,
|
|
const std::string& file_name, FileSystem* fs) {
|
|
ConfigOptions
|
|
config_options; // Use default for escaped(true) and check (exact)
|
|
config_options.delimiter = "\n ";
|
|
// Do not invoke PrepareOptions when we are doing validation.
|
|
config_options.invoke_prepare_options = false;
|
|
// If a readahead size was set in the input options, use it
|
|
if (db_opt.log_readahead_size > 0) {
|
|
config_options.file_readahead_size = db_opt.log_readahead_size;
|
|
}
|
|
return PersistRocksDBOptions(write_options, config_options, db_opt, cf_names,
|
|
cf_opts, file_name, fs);
|
|
}
|
|
|
|
Status PersistRocksDBOptions(const WriteOptions& write_options,
|
|
const ConfigOptions& config_options_in,
|
|
const DBOptions& db_opt,
|
|
const std::vector<std::string>& cf_names,
|
|
const std::vector<ColumnFamilyOptions>& cf_opts,
|
|
const std::string& file_name, FileSystem* fs) {
|
|
ConfigOptions config_options = config_options_in;
|
|
config_options.delimiter = "\n "; // Override the default to nl
|
|
|
|
TEST_SYNC_POINT("PersistRocksDBOptions:start");
|
|
if (cf_names.size() != cf_opts.size()) {
|
|
return Status::InvalidArgument(
|
|
"cf_names.size() and cf_opts.size() must be the same");
|
|
}
|
|
std::unique_ptr<FSWritableFile> wf;
|
|
|
|
Status s =
|
|
fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
std::unique_ptr<WritableFileWriter> writable;
|
|
writable.reset(new WritableFileWriter(std::move(wf), file_name, EnvOptions(),
|
|
nullptr /* statistics */));
|
|
TEST_SYNC_POINT("PersistRocksDBOptions:create");
|
|
|
|
std::string options_file_content;
|
|
|
|
IOOptions opts;
|
|
s = WritableFileWriter::PrepareIOOptions(write_options, opts);
|
|
if (s.ok()) {
|
|
s = writable->Append(opts, option_file_header + "[" +
|
|
opt_section_titles[kOptionSectionVersion] +
|
|
"]\n"
|
|
" rocksdb_version=" +
|
|
std::to_string(ROCKSDB_MAJOR) + "." +
|
|
std::to_string(ROCKSDB_MINOR) + "." +
|
|
std::to_string(ROCKSDB_PATCH) + "\n");
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Append(
|
|
opts,
|
|
" options_file_version=" + std::to_string(ROCKSDB_OPTION_FILE_MAJOR) +
|
|
"." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n");
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Append(
|
|
opts, "\n[" + opt_section_titles[kOptionSectionDBOptions] + "]\n ");
|
|
}
|
|
|
|
if (s.ok()) {
|
|
s = GetStringFromDBOptions(config_options, db_opt, &options_file_content);
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Append(opts, options_file_content + "\n");
|
|
}
|
|
|
|
for (size_t i = 0; s.ok() && i < cf_opts.size(); ++i) {
|
|
// CFOptions section
|
|
s = writable->Append(
|
|
opts, "\n[" + opt_section_titles[kOptionSectionCFOptions] + " \"" +
|
|
EscapeOptionString(cf_names[i]) + "\"]\n ");
|
|
if (s.ok()) {
|
|
s = GetStringFromColumnFamilyOptions(config_options, cf_opts[i],
|
|
&options_file_content);
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Append(opts, options_file_content + "\n");
|
|
}
|
|
// TableOptions section
|
|
auto* tf = cf_opts[i].table_factory.get();
|
|
if (tf != nullptr) {
|
|
if (s.ok()) {
|
|
s = writable->Append(
|
|
opts, "[" + opt_section_titles[kOptionSectionTableOptions] +
|
|
tf->Name() + " \"" + EscapeOptionString(cf_names[i]) +
|
|
"\"]\n ");
|
|
}
|
|
if (s.ok()) {
|
|
options_file_content.clear();
|
|
s = tf->GetOptionString(config_options, &options_file_content);
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Append(opts, options_file_content + "\n");
|
|
}
|
|
}
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Sync(opts, true /* use_fsync */);
|
|
}
|
|
if (s.ok()) {
|
|
s = writable->Close(opts);
|
|
}
|
|
TEST_SYNC_POINT("PersistRocksDBOptions:written");
|
|
if (s.ok()) {
|
|
return RocksDBOptionsParser::VerifyRocksDBOptionsFromFile(
|
|
config_options, db_opt, cf_names, cf_opts, file_name, fs);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
RocksDBOptionsParser::RocksDBOptionsParser() { Reset(); }
|
|
|
|
void RocksDBOptionsParser::Reset() {
|
|
db_opt_ = DBOptions();
|
|
db_opt_map_.clear();
|
|
cf_names_.clear();
|
|
cf_opts_.clear();
|
|
cf_opt_maps_.clear();
|
|
has_version_section_ = false;
|
|
has_db_options_ = false;
|
|
has_default_cf_options_ = false;
|
|
for (int i = 0; i < 3; ++i) {
|
|
db_version[i] = 0;
|
|
opt_file_version[i] = 0;
|
|
}
|
|
}
|
|
|
|
bool RocksDBOptionsParser::IsSection(const std::string& line) {
|
|
if (line.size() < 2) {
|
|
return false;
|
|
}
|
|
if (line[0] != '[' || line[line.size() - 1] != ']') {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Status RocksDBOptionsParser::ParseSection(OptionSection* section,
|
|
std::string* title,
|
|
std::string* argument,
|
|
const std::string& line,
|
|
const int line_num) {
|
|
*section = kOptionSectionUnknown;
|
|
// A section is of the form [<SectionName> "<SectionArg>"], where
|
|
// "<SectionArg>" is optional.
|
|
size_t arg_start_pos = line.find('\"');
|
|
size_t arg_end_pos = line.rfind('\"');
|
|
// The following if-then check tries to identify whether the input
|
|
// section has the optional section argument.
|
|
if (arg_start_pos != std::string::npos && arg_start_pos != arg_end_pos) {
|
|
*title = TrimAndRemoveComment(line.substr(1, arg_start_pos - 1), true);
|
|
*argument = UnescapeOptionString(
|
|
line.substr(arg_start_pos + 1, arg_end_pos - arg_start_pos - 1));
|
|
} else {
|
|
*title = TrimAndRemoveComment(line.substr(1, line.size() - 2), true);
|
|
*argument = "";
|
|
}
|
|
for (int i = 0; i < kOptionSectionUnknown; ++i) {
|
|
if (title->find(opt_section_titles[i]) == 0) {
|
|
if (i == kOptionSectionVersion || i == kOptionSectionDBOptions ||
|
|
i == kOptionSectionCFOptions) {
|
|
if (title->size() == opt_section_titles[i].size()) {
|
|
// if true, then it indicats equal
|
|
*section = static_cast<OptionSection>(i);
|
|
return CheckSection(*section, *argument, line_num);
|
|
}
|
|
} else if (i == kOptionSectionTableOptions) {
|
|
// This type of sections has a sufffix at the end of the
|
|
// section title
|
|
if (title->size() > opt_section_titles[i].size()) {
|
|
*section = static_cast<OptionSection>(i);
|
|
return CheckSection(*section, *argument, line_num);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return Status::InvalidArgument(std::string("Unknown section ") + line);
|
|
}
|
|
|
|
Status RocksDBOptionsParser::InvalidArgument(const int line_num,
|
|
const std::string& message) {
|
|
return Status::InvalidArgument(
|
|
"[RocksDBOptionsParser Error] ",
|
|
message + " (at line " + std::to_string(line_num) + ")");
|
|
}
|
|
|
|
Status RocksDBOptionsParser::ParseStatement(std::string* name,
|
|
std::string* value,
|
|
const std::string& line,
|
|
const int line_num) {
|
|
size_t eq_pos = line.find('=');
|
|
if (eq_pos == std::string::npos) {
|
|
return InvalidArgument(line_num, "A valid statement must have a '='.");
|
|
}
|
|
|
|
*name = TrimAndRemoveComment(line.substr(0, eq_pos), true);
|
|
*value =
|
|
TrimAndRemoveComment(line.substr(eq_pos + 1, line.size() - eq_pos - 1));
|
|
if (name->empty()) {
|
|
return InvalidArgument(line_num,
|
|
"A valid statement must have a variable name.");
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::Parse(const std::string& file_name, FileSystem* fs,
|
|
bool ignore_unknown_options,
|
|
size_t file_readahead_size) {
|
|
ConfigOptions
|
|
config_options; // Use default for escaped(true) and check (exact)
|
|
config_options.ignore_unknown_options = ignore_unknown_options;
|
|
if (file_readahead_size > 0) {
|
|
config_options.file_readahead_size = file_readahead_size;
|
|
}
|
|
return Parse(config_options, file_name, fs);
|
|
}
|
|
|
|
Status RocksDBOptionsParser::Parse(const ConfigOptions& config_options_in,
|
|
const std::string& file_name,
|
|
FileSystem* fs) {
|
|
Reset();
|
|
ConfigOptions config_options = config_options_in;
|
|
|
|
std::unique_ptr<FSSequentialFile> seq_file;
|
|
Status s = fs->NewSequentialFile(file_name, FileOptions(), &seq_file,
|
|
nullptr);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
LineFileReader lf_reader(std::move(seq_file), file_name,
|
|
config_options.file_readahead_size);
|
|
|
|
OptionSection section = kOptionSectionUnknown;
|
|
std::string title;
|
|
std::string argument;
|
|
std::unordered_map<std::string, std::string> opt_map;
|
|
std::string line;
|
|
// we only support single-lined statement.
|
|
while (lf_reader.ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) {
|
|
int line_num = static_cast<int>(lf_reader.GetLineNumber());
|
|
line = TrimAndRemoveComment(line);
|
|
if (line.empty()) {
|
|
continue;
|
|
}
|
|
if (IsSection(line)) {
|
|
s = EndSection(config_options, section, title, argument, opt_map);
|
|
opt_map.clear();
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
|
|
// If the option file is not generated by a higher minor version,
|
|
// there shouldn't be any unknown option.
|
|
if (config_options.ignore_unknown_options &&
|
|
section == kOptionSectionVersion) {
|
|
if (db_version[0] < ROCKSDB_MAJOR || (db_version[0] == ROCKSDB_MAJOR &&
|
|
db_version[1] <= ROCKSDB_MINOR)) {
|
|
config_options.ignore_unknown_options = false;
|
|
}
|
|
}
|
|
|
|
s = ParseSection(§ion, &title, &argument, line, line_num);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
} else {
|
|
std::string name;
|
|
std::string value;
|
|
s = ParseStatement(&name, &value, line, line_num);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
opt_map.insert({name, value});
|
|
}
|
|
}
|
|
s = lf_reader.GetStatus();
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
|
|
s = EndSection(config_options, section, title, argument, opt_map);
|
|
opt_map.clear();
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
return ValidityCheck();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::CheckSection(const OptionSection section,
|
|
const std::string& section_arg,
|
|
const int line_num) {
|
|
if (section == kOptionSectionDBOptions) {
|
|
if (has_db_options_) {
|
|
return InvalidArgument(
|
|
line_num,
|
|
"More than one DBOption section found in the option config file");
|
|
}
|
|
has_db_options_ = true;
|
|
} else if (section == kOptionSectionCFOptions) {
|
|
bool is_default_cf = (section_arg == kDefaultColumnFamilyName);
|
|
if (cf_opts_.size() == 0 && !is_default_cf) {
|
|
return InvalidArgument(
|
|
line_num,
|
|
"Default column family must be the first CFOptions section "
|
|
"in the option config file");
|
|
} else if (cf_opts_.size() != 0 && is_default_cf) {
|
|
return InvalidArgument(
|
|
line_num,
|
|
"Default column family must be the first CFOptions section "
|
|
"in the optio/n config file");
|
|
} else if (GetCFOptions(section_arg) != nullptr) {
|
|
return InvalidArgument(
|
|
line_num,
|
|
"Two identical column families found in option config file");
|
|
}
|
|
has_default_cf_options_ |= is_default_cf;
|
|
} else if (section == kOptionSectionTableOptions) {
|
|
if (GetCFOptions(section_arg) == nullptr) {
|
|
return InvalidArgument(
|
|
line_num, std::string(
|
|
"Does not find a matched column family name in "
|
|
"TableOptions section. Column Family Name:") +
|
|
section_arg);
|
|
}
|
|
} else if (section == kOptionSectionVersion) {
|
|
if (has_version_section_) {
|
|
return InvalidArgument(
|
|
line_num,
|
|
"More than one Version section found in the option config file.");
|
|
}
|
|
has_version_section_ = true;
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::ParseVersionNumber(const std::string& ver_name,
|
|
const std::string& ver_string,
|
|
const int max_count,
|
|
int* version) {
|
|
int version_index = 0;
|
|
int current_number = 0;
|
|
int current_digit_count = 0;
|
|
bool has_dot = false;
|
|
for (int i = 0; i < max_count; ++i) {
|
|
version[i] = 0;
|
|
}
|
|
constexpr int kBufferSize = 200;
|
|
char buffer[kBufferSize];
|
|
for (size_t i = 0; i < ver_string.size(); ++i) {
|
|
if (ver_string[i] == '.') {
|
|
if (version_index >= max_count - 1) {
|
|
snprintf(buffer, sizeof(buffer) - 1,
|
|
"A valid %s can only contains at most %d dots.",
|
|
ver_name.c_str(), max_count - 1);
|
|
return Status::InvalidArgument(buffer);
|
|
}
|
|
if (current_digit_count == 0) {
|
|
snprintf(buffer, sizeof(buffer) - 1,
|
|
"A valid %s must have at least one digit before each dot.",
|
|
ver_name.c_str());
|
|
return Status::InvalidArgument(buffer);
|
|
}
|
|
version[version_index++] = current_number;
|
|
current_number = 0;
|
|
current_digit_count = 0;
|
|
has_dot = true;
|
|
} else if (isdigit(ver_string[i])) {
|
|
current_number = current_number * 10 + (ver_string[i] - '0');
|
|
current_digit_count++;
|
|
} else {
|
|
snprintf(buffer, sizeof(buffer) - 1,
|
|
"A valid %s can only contains dots and numbers.",
|
|
ver_name.c_str());
|
|
return Status::InvalidArgument(buffer);
|
|
}
|
|
}
|
|
version[version_index] = current_number;
|
|
if (has_dot && current_digit_count == 0) {
|
|
snprintf(buffer, sizeof(buffer) - 1,
|
|
"A valid %s must have at least one digit after each dot.",
|
|
ver_name.c_str());
|
|
return Status::InvalidArgument(buffer);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::EndSection(
|
|
const ConfigOptions& config_options, const OptionSection section,
|
|
const std::string& section_title, const std::string& section_arg,
|
|
const std::unordered_map<std::string, std::string>& opt_map) {
|
|
Status s;
|
|
if (section == kOptionSectionDBOptions) {
|
|
s = GetDBOptionsFromMap(config_options, DBOptions(), opt_map, &db_opt_);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
db_opt_map_ = opt_map;
|
|
} else if (section == kOptionSectionCFOptions) {
|
|
// This condition should be ensured earlier in ParseSection
|
|
// so we make an assertion here.
|
|
assert(GetCFOptions(section_arg) == nullptr);
|
|
cf_names_.emplace_back(section_arg);
|
|
cf_opts_.emplace_back();
|
|
s = GetColumnFamilyOptionsFromMap(config_options, ColumnFamilyOptions(),
|
|
opt_map, &cf_opts_.back());
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
// keep the parsed string.
|
|
cf_opt_maps_.emplace_back(opt_map);
|
|
} else if (section == kOptionSectionTableOptions) {
|
|
assert(GetCFOptions(section_arg) != nullptr);
|
|
auto* cf_opt = GetCFOptionsImpl(section_arg);
|
|
if (cf_opt == nullptr) {
|
|
return Status::InvalidArgument(
|
|
"The specified column family must be defined before the "
|
|
"TableOptions section:",
|
|
section_arg);
|
|
}
|
|
// Ignore error as table factory deserialization is optional
|
|
cf_opt->table_factory.reset();
|
|
s = TableFactory::CreateFromString(
|
|
config_options,
|
|
section_title.substr(
|
|
opt_section_titles[kOptionSectionTableOptions].size()),
|
|
&(cf_opt->table_factory));
|
|
if (s.ok() && cf_opt->table_factory != nullptr) {
|
|
s = cf_opt->table_factory->ConfigureFromMap(config_options, opt_map);
|
|
// Translate any errors (NotFound, NotSupported, to InvalidArgument
|
|
if (s.ok() || s.IsInvalidArgument()) {
|
|
return s;
|
|
} else {
|
|
return Status::InvalidArgument(s.getState());
|
|
}
|
|
} else {
|
|
// Return OK for not supported table factories as TableFactory
|
|
// Deserialization is optional.
|
|
cf_opt->table_factory.reset();
|
|
return Status::OK();
|
|
}
|
|
} else if (section == kOptionSectionVersion) {
|
|
for (const auto& pair : opt_map) {
|
|
if (pair.first == "rocksdb_version") {
|
|
s = ParseVersionNumber(pair.first, pair.second, 3, db_version);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
} else if (pair.first == "options_file_version") {
|
|
s = ParseVersionNumber(pair.first, pair.second, 2, opt_file_version);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
if (opt_file_version[0] < 1) {
|
|
return Status::InvalidArgument(
|
|
"A valid options_file_version must be at least 1.");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return s;
|
|
}
|
|
|
|
Status RocksDBOptionsParser::ValidityCheck() {
|
|
if (!has_db_options_) {
|
|
return Status::Corruption(
|
|
"A RocksDB Option file must have a single DBOptions section");
|
|
}
|
|
if (!has_default_cf_options_) {
|
|
return Status::Corruption(
|
|
"A RocksDB Option file must have a single CFOptions:default section");
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
std::string RocksDBOptionsParser::TrimAndRemoveComment(const std::string& line,
|
|
bool trim_only) {
|
|
size_t start = 0;
|
|
size_t end = line.size();
|
|
|
|
// we only support "#" style comment
|
|
if (!trim_only) {
|
|
size_t search_pos = 0;
|
|
while (search_pos < line.size()) {
|
|
size_t comment_pos = line.find('#', search_pos);
|
|
if (comment_pos == std::string::npos) {
|
|
break;
|
|
}
|
|
if (comment_pos == 0 || line[comment_pos - 1] != '\\') {
|
|
end = comment_pos;
|
|
break;
|
|
}
|
|
search_pos = comment_pos + 1;
|
|
}
|
|
}
|
|
|
|
while (start < end && isspace(line[start]) != 0) {
|
|
++start;
|
|
}
|
|
|
|
// start < end implies end > 0.
|
|
while (start < end && isspace(line[end - 1]) != 0) {
|
|
--end;
|
|
}
|
|
|
|
if (start < end) {
|
|
return line.substr(start, end - start);
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
Status RocksDBOptionsParser::VerifyRocksDBOptionsFromFile(
|
|
const ConfigOptions& config_options_in, const DBOptions& db_opt,
|
|
const std::vector<std::string>& cf_names,
|
|
const std::vector<ColumnFamilyOptions>& cf_opts,
|
|
const std::string& file_name, FileSystem* fs) {
|
|
RocksDBOptionsParser parser;
|
|
ConfigOptions config_options = config_options_in;
|
|
config_options.invoke_prepare_options =
|
|
false; // No need to do a prepare for verify
|
|
if (config_options.sanity_level < ConfigOptions::kSanityLevelExactMatch) {
|
|
// If we are not doing an exact comparison, we should ignore
|
|
// unsupported options, as they may cause the Parse to fail
|
|
// (if the ObjectRegistry is not initialized)
|
|
config_options.ignore_unsupported_options = true;
|
|
}
|
|
Status s = parser.Parse(config_options, file_name, fs);
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
|
|
// Verify DBOptions
|
|
s = VerifyDBOptions(config_options, db_opt, *parser.db_opt(),
|
|
parser.db_opt_map());
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
|
|
// Verify ColumnFamily Name
|
|
if (cf_names.size() != parser.cf_names()->size()) {
|
|
if (config_options.sanity_level >=
|
|
ConfigOptions::kSanityLevelLooselyCompatible) {
|
|
return Status::InvalidArgument(
|
|
"[RocksDBOptionParser Error] The persisted options does not have "
|
|
"the same number of column family names as the db instance.");
|
|
} else if (cf_opts.size() > parser.cf_opts()->size()) {
|
|
return Status::InvalidArgument(
|
|
"[RocksDBOptionsParser Error]",
|
|
"The persisted options file has less number of column family "
|
|
"names than that of the specified one.");
|
|
}
|
|
}
|
|
for (size_t i = 0; i < cf_names.size(); ++i) {
|
|
if (cf_names[i] != parser.cf_names()->at(i)) {
|
|
return Status::InvalidArgument(
|
|
"[RocksDBOptionParser Error] The persisted options and the db"
|
|
"instance does not have the same name for column family ",
|
|
std::to_string(i));
|
|
}
|
|
}
|
|
|
|
// Verify Column Family Options
|
|
if (cf_opts.size() != parser.cf_opts()->size()) {
|
|
if (config_options.sanity_level >=
|
|
ConfigOptions::kSanityLevelLooselyCompatible) {
|
|
return Status::InvalidArgument(
|
|
"[RocksDBOptionsParser Error]",
|
|
"The persisted options does not have the same number of "
|
|
"column families as the db instance.");
|
|
} else if (cf_opts.size() > parser.cf_opts()->size()) {
|
|
return Status::InvalidArgument(
|
|
"[RocksDBOptionsParser Error]",
|
|
"The persisted options file has less number of column families "
|
|
"than that of the specified number.");
|
|
}
|
|
}
|
|
for (size_t i = 0; i < cf_opts.size(); ++i) {
|
|
s = VerifyCFOptions(config_options, cf_opts[i], parser.cf_opts()->at(i),
|
|
&(parser.cf_opt_maps()->at(i)));
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
s = VerifyTableFactory(config_options, cf_opts[i].table_factory.get(),
|
|
parser.cf_opts()->at(i).table_factory.get());
|
|
if (!s.ok()) {
|
|
return s;
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::VerifyDBOptions(
|
|
const ConfigOptions& config_options, const DBOptions& base_opt,
|
|
const DBOptions& file_opt,
|
|
const std::unordered_map<std::string, std::string>* opt_map) {
|
|
auto base_config = DBOptionsAsConfigurable(base_opt, opt_map);
|
|
auto file_config = DBOptionsAsConfigurable(file_opt, opt_map);
|
|
std::string mismatch;
|
|
if (!base_config->AreEquivalent(config_options, file_config.get(),
|
|
&mismatch)) {
|
|
const size_t kBufferSize = 2048;
|
|
char buffer[kBufferSize];
|
|
std::string base_value;
|
|
std::string file_value;
|
|
int offset = snprintf(buffer, sizeof(buffer),
|
|
"[RocksDBOptionsParser]: "
|
|
"failed the verification on DBOptions::%s -- ",
|
|
mismatch.c_str());
|
|
Status s = base_config->GetOption(config_options, mismatch, &base_value);
|
|
if (s.ok()) {
|
|
s = file_config->GetOption(config_options, mismatch, &file_value);
|
|
}
|
|
assert(offset >= 0);
|
|
assert(static_cast<size_t>(offset) < sizeof(buffer));
|
|
if (s.ok()) {
|
|
snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
|
|
"-- The specified one is %s while the persisted one is %s.\n",
|
|
base_value.c_str(), file_value.c_str());
|
|
} else {
|
|
snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
|
|
"-- Unable to re-serialize an option: %s.\n",
|
|
s.ToString().c_str());
|
|
}
|
|
return Status::InvalidArgument(Slice(buffer, strlen(buffer)));
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::VerifyCFOptions(
|
|
const ConfigOptions& config_options, const ColumnFamilyOptions& base_opt,
|
|
const ColumnFamilyOptions& file_opt,
|
|
const std::unordered_map<std::string, std::string>* opt_map) {
|
|
auto base_config = CFOptionsAsConfigurable(base_opt, opt_map);
|
|
auto file_config = CFOptionsAsConfigurable(file_opt, opt_map);
|
|
std::string mismatch;
|
|
if (!base_config->AreEquivalent(config_options, file_config.get(),
|
|
&mismatch)) {
|
|
std::string base_value;
|
|
std::string file_value;
|
|
// The options do not match
|
|
const size_t kBufferSize = 2048;
|
|
char buffer[kBufferSize];
|
|
Status s = base_config->GetOption(config_options, mismatch, &base_value);
|
|
if (s.ok()) {
|
|
s = file_config->GetOption(config_options, mismatch, &file_value);
|
|
// In file_opt, certain options like MergeOperator may be nullptr due to
|
|
// factor methods not available. So we use opt_map to get
|
|
// option value to use in the error message below.
|
|
if (s.ok() && file_value == kNullptrString && opt_map) {
|
|
auto const& opt_val_str = (opt_map->find(mismatch));
|
|
if (opt_val_str != opt_map->end()) {
|
|
file_value = opt_val_str->second;
|
|
}
|
|
}
|
|
}
|
|
int offset = snprintf(buffer, sizeof(buffer),
|
|
"[RocksDBOptionsParser]: "
|
|
"failed the verification on ColumnFamilyOptions::%s",
|
|
mismatch.c_str());
|
|
assert(offset >= 0);
|
|
assert(static_cast<size_t>(offset) < sizeof(buffer));
|
|
if (s.ok()) {
|
|
snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
|
|
"--- The specified one is %s while the persisted one is %s.\n",
|
|
base_value.c_str(), file_value.c_str());
|
|
} else {
|
|
snprintf(buffer + offset, sizeof(buffer) - static_cast<size_t>(offset),
|
|
"--- Unable to re-serialize an option: %s.\n",
|
|
s.ToString().c_str());
|
|
}
|
|
return Status::InvalidArgument(Slice(buffer, sizeof(buffer)));
|
|
} // For each option
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RocksDBOptionsParser::VerifyTableFactory(
|
|
const ConfigOptions& config_options, const TableFactory* base_tf,
|
|
const TableFactory* file_tf) {
|
|
std::string mismatch;
|
|
if (base_tf && file_tf) {
|
|
if (config_options.sanity_level > ConfigOptions::kSanityLevelNone &&
|
|
std::string(base_tf->Name()) != std::string(file_tf->Name())) {
|
|
return Status::Corruption(
|
|
"[RocksDBOptionsParser]: "
|
|
"failed the verification on TableFactory->Name()");
|
|
} else if (!base_tf->AreEquivalent(config_options, file_tf, &mismatch)) {
|
|
return Status::Corruption(std::string("[RocksDBOptionsParser]:"
|
|
"failed the verification on ") +
|
|
base_tf->Name() + "::",
|
|
mismatch);
|
|
}
|
|
} else {
|
|
// TODO(yhchiang): further support sanity check here
|
|
}
|
|
return Status::OK();
|
|
}
|
|
} // namespace ROCKSDB_NAMESPACE
|