From 5576ded7625419fa43f5126d5679825e029e78ed Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 18 Jan 2022 17:25:33 -0800 Subject: [PATCH] Add Options::DisableExtraChecks, clarify force_consistency_checks (#9363) Summary: In response to https://github.com/facebook/rocksdb/issues/9354, this PR adds a way for users to "opt out" of extra checks that can impact peak write performance, which currently only includes force_consistency_checks. I considered including some other options but did not see a db_bench performance difference. Also clarify in comment for force_consistency_checks that it can "slow down saturated writing." Pull Request resolved: https://github.com/facebook/rocksdb/pull/9363 Test Plan: basic coverage in unit tests Using my perf test in https://github.com/facebook/rocksdb/issues/9354 comment, I see force_consistency_checks=true -> 725360 ops/s force_consistency_checks=false -> 783072 ops/s Reviewed By: mrambacher Differential Revision: D33636559 Pulled By: pdillinger fbshipit-source-id: 25bfd006f4844675e7669b342817dd4c6a641e84 --- DEFAULT_OPTIONS_HISTORY.md | 2 +- HISTORY.md | 4 ++++ db/db_test_util.cc | 2 ++ include/rocksdb/advanced_options.h | 4 +++- include/rocksdb/options.h | 12 +++++++++++- options/options.cc | 13 +++++++++++++ tools/db_bench_tool.cc | 20 +++++++++++++++++++- 7 files changed, 53 insertions(+), 4 deletions(-) diff --git a/DEFAULT_OPTIONS_HISTORY.md b/DEFAULT_OPTIONS_HISTORY.md index 26280ee34d..82c64d5235 100644 --- a/DEFAULT_OPTIONS_HISTORY.md +++ b/DEFAULT_OPTIONS_HISTORY.md @@ -1,4 +1,4 @@ -# RocksDB default options change log +# RocksDB default options change log (NO LONGER MAINTAINED) ## Unreleased * delayed_write_rate takes the rate given by rate_limiter if not specified. diff --git a/HISTORY.md b/HISTORY.md index 7bcad2b84a..7ae9bbbafc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,11 +4,15 @@ * Added values to `TraceFilterType`: `kTraceFilterIteratorSeek`, `kTraceFilterIteratorSeekForPrev`, and `kTraceFilterMultiGet`. They can be set in `TraceOptions` to filter out the operation types after which they are named. * Added `TraceOptions::preserve_write_order`. When enabled it guarantees write records are traced in the same order they are logged to WAL and applied to the DB. By default it is disabled (false) to match the legacy behavior and prevent regression. * Made the Env class extend the Customizable class. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* `Options::OldDefaults` is marked deprecated, as it is no longer maintained. * Add ObjectLibrary::AddFactory and ObjectLibrary::PatternEntry classes. This method and associated class are the preferred mechanism for registering factories with the ObjectLibrary going forward. The ObjectLibrary::Register method, which uses regular expressions and may be problematic, is deprecated and will be in a future release. ### Behavior Changes * `DB::DestroyColumnFamilyHandle()` will return Status::InvalidArgument() if called with `DB::DefaultColumnFamily()`. +### New Features +* Added `Options::DisableExtraChecks()` that can be used to improve peak write performance by disabling checks that should not be necessary in the absence of software logic errors or CPU+memory hardware errors. (Default options are slowly moving toward some performance overheads for extra correctness checking.) + ### Bug Fixes * Fix a bug that FlushMemTable may return ok even flush not succeed. diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 130b16fda2..26d02b0f12 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -478,6 +478,8 @@ Options DBTestBase::GetOptions( break; case kXXH3Checksum: { table_options.checksum = kXXH3; + // Thrown in here for basic coverage: + options.DisableExtraChecks(); break; } case kFIFOCompaction: { diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 1ee09bb4b9..fbabcd771a 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -736,7 +736,9 @@ struct AdvancedColumnFamilyOptions { // LSM changes (Flush, Compaction, AddFile). When this option is true, these // checks are also enabled in release mode. These checks were historically // disabled in release mode, but are now enabled by default for proactive - // corruption detection, at almost no cost in extra CPU. + // corruption detection. The CPU overhead is negligible for normal mixed + // operations but can slow down saturated writing. See + // Options::DisableExtraChecks(). // Default: true bool force_consistency_checks = true; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 0b406db3db..e3c7d8ead9 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1365,7 +1365,11 @@ struct Options : public DBOptions, public ColumnFamilyOptions { const ColumnFamilyOptions& column_family_options) : DBOptions(db_options), ColumnFamilyOptions(column_family_options) {} - // The function recovers options to the option as in version 4.6. + // Change to some default settings from an older version. + // NOT MAINTAINED: This function has not been and is not maintained. + // DEPRECATED: This function might be removed in a future release. + // In general, defaults are changed to suit broad interests. Opting + // out of a change on upgrade should be deliberate and considered. Options* OldDefaults(int rocksdb_major_version = 4, int rocksdb_minor_version = 6); @@ -1388,6 +1392,12 @@ struct Options : public DBOptions, public ColumnFamilyOptions { // Use this if your DB is very small (like under 1GB) and you don't want to // spend lots of memory for memtables. Options* OptimizeForSmallDb(); + + // Disable some checks that should not be necessary in the absence of + // software logic errors or CPU+memory hardware errors. This can improve + // write speeds but is only recommended for temporary use. Does not + // change protection against corrupt storage (e.g. verify_checksums). + Options* DisableExtraChecks(); }; // diff --git a/options/options.cc b/options/options.cc index 969bc31a85..a64e1e7b9c 100644 --- a/options/options.cc +++ b/options/options.cc @@ -474,6 +474,19 @@ Options* Options::OptimizeForSmallDb() { return this; } +Options* Options::DisableExtraChecks() { + // See https://github.com/facebook/rocksdb/issues/9354 + force_consistency_checks = false; + // Considered but no clear performance impact seen: + // * check_flush_compaction_key_order + // * paranoid_checks + // * flush_verify_memtable_count + // By current API contract, not including + // * verify_checksums + // because checking storage data integrity is a more standard practice. + return this; +} + Options* Options::OldDefaults(int rocksdb_major_version, int rocksdb_minor_version) { ColumnFamilyOptions::OldDefaults(rocksdb_major_version, diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 4909c52a57..aa4469c18b 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -837,11 +837,25 @@ DEFINE_int32(deletepercent, 2, "Percentage of deletes out of reads/writes/" "deletepercent), so deletepercent must be smaller than (100 - " "FLAGS_readwritepercent)"); -DEFINE_bool(optimize_filters_for_hits, false, +DEFINE_bool(optimize_filters_for_hits, + ROCKSDB_NAMESPACE::Options().optimize_filters_for_hits, "Optimizes bloom filters for workloads for most lookups return " "a value. For now this doesn't create bloom filters for the max " "level of the LSM to reduce metadata that should fit in RAM. "); +DEFINE_bool(paranoid_checks, ROCKSDB_NAMESPACE::Options().paranoid_checks, + "RocksDB will aggressively check consistency of the data."); + +DEFINE_bool(force_consistency_checks, + ROCKSDB_NAMESPACE::Options().force_consistency_checks, + "Runs consistency checks on the LSM every time a change is " + "applied."); + +DEFINE_bool(check_flush_compaction_key_order, + ROCKSDB_NAMESPACE::Options().check_flush_compaction_key_order, + "During flush or compaction, check whether keys inserted to " + "output files are in order."); + DEFINE_uint64(delete_obsolete_files_period_micros, 0, "Ignored. Left here for backward compatibility"); @@ -4304,6 +4318,10 @@ class Benchmark { options.max_compaction_bytes = FLAGS_max_compaction_bytes; options.disable_auto_compactions = FLAGS_disable_auto_compactions; options.optimize_filters_for_hits = FLAGS_optimize_filters_for_hits; + options.paranoid_checks = FLAGS_paranoid_checks; + options.force_consistency_checks = FLAGS_force_consistency_checks; + options.check_flush_compaction_key_order = + FLAGS_check_flush_compaction_key_order; options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds; options.ttl = FLAGS_ttl_seconds; // fill storage options