diff --git a/HISTORY.md b/HISTORY.md index 3b959335cc..f34e669f88 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,8 @@ * Options::max_bytes_for_level_multiplier is now a double along with all getters and setters. * Support dynamically change `delayed_write_rate` and `max_total_wal_size` options via SetDBOptions(). * Introduce DB::DeleteRange for optimized deletion of large ranges of contiguous keys. +* Support dynamically change `delayed_write_rate` option via SetDBOptions(). +* Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default. ### New Features * Add avoid_flush_during_shutdown option, which speeds up DB shutdown by not flushing unpersisted data (i.e. with disableWAL = true). Unpersisted data will be lost. The options is dynamically changeable via SetDBOptions(). diff --git a/db/c.cc b/db/c.cc index b19588b272..45e8aefe5d 100644 --- a/db/c.cc +++ b/db/c.cc @@ -1703,6 +1703,16 @@ void rocksdb_options_set_bytes_per_sync( opt->rep.bytes_per_sync = v; } +void rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t* opt, + unsigned char v) { + opt->rep.allow_concurrent_memtable_write = v; +} + +void rocksdb_options_set_enable_write_thread_adaptive_yield( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.enable_write_thread_adaptive_yield = v; +} + void rocksdb_options_set_verify_checksums_in_compaction( rocksdb_options_t* opt, unsigned char v) { opt->rep.verify_checksums_in_compaction = v; diff --git a/db/c_test.c b/db/c_test.c index 9b7f16441b..6b52f0aee2 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -895,6 +895,7 @@ int main(int argc, char** argv) { rocksdb_options_set_prefix_extractor(options, rocksdb_slicetransform_create_fixed_prefix(3)); rocksdb_options_set_hash_skip_list_rep(options, 5000, 4, 4); rocksdb_options_set_plain_table_factory(options, 4, 10, 0.75, 16); + rocksdb_options_set_allow_concurrent_memtable_write(options, 0); db = rocksdb_open(options, dbname, &err); CheckNoError(err); diff --git a/db/column_family_test.cc b/db/column_family_test.cc index aaf511b6eb..2d3f2cc0d5 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1056,6 +1056,7 @@ TEST_F(ColumnFamilyTest, DifferentWriteBufferSizes) { #ifndef ROCKSDB_LITE // Cuckoo is not supported in lite TEST_F(ColumnFamilyTest, MemtableNotSupportSnapshot) { + db_options_.allow_concurrent_memtable_write = false; Open(); auto* s1 = dbfull()->GetSnapshot(); ASSERT_TRUE(s1 != nullptr); diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc index f48b5b436c..1a75de6dd7 100644 --- a/db/cuckoo_table_db_test.cc +++ b/db/cuckoo_table_db_test.cc @@ -41,6 +41,7 @@ class CuckooTableDBTest : public testing::Test { options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); options.allow_mmap_reads = true; options.create_if_missing = true; + options.allow_concurrent_memtable_write = false; return options; } diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 07f2409ab3..8e16a8c819 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -810,6 +810,7 @@ TEST_F(DBBloomFilterTest, PrefixScan) { options.max_background_compactions = 2; options.create_if_missing = true; options.memtable_factory.reset(NewHashSkipListRepFactory(16)); + options.allow_concurrent_memtable_write = false; BlockBasedTableOptions table_options; table_options.no_block_cache = true; diff --git a/db/db_inplace_update_test.cc b/db/db_inplace_update_test.cc index 2acc25700d..7f892db823 100644 --- a/db/db_inplace_update_test.cc +++ b/db/db_inplace_update_test.cc @@ -23,6 +23,8 @@ TEST_F(DBTestInPlaceUpdate, InPlaceUpdate) { options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; + options.allow_concurrent_memtable_write = false; + Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of smaller size @@ -45,6 +47,8 @@ TEST_F(DBTestInPlaceUpdate, InPlaceUpdateLargeNewValue) { options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; + options.allow_concurrent_memtable_write = false; + Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of larger size @@ -70,6 +74,8 @@ TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerSize) { options.write_buffer_size = 100000; options.inplace_callback = rocksdb::DBTestInPlaceUpdate::updateInPlaceSmallerSize; + options.allow_concurrent_memtable_write = false; + Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of smaller size @@ -97,6 +103,8 @@ TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerVarintSize) { options.write_buffer_size = 100000; options.inplace_callback = rocksdb::DBTestInPlaceUpdate::updateInPlaceSmallerVarintSize; + options.allow_concurrent_memtable_write = false; + Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of smaller varint size @@ -124,6 +132,8 @@ TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackLargeNewValue) { options.write_buffer_size = 100000; options.inplace_callback = rocksdb::DBTestInPlaceUpdate::updateInPlaceLargerSize; + options.allow_concurrent_memtable_write = false; + Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of larger size @@ -148,7 +158,9 @@ TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackNoAction) { options.env = env_; options.write_buffer_size = 100000; options.inplace_callback = - rocksdb::DBTestInPlaceUpdate::updateInPlaceNoAction; + rocksdb::DBTestInPlaceUpdate::updateInPlaceNoAction; + options.allow_concurrent_memtable_write = false; + Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Callback function requests no actions from db diff --git a/db/db_memtable_test.cc b/db/db_memtable_test.cc index 19b6a63bc5..fcf43f197e 100644 --- a/db/db_memtable_test.cc +++ b/db/db_memtable_test.cc @@ -86,6 +86,8 @@ class MockMemTableRepFactory : public MemTableRepFactory { MockMemTableRep* rep() { return mock_rep_; } + bool IsInsertConcurrentlySupported() const override { return false; } + private: MockMemTableRep* mock_rep_; }; @@ -116,6 +118,7 @@ class TestPrefixExtractor : public SliceTransform { TEST_F(DBMemTableTest, InsertWithHint) { Options options; + options.allow_concurrent_memtable_write = false; options.create_if_missing = true; options.memtable_factory.reset(new MockMemTableRepFactory()); options.memtable_insert_with_hint_prefix_extractor.reset( diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 31afc2cfda..ff6b8c3203 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -34,6 +34,7 @@ TEST_F(DBPropertiesTest, Empty) { Options options; options.env = env_; options.write_buffer_size = 100000; // Small write buffer + options.allow_concurrent_memtable_write = false; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc index d199121004..d5a20c7631 100644 --- a/db/db_tailing_iter_test.cc +++ b/db/db_tailing_iter_test.cc @@ -305,6 +305,7 @@ TEST_F(DBTestTailingIterator, TailingIteratorPrefixSeek) { options.disable_auto_compactions = true; options.prefix_extractor.reset(NewFixedPrefixTransform(2)); options.memtable_factory.reset(NewHashSkipListRepFactory(16)); + options.allow_concurrent_memtable_write = false; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -625,6 +626,7 @@ TEST_F(DBTestTailingIterator, ManagedTailingIteratorPrefixSeek) { options.disable_auto_compactions = true; options.prefix_extractor.reset(NewFixedPrefixTransform(2)); options.memtable_factory.reset(NewHashSkipListRepFactory(16)); + options.allow_concurrent_memtable_write = false; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); diff --git a/db/db_test.cc b/db/db_test.cc index f82dff5015..0e6d5eee43 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2469,11 +2469,13 @@ class MultiThreadedDBTest : public DBTest, TEST_P(MultiThreadedDBTest, MultiThreaded) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; + Options options = CurrentOptions(options_override); std::vector cfs; for (int i = 1; i < kColumnFamilies; ++i) { cfs.push_back(ToString(i)); } - CreateAndReopenWithCF(cfs, CurrentOptions(options_override)); + Reopen(options); + CreateAndReopenWithCF(cfs, options); // Initialize state MTState mt; mt.test = this; diff --git a/db/db_test2.cc b/db/db_test2.cc index 85e6057910..50a1e62120 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -956,6 +956,7 @@ TEST_F(DBTest2, PresetCompressionDict) { const int kNumL0Files = 5; Options options; + options.allow_concurrent_memtable_write = false; options.arena_block_size = kBlockSizeBytes; options.compaction_style = kCompactionStyleUniversal; options.create_if_missing = true; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index dc5ad992e5..91fc87d1a4 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -233,6 +233,7 @@ Options DBTestBase::CurrentOptions( case kHashSkipList: options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.memtable_factory.reset(NewHashSkipListRepFactory(16)); + options.allow_concurrent_memtable_write = false; break; case kPlainTableFirstBytePrefix: options.table_factory.reset(new PlainTableFactory()); @@ -264,15 +265,18 @@ Options DBTestBase::CurrentOptions( break; case kVectorRep: options.memtable_factory.reset(new VectorRepFactory(100)); + options.allow_concurrent_memtable_write = false; break; case kHashLinkList: options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.memtable_factory.reset( NewHashLinkListRepFactory(4, 0, 3, true, 4)); + options.allow_concurrent_memtable_write = false; break; case kHashCuckoo: options.memtable_factory.reset( NewHashCuckooRepFactory(options.write_buffer_size)); + options.allow_concurrent_memtable_write = false; break; #endif // ROCKSDB_LITE case kMergePut: diff --git a/db/db_test_util.h b/db/db_test_util.h index 8166c28734..9aa86e4c77 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -193,6 +193,10 @@ class SpecialSkipListFactory : public MemTableRepFactory { } virtual const char* Name() const override { return "SkipListFactory"; } + bool IsInsertConcurrentlySupported() const override { + return factory_.IsInsertConcurrentlySupported(); + } + private: SkipListFactory factory_; int num_entries_flush_; diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index fdf98b0bf9..b71da7f1af 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -142,6 +142,7 @@ class PlainTableDBTest : public testing::Test, options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.allow_mmap_reads = mmap_mode_; + options.allow_concurrent_memtable_write = false; return options; } diff --git a/db/prefix_test.cc b/db/prefix_test.cc index 797f8d76db..b1e43dd8e0 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -222,6 +222,7 @@ class PrefixTest : public testing::Test { bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + options.allow_concurrent_memtable_write = false; Status s = DB::Open(options, kDbName, &db); EXPECT_OK(s); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index e5b859ce6c..3f0aca7983 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -647,6 +647,12 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_adaptive_mutex( extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bytes_per_sync( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void +rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t*, + unsigned char); +extern ROCKSDB_LIBRARY_API void +rocksdb_options_set_enable_write_thread_adaptive_yield(rocksdb_options_t*, + unsigned char); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_verify_checksums_in_compaction(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index ce76f41934..fe8b7128a3 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1298,7 +1298,7 @@ struct DBOptions { // It is strongly recommended to set enable_write_thread_adaptive_yield // if you are going to use this feature. // - // Default: false + // Default: true bool allow_concurrent_memtable_write; // If true, threads synchronizing with the write batch group leader will @@ -1306,7 +1306,7 @@ struct DBOptions { // This can substantially improve throughput for concurrent workloads, // regardless of whether allow_concurrent_memtable_write is enabled. // - // Default: false + // Default: true bool enable_write_thread_adaptive_yield; // The maximum number of microseconds that a write operation will use diff --git a/util/options.cc b/util/options.cc index b7b47c6bdb..05ea42a3a5 100644 --- a/util/options.cc +++ b/util/options.cc @@ -219,8 +219,8 @@ DBOptions::DBOptions() listeners(), enable_thread_tracking(false), delayed_write_rate(2 * 1024U * 1024U), - allow_concurrent_memtable_write(false), - enable_write_thread_adaptive_yield(false), + allow_concurrent_memtable_write(true), + enable_write_thread_adaptive_yield(true), write_thread_max_yield_usec(100), write_thread_slow_yield_usec(3), skip_stats_update_on_db_open(false),