diff --git a/db/c_test.c b/db/c_test.c index 656e2fa5f7..b55b2cc96f 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -1318,6 +1318,8 @@ int main(int argc, char** argv) { policy = rocksdb_filterpolicy_create_ribbon_hybrid(8.0, 1); } rocksdb_block_based_options_set_filter_policy(table_options, policy); + rocksdb_block_based_options_set_optimize_filters_for_memory(table_options, + 0); // Create new database rocksdb_close(db); diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index fa2d45d250..a13820535d 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -529,6 +529,7 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) { BlockBasedTableOptions table_options; table_options.no_block_cache = true; table_options.filter_policy = Create(10, bfp_impl_); + table_options.optimize_filters_for_memory = false; table_options.partition_filters = partition_filters_; if (partition_filters_) { table_options.index_type = @@ -1695,6 +1696,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { BlockBasedTableOptions table_options; table_options.filter_policy = policy; table_options.format_version = 5; + table_options.optimize_filters_for_memory = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ASSERT_OK(TryReopen(options)); diff --git a/env/env.cc b/env/env.cc index 7405f66e08..77e242a42e 100644 --- a/env/env.cc +++ b/env/env.cc @@ -825,6 +825,15 @@ WritableFile::~WritableFile() = default; MemoryMappedFileBuffer::~MemoryMappedFileBuffer() = default; +// This const variable can be used in public headers without introducing the +// possibility of ODR violations due to varying macro definitions. +const InfoLogLevel Logger::kDefaultLogLevel = +#ifdef NDEBUG + INFO_LEVEL; +#else + DEBUG_LEVEL; +#endif // NDEBUG + Logger::~Logger() = default; Status Logger::Close() { diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index d81960c437..0be90ca2a3 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1209,6 +1209,10 @@ class Logger { public: static constexpr size_t kDoNotSupportGetLogFileSize = SIZE_MAX; + // Set to INFO_LEVEL when RocksDB is compiled in release mode, and + // DEBUG_LEVEL when compiled in debug mode. See DBOptions::info_log_level. + static const InfoLogLevel kDefaultLogLevel; + explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL) : closed_(false), log_level_(log_level) {} // No copying allowed diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 584fd4b046..b83fce7fef 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -643,11 +643,10 @@ struct DBOptions { // Default: nullptr std::shared_ptr info_log = nullptr; -#ifdef NDEBUG - InfoLogLevel info_log_level = INFO_LEVEL; -#else - InfoLogLevel info_log_level = DEBUG_LEVEL; -#endif // NDEBUG + // Minimum level for sending log messages to info_log. The default is + // INFO_LEVEL when RocksDB is compiled in release mode, and DEBUG_LEVEL + // when it is compiled in debug mode. + InfoLogLevel info_log_level = Logger::kDefaultLogLevel; // Number of open files that can be used by the DB. You may need to // increase this if your database has a large working set. Value -1 means diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index b0b276a639..a801a3349a 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -426,12 +426,12 @@ struct BlockBasedTableOptions { // the block cache better at using space it is allowed. (These issues // should not arise with partitioned filters.) // - // NOTE: Do not set to true if you do not trust malloc_usable_size. With - // this option, RocksDB might access an allocated memory object beyond its - // original size if malloc_usable_size says it is safe to do so. While this - // can be considered bad practice, it should not produce undefined behavior - // unless malloc_usable_size is buggy or broken. - bool optimize_filters_for_memory = false; + // NOTE: Set to false if you do not trust malloc_usable_size. When set to + // true, RocksDB might access an allocated memory object beyond its original + // size if malloc_usable_size says it is safe to do so. While this can be + // considered bad practice, it should not produce undefined behavior unless + // malloc_usable_size is buggy or broken. + bool optimize_filters_for_memory = true; // Use delta encoding to compress keys in blocks. // ReadOptions::pin_data requires this option to be disabled. diff --git a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java index d066131458..c8159db2dd 100644 --- a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java @@ -31,7 +31,7 @@ public class BlockBasedTableConfig extends TableFormatConfig { indexBlockRestartInterval = 1; metadataBlockSize = 4096; partitionFilters = false; - optimizeFiltersForMemory = false; + optimizeFiltersForMemory = true; useDeltaEncoding = true; filterPolicy = null; wholeKeyFiltering = true; diff --git a/java/src/test/java/org/rocksdb/OptionsUtilTest.java b/java/src/test/java/org/rocksdb/OptionsUtilTest.java index e14fb58fa6..0998ae83fa 100644 --- a/java/src/test/java/org/rocksdb/OptionsUtilTest.java +++ b/java/src/test/java/org/rocksdb/OptionsUtilTest.java @@ -289,7 +289,7 @@ public class OptionsUtilTest { altCFTableConfig.setIndexBlockRestartInterval(6); altCFTableConfig.setMetadataBlockSize(12 * 1024); altCFTableConfig.setPartitionFilters(true); - altCFTableConfig.setOptimizeFiltersForMemory(true); + altCFTableConfig.setOptimizeFiltersForMemory(false); altCFTableConfig.setUseDeltaEncoding(false); altCFTableConfig.setFilterPolicy(new BloomFilter(7.5)); altCFTableConfig.setWholeKeyFiltering(false); diff --git a/memory/memory_allocator.cc b/memory/memory_allocator.cc index d0de26b94d..fe183e403f 100644 --- a/memory/memory_allocator.cc +++ b/memory/memory_allocator.cc @@ -77,4 +77,5 @@ Status MemoryAllocator::CreateFromString( copy.invoke_prepare_options = true; return LoadManagedObject(copy, value, result); } + } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index f3c3fb256b..3cd63ffad8 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -425,6 +425,9 @@ class FastLocalBloomBitsBuilder : public XXPH3FilterBitsBuilder { } double EstimatedFpRate(size_t keys, size_t len_with_metadata) override { + if (len_with_metadata <= kMetadataLen) { + return keys > 0 ? 1.0 : 0.0; + } int num_probes = GetNumProbes(keys, len_with_metadata); return FastLocalBloomImpl::EstimatedFpRate( keys, len_with_metadata - kMetadataLen, num_probes, /*hash bits*/ 64); @@ -891,6 +894,9 @@ class Standard128RibbonBitsBuilder : public XXPH3FilterBitsBuilder { double EstimatedFpRate(size_t num_entries, size_t len_with_metadata) override { + if (len_with_metadata <= kMetadataLen) { + return num_entries > 0 ? 1.0 : 0.0; + } if (num_entries > kMaxRibbonEntries) { // More entries than supported by this Ribbon return bloom_fallback_.EstimatedFpRate(num_entries, len_with_metadata); @@ -1030,9 +1036,12 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder { return CalculateSpace(num_entries, &dont_care1, &dont_care2); } - double EstimatedFpRate(size_t keys, size_t bytes) override { - return LegacyBloomImpl::EstimatedFpRate(keys, bytes - kMetadataLen, - num_probes_); + double EstimatedFpRate(size_t keys, size_t len_with_metadata) override { + if (len_with_metadata <= kMetadataLen) { + return keys > 0 ? 1.0 : 0.0; + } + return LegacyBloomImpl::EstimatedFpRate( + keys, len_with_metadata - kMetadataLen, num_probes_); } size_t ApproximateNumEntries(size_t bytes) override; diff --git a/unreleased_history/behavior_changes/default_optimize_filters_for_memory.md b/unreleased_history/behavior_changes/default_optimize_filters_for_memory.md new file mode 100644 index 0000000000..0cd00120ae --- /dev/null +++ b/unreleased_history/behavior_changes/default_optimize_filters_for_memory.md @@ -0,0 +1 @@ +`BlockBasedTableOptions::optimize_filters_for_memory` is now set to true by default. When `partition_filters=false`, this could lead to somewhat increased average RSS memory usage by the block cache, but this "extra" usage is within the allowed memory budget and should make memory usage more consistent (by minimizing internal fragmentation for more kinds of blocks). diff --git a/util/bloom_test.cc b/util/bloom_test.cc index b0a5cae566..f3dbe63735 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -290,6 +290,9 @@ TEST_P(FullBloomTest, FullSmall) { } TEST_P(FullBloomTest, FullVaryingLengths) { + // Match how this test was originally built + table_options_.optimize_filters_for_memory = false; + char buffer[sizeof(int)]; // Count number of filters that significantly exceed the false positive rate @@ -335,6 +338,9 @@ TEST_P(FullBloomTest, FullVaryingLengths) { } TEST_P(FullBloomTest, OptimizeForMemory) { + // Verify default option + EXPECT_EQ(BlockBasedTableOptions().optimize_filters_for_memory, true); + char buffer[sizeof(int)]; for (bool offm : {true, false}) { table_options_.optimize_filters_for_memory = offm; @@ -354,8 +360,9 @@ TEST_P(FullBloomTest, OptimizeForMemory) { Build(); size_t size = FilterData().size(); total_size += size; - // optimize_filters_for_memory currently depends on malloc_usable_size - // but we run the rest of the test to ensure no bad behavior without it. + // optimize_filters_for_memory currently only has an effect with + // malloc_usable_size support, but we run the rest of the test to ensure + // no bad behavior without it. #ifdef ROCKSDB_MALLOC_USABLE_SIZE size = malloc_usable_size(const_cast(FilterData().data())); #endif // ROCKSDB_MALLOC_USABLE_SIZE @@ -508,6 +515,9 @@ inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128, // ability to read filters generated using other cache line sizes. // See RawSchema. TEST_P(FullBloomTest, Schema) { + // Match how this test was originally built + table_options_.optimize_filters_for_memory = false; + #define EXPECT_EQ_Bloom(a, b) \ { \ if (GetParam() != kStandard128Ribbon) { \