Set optimize_filters_for_memory by default (#12377)

Summary:
This feature has been around for a couple of years and users haven't reported any problems with it.

Not quite related: fixed a technical ODR violation in public header for info_log_level in case DEBUG build status changes.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12377

Test Plan: unit tests updated, already in crash test. Some unit tests are expecting specific behaviors of optimize_filters_for_memory=false and we now need to bake that in.

Reviewed By: jowlyzhang

Differential Revision: D54129517

Pulled By: pdillinger

fbshipit-source-id: a64b614840eadd18b892624187b3e122bab6719c
This commit is contained in:
Peter Dillinger 2024-04-30 08:33:31 -07:00 committed by Facebook GitHub Bot
parent 5c1334f763
commit 45c105104b
12 changed files with 55 additions and 18 deletions

View File

@ -1318,6 +1318,8 @@ int main(int argc, char** argv) {
policy = rocksdb_filterpolicy_create_ribbon_hybrid(8.0, 1); policy = rocksdb_filterpolicy_create_ribbon_hybrid(8.0, 1);
} }
rocksdb_block_based_options_set_filter_policy(table_options, policy); rocksdb_block_based_options_set_filter_policy(table_options, policy);
rocksdb_block_based_options_set_optimize_filters_for_memory(table_options,
0);
// Create new database // Create new database
rocksdb_close(db); rocksdb_close(db);

View File

@ -529,6 +529,7 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.no_block_cache = true; table_options.no_block_cache = true;
table_options.filter_policy = Create(10, bfp_impl_); table_options.filter_policy = Create(10, bfp_impl_);
table_options.optimize_filters_for_memory = false;
table_options.partition_filters = partition_filters_; table_options.partition_filters = partition_filters_;
if (partition_filters_) { if (partition_filters_) {
table_options.index_type = table_options.index_type =
@ -1695,6 +1696,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) {
BlockBasedTableOptions table_options; BlockBasedTableOptions table_options;
table_options.filter_policy = policy; table_options.filter_policy = policy;
table_options.format_version = 5; table_options.format_version = 5;
table_options.optimize_filters_for_memory = false;
options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_OK(TryReopen(options)); ASSERT_OK(TryReopen(options));

9
env/env.cc vendored
View File

@ -825,6 +825,15 @@ WritableFile::~WritableFile() = default;
MemoryMappedFileBuffer::~MemoryMappedFileBuffer() = default; MemoryMappedFileBuffer::~MemoryMappedFileBuffer() = default;
// This const variable can be used in public headers without introducing the
// possibility of ODR violations due to varying macro definitions.
const InfoLogLevel Logger::kDefaultLogLevel =
#ifdef NDEBUG
INFO_LEVEL;
#else
DEBUG_LEVEL;
#endif // NDEBUG
Logger::~Logger() = default; Logger::~Logger() = default;
Status Logger::Close() { Status Logger::Close() {

View File

@ -1209,6 +1209,10 @@ class Logger {
public: public:
static constexpr size_t kDoNotSupportGetLogFileSize = SIZE_MAX; static constexpr size_t kDoNotSupportGetLogFileSize = SIZE_MAX;
// Set to INFO_LEVEL when RocksDB is compiled in release mode, and
// DEBUG_LEVEL when compiled in debug mode. See DBOptions::info_log_level.
static const InfoLogLevel kDefaultLogLevel;
explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL) explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
: closed_(false), log_level_(log_level) {} : closed_(false), log_level_(log_level) {}
// No copying allowed // No copying allowed

View File

@ -643,11 +643,10 @@ struct DBOptions {
// Default: nullptr // Default: nullptr
std::shared_ptr<Logger> info_log = nullptr; std::shared_ptr<Logger> info_log = nullptr;
#ifdef NDEBUG // Minimum level for sending log messages to info_log. The default is
InfoLogLevel info_log_level = INFO_LEVEL; // INFO_LEVEL when RocksDB is compiled in release mode, and DEBUG_LEVEL
#else // when it is compiled in debug mode.
InfoLogLevel info_log_level = DEBUG_LEVEL; InfoLogLevel info_log_level = Logger::kDefaultLogLevel;
#endif // NDEBUG
// Number of open files that can be used by the DB. You may need to // Number of open files that can be used by the DB. You may need to
// increase this if your database has a large working set. Value -1 means // increase this if your database has a large working set. Value -1 means

View File

@ -426,12 +426,12 @@ struct BlockBasedTableOptions {
// the block cache better at using space it is allowed. (These issues // the block cache better at using space it is allowed. (These issues
// should not arise with partitioned filters.) // should not arise with partitioned filters.)
// //
// NOTE: Do not set to true if you do not trust malloc_usable_size. With // NOTE: Set to false if you do not trust malloc_usable_size. When set to
// this option, RocksDB might access an allocated memory object beyond its // true, RocksDB might access an allocated memory object beyond its original
// original size if malloc_usable_size says it is safe to do so. While this // size if malloc_usable_size says it is safe to do so. While this can be
// can be considered bad practice, it should not produce undefined behavior // considered bad practice, it should not produce undefined behavior unless
// unless malloc_usable_size is buggy or broken. // malloc_usable_size is buggy or broken.
bool optimize_filters_for_memory = false; bool optimize_filters_for_memory = true;
// Use delta encoding to compress keys in blocks. // Use delta encoding to compress keys in blocks.
// ReadOptions::pin_data requires this option to be disabled. // ReadOptions::pin_data requires this option to be disabled.

View File

@ -31,7 +31,7 @@ public class BlockBasedTableConfig extends TableFormatConfig {
indexBlockRestartInterval = 1; indexBlockRestartInterval = 1;
metadataBlockSize = 4096; metadataBlockSize = 4096;
partitionFilters = false; partitionFilters = false;
optimizeFiltersForMemory = false; optimizeFiltersForMemory = true;
useDeltaEncoding = true; useDeltaEncoding = true;
filterPolicy = null; filterPolicy = null;
wholeKeyFiltering = true; wholeKeyFiltering = true;

View File

@ -289,7 +289,7 @@ public class OptionsUtilTest {
altCFTableConfig.setIndexBlockRestartInterval(6); altCFTableConfig.setIndexBlockRestartInterval(6);
altCFTableConfig.setMetadataBlockSize(12 * 1024); altCFTableConfig.setMetadataBlockSize(12 * 1024);
altCFTableConfig.setPartitionFilters(true); altCFTableConfig.setPartitionFilters(true);
altCFTableConfig.setOptimizeFiltersForMemory(true); altCFTableConfig.setOptimizeFiltersForMemory(false);
altCFTableConfig.setUseDeltaEncoding(false); altCFTableConfig.setUseDeltaEncoding(false);
altCFTableConfig.setFilterPolicy(new BloomFilter(7.5)); altCFTableConfig.setFilterPolicy(new BloomFilter(7.5));
altCFTableConfig.setWholeKeyFiltering(false); altCFTableConfig.setWholeKeyFiltering(false);

View File

@ -77,4 +77,5 @@ Status MemoryAllocator::CreateFromString(
copy.invoke_prepare_options = true; copy.invoke_prepare_options = true;
return LoadManagedObject<MemoryAllocator>(copy, value, result); return LoadManagedObject<MemoryAllocator>(copy, value, result);
} }
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

View File

@ -425,6 +425,9 @@ class FastLocalBloomBitsBuilder : public XXPH3FilterBitsBuilder {
} }
double EstimatedFpRate(size_t keys, size_t len_with_metadata) override { double EstimatedFpRate(size_t keys, size_t len_with_metadata) override {
if (len_with_metadata <= kMetadataLen) {
return keys > 0 ? 1.0 : 0.0;
}
int num_probes = GetNumProbes(keys, len_with_metadata); int num_probes = GetNumProbes(keys, len_with_metadata);
return FastLocalBloomImpl::EstimatedFpRate( return FastLocalBloomImpl::EstimatedFpRate(
keys, len_with_metadata - kMetadataLen, num_probes, /*hash bits*/ 64); keys, len_with_metadata - kMetadataLen, num_probes, /*hash bits*/ 64);
@ -891,6 +894,9 @@ class Standard128RibbonBitsBuilder : public XXPH3FilterBitsBuilder {
double EstimatedFpRate(size_t num_entries, double EstimatedFpRate(size_t num_entries,
size_t len_with_metadata) override { size_t len_with_metadata) override {
if (len_with_metadata <= kMetadataLen) {
return num_entries > 0 ? 1.0 : 0.0;
}
if (num_entries > kMaxRibbonEntries) { if (num_entries > kMaxRibbonEntries) {
// More entries than supported by this Ribbon // More entries than supported by this Ribbon
return bloom_fallback_.EstimatedFpRate(num_entries, len_with_metadata); return bloom_fallback_.EstimatedFpRate(num_entries, len_with_metadata);
@ -1030,9 +1036,12 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
return CalculateSpace(num_entries, &dont_care1, &dont_care2); return CalculateSpace(num_entries, &dont_care1, &dont_care2);
} }
double EstimatedFpRate(size_t keys, size_t bytes) override { double EstimatedFpRate(size_t keys, size_t len_with_metadata) override {
return LegacyBloomImpl::EstimatedFpRate(keys, bytes - kMetadataLen, if (len_with_metadata <= kMetadataLen) {
num_probes_); return keys > 0 ? 1.0 : 0.0;
}
return LegacyBloomImpl::EstimatedFpRate(
keys, len_with_metadata - kMetadataLen, num_probes_);
} }
size_t ApproximateNumEntries(size_t bytes) override; size_t ApproximateNumEntries(size_t bytes) override;

View File

@ -0,0 +1 @@
`BlockBasedTableOptions::optimize_filters_for_memory` is now set to true by default. When `partition_filters=false`, this could lead to somewhat increased average RSS memory usage by the block cache, but this "extra" usage is within the allowed memory budget and should make memory usage more consistent (by minimizing internal fragmentation for more kinds of blocks).

View File

@ -290,6 +290,9 @@ TEST_P(FullBloomTest, FullSmall) {
} }
TEST_P(FullBloomTest, FullVaryingLengths) { TEST_P(FullBloomTest, FullVaryingLengths) {
// Match how this test was originally built
table_options_.optimize_filters_for_memory = false;
char buffer[sizeof(int)]; char buffer[sizeof(int)];
// Count number of filters that significantly exceed the false positive rate // Count number of filters that significantly exceed the false positive rate
@ -335,6 +338,9 @@ TEST_P(FullBloomTest, FullVaryingLengths) {
} }
TEST_P(FullBloomTest, OptimizeForMemory) { TEST_P(FullBloomTest, OptimizeForMemory) {
// Verify default option
EXPECT_EQ(BlockBasedTableOptions().optimize_filters_for_memory, true);
char buffer[sizeof(int)]; char buffer[sizeof(int)];
for (bool offm : {true, false}) { for (bool offm : {true, false}) {
table_options_.optimize_filters_for_memory = offm; table_options_.optimize_filters_for_memory = offm;
@ -354,8 +360,9 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
Build(); Build();
size_t size = FilterData().size(); size_t size = FilterData().size();
total_size += size; total_size += size;
// optimize_filters_for_memory currently depends on malloc_usable_size // optimize_filters_for_memory currently only has an effect with
// but we run the rest of the test to ensure no bad behavior without it. // malloc_usable_size support, but we run the rest of the test to ensure
// no bad behavior without it.
#ifdef ROCKSDB_MALLOC_USABLE_SIZE #ifdef ROCKSDB_MALLOC_USABLE_SIZE
size = malloc_usable_size(const_cast<char*>(FilterData().data())); size = malloc_usable_size(const_cast<char*>(FilterData().data()));
#endif // ROCKSDB_MALLOC_USABLE_SIZE #endif // ROCKSDB_MALLOC_USABLE_SIZE
@ -508,6 +515,9 @@ inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
// ability to read filters generated using other cache line sizes. // ability to read filters generated using other cache line sizes.
// See RawSchema. // See RawSchema.
TEST_P(FullBloomTest, Schema) { TEST_P(FullBloomTest, Schema) {
// Match how this test was originally built
table_options_.optimize_filters_for_memory = false;
#define EXPECT_EQ_Bloom(a, b) \ #define EXPECT_EQ_Bloom(a, b) \
{ \ { \
if (GetParam() != kStandard128Ribbon) { \ if (GetParam() != kStandard128Ribbon) { \