diff --git a/CMakeLists.txt b/CMakeLists.txt index 07f89a9cbe..91fbade1e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -738,7 +738,6 @@ set(SOURCES table/adaptive/adaptive_table_factory.cc table/block_based/binary_search_index_reader.cc table/block_based/block.cc - table/block_based/block_based_filter_block.cc table/block_based/block_based_table_builder.cc table/block_based/block_based_table_factory.cc table/block_based/block_based_table_iterator.cc @@ -1321,7 +1320,6 @@ if(WITH_TESTS) options/customizable_test.cc options/options_settable_test.cc options/options_test.cc - table/block_based/block_based_filter_block_test.cc table/block_based/block_based_table_reader_test.cc table/block_based/block_test.cc table/block_based/data_block_hash_index_test.cc diff --git a/HISTORY.md b/HISTORY.md index 4b80bc66b9..fadf57eb91 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -37,6 +37,7 @@ ### Behavior changes * DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984) +* Removed support for reading Bloom filters using obsolete block-based filter format. (Support for writing such filters was dropped in 7.0.) For good read performance on old DBs using these filters, a full compaction is required. ## 7.3.0 (05/20/2022) ### Bug Fixes diff --git a/Makefile b/Makefile index 7494987c50..187abe6a1e 100644 --- a/Makefile +++ b/Makefile @@ -1594,9 +1594,6 @@ random_access_file_reader_test: $(OBJ_DIR)/file/random_access_file_reader_test.o file_reader_writer_test: $(OBJ_DIR)/util/file_reader_writer_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -block_based_filter_block_test: $(OBJ_DIR)/table/block_based/block_based_filter_block_test.o $(TEST_LIBRARY) $(LIBRARY) - $(AM_LINK) - block_based_table_reader_test: table/block_based/block_based_table_reader_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) diff --git a/TARGETS b/TARGETS index 6f311616cf..d19f5eb6d4 100644 --- a/TARGETS +++ b/TARGETS @@ -165,7 +165,6 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "table/adaptive/adaptive_table_factory.cc", "table/block_based/binary_search_index_reader.cc", "table/block_based/block.cc", - "table/block_based/block_based_filter_block.cc", "table/block_based/block_based_table_builder.cc", "table/block_based/block_based_table_factory.cc", "table/block_based/block_based_table_iterator.cc", @@ -494,7 +493,6 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[ "table/adaptive/adaptive_table_factory.cc", "table/block_based/binary_search_index_reader.cc", "table/block_based/block.cc", - "table/block_based/block_based_filter_block.cc", "table/block_based/block_based_table_builder.cc", "table/block_based/block_based_table_factory.cc", "table/block_based/block_based_table_iterator.cc", @@ -4800,12 +4798,6 @@ cpp_unittest_wrapper(name="blob_garbage_meter_test", extra_compiler_flags=[]) -cpp_unittest_wrapper(name="block_based_filter_block_test", - srcs=["table/block_based/block_based_filter_block_test.cc"], - deps=[":rocksdb_test_lib"], - extra_compiler_flags=[]) - - cpp_unittest_wrapper(name="block_based_table_reader_test", srcs=["table/block_based/block_based_table_reader_test.cc"], deps=[":rocksdb_test_lib"], diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 9ab2d3db6d..353db60dd9 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -670,7 +670,7 @@ class DBBlockCacheTest1 : public DBTestBase, }; INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1, - ::testing::Values(1, 2, 3)); + ::testing::Values(1, 2)); TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { Options options = CurrentOptions(); @@ -686,13 +686,10 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { table_options.partition_filters = true; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); break; - case 2: // block-based filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - break; - case 3: // full filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + case 2: // full filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); break; default: assert(false); diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index db93596a40..a2a2a7a21c 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -37,8 +37,6 @@ std::shared_ptr Create(double bits_per_key, return BloomLikeFilterPolicy::Create(name, bits_per_key); } const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName(); -const std::string kDeprecatedBlock = - DeprecatedBlockBasedBloomFilterPolicy::kClassName(); const std::string kFastLocalBloom = test::FastLocalBloomFilterPolicy::kClassName(); const std::string kStandard128Ribbon = @@ -189,7 +187,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -267,7 +265,7 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -331,7 +329,7 @@ TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); bbto.whole_key_filtering = false; if (partition_filters) { bbto.partition_filters = true; @@ -747,7 +745,6 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestDefFormatVersion, ::testing::Values( - std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); @@ -755,7 +752,6 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(kDeprecatedBlock, false, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); @@ -763,7 +759,6 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(kDeprecatedBlock, false, kLatestFormatVersion), std::make_tuple(kAutoBloom, true, kLatestFormatVersion), std::make_tuple(kAutoBloom, false, kLatestFormatVersion), std::make_tuple(kAutoRibbon, false, kLatestFormatVersion))); @@ -829,8 +824,6 @@ std::shared_ptr kCompatibilityRibbonPolicy{ NewRibbonFilterPolicy(20, -1)}; std::vector kCompatibilityConfigs = { - {Create(20, kDeprecatedBlock), false, - BlockBasedTableOptions().format_version}, {kCompatibilityBloomPolicy, false, BlockBasedTableOptions().format_version}, {kCompatibilityBloomPolicy, true, BlockBasedTableOptions().format_version}, {kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U}, @@ -878,9 +871,8 @@ TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive // Filter negative, with high probability ASSERT_EQ("NOT_FOUND", Get(prefix + "Q")); - // FULL_POSITIVE does not include block-based filter case (j == 0) EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - j == 0 ? 0 : 2); + 2); EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1); } } @@ -904,7 +896,7 @@ class ChargeFilterConstructionTestWithParam detect_filter_construct_corruption_(std::get<3>(GetParam())) { if (charge_filter_construction_ == CacheEntryRoleOptions::Decision::kDisabled || - policy_ == kDeprecatedBlock || policy_ == kLegacyBloom) { + policy_ == kLegacyBloom) { // For these cases, we only interested in whether filter construction // cache charging happens instead of its accuracy. Therefore we don't // need many keys. @@ -1031,8 +1023,6 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kStandard128Ribbon, true, true), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kDeprecatedBlock, false, false), std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom, false, false))); @@ -1109,11 +1099,10 @@ TEST_P(ChargeFilterConstructionTestWithParam, Basic) { return; } - if (policy == kDeprecatedBlock || policy == kLegacyBloom) { + if (policy == kLegacyBloom) { EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0) << "There shouldn't be filter construction cache charging as this " - "feature does not support kDeprecatedBlock " - "nor kLegacyBloom"; + "feature does not support kLegacyBloom"; return; } @@ -1798,10 +1787,9 @@ class SliceTransformLimitedDomain : public SliceTransform { } }; -TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { +TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) { BlockBasedTableOptions bbto; - // Full Filter Block - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); + bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); bbto.whole_key_filtering = false; Options options = CurrentOptions(); @@ -1827,10 +1815,9 @@ TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); } -TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) { +TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) { BlockBasedTableOptions bbto; - // Block Filter Block - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, true)); + bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); Options options = CurrentOptions(); options.prefix_extractor = std::make_shared(); @@ -2221,7 +2208,6 @@ class BloomStatsTestWithParam } else { BlockBasedTableOptions table_options; if (partition_filters_) { - assert(bfp_impl_ != kDeprecatedBlock); table_options.partition_filters = partition_filters_; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; @@ -2346,9 +2332,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(value3, iter->value().ToString()); - // The seek doesn't check block-based bloom filter because last index key - // starts with the same prefix we're seeking to. - uint64_t expected_hits = bfp_impl_ == kDeprecatedBlock ? 1 : 2; + uint64_t expected_hits = 2; ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); iter->Seek(key2); @@ -2360,8 +2344,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { INSTANTIATE_TEST_CASE_P( BloomStatsTestWithParam, BloomStatsTestWithParam, - ::testing::Values(std::make_tuple(kDeprecatedBlock, false), - std::make_tuple(kLegacyBloom, false), + ::testing::Values(std::make_tuple(kLegacyBloom, false), std::make_tuple(kLegacyBloom, true), std::make_tuple(kFastLocalBloom, false), std::make_tuple(kFastLocalBloom, true), @@ -2486,7 +2469,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { options.level_compaction_dynamic_level_bytes = true; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); + bbto.filter_policy.reset(NewBloomFilterPolicy(10)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.optimize_filters_for_hits = true; @@ -2674,7 +2657,6 @@ int CountIter(std::unique_ptr& iter, const Slice& key) { // as the upper bound and two keys are adjacent according to the comparator. TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - int using_full_builder = bfp_impl != kDeprecatedBlock; Options options; options.create_if_missing = true; options.env = CurrentOptions().env; @@ -2726,8 +2708,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); // should check bloom filter since upper bound meets requirement - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { @@ -2740,8 +2721,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); // should skip bloom filter since upper bound is too long - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { @@ -2754,8 +2734,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { ASSERT_EQ(CountIter(iter, "abcdxx02"), 4); // should check bloom filter since upper bound matches transformed seek // key - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { @@ -2768,8 +2747,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); // should skip bloom filter since mismatch is found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); @@ -2782,8 +2760,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}})); @@ -2795,8 +2772,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 3 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); } } @@ -2806,7 +2782,6 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { // verify iterators can read all SST files using the latest config. TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - int using_full_builder = bfp_impl != kDeprecatedBlock; Options options; options.env = CurrentOptions().env; options.create_if_missing = true; @@ -2840,11 +2815,9 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 1 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); ASSERT_EQ(CountIter(iter, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 1 + using_full_builder); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); // second SST with capped:3 BF @@ -2857,14 +2830,12 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // BF is cappped:3 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 2 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // both counters are incremented because BF is "not changed" for 1 of the // 2 SST files, so filter is checked once and found no match. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 3 + using_full_builder * 2); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); } @@ -2882,25 +2853,21 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); // the first and last BF are checked - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 4 + using_full_builder * 3); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // only last BF is checked and not found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 5 + using_full_builder * 3); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); } // iter_old can only see the first SST, so checked plus 1 ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 6 + using_full_builder * 3); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); // iter was created after the first setoptions call so only full filter // will check the filter ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 6 + using_full_builder * 4); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10); { // keys in all three SSTs are visible to iterator @@ -2908,12 +2875,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { // so +2 for checked counter std::unique_ptr iter_all(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_all, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 7 + using_full_builder * 5); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 8 + using_full_builder * 5); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 13); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); @@ -2924,12 +2889,10 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { ASSERT_EQ(CountIter(iter_all, "foo"), 6); // all three SST are checked because the current options has the same as // the remaining SST (capped:3) - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 9 + using_full_builder * 7); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 16); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), - 10 + using_full_builder * 7); + ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 17); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); } // TODO(Zhongyi): Maybe also need to add Get calls to test point look up? diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 64b39b741f..1dd99884fc 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -152,7 +152,6 @@ DECLARE_double(experimental_mempurge_threshold); DECLARE_bool(enable_write_thread_adaptive_yield); DECLARE_int32(reopen); DECLARE_double(bloom_bits); -DECLARE_bool(use_block_based_filter); DECLARE_int32(ribbon_starting_level); DECLARE_bool(partition_filters); DECLARE_bool(optimize_filters_for_memory); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index c2594beb6a..b689028120 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -469,10 +469,6 @@ DEFINE_double(bloom_bits, 10, "Bloom filter bits per key. " "Negative means use default settings."); -DEFINE_bool(use_block_based_filter, false, - "use block based filter" - "instead of full filter for block based table"); - DEFINE_int32( ribbon_starting_level, 999, "Use Bloom filter on levels below specified and Ribbon beginning on level " diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b9ac4734b0..466cef209f 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -36,16 +36,7 @@ std::shared_ptr CreateFilterPolicy() { return BlockBasedTableOptions().filter_policy; } const FilterPolicy* new_policy; - if (FLAGS_use_block_based_filter) { - if (FLAGS_ribbon_starting_level < 999) { - fprintf( - stderr, - "Cannot combine use_block_based_filter and ribbon_starting_level\n"); - exit(1); - } else { - new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true); - } - } else if (FLAGS_ribbon_starting_level >= 999) { + if (FLAGS_ribbon_starting_level >= 999) { // Use Bloom API new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false); } else { diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 593a53eb47..6ec2e33320 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -553,7 +553,7 @@ enum class CacheEntryRole { kFilterBlock, // Block-based table metadata block for partitioned filter kFilterMetaBlock, - // Block-based table deprecated filter block (old "block-based" filter) + // OBSOLETE / DEPRECATED: old/removed block-based filter kDeprecatedFilterBlock, // Block-based table index block kIndexBlock, diff --git a/options/options_test.cc b/options/options_test.cc index 7c688f290d..2645007589 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -999,21 +999,11 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - // Back door way of enabling deprecated block-based Bloom - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4", - &new_opt)); - auto builtin = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(builtin->GetId(), - "rocksdb.internal.DeprecatedBlockBasedBloomFilter:4"); - // Test configuring using other internal names ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt)); - builtin = + auto builtin = dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3"); diff --git a/src.mk b/src.mk index dbd9fa15aa..882d6fa48f 100644 --- a/src.mk +++ b/src.mk @@ -156,7 +156,6 @@ LIB_SOURCES = \ table/adaptive/adaptive_table_factory.cc \ table/block_based/binary_search_index_reader.cc \ table/block_based/block.cc \ - table/block_based/block_based_filter_block.cc \ table/block_based/block_based_table_builder.cc \ table/block_based/block_based_table_factory.cc \ table/block_based/block_based_table_iterator.cc \ @@ -525,7 +524,6 @@ TEST_MAIN_SOURCES = \ options/customizable_test.cc \ options/options_settable_test.cc \ options/options_test.cc \ - table/block_based/block_based_filter_block_test.cc \ table/block_based/block_based_table_reader_test.cc \ table/block_based/block_test.cc \ table/block_based/data_block_hash_index_test.cc \ diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc deleted file mode 100644 index 3650621b0b..0000000000 --- a/table/block_based/block_based_filter_block.cc +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/block_based/block_based_filter_block.h" - -#include - -#include "db/dbformat.h" -#include "monitoring/perf_context_imp.h" -#include "rocksdb/filter_policy.h" -#include "table/block_based/block_based_table_reader.h" -#include "util/cast_util.h" -#include "util/coding.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { - -void AppendItem(std::string* props, const std::string& key, - const std::string& value) { - char cspace = ' '; - std::string value_str(""); - size_t i = 0; - const size_t dataLength = 64; - const size_t tabLength = 2; - const size_t offLength = 16; - - value_str.append(&value[i], std::min(size_t(dataLength), value.size())); - i += dataLength; - while (i < value.size()) { - value_str.append("\n"); - value_str.append(offLength, cspace); - value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); - i += dataLength; - } - - std::string result(""); - if (key.size() < (offLength - tabLength)) - result.append(size_t((offLength - tabLength)) - key.size(), cspace); - result.append(key); - - props->append(result + ": " + value_str + "\n"); -} - -template -void AppendItem(std::string* props, const TKey& key, const std::string& value) { - std::string key_str = std::to_string(key); - AppendItem(props, key_str, value); -} -} // namespace - -// See doc/table_format.txt for an explanation of the filter block format. - -// Generate new filter every 2KB of data -static const size_t kFilterBaseLg = 11; -static const size_t kFilterBase = 1 << kFilterBaseLg; - -BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( - const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt, int bits_per_key) - : prefix_extractor_(prefix_extractor), - whole_key_filtering_(table_opt.whole_key_filtering), - bits_per_key_(bits_per_key), - prev_prefix_start_(0), - prev_prefix_size_(0), - total_added_in_built_(0) {} - -void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { - uint64_t filter_index = (block_offset / kFilterBase); - assert(filter_index >= filter_offsets_.size()); - while (filter_index > filter_offsets_.size()) { - GenerateFilter(); - } -} - -size_t BlockBasedFilterBlockBuilder::EstimateEntriesAdded() { - return total_added_in_built_ + start_.size(); -} - -void BlockBasedFilterBlockBuilder::Add(const Slice& key_without_ts) { - if (prefix_extractor_ && prefix_extractor_->InDomain(key_without_ts)) { - AddPrefix(key_without_ts); - } - - if (whole_key_filtering_) { - AddKey(key_without_ts); - } -} - -// Add key to filter if needed -inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { - start_.push_back(entries_.size()); - entries_.append(key.data(), key.size()); -} - -// Add prefix to filter if needed -inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { - // get slice for most recently added entry - Slice prev; - if (prev_prefix_size_ > 0) { - prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); - } - - Slice prefix = prefix_extractor_->Transform(key); - // insert prefix only when it's different from the previous prefix. - if (prev.size() == 0 || prefix != prev) { - prev_prefix_start_ = entries_.size(); - prev_prefix_size_ = prefix.size(); - AddKey(prefix); - } -} - -Slice BlockBasedFilterBlockBuilder::Finish( - const BlockHandle& /*tmp*/, Status* status, - std::unique_ptr* /* filter_data */) { - // In this impl we ignore BlockHandle and filter_data - *status = Status::OK(); - - if (!start_.empty()) { - GenerateFilter(); - } - - // Append array of per-filter offsets - const uint32_t array_offset = static_cast(result_.size()); - for (size_t i = 0; i < filter_offsets_.size(); i++) { - PutFixed32(&result_, filter_offsets_[i]); - } - - PutFixed32(&result_, array_offset); - result_.push_back(kFilterBaseLg); // Save encoding parameter in result - return Slice(result_); -} - -void BlockBasedFilterBlockBuilder::GenerateFilter() { - const size_t num_entries = start_.size(); - if (num_entries == 0) { - // Fast path if there are no keys for this filter - filter_offsets_.push_back(static_cast(result_.size())); - return; - } - total_added_in_built_ += num_entries; - - // Make list of keys from flattened key structure - start_.push_back(entries_.size()); // Simplify length computation - tmp_entries_.resize(num_entries); - for (size_t i = 0; i < num_entries; i++) { - const char* base = entries_.data() + start_[i]; - size_t length = start_[i + 1] - start_[i]; - tmp_entries_[i] = Slice(base, length); - } - - // Generate filter for current set of keys and append to result_. - filter_offsets_.push_back(static_cast(result_.size())); - DeprecatedBlockBasedBloomFilterPolicy::CreateFilter( - tmp_entries_.data(), static_cast(num_entries), bits_per_key_, - &result_); - - tmp_entries_.clear(); - entries_.clear(); - start_.clear(); - prev_prefix_start_ = 0; - prev_prefix_size_ = 0; -} - -BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( - const BlockBasedTable* t, CachableEntry&& filter_block) - : FilterBlockReaderCommon(t, std::move(filter_block)) { - assert(table()); - assert(table()->get_rep()); - assert(table()->get_rep()->filter_policy); -} - -std::unique_ptr BlockBasedFilterBlockReader::Create( - const BlockBasedTable* table, const ReadOptions& ro, - FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, - bool pin, BlockCacheLookupContext* lookup_context) { - assert(table); - assert(table->get_rep()); - assert(!pin || prefetch); - - CachableEntry filter_block; - if (prefetch || !use_cache) { - const Status s = ReadFilterBlock( - table, prefetch_buffer, ro, use_cache, nullptr /* get_context */, - lookup_context, &filter_block, BlockType::kDeprecatedFilter); - if (!s.ok()) { - IGNORE_STATUS_IF_ERROR(s); - return std::unique_ptr(); - } - - if (use_cache && !pin) { - filter_block.Reset(); - } - } - - return std::unique_ptr( - new BlockBasedFilterBlockReader(table, std::move(filter_block))); -} - -bool BlockBasedFilterBlockReader::KeyMayMatch( - const Slice& key, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool no_io, - const Slice* const /*const_ikey_ptr*/, GetContext* get_context, - BlockCacheLookupContext* lookup_context) { - assert(block_offset != kNotValid); - if (!whole_key_filtering()) { - return true; - } - return MayMatch(key, block_offset, no_io, get_context, lookup_context); -} - -bool BlockBasedFilterBlockReader::PrefixMayMatch( - const Slice& prefix, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool no_io, - const Slice* const /*const_ikey_ptr*/, GetContext* get_context, - BlockCacheLookupContext* lookup_context) { - assert(block_offset != kNotValid); - return MayMatch(prefix, block_offset, no_io, get_context, lookup_context); -} - -bool BlockBasedFilterBlockReader::ParseFieldsFromBlock( - const BlockContents& contents, const char** data, const char** offset, - size_t* num, size_t* base_lg) { - assert(data); - assert(offset); - assert(num); - assert(base_lg); - - const size_t n = contents.data.size(); - if (n < 5) { // 1 byte for base_lg and 4 for start of offset array - return false; - } - - const uint32_t last_word = DecodeFixed32(contents.data.data() + n - 5); - if (last_word > n - 5) { - return false; - } - - *data = contents.data.data(); - *offset = (*data) + last_word; - *num = (n - 5 - last_word) / 4; - *base_lg = contents.data[n - 1]; - - return true; -} - -bool BlockBasedFilterBlockReader::MayMatch( - const Slice& entry, uint64_t block_offset, bool no_io, - GetContext* get_context, BlockCacheLookupContext* lookup_context) const { - CachableEntry filter_block; - - const Status s = - GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block, - BlockType::kDeprecatedFilter); - if (!s.ok()) { - IGNORE_STATUS_IF_ERROR(s); - return true; - } - - assert(filter_block.GetValue()); - - const char* data = nullptr; - const char* offset = nullptr; - size_t num = 0; - size_t base_lg = 0; - if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, - &base_lg)) { - return true; // Errors are treated as potential matches - } - - const uint64_t index = block_offset >> base_lg; - if (index < num) { - const uint32_t start = DecodeFixed32(offset + index * 4); - const uint32_t limit = DecodeFixed32(offset + index * 4 + 4); - if (start <= limit && limit <= (uint32_t)(offset - data)) { - const Slice filter = Slice(data + start, limit - start); - - assert(table()); - assert(table()->get_rep()); - - const bool may_match = - DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch(entry, filter); - if (may_match) { - PERF_COUNTER_ADD(bloom_sst_hit_count, 1); - return true; - } else { - PERF_COUNTER_ADD(bloom_sst_miss_count, 1); - return false; - } - } else if (start == limit) { - // Empty filters do not match any entries - return false; - } - } - return true; // Errors are treated as potential matches -} - -size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { - size_t usage = ApproximateFilterBlockMemoryUsage(); -#ifdef ROCKSDB_MALLOC_USABLE_SIZE - usage += malloc_usable_size(const_cast(this)); -#else - usage += sizeof(*this); -#endif // ROCKSDB_MALLOC_USABLE_SIZE - return usage; -} - -std::string BlockBasedFilterBlockReader::ToString() const { - CachableEntry filter_block; - - const Status s = - GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */, - nullptr /* lookup_context */, &filter_block, - BlockType::kDeprecatedFilter); - if (!s.ok()) { - IGNORE_STATUS_IF_ERROR(s); - return std::string("Unable to retrieve filter block"); - } - - assert(filter_block.GetValue()); - - const char* data = nullptr; - const char* offset = nullptr; - size_t num = 0; - size_t base_lg = 0; - if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, - &base_lg)) { - return std::string("Error parsing filter block"); - } - - std::string result; - result.reserve(1024); - - std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); - AppendItem(&result, s_fb, std::to_string(num)); - AppendItem(&result, s_bo, s_hd); - - for (size_t index = 0; index < num; index++) { - uint32_t start = DecodeFixed32(offset + index * 4); - uint32_t limit = DecodeFixed32(offset + index * 4 + 4); - - if (start != limit) { - result.append(" filter block # " + std::to_string(index + 1) + "\n"); - Slice filter = Slice(data + start, limit - start); - AppendItem(&result, start, filter.ToString(true)); - } - } - return result; -} - -} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_filter_block.h b/table/block_based/block_based_filter_block.h deleted file mode 100644 index 156da04922..0000000000 --- a/table/block_based/block_based_filter_block.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A filter block is stored near the end of a Table file. It contains -// filters (e.g., bloom filters) for all data blocks in the table combined -// into a single filter block. - -#pragma once - -#include -#include - -#include -#include -#include - -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "table/block_based/filter_block_reader_common.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/format.h" -#include "util/hash.h" - -namespace ROCKSDB_NAMESPACE { - -// A BlockBasedFilterBlockBuilder is used to construct all of the filters for a -// particular Table. It generates a single string which is stored as -// a special block in the Table. -// -// The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp: -// (StartBlock Add*)* Finish -class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { - public: - BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt, - int bits_per_key); - // No copying allowed - BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&) = delete; - void operator=(const BlockBasedFilterBlockBuilder&) = delete; - - virtual bool IsBlockBased() override { return true; } - virtual void StartBlock(uint64_t block_offset) override; - virtual void Add(const Slice& key_without_ts) override; - virtual bool IsEmpty() const override { - return start_.empty() && filter_offsets_.empty(); - } - virtual size_t EstimateEntriesAdded() override; - virtual Slice Finish( - const BlockHandle& tmp, Status* status, - std::unique_ptr* filter_data = nullptr) override; - using FilterBlockBuilder::Finish; - - private: - void AddKey(const Slice& key); - void AddPrefix(const Slice& key); - void GenerateFilter(); - - // important: all of these might point to invalid addresses - // at the time of destruction of this filter block. destructor - // should NOT dereference them. - const SliceTransform* prefix_extractor_; - bool whole_key_filtering_; - int bits_per_key_; - - size_t prev_prefix_start_; // the position of the last appended prefix - // to "entries_". - size_t prev_prefix_size_; // the length of the last appended prefix to - // "entries_". - std::string entries_; // Flattened entry contents - std::vector start_; // Starting index in entries_ of each entry - std::string result_; // Filter data computed so far - std::vector tmp_entries_; // policy_->CreateFilter() argument - std::vector filter_offsets_; - uint64_t total_added_in_built_; // Total keys added to filters built so far -}; - -// A FilterBlockReader is used to parse filter from SST table. -// KeyMayMatch and PrefixMayMatch would trigger filter checking -class BlockBasedFilterBlockReader - : public FilterBlockReaderCommon { - public: - BlockBasedFilterBlockReader(const BlockBasedTable* t, - CachableEntry&& filter_block); - // No copying allowed - BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&) = delete; - void operator=(const BlockBasedFilterBlockReader&) = delete; - - static std::unique_ptr Create( - const BlockBasedTable* table, const ReadOptions& ro, - FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, - bool pin, BlockCacheLookupContext* lookup_context); - - bool IsBlockBased() override { return true; } - - bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr, GetContext* get_context, - BlockCacheLookupContext* lookup_context) override; - bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr, - GetContext* get_context, - BlockCacheLookupContext* lookup_context) override; - size_t ApproximateMemoryUsage() const override; - - // convert this object to a human readable form - std::string ToString() const override; - - private: - static bool ParseFieldsFromBlock(const BlockContents& contents, - const char** data, const char** offset, - size_t* num, size_t* base_lg); - - bool MayMatch(const Slice& entry, uint64_t block_offset, bool no_io, - GetContext* get_context, - BlockCacheLookupContext* lookup_context) const; -}; - -} // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_filter_block_test.cc b/table/block_based/block_based_filter_block_test.cc deleted file mode 100644 index 943a3e9415..0000000000 --- a/table/block_based/block_based_filter_block_test.cc +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/block_based/block_based_filter_block.h" -#include "rocksdb/filter_policy.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/mock_block_based_table.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -// Test for block based filter block -// use new interface in FilterPolicy to create filter builder/reader -class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester, - public testing::Test { - public: - BlockBasedFilterBlockTest() - : mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, true)) {} -}; - -TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { - FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); - Slice slice(builder->Finish()); - ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice)); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - FilterBlockReader* reader = - new BlockBasedFilterBlockReader(table_.get(), std::move(block)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/10000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - delete builder; - delete reader; -} - -TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { - FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); - builder->StartBlock(100); - builder->Add("foo"); - builder->Add("bar"); - builder->Add("box"); - builder->StartBlock(200); - builder->Add("box"); - builder->StartBlock(300); - builder->Add("hello"); - Slice slice(builder->Finish()); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - FilterBlockReader* reader = - new BlockBasedFilterBlockReader(table_.get(), std::move(block)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - delete builder; - delete reader; -} - -TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { - FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); - - // First filter - builder->StartBlock(0); - builder->Add("foo"); - builder->StartBlock(2000); - builder->Add("bar"); - - // Second filter - builder->StartBlock(3100); - builder->Add("box"); - - // Third filter is empty - - // Last filter - builder->StartBlock(9000); - builder->Add("box"); - builder->Add("hello"); - - Slice slice(builder->Finish()); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - FilterBlockReader* reader = - new BlockBasedFilterBlockReader(table_.get(), std::move(block)); - - // Check first filter - ASSERT_TRUE(reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check second filter - ASSERT_TRUE(reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check third filter (empty) - ASSERT_TRUE(!reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check last filter - ASSERT_TRUE(reader->KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader->KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader->KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - delete builder; - delete reader; -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 5d56499dbf..80e3aefb26 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -37,7 +37,6 @@ #include "rocksdb/table.h" #include "rocksdb/types.h" #include "table/block_based/block.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" @@ -80,17 +79,6 @@ FilterBlockBuilder* CreateFilterBlockBuilder( if (filter_bits_builder == nullptr) { return nullptr; } else { - // Check for backdoor deprecated block-based bloom config - size_t starting_est = filter_bits_builder->EstimateEntriesAdded(); - constexpr auto kSecretStart = - DeprecatedBlockBasedBloomFilterPolicy::kSecretBitsPerKeyStart; - if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) { - int bits_per_key = static_cast(starting_est - kSecretStart); - delete filter_bits_builder; - return new BlockBasedFilterBlockBuilder(mopt.prefix_extractor.get(), - table_opt, bits_per_key); - } - // END check for backdoor deprecated block-based bloom config if (table_opt.partition_filters) { assert(p_index_builder != nullptr); // Since after partition cut request from filter builder it takes time @@ -904,10 +892,6 @@ BlockBasedTableBuilder::BlockBasedTableBuilder( rep_ = new Rep(sanitized_table_options, tbo, file); - if (rep_->filter_builder != nullptr) { - rep_->filter_builder->StartBlock(0); - } - TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey", const_cast(&rep_->props)); @@ -1094,9 +1078,6 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, WriteRawBlock(block_contents, type, handle, block_type, &raw_block_contents); r->compressed_output.clear(); if (is_data_block) { - if (r->filter_builder != nullptr) { - r->filter_builder->StartBlock(r->get_offset()); - } r->props.data_size = r->get_offset(); ++r->props.num_data_blocks; } @@ -1385,9 +1366,6 @@ void BlockBasedTableBuilder::BGWorkWriteRawBlock() { break; } - if (r->filter_builder != nullptr) { - r->filter_builder->StartBlock(r->get_offset()); - } r->props.data_size = r->get_offset(); ++r->props.num_data_blocks; @@ -1496,9 +1474,6 @@ Status BlockBasedTableBuilder::InsertBlockInCacheHelper( case BlockType::kFilterPartitionIndex: s = InsertBlockInCache(block_contents, handle, block_type); break; - case BlockType::kDeprecatedFilter: - s = InsertBlockInCache(block_contents, handle, block_type); - break; case BlockType::kFilter: s = InsertBlockInCache(block_contents, handle, block_type); @@ -1568,7 +1543,6 @@ void BlockBasedTableBuilder::WriteFilterBlock( return; } BlockHandle filter_block_handle; - bool is_block_based_filter = rep_->filter_builder->IsBlockBased(); bool is_partitioned_filter = rep_->table_options.partition_filters; if (ok()) { rep_->props.num_filter_entries += @@ -1595,8 +1569,7 @@ void BlockBasedTableBuilder::WriteFilterBlock( rep_->props.filter_size += filter_content.size(); - BlockType btype = is_block_based_filter ? BlockType::kDeprecatedFilter - : is_partitioned_filter && /* last */ s.ok() + BlockType btype = is_partitioned_filter && /* last */ s.ok() ? BlockType::kFilterPartitionIndex : BlockType::kFilter; WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, btype, @@ -1608,10 +1581,8 @@ void BlockBasedTableBuilder::WriteFilterBlock( // Add mapping from ".Name" to location // of filter data. std::string key; - key = is_block_based_filter ? BlockBasedTable::kFilterBlockPrefix - : is_partitioned_filter - ? BlockBasedTable::kPartitionedFilterBlockPrefix - : BlockBasedTable::kFullFilterBlockPrefix; + key = is_partitioned_filter ? BlockBasedTable::kPartitionedFilterBlockPrefix + : BlockBasedTable::kFullFilterBlockPrefix; key.append(rep_->table_options.filter_policy->CompatibilityName()); meta_index_builder->Add(key, filter_block_handle); } @@ -2114,7 +2085,7 @@ const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const { } } -const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; +const std::string BlockBasedTable::kObsoleteFilterBlockPrefix = "filter."; const std::string BlockBasedTable::kFullFilterBlockPrefix = "fullfilter."; const std::string BlockBasedTable::kPartitionedFilterBlockPrefix = "partitionedfilter."; diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 7074df3135..6c1bb431d8 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -46,7 +46,6 @@ #include "rocksdb/trace_record.h" #include "table/block_based/binary_search_index_reader.h" #include "table/block_based/block.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_iterator.h" #include "table/block_based/block_like_traits.h" @@ -196,7 +195,6 @@ void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type, switch (block_type) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: PERF_COUNTER_ADD(block_cache_filter_hit_count, 1); if (get_context) { @@ -255,7 +253,6 @@ void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type, switch (block_type) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: if (get_context) { ++get_context->get_context_stats_.num_cache_filter_miss; } else { @@ -312,7 +309,6 @@ void BlockBasedTable::UpdateCacheInsertionMetrics( switch (block_type) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: if (get_context) { ++get_context->get_context_stats_.num_cache_filter_add; if (redundant) { @@ -954,7 +950,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( {std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix), std::make_pair(Rep::FilterType::kPartitionedFilter, kPartitionedFilterBlockPrefix), - std::make_pair(Rep::FilterType::kBlockFilter, kFilterBlockPrefix)}) { + std::make_pair(Rep::FilterType::kNoFilter, + kObsoleteFilterBlockPrefix)}) { if (builtin_compatible) { // This code is only here to deal with a hiccup in early 7.0.x where // there was an unintentional name change in the SST files metadata. @@ -967,7 +964,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( test::LegacyBloomFilterPolicy::kClassName(), test::FastLocalBloomFilterPolicy::kClassName(), test::Standard128RibbonFilterPolicy::kClassName(), - DeprecatedBlockBasedBloomFilterPolicy::kClassName(), + "rocksdb.internal.DeprecatedBlockBasedBloomFilter", BloomFilterPolicy::kClassName(), RibbonFilterPolicy::kClassName(), }; @@ -985,6 +982,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( Status s = rep_->filter_handle.DecodeFrom(&v); if (s.ok()) { rep_->filter_type = filter_type; + if (filter_type == Rep::FilterType::kNoFilter) { + ROCKS_LOG_WARN(rep_->ioptions.logger, + "Detected obsolete filter type in %s. Read " + "performance might suffer until DB is fully " + "re-compacted.", + rep_->file->file_name().c_str()); + } break; } } @@ -995,6 +999,13 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle) .ok()) { rep_->filter_type = filter_type; + if (filter_type == Rep::FilterType::kNoFilter) { + ROCKS_LOG_WARN( + rep_->ioptions.logger, + "Detected obsolete filter type in %s. Read performance might " + "suffer until DB is fully re-compacted.", + rep_->file->file_name().c_str()); + } break; } } @@ -1459,10 +1470,6 @@ std::unique_ptr BlockBasedTable::CreateFilterBlockReader( return PartitionedFilterBlockReader::Create( this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context); - case Rep::FilterType::kBlockFilter: - return BlockBasedFilterBlockReader::Create( - this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context); - case Rep::FilterType::kFullFilter: return FullFilterBlockReader::Create(this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context); @@ -1608,7 +1615,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( break; case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: ++get_context->get_context_stats_.num_filter_read; break; default: @@ -1649,7 +1655,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( break; case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: trace_block_type = TraceType::kBlockTraceFilterBlock; break; case BlockType::kCompressionDictionary: @@ -1765,7 +1770,6 @@ Status BlockBasedTable::RetrieveBlock( break; case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: ++(get_context->get_context_stats_.num_filter_read); break; default: @@ -1889,75 +1893,16 @@ bool BlockBasedTable::PrefixMayMatch( bool may_match = true; - // First, try check with full filter FilterBlockReader* const filter = rep_->filter.get(); - bool filter_checked = true; + bool filter_checked = false; if (filter != nullptr) { const bool no_io = read_options.read_tier == kBlockCacheTier; - if (!filter->IsBlockBased()) { - const Slice* const const_ikey_ptr = &internal_key; - may_match = filter->RangeMayExist( - read_options.iterate_upper_bound, user_key_without_ts, - prefix_extractor, rep_->internal_comparator.user_comparator(), - const_ikey_ptr, &filter_checked, need_upper_bound_check, no_io, - lookup_context); - } else { - // if prefix_extractor changed for block based filter, skip filter - if (need_upper_bound_check) { - return true; - } - auto prefix = prefix_extractor->Transform(user_key_without_ts); - InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue); - auto internal_prefix = internal_key_prefix.Encode(); - - // To prevent any io operation in this method, we set `read_tier` to make - // sure we always read index or filter only when they have already been - // loaded to memory. - ReadOptions no_io_read_options; - no_io_read_options.read_tier = kBlockCacheTier; - - // Then, try find it within each block - // we already know prefix_extractor and prefix_extractor_name must match - // because `CheckPrefixMayMatch` first checks `check_filter_ == true` - std::unique_ptr> iiter(NewIndexIterator( - no_io_read_options, - /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, - /*get_context=*/nullptr, lookup_context)); - iiter->Seek(internal_prefix); - - if (!iiter->Valid()) { - // we're past end of file - // if it's incomplete, it means that we avoided I/O - // and we're not really sure that we're past the end - // of the file - may_match = iiter->status().IsIncomplete(); - } else if ((rep_->index_key_includes_seq ? ExtractUserKey(iiter->key()) - : iiter->key()) - .starts_with(ExtractUserKey(internal_prefix))) { - // we need to check for this subtle case because our only - // guarantee is that "the key is a string >= last key in that data - // block" according to the doc/table_format.txt spec. - // - // Suppose iiter->key() starts with the desired prefix; it is not - // necessarily the case that the corresponding data block will - // contain the prefix, since iiter->key() need not be in the - // block. However, the next data block may contain the prefix, so - // we return true to play it safe. - may_match = true; - } else if (filter->IsBlockBased()) { - // iiter->key() does NOT start with the desired prefix. Because - // Seek() finds the first key that is >= the seek target, this - // means that iiter->key() > prefix. Thus, any data blocks coming - // after the data block corresponding to iiter->key() cannot - // possibly contain the key. Thus, the corresponding data block - // is the only on could potentially contain the prefix. - BlockHandle handle = iiter->value().handle; - may_match = filter->PrefixMayMatch( - prefix, prefix_extractor, handle.offset(), no_io, - /*const_key_ptr=*/nullptr, /*get_context=*/nullptr, lookup_context); - } - } + const Slice* const const_ikey_ptr = &internal_key; + may_match = filter->RangeMayExist( + read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor, + rep_->internal_comparator.user_comparator(), const_ikey_ptr, + &filter_checked, need_upper_bound_check, no_io, lookup_context); } if (filter_checked) { @@ -2030,7 +1975,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch( FilterBlockReader* filter, const Slice& internal_key, const bool no_io, const SliceTransform* prefix_extractor, GetContext* get_context, BlockCacheLookupContext* lookup_context) const { - if (filter == nullptr || filter->IsBlockBased()) { + if (filter == nullptr) { return true; } Slice user_key = ExtractUserKey(internal_key); @@ -2039,15 +1984,13 @@ bool BlockBasedTable::FullFilterKeyMayMatch( size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size(); Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz); if (rep_->whole_key_filtering) { - may_match = - filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid, - no_io, const_ikey_ptr, get_context, lookup_context); + may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr, + get_context, lookup_context); } else if (!PrefixExtractorChanged(prefix_extractor) && prefix_extractor->InDomain(user_key_without_ts) && !filter->PrefixMayMatch( - prefix_extractor->Transform(user_key_without_ts), - prefix_extractor, kNotValid, no_io, const_ikey_ptr, - get_context, lookup_context)) { + prefix_extractor->Transform(user_key_without_ts), no_io, + const_ikey_ptr, get_context, lookup_context)) { // FIXME ^^^: there should be no reason for Get() to depend on current // prefix_extractor at all. It should always use table_prefix_extractor. may_match = false; @@ -2063,14 +2006,13 @@ void BlockBasedTable::FullFilterKeysMayMatch( FilterBlockReader* filter, MultiGetRange* range, const bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const { - if (filter == nullptr || filter->IsBlockBased()) { + if (filter == nullptr) { return; } uint64_t before_keys = range->KeysLeft(); assert(before_keys > 0); // Caller should ensure if (rep_->whole_key_filtering) { - filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io, - lookup_context); + filter->KeysMayMatch(range, no_io, lookup_context); uint64_t after_keys = range->KeysLeft(); if (after_keys) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys); @@ -2086,8 +2028,7 @@ void BlockBasedTable::FullFilterKeysMayMatch( } else if (!PrefixExtractorChanged(prefix_extractor)) { // FIXME ^^^: there should be no reason for MultiGet() to depend on current // prefix_extractor at all. It should always use table_prefix_extractor. - filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, - lookup_context); + filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context); RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys); uint64_t after_keys = range->KeysLeft(); uint64_t filtered_keys = before_keys - after_keys; @@ -2152,22 +2093,6 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { IndexValue v = iiter->value(); - bool not_exist_in_filter = - filter != nullptr && filter->IsBlockBased() == true && - !filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz), - prefix_extractor, v.handle.offset(), no_io, - /*const_ikey_ptr=*/nullptr, get_context, - &lookup_context); - - if (not_exist_in_filter) { - // Not found - // TODO: think about interaction with Merge. If a user key cannot - // cross one data block, we should be fine. - RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); - break; - } - if (!v.first_internal_key.empty() && !skip_filters && UserComparatorWrapper(rep_->internal_comparator.user_comparator()) .CompareWithoutTimestamp( @@ -2272,7 +2197,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, break; } } - if (matched && filter != nullptr && !filter->IsBlockBased()) { + if (matched && filter != nullptr) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, rep_->level); @@ -2425,10 +2350,6 @@ Status BlockBasedTable::VerifyChecksumInBlocks( BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( const Slice& meta_block_name) { - if (meta_block_name.starts_with(kFilterBlockPrefix)) { - return BlockType::kDeprecatedFilter; - } - if (meta_block_name.starts_with(kFullFilterBlockPrefix)) { return BlockType::kFilter; } @@ -2457,6 +2378,11 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( return BlockType::kHashIndexMetadata; } + if (meta_block_name.starts_with(kObsoleteFilterBlockPrefix)) { + // Obsolete but possible in old files + return BlockType::kInvalid; + } + assert(false); return BlockType::kInvalid; } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 692e519d0f..51981f145b 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -38,7 +38,6 @@ namespace ROCKSDB_NAMESPACE { class Cache; class FilterBlockReader; -class BlockBasedFilterBlockReader; class FullFilterBlockReader; class Footer; class InternalKeyComparator; @@ -67,7 +66,7 @@ using KVPairBlock = std::vector>; // loaded blocks in the memory. class BlockBasedTable : public TableReader { public: - static const std::string kFilterBlockPrefix; + static const std::string kObsoleteFilterBlockPrefix; static const std::string kFullFilterBlockPrefix; static const std::string kPartitionedFilterBlockPrefix; @@ -585,7 +584,6 @@ struct BlockBasedTable::Rep { enum class FilterType { kNoFilter, kFullFilter, - kBlockFilter, kPartitionedFilter, }; FilterType filter_type; diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 9eeea40daa..450979dd7f 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -729,7 +729,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) iiter->Next(); } while (iiter->Valid()); - if (matched && filter != nullptr && !filter->IsBlockBased()) { + if (matched && filter != nullptr) { RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_TRUE_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, rep_->level); diff --git a/table/block_based/block_like_traits.h b/table/block_based/block_like_traits.h index 8889b896c1..aeb2551147 100644 --- a/table/block_based/block_like_traits.h +++ b/table/block_based/block_like_traits.h @@ -72,15 +72,10 @@ class BlocklikeTraits { return Status::OK(); } - static Cache::CacheItemHelper* GetCacheItemHelper(BlockType block_type) { - if (block_type == BlockType::kDeprecatedFilter) { - return GetCacheItemHelperForRole< - BlockContents, CacheEntryRole::kDeprecatedFilterBlock>(); - } else { - // E.g. compressed cache - return GetCacheItemHelperForRole(); - } + static Cache::CacheItemHelper* GetCacheItemHelper(BlockType /*block_type*/) { + // E.g. compressed cache + return GetCacheItemHelperForRole(); } }; diff --git a/table/block_based/block_type.h b/table/block_based/block_type.h index 516510b1a4..a9d6a1a773 100644 --- a/table/block_based/block_type.h +++ b/table/block_based/block_type.h @@ -20,7 +20,6 @@ enum class BlockType : uint8_t { kData, kFilter, // for second level partitioned filters and full filters kFilterPartitionIndex, // for top-level index of filter partitions - kDeprecatedFilter, // for old, deprecated block-based filter kProperties, kCompressionDictionary, kRangeDeletion, diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index e767df7f1d..0d67dd2f86 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -10,11 +10,6 @@ // A filter block is stored near the end of a Table file. It contains // filters (e.g., bloom filters) for all data blocks in the table combined // into a single filter block. -// -// It is a base class for BlockBasedFilter and FullFilter. -// These two are both used in BlockBasedTable. The first one contain filter -// For a part of keys in sst file, the second contain filter for all keys -// in sst file. #pragma once @@ -44,12 +39,10 @@ using MultiGetRange = MultiGetContext::Range; // A FilterBlockBuilder is used to construct all of the filters for a // particular Table. It generates a single string which is stored as -// a special block in the Table. +// a special block in the Table, or partitioned into smaller filters. // // The sequence of calls to FilterBlockBuilder must match the regexp: -// (StartBlock Add*)* Finish -// -// BlockBased/Full FilterBlock would be called in the same way. +// Add* Finish class FilterBlockBuilder { public: explicit FilterBlockBuilder() {} @@ -59,8 +52,6 @@ class FilterBlockBuilder { virtual ~FilterBlockBuilder() {} - virtual bool IsBlockBased() = 0; // If is blockbased filter - virtual void StartBlock(uint64_t block_offset) = 0; // Start new block filter virtual void Add( const Slice& key_without_ts) = 0; // Add a key to current filter virtual bool IsEmpty() const = 0; // Empty == none added @@ -108,8 +99,6 @@ class FilterBlockReader { FilterBlockReader(const FilterBlockReader&) = delete; FilterBlockReader& operator=(const FilterBlockReader&) = delete; - virtual bool IsBlockBased() = 0; // If is blockbased filter - /** * If no_io is set, then it returns true if it cannot answer the query without * reading data from disk. This is used in PartitionedFilterBlockReader to @@ -120,23 +109,19 @@ class FilterBlockReader { * built upon InternalKey and must be provided via const_ikey_ptr when running * queries. */ - virtual bool KeyMayMatch(const Slice& key, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + virtual bool KeyMayMatch(const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; - virtual void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + virtual void KeysMayMatch(MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey_without_ts = iter->ukey_without_ts; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; - if (!KeyMayMatch(ukey_without_ts, prefix_extractor, block_offset, no_io, - &ikey, get_context, lookup_context)) { + if (!KeyMayMatch(ukey_without_ts, no_io, &ikey, get_context, + lookup_context)) { range->SkipKey(iter); } } @@ -145,25 +130,22 @@ class FilterBlockReader { /** * no_io and const_ikey_ptr here means the same as in KeyMayMatch */ - virtual bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + virtual bool PrefixMayMatch(const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; virtual void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + const bool no_io, BlockCacheLookupContext* lookup_context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey_without_ts = iter->ukey_without_ts; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; if (prefix_extractor->InDomain(ukey_without_ts) && - !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), - prefix_extractor, block_offset, no_io, &ikey, - get_context, lookup_context)) { + !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), no_io, + &ikey, get_context, lookup_context)) { range->SkipKey(iter); } } diff --git a/table/block_based/filter_block_reader_common.cc b/table/block_based/filter_block_reader_common.cc index fe0da0e4d4..51d34fa00f 100644 --- a/table/block_based/filter_block_reader_common.cc +++ b/table/block_based/filter_block_reader_common.cc @@ -112,9 +112,8 @@ bool FilterBlockReaderCommon::RangeMayExist( return true; } else { *filter_checked = true; - return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, - const_ikey_ptr, /* get_context */ nullptr, - lookup_context); + return PrefixMayMatch(prefix, no_io, const_ikey_ptr, + /* get_context */ nullptr, lookup_context); } } diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index c887c59a0a..020ce87d21 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -24,7 +24,6 @@ #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/object_registry.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/filter_policy_internal.h" #include "table/block_based/full_filter_block.h" @@ -1375,87 +1374,10 @@ const char* ReadOnlyBuiltinFilterPolicy::kClassName() { return kBuiltinFilterMetadataName; } -const char* DeprecatedBlockBasedBloomFilterPolicy::kClassName() { - return "rocksdb.internal.DeprecatedBlockBasedBloomFilter"; -} - std::string BloomLikeFilterPolicy::GetId() const { return Name() + GetBitsPerKeySuffix(); } -DeprecatedBlockBasedBloomFilterPolicy::DeprecatedBlockBasedBloomFilterPolicy( - double bits_per_key) - : BloomLikeFilterPolicy(bits_per_key) {} - -FilterBitsBuilder* DeprecatedBlockBasedBloomFilterPolicy::GetBuilderWithContext( - const FilterBuildingContext&) const { - if (GetWholeBitsPerKey() == 0) { - // "No filter" special case - return nullptr; - } - // Internal contract: returns a new fake builder that encodes bits per key - // into a special value from EstimateEntriesAdded() - struct B : public FilterBitsBuilder { - explicit B(int bits_per_key) : est(kSecretBitsPerKeyStart + bits_per_key) {} - size_t est; - size_t EstimateEntriesAdded() override { return est; } - void AddKey(const Slice&) override {} - using FilterBitsBuilder::Finish; // FIXME - Slice Finish(std::unique_ptr*) override { return Slice(); } - size_t ApproximateNumEntries(size_t) override { return 0; } - }; - return new B(GetWholeBitsPerKey()); -} - -void DeprecatedBlockBasedBloomFilterPolicy::CreateFilter(const Slice* keys, - int n, - int bits_per_key, - std::string* dst) { - // Compute bloom filter size (in both bits and bytes) - uint32_t bits = static_cast(n * bits_per_key); - - // For small n, we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. - if (bits < 64) bits = 64; - - uint32_t bytes = (bits + 7) / 8; - bits = bytes * 8; - - int num_probes = LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key); - - const size_t init_size = dst->size(); - dst->resize(init_size + bytes, 0); - dst->push_back(static_cast(num_probes)); // Remember # of probes - char* array = &(*dst)[init_size]; - for (int i = 0; i < n; i++) { - LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes, - array); - } -} - -bool DeprecatedBlockBasedBloomFilterPolicy::KeyMayMatch( - const Slice& key, const Slice& bloom_filter) { - const size_t len = bloom_filter.size(); - if (len < 2 || len > 0xffffffffU) { - return false; - } - - const char* array = bloom_filter.data(); - const uint32_t bits = static_cast(len - 1) * 8; - - // Use the encoded k so that we can read filters generated by - // bloom filters created using different parameters. - const int k = static_cast(array[len - 1]); - if (k > 30) { - // Reserved for potentially new encodings for short bloom filters. - // Consider it a match. - return true; - } - // NB: using stored k not num_probes for whole_bits_per_key_ - return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k, - array); -} - BloomFilterPolicy::BloomFilterPolicy(double bits_per_key) : BloomLikeFilterPolicy(bits_per_key) {} @@ -1876,9 +1798,6 @@ std::shared_ptr BloomLikeFilterPolicy::Create( return std::make_shared(bits_per_key); } else if (name == test::Standard128RibbonFilterPolicy::kClassName()) { return std::make_shared(bits_per_key); - } else if (name == DeprecatedBlockBasedBloomFilterPolicy::kClassName()) { - return std::make_shared( - bits_per_key); } else if (name == BloomFilterPolicy::kClassName()) { // For testing return std::make_shared(bits_per_key); @@ -1996,15 +1915,6 @@ static int RegisterBuiltinFilterPolicies(ObjectLibrary& library, uri)); return guard->get(); }); - library.AddFactory( - FilterPatternEntryWithBits( - DeprecatedBlockBasedBloomFilterPolicy::kClassName()), - [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(NewBuiltinFilterPolicyWithBits< - DeprecatedBlockBasedBloomFilterPolicy>(uri)); - return guard->get(); - }); size_t num_types; return static_cast(library.GetFactoryCount(&num_types)); } @@ -2055,7 +1965,6 @@ const std::vector& BloomLikeFilterPolicy::GetAllFixedImpls() { STATIC_AVOID_DESTRUCTION(std::vector, impls){ // Match filter_bench -impl=x ordering test::LegacyBloomFilterPolicy::kClassName(), - DeprecatedBlockBasedBloomFilterPolicy::kClassName(), test::FastLocalBloomFilterPolicy::kClassName(), test::Standard128RibbonFilterPolicy::kClassName(), }; diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 06566f8712..9bc3a24829 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -131,7 +131,7 @@ class BuiltinFilterPolicy : public FilterPolicy { // Read metadata to determine what kind of FilterBitsReader is needed // and return a new one. This must successfully process any filter data // generated by a built-in FilterBitsBuilder, regardless of the impl - // chosen for this BloomFilterPolicy. Not compatible with CreateFilter. + // chosen for this BloomFilterPolicy. FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; static const char* kClassName(); bool IsInstanceOf(const std::string& id) const override; @@ -151,12 +151,8 @@ class BuiltinFilterPolicy : public FilterPolicy { // (An internal convenience function to save boilerplate.) static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&); - protected: - // Deprecated block-based filter only (no longer in public API) - bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const; - private: - // For Bloom filter implementation(s) (except deprecated block-based filter) + // For Bloom filter implementation(s) static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents); // For Ribbon filter implementation(s) @@ -300,30 +296,6 @@ class RibbonFilterPolicy : public BloomLikeFilterPolicy { const int bloom_before_level_; }; -// Deprecated block-based filter only. We still support reading old -// block-based filters from any BuiltinFilterPolicy, but there is no public -// option to build them. However, this class is used to build them for testing -// and for a public backdoor to building them by constructing this policy from -// a string. -class DeprecatedBlockBasedBloomFilterPolicy : public BloomLikeFilterPolicy { - public: - explicit DeprecatedBlockBasedBloomFilterPolicy(double bits_per_key); - - // Internal contract: returns a new fake builder that encodes bits per key - // into a special value from EstimateEntriesAdded(), using - // kSecretBitsPerKeyStart - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override; - static constexpr size_t kSecretBitsPerKeyStart = 1234567890U; - - static const char* kClassName(); - const char* Name() const override { return kClassName(); } - - static void CreateFilter(const Slice* keys, int n, int bits_per_key, - std::string* dst); - static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter); -}; - // For testing only, but always constructable with internal names namespace test { diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 9bb9212e6a..4ca650edcc 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -125,14 +125,8 @@ FullFilterBlockReader::FullFilterBlockReader( } bool FullFilterBlockReader::KeyMayMatch( - const Slice& key, const SliceTransform* /*prefix_extractor*/, - uint64_t block_offset, const bool no_io, - const Slice* const /*const_ikey_ptr*/, GetContext* get_context, - BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); + const Slice& key, const bool no_io, const Slice* const /*const_ikey_ptr*/, + GetContext* get_context, BlockCacheLookupContext* lookup_context) { if (!whole_key_filtering()) { return true; } @@ -167,14 +161,9 @@ std::unique_ptr FullFilterBlockReader::Create( } bool FullFilterBlockReader::PrefixMayMatch( - const Slice& prefix, const SliceTransform* /* prefix_extractor */, - uint64_t block_offset, const bool no_io, + const Slice& prefix, const bool no_io, const Slice* const /*const_ikey_ptr*/, GetContext* get_context, BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); return MayMatch(prefix, no_io, get_context, lookup_context); } @@ -204,17 +193,12 @@ bool FullFilterBlockReader::MayMatch( return false; } } - return true; // remain the same with block_based filter + return true; } void FullFilterBlockReader::KeysMayMatch( - MultiGetRange* range, const SliceTransform* /*prefix_extractor*/, - uint64_t block_offset, const bool no_io, + MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); if (!whole_key_filtering()) { // Simply return. Don't skip any key - consider all keys as likely to be // present @@ -225,12 +209,7 @@ void FullFilterBlockReader::KeysMayMatch( void FullFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - BlockCacheLookupContext* lookup_context) { -#ifdef NDEBUG - (void)block_offset; -#endif - assert(block_offset == kNotValid); + const bool no_io, BlockCacheLookupContext* lookup_context) { MayMatch(range, no_io, prefix_extractor, lookup_context); } diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index 3753a1c3d6..ea7f06e600 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -49,8 +49,6 @@ class FullFilterBlockBuilder : public FilterBlockBuilder { // directly. and be deleted here ~FullFilterBlockBuilder() {} - virtual bool IsBlockBased() override { return false; } - virtual void StartBlock(uint64_t /*block_offset*/) override {} virtual void Add(const Slice& key_without_ts) override; virtual bool IsEmpty() const override { return !any_added_; } virtual size_t EstimateEntriesAdded() override; @@ -107,28 +105,28 @@ class FullFilterBlockReader FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); - bool IsBlockBased() override { return false; } - - bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool KeyMayMatch(const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; - bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool PrefixMayMatch(const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; - void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + void KeysMayMatch(MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) override; + // Used in partitioned filter code + void KeysMayMatch2(MultiGetRange* range, + const SliceTransform* /*prefix_extractor*/, + const bool no_io, + BlockCacheLookupContext* lookup_context) { + KeysMayMatch(range, no_io, lookup_context); + } void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; private: diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index 24d870d4cd..ab0bdc3e02 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -115,8 +115,7 @@ TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) { FullFilterBlockReader reader(table_.get(), std::move(block)); // Remain same symantic with blockbased filter - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); @@ -137,39 +136,34 @@ TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) { nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("bar", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("box", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("hello", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("missing", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("other", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } class FullFilterBlockTest : public mock::MockBlockBasedTableTester, @@ -191,8 +185,7 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) { FullFilterBlockReader reader(table_.get(), std::move(block)); // Remain same symantic with blockbased filter - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); @@ -292,39 +285,34 @@ TEST_F(FullFilterBlockTest, SingleChunk) { nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("bar", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("box", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("hello", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/kNotValid, + ASSERT_TRUE(reader.KeyMayMatch("foo", /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("missing", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); + ASSERT_TRUE(!reader.KeyMayMatch("other", + /*no_io=*/false, /*const_ikey_ptr=*/nullptr, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/mock_block_based_table.h b/table/block_based/mock_block_based_table.h index 4dd6e392ba..13f3dfaee1 100644 --- a/table/block_based/mock_block_based_table.h +++ b/table/block_based/mock_block_based_table.h @@ -7,7 +7,6 @@ #include #include "rocksdb/filter_policy.h" -#include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/filter_policy_internal.h" diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 6fc6ddd1e1..c80cd10fdf 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -216,58 +216,41 @@ std::unique_ptr PartitionedFilterBlockReader::Create( } bool PartitionedFilterBlockReader::KeyMayMatch( - const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, + const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(const_ikey_ptr != nullptr); - assert(block_offset == kNotValid); if (!whole_key_filtering()) { return true; } - return MayMatch(key, prefix_extractor, block_offset, no_io, const_ikey_ptr, - get_context, lookup_context, + return MayMatch(key, no_io, const_ikey_ptr, get_context, lookup_context, &FullFilterBlockReader::KeyMayMatch); } void PartitionedFilterBlockReader::KeysMayMatch( - MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) { - assert(block_offset == kNotValid); if (!whole_key_filtering()) { return; // Any/all may match } - MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context, - &FullFilterBlockReader::KeysMayMatch); + MayMatch(range, nullptr, no_io, lookup_context, + &FullFilterBlockReader::KeysMayMatch2); } bool PartitionedFilterBlockReader::PrefixMayMatch( - const Slice& prefix, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, + const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(const_ikey_ptr != nullptr); - assert(block_offset == kNotValid); - if (!table_prefix_extractor() && !prefix_extractor) { - return true; - } - - return MayMatch(prefix, prefix_extractor, block_offset, no_io, const_ikey_ptr, - get_context, lookup_context, + return MayMatch(prefix, no_io, const_ikey_ptr, get_context, lookup_context, &FullFilterBlockReader::PrefixMayMatch); } void PartitionedFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - BlockCacheLookupContext* lookup_context) { - assert(block_offset == kNotValid); - if (!table_prefix_extractor() && !prefix_extractor) { - return; // Any/all may match - } - - MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context, + const bool no_io, BlockCacheLookupContext* lookup_context) { + assert(prefix_extractor); + MayMatch(range, prefix_extractor, no_io, lookup_context, &FullFilterBlockReader::PrefixesMayMatch); } @@ -331,8 +314,7 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock( } bool PartitionedFilterBlockReader::MayMatch( - const Slice& slice, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr, + const Slice& slice, bool no_io, const Slice* const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilterFunction filter_function) const { CachableEntry filter_block; @@ -364,14 +346,13 @@ bool PartitionedFilterBlockReader::MayMatch( FullFilterBlockReader filter_partition(table(), std::move(filter_partition_block)); - return (filter_partition.*filter_function)( - slice, prefix_extractor, block_offset, no_io, const_ikey_ptr, get_context, - lookup_context); + return (filter_partition.*filter_function)(slice, no_io, const_ikey_ptr, + get_context, lookup_context); } void PartitionedFilterBlockReader::MayMatch( - MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, BlockCacheLookupContext* lookup_context, + MultiGetRange* range, const SliceTransform* prefix_extractor, bool no_io, + BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const { CachableEntry filter_block; Status s = @@ -399,9 +380,8 @@ void PartitionedFilterBlockReader::MayMatch( if (!prev_filter_handle.IsNull() && this_filter_handle != prev_filter_handle) { MultiGetRange subrange(*range, start_iter_same_handle, iter); - MayMatchPartition(&subrange, prefix_extractor, block_offset, - prev_filter_handle, no_io, lookup_context, - filter_function); + MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io, + lookup_context, filter_function); range->AddSkipsFrom(subrange); start_iter_same_handle = iter; } @@ -416,16 +396,15 @@ void PartitionedFilterBlockReader::MayMatch( } if (!prev_filter_handle.IsNull()) { MultiGetRange subrange(*range, start_iter_same_handle, range->end()); - MayMatchPartition(&subrange, prefix_extractor, block_offset, - prev_filter_handle, no_io, lookup_context, - filter_function); + MayMatchPartition(&subrange, prefix_extractor, prev_filter_handle, no_io, + lookup_context, filter_function); range->AddSkipsFrom(subrange); } } void PartitionedFilterBlockReader::MayMatchPartition( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, BlockHandle filter_handle, bool no_io, + BlockHandle filter_handle, bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const { CachableEntry filter_partition_block; @@ -439,8 +418,8 @@ void PartitionedFilterBlockReader::MayMatchPartition( FullFilterBlockReader filter_partition(table(), std::move(filter_partition_block)); - (filter_partition.*filter_function)(range, prefix_extractor, block_offset, - no_io, lookup_context); + (filter_partition.*filter_function)(range, prefix_extractor, no_io, + lookup_context); } size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const { diff --git a/table/block_based/partitioned_filter_block.h b/table/block_based/partitioned_filter_block.h index 302b30f190..3c00fa83e4 100644 --- a/table/block_based/partitioned_filter_block.h +++ b/table/block_based/partitioned_filter_block.h @@ -109,25 +109,19 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon { FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); - bool IsBlockBased() override { return false; } - bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool KeyMayMatch(const Slice& key, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; - void KeysMayMatch(MultiGetRange* range, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + void KeysMayMatch(MultiGetRange* range, const bool no_io, BlockCacheLookupContext* lookup_context) override; - bool PrefixMayMatch(const Slice& prefix, - const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + bool PrefixMayMatch(const Slice& prefix, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, + const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; @@ -142,27 +136,22 @@ class PartitionedFilterBlockReader : public FilterBlockReaderCommon { CachableEntry* filter_block) const; using FilterFunction = bool (FullFilterBlockReader::*)( - const Slice& slice, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - const Slice* const const_ikey_ptr, GetContext* get_context, - BlockCacheLookupContext* lookup_context); - bool MayMatch(const Slice& slice, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr, + const Slice& slice, const bool no_io, const Slice* const const_ikey_ptr, + GetContext* get_context, BlockCacheLookupContext* lookup_context); + bool MayMatch(const Slice& slice, bool no_io, const Slice* const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilterFunction filter_function) const; using FilterManyFunction = void (FullFilterBlockReader::*)( MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, const bool no_io, - BlockCacheLookupContext* lookup_context); + const bool no_io, BlockCacheLookupContext* lookup_context); void MayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, bool no_io, - BlockCacheLookupContext* lookup_context, + bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const; void MayMatchPartition(MultiGetRange* range, const SliceTransform* prefix_extractor, - uint64_t block_offset, BlockHandle filter_handle, - bool no_io, BlockCacheLookupContext* lookup_context, + BlockHandle filter_handle, bool no_io, + BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const; Status CacheDependencies(const ReadOptions& ro, bool pin) override; diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index d7ae4bfd19..d68e5d063c 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -161,8 +161,7 @@ class PartitionedFilterBlockTest } void VerifyReader(PartitionedFilterBlockBuilder* builder, - PartitionedIndexBuilder* pib, bool empty = false, - const SliceTransform* prefix_extractor = nullptr) { + PartitionedIndexBuilder* pib, bool empty = false) { std::unique_ptr reader( NewReader(builder, pib)); // Querying added keys @@ -170,31 +169,31 @@ class PartitionedFilterBlockTest for (auto key : keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io, - &ikey_slice, /*get_context=*/nullptr, + ASSERT_TRUE(reader->KeyMayMatch(key, !no_io, &ikey_slice, + /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } { // querying a key twice auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->KeyMayMatch( - keys[0], prefix_extractor, kNotValid, !no_io, &ikey_slice, - /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch(keys[0], !no_io, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } // querying missing keys for (auto key : missing_keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); if (empty) { - ASSERT_TRUE(reader->KeyMayMatch( - key, prefix_extractor, kNotValid, !no_io, &ikey_slice, - /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->KeyMayMatch(key, !no_io, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } else { // assuming a good hash function - ASSERT_FALSE(reader->KeyMayMatch( - key, prefix_extractor, kNotValid, !no_io, &ikey_slice, - /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + ASSERT_FALSE(reader->KeyMayMatch(key, !no_io, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } } @@ -343,20 +342,20 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { for (auto key : pkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch( - prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, - /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), + /*no_io=*/false, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } // Non-existent keys but with the same prefix const std::string pnonkeys[4] = {"p-key9", "p-key11", "p-key21", "p-key31"}; for (auto key : pnonkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch( - prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, - /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), + /*no_io=*/false, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } @@ -391,10 +390,10 @@ TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) { auto prefix = prefix_extractor->Transform(key); auto ikey = InternalKey(prefix, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); - ASSERT_TRUE(reader->PrefixMayMatch( - prefix, prefix_extractor.get(), kNotValid, - /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); + ASSERT_TRUE(reader->PrefixMayMatch(prefix, + /*no_io=*/false, &ikey_slice, + /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); } } diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index 4ca4063d74..dc32c887c2 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -285,7 +285,6 @@ IOStatus BlockFetcher::ReadBlockContents() { switch (block_type_) { case BlockType::kFilter: case BlockType::kFilterPartitionIndex: - case BlockType::kDeprecatedFilter: PERF_COUNTER_ADD(filter_block_read_count, 1); break; diff --git a/table/table_test.cc b/table/table_test.cc index 39f6e19740..f8f5bda49b 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -3524,11 +3524,10 @@ TEST_P(BlockBasedTableTest, InvalidOptions) { } TEST_P(BlockBasedTableTest, BlockReadCountTest) { - // bloom_filter_type = 0 -- block-based filter (not available in public API) // bloom_filter_type = 1 -- full filter using use_block_based_builder=false // bloom_filter_type = 2 -- full filter using use_block_based_builder=true // because of API change to hide block-based filter - for (int bloom_filter_type = 0; bloom_filter_type <= 2; ++bloom_filter_type) { + for (int bloom_filter_type = 1; bloom_filter_type <= 2; ++bloom_filter_type) { for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2; ++index_and_filter_in_cache) { Options options; @@ -3537,22 +3536,8 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_cache = NewLRUCache(1, 0); table_options.cache_index_and_filter_blocks = index_and_filter_in_cache; - if (bloom_filter_type == 0) { -#ifndef ROCKSDB_LITE - // Use back-door way of enabling obsolete block-based Bloom - ASSERT_OK(FilterPolicy::CreateFromString( - ConfigOptions(), - "rocksdb.internal.DeprecatedBlockBasedBloomFilter:10", - &table_options.filter_policy)); -#else - // Skip this case in LITE build - continue; -#endif - } else { - // Public API - table_options.filter_policy.reset( - NewBloomFilterPolicy(10, bloom_filter_type == 2)); - } + table_options.filter_policy.reset( + NewBloomFilterPolicy(10, bloom_filter_type == 2)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); std::vector keys; stl_wrappers::KVMap kvmap; diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index bd9e43b43b..9cebbbca97 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1605,9 +1605,6 @@ DEFINE_double(cuckoo_hash_ratio, 0.9, "Hash ratio for Cuckoo SST table."); DEFINE_bool(use_hash_search, false, "if use kHashSearch " "instead of kBinarySearch. " "This is valid if only we use BlockTable"); -DEFINE_bool(use_block_based_filter, false, "if use kBlockBasedFilter " - "instead of kFullFilter for filter block. " - "This is valid if only we use BlockTable"); DEFINE_string(merge_operator, "", "The merge operator to use with the database." "If a new merge operator is specified, be sure to use fresh" " database The possible merge operators are defined in" @@ -4525,19 +4522,6 @@ class Benchmark { table_options->filter_policy = BlockBasedTableOptions().filter_policy; } else if (FLAGS_bloom_bits == 0) { table_options->filter_policy.reset(); - } else if (FLAGS_use_block_based_filter) { - // Use back-door way of enabling obsolete block-based Bloom - Status s = FilterPolicy::CreateFromString( - ConfigOptions(), - "rocksdb.internal.DeprecatedBlockBasedBloomFilter:" + - std::to_string(FLAGS_bloom_bits), - &table_options->filter_policy); - if (!s.ok()) { - fprintf(stderr, - "failure creating obsolete block-based filter: %s\n", - s.ToString().c_str()); - exit(1); - } } else { table_options->filter_policy.reset( FLAGS_use_ribbon_filter ? NewRibbonFilterPolicy(FLAGS_bloom_bits) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index e0b4d36c28..cea8232d5e 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -119,7 +119,6 @@ default_params = { "use_merge": lambda: random.randint(0, 1), # 999 -> use Bloom API "ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]), - "use_block_based_filter": lambda: random.randint(0, 1), "value_size_mult": 32, "verify_checksum": 1, "write_buffer_size": 4 * 1024 * 1024, @@ -358,7 +357,6 @@ ts_params = { "use_blob_db": 0, "enable_compaction_filter": 0, "ingest_external_file_one_in": 0, - "use_block_based_filter": 0, } multiops_txn_default_params = { @@ -497,10 +495,6 @@ def finalize_and_sanitize(src_params): if dest_params["partition_filters"] == 1: if dest_params["index_type"] != 2: dest_params["partition_filters"] = 0 - else: - dest_params["use_block_based_filter"] = 0 - if dest_params["ribbon_starting_level"] < 999: - dest_params["use_block_based_filter"] = 0 if dest_params.get("atomic_flush", 0) == 1: # disable pipelined write when atomic flush is used. dest_params["enable_pipelined_write"] = 0 diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 91d640521e..0bb50cbef8 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -69,207 +69,6 @@ static int NextLength(int length) { return length; } -class BlockBasedBloomTest : public testing::Test { - private: - std::unique_ptr policy_; - std::string filter_; - std::vector keys_; - - public: - BlockBasedBloomTest() { ResetPolicy(); } - - void Reset() { - keys_.clear(); - filter_.clear(); - } - - void ResetPolicy(double bits_per_key) { - policy_.reset(new DeprecatedBlockBasedBloomFilterPolicy(bits_per_key)); - Reset(); - } - - void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); } - - void Add(const Slice& s) { - keys_.push_back(s.ToString()); - } - - void Build() { - std::vector key_slices; - for (size_t i = 0; i < keys_.size(); i++) { - key_slices.push_back(Slice(keys_[i])); - } - filter_.clear(); - DeprecatedBlockBasedBloomFilterPolicy::CreateFilter( - &key_slices[0], static_cast(key_slices.size()), - policy_->GetWholeBitsPerKey(), &filter_); - keys_.clear(); - if (kVerbose >= 2) DumpFilter(); - } - - size_t FilterSize() const { - return filter_.size(); - } - - Slice FilterData() const { return Slice(filter_); } - - void DumpFilter() { - fprintf(stderr, "F("); - for (size_t i = 0; i+1 < filter_.size(); i++) { - const unsigned int c = static_cast(filter_[i]); - for (int j = 0; j < 8; j++) { - fprintf(stderr, "%c", (c & (1 <= 1) { - fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", - rate*100.0, length, static_cast(FilterSize())); - } - if (FLAGS_bits_per_key == 10) { - ASSERT_LE(rate, 0.02); // Must not be over 2% - if (rate > 0.0125) { - mediocre_filters++; // Allowed, but not too often - } else { - good_filters++; - } - } - } - if (FLAGS_bits_per_key == 10 && kVerbose >= 1) { - fprintf(stderr, "Filters: %d good, %d mediocre\n", - good_filters, mediocre_filters); - } - ASSERT_LE(mediocre_filters, good_filters/5); -} - -// Ensure the implementation doesn't accidentally change in an -// incompatible way -TEST_F(BlockBasedBloomTest, Schema) { - char buffer[sizeof(int)]; - - ResetPolicy(8); // num_probes = 5 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 3589896109U); - - ResetPolicy(9); // num_probes = 6 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 969445585U); - - ResetPolicy(11); // num_probes = 7 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 1694458207U); - - ResetPolicy(10); // num_probes = 6 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 2373646410U); - - ResetPolicy(10); - for (int key = /*CHANGED*/ 1; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 1908442116U); - - ResetPolicy(10); - for (int key = 1; key < /*CHANGED*/ 88; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), 3057004015U); - - // With new fractional bits_per_key, check that we are rounding to - // whole bits per key for old Bloom filters. - ResetPolicy(9.5); // Treated as 10 - for (int key = 1; key < 88; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U); - - ResetPolicy(10.499); // Treated as 10 - for (int key = 1; key < 88; key++) { - Add(Key(key, buffer)); - } - Build(); - ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U); - - ResetPolicy(); -} - -// Different bits-per-byte - class FullBloomTest : public testing::TestWithParam { protected: BlockBasedTableOptions table_options_; diff --git a/util/filter_bench.cc b/util/filter_bench.cc index bca4071d81..294f4367d4 100644 --- a/util/filter_bench.cc +++ b/util/filter_bench.cc @@ -84,8 +84,8 @@ DEFINE_bool(new_builder, false, DEFINE_uint32(impl, 0, "Select filter implementation. Without -use_plain_table_bloom:" - "0 = legacy full Bloom filter, 1 = block-based Bloom filter, " - "2 = format_version 5 Bloom filter, 3 = Ribbon128 filter. With " + "0 = legacy full Bloom filter, " + "1 = format_version 5 Bloom filter, 2 = Ribbon128 filter. With " "-use_plain_table_bloom: 0 = no locality, 1 = locality."); DEFINE_bool(net_includes_hashing, false, @@ -358,13 +358,9 @@ void FilterBench::Go() { "-impl must currently be >= 0 and <= 1 for Plain table"); } } else { - if (FLAGS_impl == 1) { + if (FLAGS_impl > 2) { throw std::runtime_error( - "Block-based filter not currently supported by filter_bench"); - } - if (FLAGS_impl > 3) { - throw std::runtime_error( - "-impl must currently be 0, 2, or 3 for Block-based table"); + "-impl must currently be >= 0 and <= 2 for Block-based table"); } } @@ -603,7 +599,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run, auto dry_run_hash_fn = DryRunNoHash; if (!FLAGS_net_includes_hashing) { - if (FLAGS_impl < 2 || FLAGS_use_plain_table_bloom) { + if (FLAGS_impl == 0 || FLAGS_use_plain_table_bloom) { dry_run_hash_fn = DryRunHash32; } else { dry_run_hash_fn = DryRunHash64; @@ -728,8 +724,6 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run, } else { may_match = info.full_block_reader_->KeyMayMatch( batch_slices[i], - /*prefix_extractor=*/nullptr, - /*block_offset=*/ROCKSDB_NAMESPACE::kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr); diff --git a/utilities/cache_dump_load_impl.cc b/utilities/cache_dump_load_impl.cc index 0c2e2e2f53..7745e1618d 100644 --- a/utilities/cache_dump_load_impl.cc +++ b/utilities/cache_dump_load_impl.cc @@ -139,11 +139,6 @@ CacheDumperImpl::DumpOneBlockCallBack() { block_start = (static_cast(value))->data(); block_len = (static_cast(value))->size(); break; - case CacheEntryRole::kDeprecatedFilterBlock: - type = CacheDumpUnitType::kDeprecatedFilterBlock; - block_start = (static_cast(value))->data.data(); - block_len = (static_cast(value))->data.size(); - break; case CacheEntryRole::kFilterBlock: type = CacheDumpUnitType::kFilter; block_start = (static_cast(value)) @@ -163,6 +158,10 @@ CacheDumperImpl::DumpOneBlockCallBack() { block_start = (static_cast(value))->data(); block_len = (static_cast(value))->size(); break; + case CacheEntryRole::kDeprecatedFilterBlock: + // Obsolete + filter_out = true; + break; case CacheEntryRole::kMisc: filter_out = true; break; @@ -322,22 +321,6 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() { // according to the block type, get the helper callback function and create // the corresponding block switch (dump_unit.type) { - case CacheDumpUnitType::kDeprecatedFilterBlock: { - helper = BlocklikeTraits::GetCacheItemHelper( - BlockType::kDeprecatedFilter); - std::unique_ptr block_holder; - block_holder.reset(BlocklikeTraits::Create( - std::move(raw_block_contents), 0, statistics, false, - toptions_.filter_policy.get())); - // Insert the block to secondary cache. - // Note that, if we cannot get the correct helper callback, the block - // will not be inserted. - if (helper != nullptr) { - s = secondary_cache_->Insert(dump_unit.key, - (void*)(block_holder.get()), helper); - } - break; - } case CacheDumpUnitType::kFilter: { helper = BlocklikeTraits::GetCacheItemHelper( BlockType::kFilter); @@ -390,6 +373,9 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() { } case CacheDumpUnitType::kFooter: break; + case CacheDumpUnitType::kDeprecatedFilterBlock: + // Obsolete + break; default: continue; } diff --git a/utilities/cache_dump_load_impl.h b/utilities/cache_dump_load_impl.h index 6377a858f9..ad8cd045a9 100644 --- a/utilities/cache_dump_load_impl.h +++ b/utilities/cache_dump_load_impl.h @@ -36,7 +36,7 @@ enum CacheDumpUnitType : unsigned char { kHashIndexMetadata = 9, kMetaIndex = 10, kIndex = 11, - kDeprecatedFilterBlock = 12, + kDeprecatedFilterBlock = 12, // OBSOLETE / DEPRECATED kFilterMetaBlock = 13, kBlockTypeMax, };