From 73c0a33346834ee6c9809cedaf7e61a6e6569f77 Mon Sep 17 00:00:00 2001 From: Haobo Xu Date: Fri, 3 May 2013 15:26:12 -0700 Subject: [PATCH] [RocksDB] fix compaction filter trigger condition Summary: Currently, compaction filter is run on internal key older than the oldest snapshot, which is incorrect. Compaction filter should really be run on the most recent internal key when there is no external snapshot. Test Plan: make check; db_stress Reviewers: dhruba Reviewed By: dhruba Differential Revision: https://reviews.facebook.net/D10641 --- db/db_impl.cc | 9 ++++++--- db/db_test.cc | 28 ++++++++++++---------------- db/snapshot.h | 5 +---- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index b8308bb32c..a6a884d128 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1703,10 +1703,13 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { value = merge.value(); } else if (options_.CompactionFilter != nullptr && ikey.type != kTypeDeletion && - ikey.sequence < earliest_snapshot) { - // If the user has specified a compaction filter, then invoke - // it. If the return value of the compaction filter is true, + visible_at_tip) { + // If the user has specified a compaction filter and there are no + // outstanding external snapshots, then invoke the filter. + // If the return value of the compaction filter is true, // drop this key from the output. + assert(ikey.type == kTypeValue); + assert(!drop); bool value_changed = false; compaction_filter_value.clear(); drop = options_.CompactionFilter(options_.compaction_filter_args, diff --git a/db/db_test.cc b/db/db_test.cc index 737e38b1b3..6cb42b8d4a 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1400,13 +1400,9 @@ TEST(DBTest, CompactionFilter) { options.CompactionFilter = keep_filter; Reopen(&options); - // Write 100K+1 keys, these are written to a few files - // in L0. We do this so that the current snapshot points - // to the 100001 key.The compaction filter is not invoked - // on keys that are visible via a snapshot because we - // anyways cannot delete it. + // Write 100K keys, these are written to a few files in L0. const std::string value(10, 'x'); - for (int i = 0; i < 100001; i++) { + for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(key, value); @@ -1433,6 +1429,7 @@ TEST(DBTest, CompactionFilter) { // has sequence number zero. The 100001st record // is at the tip of this snapshot and cannot // be zeroed out. + // TODO: figure out sequence number squashtoo int count = 0; int total = 0; Iterator* iter = dbfull()->TEST_NewInternalIterator(); @@ -1448,12 +1445,12 @@ TEST(DBTest, CompactionFilter) { } iter->Next(); } - ASSERT_EQ(total, 100001); + ASSERT_EQ(total, 100000); ASSERT_EQ(count, 1); delete iter; - // overwrite all the 100K+1 keys once again. - for (int i = 0; i < 100001; i++) { + // overwrite all the 100K keys once again. + for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(key, value); @@ -1480,7 +1477,7 @@ TEST(DBTest, CompactionFilter) { DestroyAndReopen(&options); // write all the keys once again. - for (int i = 0; i < 100001; i++) { + for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(key, value); @@ -1503,10 +1500,7 @@ TEST(DBTest, CompactionFilter) { ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(1), 0); - // Scan the entire database to ensure that only the - // 100001th key is left in the db. The 100001th key - // is part of the default-most-current snapshot and - // cannot be deleted. + // Scan the entire database to ensure that nothing is left iter = db_->NewIterator(ReadOptions()); iter->SeekToFirst(); count = 0; @@ -1514,12 +1508,14 @@ TEST(DBTest, CompactionFilter) { count++; iter->Next(); } - ASSERT_EQ(count, 1); + ASSERT_EQ(count, 0); delete iter; // The sequence number of the remaining record // is not zeroed out even though it is at the // level Lmax because this record is at the tip + // TODO: remove the following or design a different + // test count = 0; iter = dbfull()->TEST_NewInternalIterator(); iter->SeekToFirst(); @@ -1531,7 +1527,7 @@ TEST(DBTest, CompactionFilter) { count++; iter->Next(); } - ASSERT_EQ(count, 1); + ASSERT_EQ(count, 0); delete iter; } diff --git a/db/snapshot.h b/db/snapshot.h index 96cf203a38..ddac4d86bc 100644 --- a/db/snapshot.h +++ b/db/snapshot.h @@ -59,12 +59,9 @@ class SnapshotList { // retrieve all snapshot numbers. They are sorted in ascending order. void getAll(std::vector& ret) { - SnapshotImpl* s = &list_; if (empty()) return; - SequenceNumber prev __attribute__((unused)) = 0; + SnapshotImpl* s = &list_; while (s->next_ != &list_) { - assert(prev <= s->next_->number_); - assert(prev = s->next_->number_); // assignment ret.push_back(s->next_->number_); s = s ->next_; }