mirror of https://github.com/facebook/rocksdb.git
Fix MultiGet range deletion handling and a memory leak (#10513)
Summary: This PR fixes 2 bugs introduced in https://github.com/facebook/rocksdb/issues/10432 - 1. If the bloom filter returned a negative result for all MultiGet keys in a file, the range tombstones in that file were being ignored, resulting in incorrect results if those tombstones covered a key in a higher level. 2. If all the keys in a file were filtered out in `TableCache::MultiGetFilter`, the table cache handle was not being released. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10513 Test Plan: Add a new unit test that fails without this fix Reviewed By: akankshamahajan15 Differential Revision: D38548739 Pulled By: anand1976 fbshipit-source-id: a741a1e25d2e991d63f038100f126c2dc404a87c
This commit is contained in:
parent
06b04127a8
commit
0b02960d8c
|
@ -2146,7 +2146,7 @@ class DBMultiGetAsyncIOTest : public DBBasicTest {
|
|||
|
||||
// Put all keys in the bottommost level, and overwrite some keys
|
||||
// in L0 and L1
|
||||
for (int i = 0; i < 128; ++i) {
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
EXPECT_OK(Put(Key(i), "val_l2_" + std::to_string(i)));
|
||||
num_keys++;
|
||||
if (num_keys == 8) {
|
||||
|
@ -2172,6 +2172,21 @@ class DBMultiGetAsyncIOTest : public DBBasicTest {
|
|||
EXPECT_OK(Flush());
|
||||
num_keys = 0;
|
||||
}
|
||||
// Put some range deletes in L1
|
||||
for (int i = 128; i < 256; i += 32) {
|
||||
std::string range_begin = Key(i);
|
||||
std::string range_end = Key(i + 16);
|
||||
EXPECT_OK(dbfull()->DeleteRange(WriteOptions(),
|
||||
dbfull()->DefaultColumnFamily(),
|
||||
range_begin, range_end));
|
||||
// Also do some Puts to force creation of bloom filter
|
||||
for (int j = i + 16; j < i + 32; ++j) {
|
||||
if (j % 3 == 0) {
|
||||
EXPECT_OK(Put(Key(j), "val_l1_" + std::to_string(j)));
|
||||
}
|
||||
}
|
||||
EXPECT_OK(Flush());
|
||||
}
|
||||
MoveFilesToLevel(1);
|
||||
|
||||
for (int i = 0; i < 128; i += 5) {
|
||||
|
@ -2366,6 +2381,32 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL2WithRangeOverlapL0L1) {
|
|||
// Bloom filters in L0/L1 will avoid the coroutine calls in those levels
|
||||
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2);
|
||||
}
|
||||
|
||||
TEST_F(DBMultiGetAsyncIOTest, GetFromL2WithRangeDelInL1) {
|
||||
std::vector<std::string> key_strs;
|
||||
std::vector<Slice> keys;
|
||||
std::vector<PinnableSlice> values;
|
||||
std::vector<Status> statuses;
|
||||
|
||||
// 139 and 163 are in L2, but overlap with a range deletes in L1
|
||||
key_strs.push_back(Key(139));
|
||||
key_strs.push_back(Key(163));
|
||||
keys.push_back(key_strs[0]);
|
||||
keys.push_back(key_strs[1]);
|
||||
values.resize(keys.size());
|
||||
statuses.resize(keys.size());
|
||||
|
||||
ReadOptions ro;
|
||||
ro.async_io = true;
|
||||
dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
|
||||
keys.data(), values.data(), statuses.data());
|
||||
ASSERT_EQ(values.size(), 2);
|
||||
ASSERT_EQ(statuses[0], Status::NotFound());
|
||||
ASSERT_EQ(statuses[1], Status::NotFound());
|
||||
|
||||
// Bloom filters in L0/L1 will avoid the coroutine calls in those levels
|
||||
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2);
|
||||
}
|
||||
#endif // USE_COROUTINES
|
||||
|
||||
TEST_F(DBBasicTest, MultiGetStats) {
|
||||
|
|
|
@ -501,6 +501,22 @@ Status TableCache::Get(
|
|||
return s;
|
||||
}
|
||||
|
||||
void TableCache::UpdateRangeTombstoneSeqnums(
|
||||
const ReadOptions& options, TableReader* t,
|
||||
MultiGetContext::Range& table_range) {
|
||||
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
|
||||
t->NewRangeTombstoneIterator(options));
|
||||
if (range_del_iter != nullptr) {
|
||||
for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) {
|
||||
SequenceNumber* max_covering_tombstone_seq =
|
||||
iter->get_context->max_covering_tombstone_seq();
|
||||
*max_covering_tombstone_seq = std::max(
|
||||
*max_covering_tombstone_seq,
|
||||
range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey_with_ts));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status TableCache::MultiGetFilter(
|
||||
const ReadOptions& options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
|
@ -524,6 +540,8 @@ Status TableCache::MultiGetFilter(
|
|||
Status s;
|
||||
TableReader* t = fd.table_reader;
|
||||
Cache::Handle* handle = nullptr;
|
||||
MultiGetContext::Range tombstone_range(*mget_range, mget_range->begin(),
|
||||
mget_range->end());
|
||||
if (t == nullptr) {
|
||||
s = FindTable(
|
||||
options, file_options_, internal_comparator, fd, &handle,
|
||||
|
@ -539,6 +557,18 @@ Status TableCache::MultiGetFilter(
|
|||
if (s.ok()) {
|
||||
s = t->MultiGetFilter(options, prefix_extractor.get(), mget_range);
|
||||
}
|
||||
if (mget_range->empty()) {
|
||||
if (s.ok() && !options.ignore_range_deletions) {
|
||||
// If all the keys have been filtered out by the bloom filter, then
|
||||
// update the range tombstone sequence numbers for the keys as
|
||||
// MultiGet() will not be called for this set of keys.
|
||||
UpdateRangeTombstoneSeqnums(options, t, tombstone_range);
|
||||
}
|
||||
if (handle) {
|
||||
ReleaseHandle(handle);
|
||||
*table_handle = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
|
|
@ -235,6 +235,11 @@ class TableCache {
|
|||
size_t max_file_size_for_l0_meta_pin = 0,
|
||||
Temperature file_temperature = Temperature::kUnknown);
|
||||
|
||||
// Update the max_covering_tombstone_seq in the GetContext for each key based
|
||||
// on the range deletions in the table
|
||||
void UpdateRangeTombstoneSeqnums(const ReadOptions& options, TableReader* t,
|
||||
MultiGetContext::Range& table_range);
|
||||
|
||||
// Create a key prefix for looking up the row cache. The prefix is of the
|
||||
// format row_cache_id + fd_number + seq_no. Later, the user key can be
|
||||
// appended to form the full key
|
||||
|
|
|
@ -79,18 +79,7 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
|
|||
}
|
||||
}
|
||||
if (s.ok() && !options.ignore_range_deletions) {
|
||||
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
|
||||
t->NewRangeTombstoneIterator(options));
|
||||
if (range_del_iter != nullptr) {
|
||||
for (auto iter = table_range.begin(); iter != table_range.end();
|
||||
++iter) {
|
||||
SequenceNumber* max_covering_tombstone_seq =
|
||||
iter->get_context->max_covering_tombstone_seq();
|
||||
*max_covering_tombstone_seq = std::max(
|
||||
*max_covering_tombstone_seq,
|
||||
range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey_with_ts));
|
||||
}
|
||||
}
|
||||
UpdateRangeTombstoneSeqnums(options, t, table_range);
|
||||
}
|
||||
if (s.ok()) {
|
||||
CO_AWAIT(t->MultiGet)
|
||||
|
|
Loading…
Reference in New Issue