Disable "uncache" behavior in DB shutdown (#12751)

Summary:
Crash test showed a potential use-after-free where a file marked as obsolete and eligible for uncache on destruction is destroyed in the VersionSet destructor, which only happens as part of DB shutdown. At that point, the in-memory column families have already been destroyed, so attempting to uncache could use-after-free on stuff like getting the `user_comparator()` from the `internal_comparator()`.

I attempted to make it smarter, but wasn't able to untangle the destruction dependencies in a way that was safe, understandable, and maintainable.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12751

Test Plan:
Reproduced by adding uncache_aggressiveness to an existing (but otherwise unrelated) test. This makes it a fair regression test.

Also added testing to ensure that trivial moves and DB close & reopen are well behaved with uncache_aggressiveness. Specifically, this issue doesn't seem to be because things are uncached inappropriately in those cases.

Reviewed By: ltamasi

Differential Revision: D58390058

Pulled By: pdillinger

fbshipit-source-id: 66ac9cb13bf02638fa80ee5b7218153d8bc7cfd3
This commit is contained in:
Peter Dillinger 2024-06-11 15:57:40 -07:00 committed by Facebook GitHub Bot
parent af50823069
commit 21eb82ebec
3 changed files with 91 additions and 5 deletions

View File

@ -1339,6 +1339,14 @@ TEST_P(DBBlockCacheTypeTest, AddRedundantStats) {
EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
} }
namespace {
std::string AltKey(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "altkey%06d", i);
return std::string(buf);
}
} // namespace
TEST_P(DBBlockCacheTypeTest, Uncache) { TEST_P(DBBlockCacheTypeTest, Uncache) {
for (bool partitioned : {false, true}) { for (bool partitioned : {false, true}) {
SCOPED_TRACE("partitioned=" + std::to_string(partitioned)); SCOPED_TRACE("partitioned=" + std::to_string(partitioned));
@ -1349,10 +1357,11 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.uncache_aggressiveness = ua; options.uncache_aggressiveness = ua;
options.create_if_missing = true; options.create_if_missing = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
// Don't allow background operations to keep Versions referenced // Don't allow background operations to keep Versions referenced
options.stats_dump_period_sec = 0; options.stats_dump_period_sec = 0;
options.stats_persist_period_sec = 0; options.stats_persist_period_sec = 0;
auto stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.statistics = stats;
const size_t capacity = size_t{1} << 25; const size_t capacity = size_t{1} << 25;
const int num_shard_bits = 0; // 1 shard const int num_shard_bits = 0; // 1 shard
@ -1401,6 +1410,8 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
// Combine into one file, making the originals obsolete // Combine into one file, making the originals obsolete
ASSERT_OK(db_->CompactRange({}, nullptr, nullptr)); ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
ASSERT_EQ(1, NumTableFilesAtLevel(1));
for (int i = 0; i < kNumDataBlocks; i++) { for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(Key(i)), "NOT_FOUND"); ASSERT_NE(Get(Key(i)), "NOT_FOUND");
} }
@ -1420,6 +1431,75 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
EXPECT_LT(cache->GetUsage(), EXPECT_LT(cache->GetUsage(),
kNumDataBlocks * table_options.block_size * 2U); kNumDataBlocks * table_options.block_size * 2U);
} }
size_t alt_baseline_count = cache->GetOccupancyCount();
size_t alt_baseline_usage = cache->GetUsage();
ASSERT_OK(stats->Reset());
// We aren't generally cleaning up cache entries on DB::Close, especially
// because someone might just re-open the same DB.
Reopen(options);
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(Key(i)), "NOT_FOUND");
}
EXPECT_EQ(cache->GetOccupancyCount(), alt_baseline_count);
EXPECT_EQ(cache->GetUsage(), alt_baseline_usage);
// Check for unnecessary unncessary cache churn
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
// And now do a similar test as above except with trivial moves, making
// sure that we aren't falsely uncaching in that case, which would cause
// unnecessary cache misses. Using AltKey instead of Key to avoid
// interference.
for (int i = 0; i < kNumDataBlocks; i++) {
// No overlap
ASSERT_OK(
Put(AltKey(i), Random::GetTLSInstance()->RandomBinaryString(
static_cast<int>(table_options.block_size))));
if (i >= kNumDataBlocks - kNumFiles) {
ASSERT_OK(Flush());
}
}
ASSERT_EQ(int{kNumFiles}, NumTableFilesAtLevel(0));
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
}
ASSERT_EQ(cache->GetOccupancyCount(),
alt_baseline_count + kNumDataBlocks +
meta_blocks_per_file * kNumFiles);
ASSERT_GE(cache->GetUsage(),
alt_baseline_usage + kNumDataBlocks * table_options.block_size);
ASSERT_OK(stats->Reset());
// Make trivial move
{
auto a = AltKey(0);
auto b = AltKey(kNumDataBlocks);
Slice slice_a{a};
Slice slice_b{b};
ASSERT_OK(db_->CompactRange({}, &slice_a, &slice_b));
}
ASSERT_EQ(/*old*/ 1 + /*new*/ int{kNumFiles}, NumTableFilesAtLevel(1));
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
}
// Should be the same if trivial move
ASSERT_EQ(cache->GetOccupancyCount(),
alt_baseline_count + kNumDataBlocks +
meta_blocks_per_file * kNumFiles);
// Check for unnecessary unncessary cache churn
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
} }
} }
} }

View File

@ -6353,6 +6353,8 @@ TEST_F(DBTest, PromoteL0) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.disable_auto_compactions = true; options.disable_auto_compactions = true;
options.write_buffer_size = 10 * 1024 * 1024; options.write_buffer_size = 10 * 1024 * 1024;
// Exercise what was a use-after-free (ASAN failure) under ~VersionSet()
options.uncache_aggressiveness = 300;
DestroyAndReopen(options); DestroyAndReopen(options);
// non overlapping ranges // non overlapping ranges

View File

@ -5196,17 +5196,21 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
} }
VersionSet::~VersionSet() { VersionSet::~VersionSet() {
// we need to delete column_family_set_ because its destructor depends on // Must clean up column families to make all files "obsolete"
// VersionSet
column_family_set_.reset(); column_family_set_.reset();
for (auto& file : obsolete_files_) { for (auto& file : obsolete_files_) {
if (file.metadata->table_reader_handle) { if (file.metadata->table_reader_handle) {
// NOTE: DB is shutting down, so file is probably not obsolete, just // NOTE: DB is shutting down, so file is probably not obsolete, just
// no longer referenced by Versions in memory. // no longer referenced by Versions in memory.
// For more context, see comment on "table_cache_->EraseUnRefEntries()" // For more context, see comment on "table_cache_->EraseUnRefEntries()"
// in DBImpl::CloseHelper(). // in DBImpl::CloseHelper().
table_cache_->Release(file.metadata->table_reader_handle); // Using uncache_aggressiveness=0 overrides any previous marking to
TableCache::Evict(table_cache_, file.metadata->fd.GetNumber()); // attempt to uncache the file's blocks (which after cleaning up
// column families could cause use-after-free)
TableCache::ReleaseObsolete(table_cache_,
file.metadata->table_reader_handle,
/*uncache_aggressiveness=*/0);
} }
file.DeleteMetadata(); file.DeleteMetadata();
} }