Merge branch 'facebook:main' into fixed-error-db-resume-failed

This commit is contained in:
Yadong Wang 2024-06-12 09:19:11 +08:00 committed by GitHub
commit 546cc2507f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 106 additions and 15 deletions

View file

@ -1339,6 +1339,14 @@ TEST_P(DBBlockCacheTypeTest, AddRedundantStats) {
EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
}
namespace {
std::string AltKey(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "altkey%06d", i);
return std::string(buf);
}
} // namespace
TEST_P(DBBlockCacheTypeTest, Uncache) {
for (bool partitioned : {false, true}) {
SCOPED_TRACE("partitioned=" + std::to_string(partitioned));
@ -1349,10 +1357,11 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
Options options = CurrentOptions();
options.uncache_aggressiveness = ua;
options.create_if_missing = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
// Don't allow background operations to keep Versions referenced
options.stats_dump_period_sec = 0;
options.stats_persist_period_sec = 0;
auto stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.statistics = stats;
const size_t capacity = size_t{1} << 25;
const int num_shard_bits = 0; // 1 shard
@ -1401,6 +1410,8 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
// Combine into one file, making the originals obsolete
ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
ASSERT_EQ(1, NumTableFilesAtLevel(1));
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(Key(i)), "NOT_FOUND");
}
@ -1420,6 +1431,75 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
EXPECT_LT(cache->GetUsage(),
kNumDataBlocks * table_options.block_size * 2U);
}
size_t alt_baseline_count = cache->GetOccupancyCount();
size_t alt_baseline_usage = cache->GetUsage();
ASSERT_OK(stats->Reset());
// We aren't generally cleaning up cache entries on DB::Close, especially
// because someone might just re-open the same DB.
Reopen(options);
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(Key(i)), "NOT_FOUND");
}
EXPECT_EQ(cache->GetOccupancyCount(), alt_baseline_count);
EXPECT_EQ(cache->GetUsage(), alt_baseline_usage);
// Check for unnecessary unncessary cache churn
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
// And now do a similar test as above except with trivial moves, making
// sure that we aren't falsely uncaching in that case, which would cause
// unnecessary cache misses. Using AltKey instead of Key to avoid
// interference.
for (int i = 0; i < kNumDataBlocks; i++) {
// No overlap
ASSERT_OK(
Put(AltKey(i), Random::GetTLSInstance()->RandomBinaryString(
static_cast<int>(table_options.block_size))));
if (i >= kNumDataBlocks - kNumFiles) {
ASSERT_OK(Flush());
}
}
ASSERT_EQ(int{kNumFiles}, NumTableFilesAtLevel(0));
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
}
ASSERT_EQ(cache->GetOccupancyCount(),
alt_baseline_count + kNumDataBlocks +
meta_blocks_per_file * kNumFiles);
ASSERT_GE(cache->GetUsage(),
alt_baseline_usage + kNumDataBlocks * table_options.block_size);
ASSERT_OK(stats->Reset());
// Make trivial move
{
auto a = AltKey(0);
auto b = AltKey(kNumDataBlocks);
Slice slice_a{a};
Slice slice_b{b};
ASSERT_OK(db_->CompactRange({}, &slice_a, &slice_b));
}
ASSERT_EQ(/*old*/ 1 + /*new*/ int{kNumFiles}, NumTableFilesAtLevel(1));
for (int i = 0; i < kNumDataBlocks; i++) {
ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
}
// Should be the same if trivial move
ASSERT_EQ(cache->GetOccupancyCount(),
alt_baseline_count + kNumDataBlocks +
meta_blocks_per_file * kNumFiles);
// Check for unnecessary unncessary cache churn
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
}
}
}

View file

@ -6353,6 +6353,8 @@ TEST_F(DBTest, PromoteL0) {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.write_buffer_size = 10 * 1024 * 1024;
// Exercise what was a use-after-free (ASAN failure) under ~VersionSet()
options.uncache_aggressiveness = 300;
DestroyAndReopen(options);
// non overlapping ranges

View file

@ -5196,17 +5196,21 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
}
VersionSet::~VersionSet() {
// we need to delete column_family_set_ because its destructor depends on
// VersionSet
// Must clean up column families to make all files "obsolete"
column_family_set_.reset();
for (auto& file : obsolete_files_) {
if (file.metadata->table_reader_handle) {
// NOTE: DB is shutting down, so file is probably not obsolete, just
// no longer referenced by Versions in memory.
// For more context, see comment on "table_cache_->EraseUnRefEntries()"
// in DBImpl::CloseHelper().
table_cache_->Release(file.metadata->table_reader_handle);
TableCache::Evict(table_cache_, file.metadata->fd.GetNumber());
// Using uncache_aggressiveness=0 overrides any previous marking to
// attempt to uncache the file's blocks (which after cleaning up
// column families could cause use-after-free)
TableCache::ReleaseObsolete(table_cache_,
file.metadata->table_reader_handle,
/*uncache_aggressiveness=*/0);
}
file.DeleteMetadata();
}

View file

@ -136,10 +136,10 @@ extern const std::string kHashIndexPrefixesBlock;
extern const std::string kHashIndexPrefixesMetadataBlock;
BlockBasedTable::~BlockBasedTable() {
if (rep_->uncache_aggressiveness > 0 && rep_->table_options.block_cache) {
auto ua = rep_->uncache_aggressiveness.LoadRelaxed();
if (ua > 0 && rep_->table_options.block_cache) {
if (rep_->filter) {
rep_->filter->EraseFromCacheBeforeDestruction(
rep_->uncache_aggressiveness);
rep_->filter->EraseFromCacheBeforeDestruction(ua);
}
if (rep_->index_reader) {
{
@ -160,7 +160,7 @@ BlockBasedTable::~BlockBasedTable() {
// without I/O. (NOTE: It's extremely unlikely that a data block
// will be in block cache without the index block pointing to it
// also in block cache.)
UncacheAggressivenessAdvisor advisor(rep_->uncache_aggressiveness);
UncacheAggressivenessAdvisor advisor(ua);
for (iiter->SeekToFirst(); iiter->Valid() && advisor.ShouldContinue();
iiter->Next()) {
bool erased = EraseFromCache(iiter->value().handle);
@ -170,8 +170,7 @@ BlockBasedTable::~BlockBasedTable() {
}
// Un-cache the index block(s)
rep_->index_reader->EraseFromCacheBeforeDestruction(
rep_->uncache_aggressiveness);
rep_->index_reader->EraseFromCacheBeforeDestruction(ua);
}
}
delete rep_;
@ -3296,7 +3295,7 @@ void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value,
}
void BlockBasedTable::MarkObsolete(uint32_t uncache_aggressiveness) {
rep_->uncache_aggressiveness = uncache_aggressiveness;
rep_->uncache_aggressiveness.StoreRelaxed(uncache_aggressiveness);
}
} // namespace ROCKSDB_NAMESPACE

View file

@ -33,6 +33,7 @@
#include "table/table_reader.h"
#include "table/two_level_iterator.h"
#include "trace_replay/block_cache_tracer.h"
#include "util/atomic.h"
#include "util/coro_utils.h"
#include "util/hash_containers.h"
@ -675,8 +676,11 @@ struct BlockBasedTable::Rep {
const bool user_defined_timestamps_persisted;
// Set to >0 when the file is known to be obsolete and should have its block
// cache entries evicted on close.
uint32_t uncache_aggressiveness = 0;
// cache entries evicted on close. NOTE: when the file becomes obsolete,
// there could be multiple table cache references that all mark this file as
// obsolete. An atomic resolves the race quite reasonably. Even in the rare
// case of such a race, they will most likely be storing the same value.
RelaxedAtomic<uint32_t> uncache_aggressiveness{0};
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
table_reader_cache_res_handle = nullptr;

View file

@ -191,7 +191,9 @@ class TableReader {
// Tell the reader that the file should now be obsolete, e.g. as a hint
// to delete relevant cache entries on destruction. (It might not be safe
// to "unpin" cache entries until destruction time.)
// to "unpin" cache entries until destruction time.) NOTE: must be thread
// safe because multiple table cache references might all mark this file as
// obsolete when they are released (the last of which destroys this reader).
virtual void MarkObsolete(uint32_t /*uncache_aggressiveness*/) {
// no-op as default
}