mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 22:41:48 +00:00
Merge branch 'facebook:main' into fixed-error-db-resume-failed
This commit is contained in:
commit
546cc2507f
|
@ -1339,6 +1339,14 @@ TEST_P(DBBlockCacheTypeTest, AddRedundantStats) {
|
|||
EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::string AltKey(int i) {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "altkey%06d", i);
|
||||
return std::string(buf);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_P(DBBlockCacheTypeTest, Uncache) {
|
||||
for (bool partitioned : {false, true}) {
|
||||
SCOPED_TRACE("partitioned=" + std::to_string(partitioned));
|
||||
|
@ -1349,10 +1357,11 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
|
|||
Options options = CurrentOptions();
|
||||
options.uncache_aggressiveness = ua;
|
||||
options.create_if_missing = true;
|
||||
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
||||
// Don't allow background operations to keep Versions referenced
|
||||
options.stats_dump_period_sec = 0;
|
||||
options.stats_persist_period_sec = 0;
|
||||
auto stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
||||
options.statistics = stats;
|
||||
|
||||
const size_t capacity = size_t{1} << 25;
|
||||
const int num_shard_bits = 0; // 1 shard
|
||||
|
@ -1401,6 +1410,8 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
|
|||
// Combine into one file, making the originals obsolete
|
||||
ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
|
||||
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(1));
|
||||
|
||||
for (int i = 0; i < kNumDataBlocks; i++) {
|
||||
ASSERT_NE(Get(Key(i)), "NOT_FOUND");
|
||||
}
|
||||
|
@ -1420,6 +1431,75 @@ TEST_P(DBBlockCacheTypeTest, Uncache) {
|
|||
EXPECT_LT(cache->GetUsage(),
|
||||
kNumDataBlocks * table_options.block_size * 2U);
|
||||
}
|
||||
|
||||
size_t alt_baseline_count = cache->GetOccupancyCount();
|
||||
size_t alt_baseline_usage = cache->GetUsage();
|
||||
ASSERT_OK(stats->Reset());
|
||||
// We aren't generally cleaning up cache entries on DB::Close, especially
|
||||
// because someone might just re-open the same DB.
|
||||
Reopen(options);
|
||||
for (int i = 0; i < kNumDataBlocks; i++) {
|
||||
ASSERT_NE(Get(Key(i)), "NOT_FOUND");
|
||||
}
|
||||
|
||||
EXPECT_EQ(cache->GetOccupancyCount(), alt_baseline_count);
|
||||
EXPECT_EQ(cache->GetUsage(), alt_baseline_usage);
|
||||
|
||||
// Check for unnecessary unncessary cache churn
|
||||
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
|
||||
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
|
||||
ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
|
||||
|
||||
// And now do a similar test as above except with trivial moves, making
|
||||
// sure that we aren't falsely uncaching in that case, which would cause
|
||||
// unnecessary cache misses. Using AltKey instead of Key to avoid
|
||||
// interference.
|
||||
for (int i = 0; i < kNumDataBlocks; i++) {
|
||||
// No overlap
|
||||
ASSERT_OK(
|
||||
Put(AltKey(i), Random::GetTLSInstance()->RandomBinaryString(
|
||||
static_cast<int>(table_options.block_size))));
|
||||
if (i >= kNumDataBlocks - kNumFiles) {
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(int{kNumFiles}, NumTableFilesAtLevel(0));
|
||||
|
||||
for (int i = 0; i < kNumDataBlocks; i++) {
|
||||
ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
|
||||
}
|
||||
|
||||
ASSERT_EQ(cache->GetOccupancyCount(),
|
||||
alt_baseline_count + kNumDataBlocks +
|
||||
meta_blocks_per_file * kNumFiles);
|
||||
ASSERT_GE(cache->GetUsage(),
|
||||
alt_baseline_usage + kNumDataBlocks * table_options.block_size);
|
||||
|
||||
ASSERT_OK(stats->Reset());
|
||||
|
||||
// Make trivial move
|
||||
{
|
||||
auto a = AltKey(0);
|
||||
auto b = AltKey(kNumDataBlocks);
|
||||
Slice slice_a{a};
|
||||
Slice slice_b{b};
|
||||
ASSERT_OK(db_->CompactRange({}, &slice_a, &slice_b));
|
||||
}
|
||||
ASSERT_EQ(/*old*/ 1 + /*new*/ int{kNumFiles}, NumTableFilesAtLevel(1));
|
||||
|
||||
for (int i = 0; i < kNumDataBlocks; i++) {
|
||||
ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
|
||||
}
|
||||
|
||||
// Should be the same if trivial move
|
||||
ASSERT_EQ(cache->GetOccupancyCount(),
|
||||
alt_baseline_count + kNumDataBlocks +
|
||||
meta_blocks_per_file * kNumFiles);
|
||||
|
||||
// Check for unnecessary unncessary cache churn
|
||||
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
|
||||
ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
|
||||
ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6353,6 +6353,8 @@ TEST_F(DBTest, PromoteL0) {
|
|||
Options options = CurrentOptions();
|
||||
options.disable_auto_compactions = true;
|
||||
options.write_buffer_size = 10 * 1024 * 1024;
|
||||
// Exercise what was a use-after-free (ASAN failure) under ~VersionSet()
|
||||
options.uncache_aggressiveness = 300;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// non overlapping ranges
|
||||
|
|
|
@ -5196,17 +5196,21 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
|
|||
}
|
||||
|
||||
VersionSet::~VersionSet() {
|
||||
// we need to delete column_family_set_ because its destructor depends on
|
||||
// VersionSet
|
||||
// Must clean up column families to make all files "obsolete"
|
||||
column_family_set_.reset();
|
||||
|
||||
for (auto& file : obsolete_files_) {
|
||||
if (file.metadata->table_reader_handle) {
|
||||
// NOTE: DB is shutting down, so file is probably not obsolete, just
|
||||
// no longer referenced by Versions in memory.
|
||||
// For more context, see comment on "table_cache_->EraseUnRefEntries()"
|
||||
// in DBImpl::CloseHelper().
|
||||
table_cache_->Release(file.metadata->table_reader_handle);
|
||||
TableCache::Evict(table_cache_, file.metadata->fd.GetNumber());
|
||||
// Using uncache_aggressiveness=0 overrides any previous marking to
|
||||
// attempt to uncache the file's blocks (which after cleaning up
|
||||
// column families could cause use-after-free)
|
||||
TableCache::ReleaseObsolete(table_cache_,
|
||||
file.metadata->table_reader_handle,
|
||||
/*uncache_aggressiveness=*/0);
|
||||
}
|
||||
file.DeleteMetadata();
|
||||
}
|
||||
|
|
|
@ -136,10 +136,10 @@ extern const std::string kHashIndexPrefixesBlock;
|
|||
extern const std::string kHashIndexPrefixesMetadataBlock;
|
||||
|
||||
BlockBasedTable::~BlockBasedTable() {
|
||||
if (rep_->uncache_aggressiveness > 0 && rep_->table_options.block_cache) {
|
||||
auto ua = rep_->uncache_aggressiveness.LoadRelaxed();
|
||||
if (ua > 0 && rep_->table_options.block_cache) {
|
||||
if (rep_->filter) {
|
||||
rep_->filter->EraseFromCacheBeforeDestruction(
|
||||
rep_->uncache_aggressiveness);
|
||||
rep_->filter->EraseFromCacheBeforeDestruction(ua);
|
||||
}
|
||||
if (rep_->index_reader) {
|
||||
{
|
||||
|
@ -160,7 +160,7 @@ BlockBasedTable::~BlockBasedTable() {
|
|||
// without I/O. (NOTE: It's extremely unlikely that a data block
|
||||
// will be in block cache without the index block pointing to it
|
||||
// also in block cache.)
|
||||
UncacheAggressivenessAdvisor advisor(rep_->uncache_aggressiveness);
|
||||
UncacheAggressivenessAdvisor advisor(ua);
|
||||
for (iiter->SeekToFirst(); iiter->Valid() && advisor.ShouldContinue();
|
||||
iiter->Next()) {
|
||||
bool erased = EraseFromCache(iiter->value().handle);
|
||||
|
@ -170,8 +170,7 @@ BlockBasedTable::~BlockBasedTable() {
|
|||
}
|
||||
|
||||
// Un-cache the index block(s)
|
||||
rep_->index_reader->EraseFromCacheBeforeDestruction(
|
||||
rep_->uncache_aggressiveness);
|
||||
rep_->index_reader->EraseFromCacheBeforeDestruction(ua);
|
||||
}
|
||||
}
|
||||
delete rep_;
|
||||
|
@ -3296,7 +3295,7 @@ void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value,
|
|||
}
|
||||
|
||||
void BlockBasedTable::MarkObsolete(uint32_t uncache_aggressiveness) {
|
||||
rep_->uncache_aggressiveness = uncache_aggressiveness;
|
||||
rep_->uncache_aggressiveness.StoreRelaxed(uncache_aggressiveness);
|
||||
}
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "table/table_reader.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
#include "trace_replay/block_cache_tracer.h"
|
||||
#include "util/atomic.h"
|
||||
#include "util/coro_utils.h"
|
||||
#include "util/hash_containers.h"
|
||||
|
||||
|
@ -675,8 +676,11 @@ struct BlockBasedTable::Rep {
|
|||
const bool user_defined_timestamps_persisted;
|
||||
|
||||
// Set to >0 when the file is known to be obsolete and should have its block
|
||||
// cache entries evicted on close.
|
||||
uint32_t uncache_aggressiveness = 0;
|
||||
// cache entries evicted on close. NOTE: when the file becomes obsolete,
|
||||
// there could be multiple table cache references that all mark this file as
|
||||
// obsolete. An atomic resolves the race quite reasonably. Even in the rare
|
||||
// case of such a race, they will most likely be storing the same value.
|
||||
RelaxedAtomic<uint32_t> uncache_aggressiveness{0};
|
||||
|
||||
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
||||
table_reader_cache_res_handle = nullptr;
|
||||
|
|
|
@ -191,7 +191,9 @@ class TableReader {
|
|||
|
||||
// Tell the reader that the file should now be obsolete, e.g. as a hint
|
||||
// to delete relevant cache entries on destruction. (It might not be safe
|
||||
// to "unpin" cache entries until destruction time.)
|
||||
// to "unpin" cache entries until destruction time.) NOTE: must be thread
|
||||
// safe because multiple table cache references might all mark this file as
|
||||
// obsolete when they are released (the last of which destroys this reader).
|
||||
virtual void MarkObsolete(uint32_t /*uncache_aggressiveness*/) {
|
||||
// no-op as default
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue