mirror of
https://github.com/facebook/rocksdb.git
synced 2024-12-01 07:15:51 +00:00
9c94559de7
Summary: This PR adds some optimization for compacting standalone range deletion files. A standalone range deletion file is one with just a single range deletion. Currently, such a file is used in bulk loading to achieve something like atomically delete old version of all data with one big range deletion and adding new version of data. These are the changes included in the PR: 1) When a standalone range deletion file is ingested via bulk loading, it's marked for compaction. 2) When picking input files during compaction picking, we attempt to only pick a standalone range deletion file when oldest snapshot is at or above the file's seqno. To do this, `PickCompaction` API is updated to take existing snapshots as an input. This is only done for the universal compaction + UDT disabled combination, we save querying for existing snapshots and not pass it for all other cases. 3) At `Compaction` construction time, the input files will be filtered to examine if any of them can be skipped for compaction iterator. For example, if all the data of the file is deleted by a standalone range tombstone, and the oldest snapshot is at or above such range tombstone, this file will be filtered out. 4) Every time a snapshot is released, we examine if any column family has standalone range deletion files that becomes eligible to be scheduled for compaction. And schedule one for it. Potential future improvements: - Add some dedicated statistics for the filtered files. - Extend this input filtering to L0 files' compactions cases when a newer L0 file could shadow an older L0 file Pull Request resolved: https://github.com/facebook/rocksdb/pull/13078 Test Plan: Added unit tests and stress tested a few rounds Reviewed By: cbi42 Differential Revision: D64879415 Pulled By: jowlyzhang fbshipit-source-id: 02b8683fddbe11f093bcaa0a38406deb39f44d9e
66 lines
2.1 KiB
C++
66 lines
2.1 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
#include "rocksdb/types.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
enum class SnapshotCheckerResult : int {
|
|
kInSnapshot = 0,
|
|
kNotInSnapshot = 1,
|
|
// In case snapshot is released and the checker has no clue whether
|
|
// the given sequence is visible to the snapshot.
|
|
kSnapshotReleased = 2,
|
|
};
|
|
|
|
// Callback class that control GC of duplicate keys in flush/compaction.
|
|
class SnapshotChecker {
|
|
public:
|
|
virtual ~SnapshotChecker() {}
|
|
virtual SnapshotCheckerResult CheckInSnapshot(
|
|
SequenceNumber sequence, SequenceNumber snapshot_sequence) const = 0;
|
|
};
|
|
|
|
class DisableGCSnapshotChecker : public SnapshotChecker {
|
|
public:
|
|
virtual ~DisableGCSnapshotChecker() {}
|
|
SnapshotCheckerResult CheckInSnapshot(
|
|
SequenceNumber /*sequence*/,
|
|
SequenceNumber /*snapshot_sequence*/) const override {
|
|
// By returning kNotInSnapshot, we prevent all the values from being GCed
|
|
return SnapshotCheckerResult::kNotInSnapshot;
|
|
}
|
|
static DisableGCSnapshotChecker* Instance();
|
|
|
|
protected:
|
|
explicit DisableGCSnapshotChecker() {}
|
|
};
|
|
|
|
class WritePreparedTxnDB;
|
|
|
|
// Callback class created by WritePreparedTxnDB to check if a key
|
|
// is visible by a snapshot.
|
|
class WritePreparedSnapshotChecker : public SnapshotChecker {
|
|
public:
|
|
explicit WritePreparedSnapshotChecker(WritePreparedTxnDB* txn_db);
|
|
virtual ~WritePreparedSnapshotChecker() {}
|
|
|
|
SnapshotCheckerResult CheckInSnapshot(
|
|
SequenceNumber sequence, SequenceNumber snapshot_sequence) const override;
|
|
|
|
private:
|
|
const WritePreparedTxnDB* const txn_db_;
|
|
};
|
|
|
|
bool DataIsDefinitelyInSnapshot(SequenceNumber seqno, SequenceNumber snapshot,
|
|
const SnapshotChecker* snapshot_checker);
|
|
|
|
bool DataIsDefinitelyNotInSnapshot(SequenceNumber seqno,
|
|
SequenceNumber snapshot,
|
|
const SnapshotChecker* snapshot_checker);
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|