mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
47b8743984
Summary: The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong). Here are couple of things demonstrating that Compaction class is hard to use: 1. we have two constructors of Compaction class 2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles 3. it's easy to introduce a subtle and dangerous bug like this: D36225 4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment:afbafeaeae/db/compaction.cc (L236-L241)
. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this:afbafeaeae/db/compaction_picker.cc (L204-L210)
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup. My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object. This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes: * have one Compaction constructor instead of two. * inputs_ is constant after construction * MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction. * SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input. * CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need. Test Plan: make check make asan_check make valgrind_check Reviewers: rven, anthony, sdong, yhchiang Reviewed By: yhchiang Subscribers: sdong, yhchiang, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D36687
246 lines
9.4 KiB
C++
246 lines
9.4 KiB
C++
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#pragma once
|
|
#include "util/arena.h"
|
|
#include "util/autovector.h"
|
|
#include "util/mutable_cf_options.h"
|
|
#include "db/version_set.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
// The structure that manages compaction input files associated
|
|
// with the same physical level.
|
|
struct CompactionInputFiles {
|
|
int level;
|
|
std::vector<FileMetaData*> files;
|
|
inline bool empty() const { return files.empty(); }
|
|
inline size_t size() const { return files.size(); }
|
|
inline void clear() { files.clear(); }
|
|
inline FileMetaData* operator[](size_t i) const { return files[i]; }
|
|
};
|
|
|
|
class Version;
|
|
class ColumnFamilyData;
|
|
class VersionStorageInfo;
|
|
|
|
// A Compaction encapsulates information about a compaction.
|
|
class Compaction {
|
|
public:
|
|
Compaction(VersionStorageInfo* input_version,
|
|
const MutableCFOptions& mutable_cf_options,
|
|
std::vector<CompactionInputFiles> inputs, int output_level,
|
|
uint64_t target_file_size, uint64_t max_grandparent_overlap_bytes,
|
|
uint32_t output_path_id, CompressionType compression,
|
|
std::vector<FileMetaData*> grandparents,
|
|
bool manual_compaction = false, double score = -1,
|
|
bool deletion_compaction = false);
|
|
|
|
// No copying allowed
|
|
Compaction(const Compaction&) = delete;
|
|
void operator=(const Compaction&) = delete;
|
|
|
|
~Compaction();
|
|
|
|
// Returns the level associated to the specified compaction input level.
|
|
// If compaction_input_level is not specified, then input_level is set to 0.
|
|
int level(size_t compaction_input_level = 0) const {
|
|
return inputs_[compaction_input_level].level;
|
|
}
|
|
|
|
int start_level() const { return start_level_; }
|
|
|
|
// Outputs will go to this level
|
|
int output_level() const { return output_level_; }
|
|
|
|
// Returns the number of input levels in this compaction.
|
|
size_t num_input_levels() const { return inputs_.size(); }
|
|
|
|
// Return the object that holds the edits to the descriptor done
|
|
// by this compaction.
|
|
VersionEdit* edit() { return &edit_; }
|
|
|
|
// Returns the number of input files associated to the specified
|
|
// compaction input level.
|
|
// The function will return 0 if when "compaction_input_level" < 0
|
|
// or "compaction_input_level" >= "num_input_levels()".
|
|
size_t num_input_files(size_t compaction_input_level) const {
|
|
if (compaction_input_level < inputs_.size()) {
|
|
return inputs_[compaction_input_level].size();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Returns input version of the compaction
|
|
Version* input_version() const { return input_version_; }
|
|
|
|
// Returns the ColumnFamilyData associated with the compaction.
|
|
ColumnFamilyData* column_family_data() const { return cfd_; }
|
|
|
|
// Returns the file meta data of the 'i'th input file at the
|
|
// specified compaction input level.
|
|
// REQUIREMENT: "compaction_input_level" must be >= 0 and
|
|
// < "input_levels()"
|
|
FileMetaData* input(size_t compaction_input_level, size_t i) const {
|
|
assert(compaction_input_level < inputs_.size());
|
|
return inputs_[compaction_input_level][i];
|
|
}
|
|
|
|
// Returns the list of file meta data of the specified compaction
|
|
// input level.
|
|
// REQUIREMENT: "compaction_input_level" must be >= 0 and
|
|
// < "input_levels()"
|
|
const std::vector<FileMetaData*>* inputs(size_t compaction_input_level) {
|
|
assert(compaction_input_level < inputs_.size());
|
|
return &inputs_[compaction_input_level].files;
|
|
}
|
|
|
|
// Returns the LevelFilesBrief of the specified compaction input level.
|
|
LevelFilesBrief* input_levels(size_t compaction_input_level) {
|
|
return &input_levels_[compaction_input_level];
|
|
}
|
|
|
|
// Maximum size of files to build during this compaction.
|
|
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
|
|
|
// What compression for output
|
|
CompressionType OutputCompressionType() const { return output_compression_; }
|
|
|
|
// Whether need to write output file to second DB path.
|
|
uint32_t GetOutputPathId() const { return output_path_id_; }
|
|
|
|
// Is this a trivial compaction that can be implemented by just
|
|
// moving a single input file to the next level (no merging or splitting)
|
|
bool IsTrivialMove() const;
|
|
|
|
// If true, then the compaction can be done by simply deleting input files.
|
|
bool IsDeletionCompaction() const {
|
|
return deletion_compaction_;
|
|
}
|
|
|
|
// Add all inputs to this compaction as delete operations to *edit.
|
|
void AddInputDeletions(VersionEdit* edit);
|
|
|
|
// Returns true if the available information we have guarantees that
|
|
// the input "user_key" does not exist in any level beyond "output_level()".
|
|
bool KeyNotExistsBeyondOutputLevel(const Slice& user_key);
|
|
|
|
// Returns true iff we should stop building the current output
|
|
// before processing "internal_key".
|
|
bool ShouldStopBefore(const Slice& internal_key);
|
|
|
|
// Clear all files to indicate that they are not being compacted
|
|
// Delete this compaction from the list of running compactions.
|
|
void ReleaseCompactionFiles(Status status);
|
|
|
|
// Returns the summary of the compaction in "output" with maximum "len"
|
|
// in bytes. The caller is responsible for the memory management of
|
|
// "output".
|
|
void Summary(char* output, int len);
|
|
|
|
// Return the score that was used to pick this compaction run.
|
|
double score() const { return score_; }
|
|
|
|
// Is this compaction creating a file in the bottom most level?
|
|
bool BottomMostLevel() { return bottommost_level_; }
|
|
|
|
// Does this compaction include all sst files?
|
|
bool IsFullCompaction() { return is_full_compaction_; }
|
|
|
|
// Was this compaction triggered manually by the client?
|
|
bool IsManualCompaction() { return is_manual_compaction_; }
|
|
|
|
// Return the MutableCFOptions that should be used throughout the compaction
|
|
// procedure
|
|
const MutableCFOptions* mutable_cf_options() { return &mutable_cf_options_; }
|
|
|
|
// Returns the size in bytes that the output file should be preallocated to.
|
|
// In level compaction, that is max_file_size_. In universal compaction, that
|
|
// is the sum of all input file sizes.
|
|
uint64_t OutputFilePreallocationSize(const MutableCFOptions& mutable_options);
|
|
|
|
void SetInputVersion(Version* input_version);
|
|
|
|
struct InputLevelSummaryBuffer {
|
|
char buffer[128];
|
|
};
|
|
|
|
const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const;
|
|
|
|
// In case of compaction error, reset the nextIndex that is used
|
|
// to pick up the next file to be compacted from files_by_size_
|
|
void ResetNextCompactionIndex();
|
|
|
|
private:
|
|
// mark (or clear) all files that are being compacted
|
|
void MarkFilesBeingCompacted(bool mark_as_compacted);
|
|
|
|
// helper function to determine if compaction with inputs and storage is
|
|
// bottommost
|
|
static bool IsBottommostLevel(
|
|
int output_level, VersionStorageInfo* vstorage,
|
|
const std::vector<CompactionInputFiles>& inputs);
|
|
static bool IsFullCompaction(VersionStorageInfo* vstorage,
|
|
const std::vector<CompactionInputFiles>& inputs);
|
|
|
|
const int start_level_; // the lowest level to be compacted
|
|
const int output_level_; // levels to which output files are stored
|
|
uint64_t max_output_file_size_;
|
|
uint64_t max_grandparent_overlap_bytes_;
|
|
MutableCFOptions mutable_cf_options_;
|
|
Version* input_version_;
|
|
VersionEdit edit_;
|
|
const int number_levels_;
|
|
ColumnFamilyData* cfd_;
|
|
Arena arena_; // Arena used to allocate space for file_levels_
|
|
|
|
const uint32_t output_path_id_;
|
|
CompressionType output_compression_;
|
|
// If true, then the comaction can be done by simply deleting input files.
|
|
const bool deletion_compaction_;
|
|
|
|
// Compaction input files organized by level. Constant after construction
|
|
const std::vector<CompactionInputFiles> inputs_;
|
|
|
|
// A copy of inputs_, organized more closely in memory
|
|
autovector<LevelFilesBrief, 2> input_levels_;
|
|
|
|
// State used to check for number of of overlapping grandparent files
|
|
// (grandparent == "output_level_ + 1")
|
|
std::vector<FileMetaData*> grandparents_;
|
|
size_t grandparent_index_; // Index in grandparent_starts_
|
|
bool seen_key_; // Some output key has been seen
|
|
uint64_t overlapped_bytes_; // Bytes of overlap between current output
|
|
// and grandparent files
|
|
const double score_; // score that was used to pick this compaction.
|
|
|
|
// Is this compaction creating a file in the bottom most level?
|
|
const bool bottommost_level_;
|
|
// Does this compaction include all sst files?
|
|
const bool is_full_compaction_;
|
|
|
|
// Is this compaction requested by the client?
|
|
const bool is_manual_compaction_;
|
|
|
|
// "level_ptrs_" holds indices into "input_version_->levels_", where each
|
|
// index remembers which file of an associated level we are currently used
|
|
// to check KeyNotExistsBeyondOutputLevel() for deletion operation.
|
|
// As it is for checking KeyNotExistsBeyondOutputLevel(), it only
|
|
// records indices for all levels beyond "output_level_".
|
|
std::vector<size_t> level_ptrs_;
|
|
|
|
// Does input compression match the output compression?
|
|
bool InputCompressionMatchesOutput() const;
|
|
};
|
|
|
|
// Utility function
|
|
extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
|
|
|
|
} // namespace rocksdb
|