rocksdb/db/import_column_family_job.h
Changyu Bi 9aa3b6f9ae Support range deletion tombstones in CreateColumnFamilyWithImport (#11252)
Summary:
CreateColumnFamilyWithImport() did not support range tombstones for two reasons:
1. it uses point keys of a input file to determine its boundary (smallest and largest internal key), which means range tombstones outside of the point key range will be effectively dropped.
2. it does not handle files with no point keys.

Also included a fix in external_sst_file_ingestion_job.cc where the blocks read in `GetIngestedFileInfo()` can be added to block cache now (issue fixed in https://github.com/facebook/rocksdb/pull/6429).

This PR adds support for exporting and importing column family with range tombstones. The main change is to add smallest internal key and largest internal key to `SstFileMetaData` that will be part of the output of `ExportColumnFamily()`. Then during `CreateColumnFamilyWithImport(...,const ExportImportFilesMetaData& metadata,...)`, file boundaries can be set from `metadata` directly. This is needed since when file boundaries are extended by range tombstones, sometimes they cannot be deduced from a file's content alone.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11252

Test Plan:
- added unit tests that fails before this change

Closes https://github.com/facebook/rocksdb/issues/11245

Reviewed By: ajkr

Differential Revision: D43577443

Pulled By: cbi42

fbshipit-source-id: 6bff78e583cc50c44854994dea0a8dd519398f2f
2023-03-13 11:06:59 -07:00

83 lines
2.7 KiB
C++

// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <string>
#include <unordered_set>
#include <vector>
#include "db/column_family.h"
#include "db/external_sst_file_ingestion_job.h"
#include "db/snapshot_impl.h"
#include "options/db_options.h"
#include "rocksdb/db.h"
#include "rocksdb/metadata.h"
#include "rocksdb/sst_file_writer.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
struct EnvOptions;
class SystemClock;
// Imports a set of sst files as is into a new column family. Logic is similar
// to ExternalSstFileIngestionJob.
class ImportColumnFamilyJob {
public:
ImportColumnFamilyJob(VersionSet* versions, ColumnFamilyData* cfd,
const ImmutableDBOptions& db_options,
const EnvOptions& env_options,
const ImportColumnFamilyOptions& import_options,
const std::vector<LiveFileMetaData>& metadata,
const std::shared_ptr<IOTracer>& io_tracer)
: clock_(db_options.clock),
versions_(versions),
cfd_(cfd),
db_options_(db_options),
fs_(db_options_.fs, io_tracer),
env_options_(env_options),
import_options_(import_options),
metadata_(metadata),
io_tracer_(io_tracer) {}
// Prepare the job by copying external files into the DB.
Status Prepare(uint64_t next_file_number, SuperVersion* sv);
// Will execute the import job and prepare edit() to be applied.
// REQUIRES: Mutex held
Status Run();
// Cleanup after successful/failed job
void Cleanup(const Status& status);
VersionEdit* edit() { return &edit_; }
const autovector<IngestedFileInfo>& files_to_import() const {
return files_to_import_;
}
private:
// Open the external file and populate `file_to_import` with all the
// external information we need to import this file.
Status GetIngestedFileInfo(const std::string& external_file,
uint64_t new_file_number, SuperVersion* sv,
const LiveFileMetaData& file_meta,
IngestedFileInfo* file_to_import);
SystemClock* clock_;
VersionSet* versions_;
ColumnFamilyData* cfd_;
const ImmutableDBOptions& db_options_;
const FileSystemPtr fs_;
const EnvOptions& env_options_;
autovector<IngestedFileInfo> files_to_import_;
VersionEdit edit_;
const ImportColumnFamilyOptions& import_options_;
std::vector<LiveFileMetaData> metadata_;
const std::shared_ptr<IOTracer> io_tracer_;
};
} // namespace ROCKSDB_NAMESPACE