Add a host location property to TableProperties (#7479)

Summary:
This PR adds support for writing a location identifier of the DB host to SST files as a table property. By default, the hostname is used, but can be overridden by the user. There have been some recent corruptions in files written by ```SstFileWriter``` before checksumming, so this property can be used to trace it back to the writing host and checking the host for hardware isues.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7479

Test Plan: Add new unit tests

Reviewed By: pdillinger

Differential Revision: D24340671

Pulled By: anand1976

fbshipit-source-id: 2038949fd8d160c0633ccb4f9da77740f19fa2a2
This commit is contained in:
anand76 2020-10-19 11:37:05 -07:00 committed by Facebook GitHub Bot
parent 0ea7db768e
commit 00751e4292
21 changed files with 152 additions and 5 deletions

View File

@ -30,6 +30,7 @@
* Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail.
* Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface.
* Add more stats for MultiGet in Histogram to get number of data blocks, index blocks, filter blocks and sst files read from file system per level.
* SST files have a new table property called db_host_id, which is set to the hostname by default. A new option in DBOptions, db_host_id, allows the property value to be overridden with a user specified string, or disable it completely by making the option string empty.
## 6.13 (09/12/2020)
### Bug fixes

View File

@ -141,6 +141,7 @@ TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase2) {
options.max_background_compactions = 2;
options.num_levels = 5;
options.max_compaction_bytes = 0; // Force not expanding in compactions
options.db_host_id = ""; // Setting this messes up the file size calculation
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

View File

@ -11,9 +11,11 @@
#include <vector>
#include "db/db_test_util.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/db.h"
#include "rocksdb/utilities/table_properties_collectors.h"
#include "table/format.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/random.h"
@ -274,6 +276,55 @@ TEST_F(DBTablePropertiesTest, GetDbIdentifiersProperty) {
}
}
class DBTableHostnamePropertyTest
: public DBTestBase,
public ::testing::WithParamInterface<std::tuple<int, std::string>> {
public:
DBTableHostnamePropertyTest()
: DBTestBase("/db_table_hostname_property_test",
/*env_do_fsync=*/false) {}
};
TEST_P(DBTableHostnamePropertyTest, DbHostLocationProperty) {
option_config_ = std::get<0>(GetParam());
Options opts = CurrentOptions();
std::string expected_host_id = std::get<1>(GetParam());
;
if (expected_host_id == kHostnameForDbHostId) {
ASSERT_OK(env_->GetHostNameString(&expected_host_id));
} else {
opts.db_host_id = expected_host_id;
}
CreateAndReopenWithCF({"goku"}, opts);
for (uint32_t cf = 0; cf < 2; ++cf) {
Put(cf, "key", "val");
Put(cf, "foo", "bar");
Flush(cf);
TablePropertiesCollection fname_to_props;
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
ASSERT_EQ(1U, fname_to_props.size());
ASSERT_EQ(fname_to_props.begin()->second->db_host_id, expected_host_id);
}
}
INSTANTIATE_TEST_CASE_P(
DBTableHostnamePropertyTest, DBTableHostnamePropertyTest,
::testing::Values(
// OptionConfig, override db_host_location
std::make_tuple(DBTestBase::OptionConfig::kDefault,
kHostnameForDbHostId),
std::make_tuple(DBTestBase::OptionConfig::kDefault, "foobar"),
std::make_tuple(DBTestBase::OptionConfig::kDefault, ""),
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
kHostnameForDbHostId),
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
"foobar"),
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
"")));
class DeletionTriggeredCompactionTestListener : public EventListener {
public:
void OnCompactionBegin(DB* , const CompactionJobInfo& ci) override {

10
env/env.cc vendored
View File

@ -140,6 +140,16 @@ Status Env::GetChildrenFileAttributes(const std::string& dir,
return Status::OK();
}
Status Env::GetHostNameString(std::string* result) {
std::array<char, kMaxHostNameLen> hostname_buf;
Status s = GetHostName(hostname_buf.data(), hostname_buf.size());
if (s.ok()) {
hostname_buf[hostname_buf.size() - 1] = '\0';
result->assign(hostname_buf.data());
}
return s;
}
SequentialFile::~SequentialFile() {
}

View File

@ -452,9 +452,15 @@ class Env {
// Sleep/delay the thread for the prescribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0;
// Get the current host name.
// Get the current host name as a null terminated string iff the string
// length is < len. The hostname should otherwise be truncated to len.
virtual Status GetHostName(char* name, uint64_t len) = 0;
// Get the current hostname from the given env as a std::string in result.
// The result may be truncated if the hostname is too
// long
virtual Status GetHostNameString(std::string* result);
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
// Only overwrites *unix_time on success.
virtual Status GetCurrentTime(int64_t* unix_time) = 0;
@ -576,6 +582,9 @@ class Env {
// Pointer to the underlying FileSystem implementation
std::shared_ptr<FileSystem> file_system_;
private:
static const size_t kMaxHostNameLen = 256;
};
// The factory function to construct a ThreadStatusUpdater. Any Env

View File

@ -348,6 +348,8 @@ struct DbPath {
DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {}
};
static const std::string kHostnameForDbHostId = "__hostname__";
struct DBOptions {
// The function recovers options to the option as in version 4.6.
DBOptions* OldDefaults(int rocksdb_major_version = 4,
@ -1180,6 +1182,19 @@ struct DBOptions {
//
// Default: false
bool allow_data_in_errors = false;
// A string identifying the machine hosting the DB. This
// will be written as a property in every SST file written by the DB (or
// by offline writers such as SstFileWriter and RepairDB). It can be useful
// for troubleshooting in memory corruption caused by a failing host when
// writing a file, by tracing back to the writing host. These corruptions
// may not be caught by the checksum since they happen before checksumming.
// If left as default, the table writer will substitute it with the actual
// hostname when writing the SST file. If set to an empty stirng, the
// property will not be written to the SST file.
//
// Default: hostname
std::string db_host_id = kHostnameForDbHostId;
};
// Options to control the behavior of a database (passed to DB::Open)

View File

@ -32,6 +32,7 @@ typedef std::map<std::string, std::string> UserCollectedProperties;
struct TablePropertiesNames {
static const std::string kDbId;
static const std::string kDbSessionId;
static const std::string kDbHostId;
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kIndexPartitions;
@ -206,6 +207,12 @@ struct TableProperties {
// empty string.
std::string db_session_id;
// Location of the machine hosting the DB instance
// db_host_id identifies the location of the host in some form
// (hostname by default, but can also be any string of the user's choosing).
// It can potentially change whenever the DB is opened
std::string db_host_id;
// Name of the column family with which this SST file is associated.
// If column family is unknown, `column_family_name` will be an empty string.
std::string column_family_name;

View File

@ -853,7 +853,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
compaction_thread_limiter(cf_options.compaction_thread_limiter),
file_checksum_gen_factory(db_options.file_checksum_gen_factory.get()),
sst_partitioner_factory(cf_options.sst_partitioner_factory),
allow_data_in_errors(db_options.allow_data_in_errors) {}
allow_data_in_errors(db_options.allow_data_in_errors),
db_host_id(db_options.db_host_id) {}
// Multiple two operands. If they overflow, return op1.
uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) {

View File

@ -124,6 +124,8 @@ struct ImmutableCFOptions {
std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory;
bool allow_data_in_errors;
std::string db_host_id;
};
struct MutableCFOptions {

View File

@ -389,6 +389,9 @@ static std::unordered_map<std::string, OptionTypeInfo>
{offsetof(struct ImmutableDBOptions, bgerror_resume_retry_interval),
OptionType::kUInt64T, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"db_host_id",
{offsetof(struct ImmutableDBOptions, db_host_id), OptionType::kString,
OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever}},
// The following properties were handled as special cases in ParseOption
// This means that the properties could be read from the options file
// but never written to the file or compared to each other.
@ -576,7 +579,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
best_efforts_recovery(options.best_efforts_recovery),
max_bgerror_resume_count(options.max_bgerror_resume_count),
bgerror_resume_retry_interval(options.bgerror_resume_retry_interval),
allow_data_in_errors(options.allow_data_in_errors) {
allow_data_in_errors(options.allow_data_in_errors),
db_host_id(options.db_host_id) {
}
void ImmutableDBOptions::Dump(Logger* log) const {
@ -738,6 +742,8 @@ void ImmutableDBOptions::Dump(Logger* log) const {
bgerror_resume_retry_interval);
ROCKS_LOG_HEADER(log, " Options.allow_data_in_errors: %d",
allow_data_in_errors);
ROCKS_LOG_HEADER(log, " Options.db_host_id: %s",
db_host_id.c_str());
}
MutableDBOptions::MutableDBOptions()

View File

@ -92,6 +92,7 @@ struct ImmutableDBOptions {
int max_bgerror_resume_count;
uint64_t bgerror_resume_retry_interval;
bool allow_data_in_errors;
std::string db_host_id;
};
struct MutableDBOptions {

View File

@ -166,6 +166,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
immutable_db_options.max_bgerror_resume_count;
options.bgerror_resume_retry_interval =
immutable_db_options.bgerror_resume_retry_interval;
options.db_host_id = immutable_db_options.db_host_id;
return options;
}

View File

@ -226,6 +226,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
{offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)},
{offsetof(struct DBOptions, file_checksum_gen_factory),
sizeof(std::shared_ptr<FileChecksumGenFactory>)},
{offsetof(struct DBOptions, db_host_id), sizeof(std::string)},
};
char* options_ptr = new char[sizeof(DBOptions)];
@ -332,7 +333,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
"write_dbid_to_manifest=false;"
"best_efforts_recovery=false;"
"max_bgerror_resume_count=2;"
"bgerror_resume_retry_interval=1000000",
"bgerror_resume_retry_interval=1000000"
"db_host_id=hostname",
new_options));
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),

View File

@ -331,6 +331,7 @@ struct BlockBasedTableBuilder::Rep {
// DB IDs
const std::string db_id;
const std::string db_session_id;
std::string db_host_id;
std::vector<std::unique_ptr<IntTblPropCollector>> table_properties_collectors;
@ -449,7 +450,8 @@ struct BlockBasedTableBuilder::Rep {
target_file_size(_target_file_size),
file_creation_time(_file_creation_time),
db_id(_db_id),
db_session_id(_db_session_id) {
db_session_id(_db_session_id),
db_host_id(ioptions.db_host_id) {
for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) {
compression_ctxs[i].reset(new CompressionContext(compression_type));
}
@ -491,6 +493,10 @@ struct BlockBasedTableBuilder::Rep {
verify_ctxs[i].reset(new UncompressionContext(compression_type));
}
}
if (!ReifyDbHostIdProperty(ioptions.env, &db_host_id).ok()) {
ROCKS_LOG_INFO(ioptions.info_log, "db_host_id property will not be set");
}
}
Rep(const Rep&) = delete;
@ -1412,6 +1418,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
rep_->props.file_creation_time = rep_->file_creation_time;
rep_->props.db_id = rep_->db_id;
rep_->props.db_session_id = rep_->db_session_id;
rep_->props.db_host_id = rep_->db_host_id;
// Add basic properties
property_block_builder.AddTableProperty(rep_->props);

View File

@ -19,6 +19,7 @@
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "table/block_based/block.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/persistent_cache_helper.h"
@ -403,4 +404,18 @@ Status UncompressBlockContents(const UncompressionInfo& uncompression_info,
ioptions, allocator);
}
// Replace the contents of db_host_id with the actual hostname, if db_host_id
// matches the keyword kHostnameForDbHostId
Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id) {
assert(db_host_id);
if (*db_host_id == kHostnameForDbHostId) {
Status s = env->GetHostNameString(db_host_id);
if (!s.ok()) {
db_host_id->clear();
}
return s;
}
return Status::OK();
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -331,6 +331,9 @@ extern Status UncompressBlockContentsForCompressionType(
BlockContents* contents, uint32_t compress_format_version,
const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr);
// Replace db_host_id contents with the real hostname if necessary
extern Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id);
// Implementation details follow. Clients should ignore,
// TODO(andrewkr): we should prefer one way of representing a null/uninitialized

View File

@ -102,6 +102,9 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
if (!props.db_session_id.empty()) {
Add(TablePropertiesNames::kDbSessionId, props.db_session_id);
}
if (!props.db_host_id.empty()) {
Add(TablePropertiesNames::kDbHostId, props.db_host_id);
}
if (!props.filter_policy_name.empty()) {
Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
@ -322,6 +325,8 @@ Status ReadProperties(const ReadOptions& read_options,
new_table_properties->db_id = raw_val.ToString();
} else if (key == TablePropertiesNames::kDbSessionId) {
new_table_properties->db_session_id = raw_val.ToString();
} else if (key == TablePropertiesNames::kDbHostId) {
new_table_properties->db_host_id = raw_val.ToString();
} else if (key == TablePropertiesNames::kFilterPolicy) {
new_table_properties->filter_policy_name = raw_val.ToString();
} else if (key == TablePropertiesNames::kColumnFamilyName) {

View File

@ -100,6 +100,10 @@ PlainTableBuilder::PlainTableBuilder(
properties_.column_family_name = column_family_name;
properties_.db_id = db_id;
properties_.db_session_id = db_session_id;
properties_.db_host_id = ioptions.db_host_id;
if (!ReifyDbHostIdProperty(ioptions_.env, &properties_.db_host_id).ok()) {
ROCKS_LOG_INFO(ioptions_.info_log, "db_host_id property will not be set");
}
properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr
? moptions_.prefix_extractor->Name()
: "nullptr";

View File

@ -90,6 +90,9 @@ class SstFileReaderTest : public testing::Test {
if (check_global_seqno) {
auto properties = reader.GetTableProperties();
ASSERT_TRUE(properties);
std::string hostname;
ASSERT_OK(env_->GetHostNameString(&hostname));
ASSERT_EQ(properties->db_host_id, hostname);
auto& user_properties = properties->user_collected_properties;
ASSERT_TRUE(
user_properties.count(ExternalSstFilePropertyNames::kGlobalSeqno));

View File

@ -196,6 +196,8 @@ void TableProperties::Add(const TableProperties& tp) {
const std::string TablePropertiesNames::kDbId = "rocksdb.creating.db.identity";
const std::string TablePropertiesNames::kDbSessionId =
"rocksdb.creating.session.identity";
const std::string TablePropertiesNames::kDbHostId =
"rocksdb.creating.host.identity";
const std::string TablePropertiesNames::kDataSize =
"rocksdb.data.size";
const std::string TablePropertiesNames::kIndexSize =

View File

@ -3529,6 +3529,7 @@ TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
std::vector<std::string> keys;
stl_wrappers::KVMap kvmap;
Options options;
options.db_host_id = "";
test::PlainInternalKeyComparator internal_comparator(options.comparator);
options.compression = kNoCompression;
BlockBasedTableOptions table_options;