rocksdb/utilities/debug.cc
Yu Zhang 1104eaa35e Add initial support for TimedPut API (#12419)
Summary:
This PR adds support for `TimedPut` API. We introduced a new type `kTypeValuePreferredSeqno` for entries added to the DB via the `TimedPut` API.

The life cycle of such an entry on the write/flush/compaction paths are:

1) It is initially added to memtable as:
`<user_key, seq, kTypeValuePreferredSeqno>: {value, write_unix_time}`

2) When it's flushed to L0 sst files, it's converted to:
`<user_key, seq, kTypeValuePreferredSeqno>: {value, preferred_seqno}`
 when we have easy access to the seqno to time mapping.

3) During compaction, if certain conditions are met, we swap in the `preferred_seqno` and the entry will become:
`<user_key, preferred_seqno, kTypeValue>: value`. This step helps fast track these entries to the cold tier if they are eligible after the sequence number swap.

On the read path:
A `kTypeValuePreferredSeqno` entry acts the same as a `kTypeValue` entry, the unix_write_time/preferred seqno part packed in value is completely ignored.

Needed follow ups:
1) The seqno to time mapping accessible in flush needs to be extended to cover the `write_unix_time` for possible `kTypeValuePreferredSeqno` entries. This also means we need to track these `write_unix_time` in memtable.

2) Compaction filter support for the new `kTypeValuePreferredSeqno` type for feature parity with other `kTypeValue` and equivalent types.

3) Stress test coverage for the feature

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12419

Test Plan: Added unit tests

Reviewed By: pdillinger

Differential Revision: D54920296

Pulled By: jowlyzhang

fbshipit-source-id: c8b43f7a7c465e569141770e93c748371ff1da9e
2024-03-14 15:44:55 -07:00

123 lines
4.5 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "rocksdb/utilities/debug.h"
#include "db/db_impl/db_impl.h"
#include "rocksdb/utilities/options_type.h"
namespace ROCKSDB_NAMESPACE {
static std::unordered_map<std::string, ValueType> value_type_string_map = {
{"TypeDeletion", ValueType::kTypeDeletion},
{"TypeValue", ValueType::kTypeValue},
{"TypeMerge", ValueType::kTypeMerge},
{"TypeLogData", ValueType::kTypeLogData},
{"TypeColumnFamilyDeletion", ValueType::kTypeColumnFamilyDeletion},
{"TypeColumnFamilyValue", ValueType::kTypeColumnFamilyValue},
{"TypeColumnFamilyMerge", ValueType::kTypeColumnFamilyMerge},
{"TypeSingleDeletion", ValueType::kTypeSingleDeletion},
{"TypeColumnFamilySingleDeletion",
ValueType::kTypeColumnFamilySingleDeletion},
{"TypeBeginPrepareXID", ValueType::kTypeBeginPrepareXID},
{"TypeEndPrepareXID", ValueType::kTypeEndPrepareXID},
{"TypeCommitXID", ValueType::kTypeCommitXID},
{"TypeRollbackXID", ValueType::kTypeRollbackXID},
{"TypeNoop", ValueType::kTypeNoop},
{"TypeColumnFamilyRangeDeletion",
ValueType::kTypeColumnFamilyRangeDeletion},
{"TypeRangeDeletion", ValueType::kTypeRangeDeletion},
{"TypeColumnFamilyBlobIndex", ValueType::kTypeColumnFamilyBlobIndex},
{"TypeBlobIndex", ValueType::kTypeBlobIndex},
{"TypeBeginPersistedPrepareXID", ValueType::kTypeBeginPersistedPrepareXID},
{"TypeBeginUnprepareXID", ValueType::kTypeBeginUnprepareXID},
{"TypeDeletionWithTimestamp", ValueType::kTypeDeletionWithTimestamp},
{"TypeCommitXIDAndTimestamp", ValueType::kTypeCommitXIDAndTimestamp},
{"TypeWideColumnEntity", ValueType::kTypeWideColumnEntity},
{"TypeColumnFamilyWideColumnEntity",
ValueType::kTypeColumnFamilyWideColumnEntity},
{"TypeValuePreferredSeqno", ValueType::kTypeValuePreferredSeqno},
{"TypeColumnFamilyValuePreferredSeqno",
ValueType::kTypeColumnFamilyValuePreferredSeqno},
};
std::string KeyVersion::GetTypeName() const {
std::string type_name;
if (SerializeEnum<ValueType>(value_type_string_map,
static_cast<ValueType>(type), &type_name)) {
return type_name;
} else {
return "Invalid";
}
}
Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key,
size_t max_num_ikeys,
std::vector<KeyVersion>* key_versions) {
if (nullptr == db) {
return Status::InvalidArgument("db cannot be null.");
}
return GetAllKeyVersions(db, db->DefaultColumnFamily(), begin_key, end_key,
max_num_ikeys, key_versions);
}
Status GetAllKeyVersions(DB* db, ColumnFamilyHandle* cfh, Slice begin_key,
Slice end_key, size_t max_num_ikeys,
std::vector<KeyVersion>* key_versions) {
if (nullptr == db) {
return Status::InvalidArgument("db cannot be null.");
}
if (nullptr == cfh) {
return Status::InvalidArgument("Column family handle cannot be null.");
}
if (nullptr == key_versions) {
return Status::InvalidArgument("key_versions cannot be null.");
}
key_versions->clear();
DBImpl* idb = static_cast<DBImpl*>(db->GetRootDB());
auto icmp = InternalKeyComparator(idb->GetOptions(cfh).comparator);
ReadOptions read_options;
Arena arena;
ScopedArenaIterator iter(
idb->NewInternalIterator(read_options, &arena, kMaxSequenceNumber, cfh));
if (!begin_key.empty()) {
InternalKey ikey;
ikey.SetMinPossibleForUserKey(begin_key);
iter->Seek(ikey.Encode());
} else {
iter->SeekToFirst();
}
size_t num_keys = 0;
for (; iter->Valid(); iter->Next()) {
ParsedInternalKey ikey;
Status pik_status =
ParseInternalKey(iter->key(), &ikey, true /* log_err_key */); // TODO
if (!pik_status.ok()) {
return pik_status;
}
if (!end_key.empty() &&
icmp.user_comparator()->Compare(ikey.user_key, end_key) > 0) {
break;
}
key_versions->emplace_back(ikey.user_key.ToString() /* _user_key */,
iter->value().ToString() /* _value */,
ikey.sequence /* _sequence */,
static_cast<int>(ikey.type) /* _type */);
if (++num_keys >= max_num_ikeys) {
break;
}
}
return Status::OK();
}
} // namespace ROCKSDB_NAMESPACE