mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
1104eaa35e
Summary: This PR adds support for `TimedPut` API. We introduced a new type `kTypeValuePreferredSeqno` for entries added to the DB via the `TimedPut` API. The life cycle of such an entry on the write/flush/compaction paths are: 1) It is initially added to memtable as: `<user_key, seq, kTypeValuePreferredSeqno>: {value, write_unix_time}` 2) When it's flushed to L0 sst files, it's converted to: `<user_key, seq, kTypeValuePreferredSeqno>: {value, preferred_seqno}` when we have easy access to the seqno to time mapping. 3) During compaction, if certain conditions are met, we swap in the `preferred_seqno` and the entry will become: `<user_key, preferred_seqno, kTypeValue>: value`. This step helps fast track these entries to the cold tier if they are eligible after the sequence number swap. On the read path: A `kTypeValuePreferredSeqno` entry acts the same as a `kTypeValue` entry, the unix_write_time/preferred seqno part packed in value is completely ignored. Needed follow ups: 1) The seqno to time mapping accessible in flush needs to be extended to cover the `write_unix_time` for possible `kTypeValuePreferredSeqno` entries. This also means we need to track these `write_unix_time` in memtable. 2) Compaction filter support for the new `kTypeValuePreferredSeqno` type for feature parity with other `kTypeValue` and equivalent types. 3) Stress test coverage for the feature Pull Request resolved: https://github.com/facebook/rocksdb/pull/12419 Test Plan: Added unit tests Reviewed By: pdillinger Differential Revision: D54920296 Pulled By: jowlyzhang fbshipit-source-id: c8b43f7a7c465e569141770e93c748371ff1da9e
123 lines
4.5 KiB
C++
123 lines
4.5 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
|
#include "rocksdb/utilities/debug.h"
|
|
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "rocksdb/utilities/options_type.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
static std::unordered_map<std::string, ValueType> value_type_string_map = {
|
|
{"TypeDeletion", ValueType::kTypeDeletion},
|
|
{"TypeValue", ValueType::kTypeValue},
|
|
{"TypeMerge", ValueType::kTypeMerge},
|
|
{"TypeLogData", ValueType::kTypeLogData},
|
|
{"TypeColumnFamilyDeletion", ValueType::kTypeColumnFamilyDeletion},
|
|
{"TypeColumnFamilyValue", ValueType::kTypeColumnFamilyValue},
|
|
{"TypeColumnFamilyMerge", ValueType::kTypeColumnFamilyMerge},
|
|
{"TypeSingleDeletion", ValueType::kTypeSingleDeletion},
|
|
{"TypeColumnFamilySingleDeletion",
|
|
ValueType::kTypeColumnFamilySingleDeletion},
|
|
{"TypeBeginPrepareXID", ValueType::kTypeBeginPrepareXID},
|
|
{"TypeEndPrepareXID", ValueType::kTypeEndPrepareXID},
|
|
{"TypeCommitXID", ValueType::kTypeCommitXID},
|
|
{"TypeRollbackXID", ValueType::kTypeRollbackXID},
|
|
{"TypeNoop", ValueType::kTypeNoop},
|
|
{"TypeColumnFamilyRangeDeletion",
|
|
ValueType::kTypeColumnFamilyRangeDeletion},
|
|
{"TypeRangeDeletion", ValueType::kTypeRangeDeletion},
|
|
{"TypeColumnFamilyBlobIndex", ValueType::kTypeColumnFamilyBlobIndex},
|
|
{"TypeBlobIndex", ValueType::kTypeBlobIndex},
|
|
{"TypeBeginPersistedPrepareXID", ValueType::kTypeBeginPersistedPrepareXID},
|
|
{"TypeBeginUnprepareXID", ValueType::kTypeBeginUnprepareXID},
|
|
{"TypeDeletionWithTimestamp", ValueType::kTypeDeletionWithTimestamp},
|
|
{"TypeCommitXIDAndTimestamp", ValueType::kTypeCommitXIDAndTimestamp},
|
|
{"TypeWideColumnEntity", ValueType::kTypeWideColumnEntity},
|
|
{"TypeColumnFamilyWideColumnEntity",
|
|
ValueType::kTypeColumnFamilyWideColumnEntity},
|
|
{"TypeValuePreferredSeqno", ValueType::kTypeValuePreferredSeqno},
|
|
{"TypeColumnFamilyValuePreferredSeqno",
|
|
ValueType::kTypeColumnFamilyValuePreferredSeqno},
|
|
};
|
|
|
|
std::string KeyVersion::GetTypeName() const {
|
|
std::string type_name;
|
|
if (SerializeEnum<ValueType>(value_type_string_map,
|
|
static_cast<ValueType>(type), &type_name)) {
|
|
return type_name;
|
|
} else {
|
|
return "Invalid";
|
|
}
|
|
}
|
|
|
|
Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key,
|
|
size_t max_num_ikeys,
|
|
std::vector<KeyVersion>* key_versions) {
|
|
if (nullptr == db) {
|
|
return Status::InvalidArgument("db cannot be null.");
|
|
}
|
|
return GetAllKeyVersions(db, db->DefaultColumnFamily(), begin_key, end_key,
|
|
max_num_ikeys, key_versions);
|
|
}
|
|
|
|
Status GetAllKeyVersions(DB* db, ColumnFamilyHandle* cfh, Slice begin_key,
|
|
Slice end_key, size_t max_num_ikeys,
|
|
std::vector<KeyVersion>* key_versions) {
|
|
if (nullptr == db) {
|
|
return Status::InvalidArgument("db cannot be null.");
|
|
}
|
|
if (nullptr == cfh) {
|
|
return Status::InvalidArgument("Column family handle cannot be null.");
|
|
}
|
|
if (nullptr == key_versions) {
|
|
return Status::InvalidArgument("key_versions cannot be null.");
|
|
}
|
|
key_versions->clear();
|
|
|
|
DBImpl* idb = static_cast<DBImpl*>(db->GetRootDB());
|
|
auto icmp = InternalKeyComparator(idb->GetOptions(cfh).comparator);
|
|
ReadOptions read_options;
|
|
Arena arena;
|
|
ScopedArenaIterator iter(
|
|
idb->NewInternalIterator(read_options, &arena, kMaxSequenceNumber, cfh));
|
|
|
|
if (!begin_key.empty()) {
|
|
InternalKey ikey;
|
|
ikey.SetMinPossibleForUserKey(begin_key);
|
|
iter->Seek(ikey.Encode());
|
|
} else {
|
|
iter->SeekToFirst();
|
|
}
|
|
|
|
size_t num_keys = 0;
|
|
for (; iter->Valid(); iter->Next()) {
|
|
ParsedInternalKey ikey;
|
|
Status pik_status =
|
|
ParseInternalKey(iter->key(), &ikey, true /* log_err_key */); // TODO
|
|
if (!pik_status.ok()) {
|
|
return pik_status;
|
|
}
|
|
|
|
if (!end_key.empty() &&
|
|
icmp.user_comparator()->Compare(ikey.user_key, end_key) > 0) {
|
|
break;
|
|
}
|
|
|
|
key_versions->emplace_back(ikey.user_key.ToString() /* _user_key */,
|
|
iter->value().ToString() /* _value */,
|
|
ikey.sequence /* _sequence */,
|
|
static_cast<int>(ikey.type) /* _type */);
|
|
if (++num_keys >= max_num_ikeys) {
|
|
break;
|
|
}
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|