Add documentation to some formatting util functions (#11674)

Summary:
As titled, mostly adding documentation. While updating one usage of these util functions in the external file ingestion job based on code inspection.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11674

Test Plan:
```
make check
```

Note that no unit test was added or updated to check the change in the external file ingestion flow works. This is because user-defined timestamp doesn't support bulk loading yet. There could be other missing pieces that are needed to make this flow functional and testable. That work is separately tracked and unit tests will be added then.

Reviewed By: cbi42

Differential Revision: D48271338

Pulled By: jowlyzhang

fbshipit-source-id: c05c3440f1c08632dd0de51b563a30b44b4eb8b5
This commit is contained in:
Yu Zhang 2023-08-14 22:04:18 -07:00 committed by Facebook GitHub Bot
parent a09c141dde
commit 6a3da5635e
4 changed files with 70 additions and 3 deletions

View File

@ -88,6 +88,13 @@ void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
result->append(kTsMax.data(), ts_sz); result->append(kTsMax.data(), ts_sz);
} }
void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz) {
assert(ts_sz > 0);
result->append(key.data(), key.size() - ts_sz);
result->append(ts_sz, static_cast<unsigned char>(0));
}
void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key, void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
size_t ts_sz) { size_t ts_sz) {
assert(ts_sz > 0); assert(ts_sz > 0);

View File

@ -168,10 +168,18 @@ inline void UnPackSequenceAndType(uint64_t packed, uint64_t* seq,
EntryType GetEntryType(ValueType value_type); EntryType GetEntryType(ValueType value_type);
// Append the serialization of "key" to *result. // Append the serialization of "key" to *result.
//
// input [internal key]: <user_key | seqno + type>
// output before: empty
// output: <user_key | seqno + type>
void AppendInternalKey(std::string* result, const ParsedInternalKey& key); void AppendInternalKey(std::string* result, const ParsedInternalKey& key);
// Append the serialization of "key" to *result, replacing the original // Append the serialization of "key" to *result, replacing the original
// timestamp with argument ts. // timestamp with argument ts.
//
// input [internal key]: <user_provided_key | original_ts | seqno + type>
// output before: empty
// output after: <user_provided_key | ts | seqno + type>
void AppendInternalKeyWithDifferentTimestamp(std::string* result, void AppendInternalKeyWithDifferentTimestamp(std::string* result,
const ParsedInternalKey& key, const ParsedInternalKey& key,
const Slice& ts); const Slice& ts);
@ -179,37 +187,73 @@ void AppendInternalKeyWithDifferentTimestamp(std::string* result,
// Serialized internal key consists of user key followed by footer. // Serialized internal key consists of user key followed by footer.
// This function appends the footer to *result, assuming that *result already // This function appends the footer to *result, assuming that *result already
// contains the user key at the end. // contains the user key at the end.
//
// output before: <user_key>
// output after: <user_key | seqno + type>
void AppendInternalKeyFooter(std::string* result, SequenceNumber s, void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
ValueType t); ValueType t);
// Append the key and a minimal timestamp to *result // Append the key and a minimal timestamp to *result
//
// input [user key without ts]: <user_provided_key>
// output before: empty
// output after: <user_provided_key | min_ts>
void AppendKeyWithMinTimestamp(std::string* result, const Slice& key, void AppendKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz); size_t ts_sz);
// Append the key and a maximal timestamp to *result // Append the key and a maximal timestamp to *result
//
// input [user key without ts]: <user_provided_key>
// output before: empty
// output after: <user_provided_key | max_ts>
void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key, void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
size_t ts_sz); size_t ts_sz);
// `key` is a user key with timestamp. Append the user key without timestamp
// and the minimum timestamp to *result.
//
// input [user key]: <user_provided_key | original_ts>
// output before: empty
// output after: <user_provided_key | min_ts>
void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
// `key` is a user key with timestamp. Append the user key without timestamp // `key` is a user key with timestamp. Append the user key without timestamp
// and the maximal timestamp to *result. // and the maximal timestamp to *result.
//
// input [user key]: <user_provided_key | original_ts>
// output before: empty
// output after: <user_provided_key | max_ts>
void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key, void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
size_t ts_sz); size_t ts_sz);
// `key` is an internal key containing a user key without timestamp. Create a // `key` is an internal key containing a user key without timestamp. Create a
// new key in *result by padding a min timestamp of size `ts_sz` to the user key // new key in *result by padding a min timestamp of size `ts_sz` to the user key
// and copying the remaining internal key bytes. // and copying the remaining internal key bytes.
//
// input [internal key]: <user_provided_key | seqno + type>
// output before: empty
// output after: <user_provided_key | min_ts | seqno + type>
void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key, void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz); size_t ts_sz);
// `key` is an internal key containing a user key with timestamp of size // `key` is an internal key containing a user key with timestamp of size
// `ts_sz`. Create a new internal key in *result by stripping the timestamp from // `ts_sz`. Create a new internal key in *result by stripping the timestamp from
// the user key and copying the remaining internal key bytes. // the user key and copying the remaining internal key bytes.
//
// input [internal key]: <user_provided_key | original_ts | seqno + type>
// output before: empty
// output after: <user_provided_key | seqno + type>
void StripTimestampFromInternalKey(std::string* result, const Slice& key, void StripTimestampFromInternalKey(std::string* result, const Slice& key,
size_t ts_sz); size_t ts_sz);
// `key` is an internal key containing a user key with timestamp of size // `key` is an internal key containing a user key with timestamp of size
// `ts_sz`. Create a new internal key in *result while replace the original // `ts_sz`. Create a new internal key in *result while replace the original
// timestamp with min timestamp. // timestamp with min timestamp.
//
// input [internal key]: <user_provided_key | original_ts | seqno + type>
// output before: empty
// output after: <user_provided_key | min_ts | seqno + type>
void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key, void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz); size_t ts_sz);
@ -221,11 +265,16 @@ Status ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result,
bool log_err_key); bool log_err_key);
// Returns the user key portion of an internal key. // Returns the user key portion of an internal key.
//
// input [internal key]: <user_key | seqno + type>
// output: <user_key>
inline Slice ExtractUserKey(const Slice& internal_key) { inline Slice ExtractUserKey(const Slice& internal_key) {
assert(internal_key.size() >= kNumInternalBytes); assert(internal_key.size() >= kNumInternalBytes);
return Slice(internal_key.data(), internal_key.size() - kNumInternalBytes); return Slice(internal_key.data(), internal_key.size() - kNumInternalBytes);
} }
// input [internal key]: <user_provided_key | ts | seqno + type>
// output : <user_provided_key>
inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key, inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
size_t ts_sz) { size_t ts_sz) {
Slice ret = internal_key; Slice ret = internal_key;
@ -233,17 +282,23 @@ inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
return ret; return ret;
} }
// input [user key]: <user_provided_key | ts>
// output: <user_provided_key>
inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
Slice ret = user_key; Slice ret = user_key;
ret.remove_suffix(ts_sz); ret.remove_suffix(ts_sz);
return ret; return ret;
} }
// input [user key]: <user_provided_key | ts>
// output: <ts>
inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
assert(user_key.size() >= ts_sz); assert(user_key.size() >= ts_sz);
return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz); return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz);
} }
// input [internal key]: <user_provided_key | ts | seqno + type>
// output: <ts>
inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) { inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) {
const size_t key_size = internal_key.size(); const size_t key_size = internal_key.size();
assert(key_size >= kNumInternalBytes + ts_sz); assert(key_size >= kNumInternalBytes + ts_sz);
@ -251,12 +306,16 @@ inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) {
ts_sz); ts_sz);
} }
// input [internal key]: <user_provided_key | ts | seqno + type>
// output: <seqno + type>
inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) { inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
assert(internal_key.size() >= kNumInternalBytes); assert(internal_key.size() >= kNumInternalBytes);
const size_t n = internal_key.size(); const size_t n = internal_key.size();
return DecodeFixed64(internal_key.data() + n - kNumInternalBytes); return DecodeFixed64(internal_key.data() + n - kNumInternalBytes);
} }
// input [internal key]: <user_provided_key | ts | seqno + type>
// output: <type>
inline ValueType ExtractValueType(const Slice& internal_key) { inline ValueType ExtractValueType(const Slice& internal_key) {
uint64_t num = ExtractInternalKeyFooter(internal_key); uint64_t num = ExtractInternalKeyFooter(internal_key);
unsigned char c = num & 0xff; unsigned char c = num & 0xff;

View File

@ -349,7 +349,7 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
std::string end_str; std::string end_str;
AppendUserKeyWithMaxTimestamp( AppendUserKeyWithMaxTimestamp(
&begin_str, file_to_ingest.smallest_internal_key.user_key(), ts_sz); &begin_str, file_to_ingest.smallest_internal_key.user_key(), ts_sz);
AppendKeyWithMinTimestamp( AppendUserKeyWithMinTimestamp(
&end_str, file_to_ingest.largest_internal_key.user_key(), ts_sz); &end_str, file_to_ingest.largest_internal_key.user_key(), ts_sz);
keys.emplace_back(std::move(begin_str)); keys.emplace_back(std::move(begin_str));
keys.emplace_back(std::move(end_str)); keys.emplace_back(std::move(end_str));

View File

@ -1361,8 +1361,9 @@ class DB {
// the files. In this case, client could set options.change_level to true, to // the files. In this case, client could set options.change_level to true, to
// move the files back to the minimum level capable of holding the data set // move the files back to the minimum level capable of holding the data set
// or a given level (specified by non-negative options.target_level). // or a given level (specified by non-negative options.target_level).
// In case of user_defined timestamp, if enabled, `start` and `end` should //
// point to key without timestamp part. // In case of user-defined timestamp, if enabled, `begin` and `end` should
// not contain timestamp.
virtual Status CompactRange(const CompactRangeOptions& options, virtual Status CompactRange(const CompactRangeOptions& options,
ColumnFamilyHandle* column_family, ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end) = 0; const Slice* begin, const Slice* end) = 0;