2016-11-14 02:58:17 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2016-11-14 02:58:17 +00:00
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
#include "db/db_test_util.h"
|
|
|
|
#include "db/memtable.h"
|
2018-12-18 01:26:56 +00:00
|
|
|
#include "db/range_del_aggregator.h"
|
2016-11-14 02:58:17 +00:00
|
|
|
#include "port/stack_trace.h"
|
|
|
|
#include "rocksdb/memtablerep.h"
|
|
|
|
#include "rocksdb/slice_transform.h"
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2016-11-14 02:58:17 +00:00
|
|
|
|
|
|
|
class DBMemTableTest : public DBTestBase {
|
|
|
|
public:
|
2021-07-23 15:37:27 +00:00
|
|
|
DBMemTableTest() : DBTestBase("db_memtable_test", /*env_do_fsync=*/true) {}
|
2016-11-14 02:58:17 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class MockMemTableRep : public MemTableRep {
|
|
|
|
public:
|
2017-06-02 21:13:59 +00:00
|
|
|
explicit MockMemTableRep(Allocator* allocator, MemTableRep* rep)
|
2016-11-14 02:58:17 +00:00
|
|
|
: MemTableRep(allocator), rep_(rep), num_insert_with_hint_(0) {}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
KeyHandle Allocate(const size_t len, char** buf) override {
|
2016-11-14 02:58:17 +00:00
|
|
|
return rep_->Allocate(len, buf);
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
void Insert(KeyHandle handle) override { rep_->Insert(handle); }
|
2016-11-14 02:58:17 +00:00
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
void InsertWithHint(KeyHandle handle, void** hint) override {
|
2016-11-14 02:58:17 +00:00
|
|
|
num_insert_with_hint_++;
|
2018-02-01 02:45:49 +00:00
|
|
|
EXPECT_NE(nullptr, hint);
|
2016-11-14 02:58:17 +00:00
|
|
|
last_hint_in_ = *hint;
|
2018-02-16 01:12:48 +00:00
|
|
|
rep_->InsertWithHint(handle, hint);
|
2016-11-14 02:58:17 +00:00
|
|
|
last_hint_out_ = *hint;
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
bool Contains(const char* key) const override { return rep_->Contains(key); }
|
2016-11-14 02:58:17 +00:00
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
void Get(const LookupKey& k, void* callback_args,
|
|
|
|
bool (*callback_func)(void* arg, const char* entry)) override {
|
2016-11-14 02:58:17 +00:00
|
|
|
rep_->Get(k, callback_args, callback_func);
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
size_t ApproximateMemoryUsage() override {
|
2016-11-14 02:58:17 +00:00
|
|
|
return rep_->ApproximateMemoryUsage();
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
Iterator* GetIterator(Arena* arena) override {
|
2016-11-14 02:58:17 +00:00
|
|
|
return rep_->GetIterator(arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
void* last_hint_in() { return last_hint_in_; }
|
|
|
|
void* last_hint_out() { return last_hint_out_; }
|
|
|
|
int num_insert_with_hint() { return num_insert_with_hint_; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::unique_ptr<MemTableRep> rep_;
|
|
|
|
void* last_hint_in_;
|
|
|
|
void* last_hint_out_;
|
|
|
|
int num_insert_with_hint_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class MockMemTableRepFactory : public MemTableRepFactory {
|
|
|
|
public:
|
2019-02-14 21:52:47 +00:00
|
|
|
MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp,
|
|
|
|
Allocator* allocator,
|
|
|
|
const SliceTransform* transform,
|
|
|
|
Logger* logger) override {
|
2016-11-14 02:58:17 +00:00
|
|
|
SkipListFactory factory;
|
|
|
|
MemTableRep* skiplist_rep =
|
|
|
|
factory.CreateMemTableRep(cmp, allocator, transform, logger);
|
|
|
|
mock_rep_ = new MockMemTableRep(allocator, skiplist_rep);
|
|
|
|
return mock_rep_;
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp,
|
|
|
|
Allocator* allocator,
|
|
|
|
const SliceTransform* transform,
|
|
|
|
Logger* logger,
|
|
|
|
uint32_t column_family_id) override {
|
2017-06-02 19:08:01 +00:00
|
|
|
last_column_family_id_ = column_family_id;
|
|
|
|
return CreateMemTableRep(cmp, allocator, transform, logger);
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
const char* Name() const override { return "MockMemTableRepFactory"; }
|
2016-11-14 02:58:17 +00:00
|
|
|
|
|
|
|
MockMemTableRep* rep() { return mock_rep_; }
|
|
|
|
|
2016-11-16 17:24:52 +00:00
|
|
|
bool IsInsertConcurrentlySupported() const override { return false; }
|
|
|
|
|
2017-06-02 19:08:01 +00:00
|
|
|
uint32_t GetLastColumnFamilyId() { return last_column_family_id_; }
|
|
|
|
|
2016-11-14 02:58:17 +00:00
|
|
|
private:
|
|
|
|
MockMemTableRep* mock_rep_;
|
2022-05-05 20:08:21 +00:00
|
|
|
// workaround since there's no std::numeric_limits<uint32_t>::max() yet.
|
2017-06-02 19:08:01 +00:00
|
|
|
uint32_t last_column_family_id_ = static_cast<uint32_t>(-1);
|
2016-11-14 02:58:17 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class TestPrefixExtractor : public SliceTransform {
|
|
|
|
public:
|
2019-02-14 21:52:47 +00:00
|
|
|
const char* Name() const override { return "TestPrefixExtractor"; }
|
2016-11-14 02:58:17 +00:00
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
Slice Transform(const Slice& key) const override {
|
2016-11-14 02:58:17 +00:00
|
|
|
const char* p = separator(key);
|
|
|
|
if (p == nullptr) {
|
|
|
|
return Slice();
|
|
|
|
}
|
|
|
|
return Slice(key.data(), p - key.data() + 1);
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
bool InDomain(const Slice& key) const override {
|
2016-11-14 02:58:17 +00:00
|
|
|
return separator(key) != nullptr;
|
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
bool InRange(const Slice& /*key*/) const override { return false; }
|
2016-11-14 02:58:17 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
const char* separator(const Slice& key) const {
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
return static_cast<const char*>(memchr(key.data(), '_', key.size()));
|
2016-11-14 02:58:17 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-02-01 02:45:49 +00:00
|
|
|
// Test that ::Add properly returns false when inserting duplicate keys
|
|
|
|
TEST_F(DBMemTableTest, DuplicateSeq) {
|
|
|
|
SequenceNumber seq = 123;
|
|
|
|
std::string value;
|
|
|
|
MergeContext merge_context;
|
|
|
|
Options options;
|
|
|
|
InternalKeyComparator ikey_cmp(options.comparator);
|
2018-12-18 01:26:56 +00:00
|
|
|
ReadRangeDelAggregator range_del_agg(&ikey_cmp,
|
|
|
|
kMaxSequenceNumber /* upper_bound */);
|
2018-02-01 02:45:49 +00:00
|
|
|
|
|
|
|
// Create a MemTable
|
|
|
|
InternalKeyComparator cmp(BytewiseComparator());
|
|
|
|
auto factory = std::make_shared<SkipListFactory>();
|
|
|
|
options.memtable_factory = factory;
|
2021-05-05 20:59:21 +00:00
|
|
|
ImmutableOptions ioptions(options);
|
2018-02-01 02:45:49 +00:00
|
|
|
WriteBufferManager wb(options.db_write_buffer_size);
|
|
|
|
MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
|
|
|
|
kMaxSequenceNumber, 0 /* column_family_id */);
|
|
|
|
|
|
|
|
// Write some keys and make sure it returns false on duplicates
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
mem->Add(seq, kTypeValue, "key", "value2", nullptr /* kv_prot_info */));
|
|
|
|
ASSERT_TRUE(
|
|
|
|
mem->Add(seq, kTypeValue, "key", "value2", nullptr /* kv_prot_info */)
|
|
|
|
.IsTryAgain());
|
2018-02-01 02:45:49 +00:00
|
|
|
// Changing the type should still cause the duplicatae key
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_TRUE(
|
|
|
|
mem->Add(seq, kTypeMerge, "key", "value2", nullptr /* kv_prot_info */)
|
|
|
|
.IsTryAgain());
|
2018-02-01 02:45:49 +00:00
|
|
|
// Changing the seq number will make the key fresh
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(mem->Add(seq + 1, kTypeMerge, "key", "value2",
|
|
|
|
nullptr /* kv_prot_info */));
|
2018-02-01 02:45:49 +00:00
|
|
|
// Test with different types for duplicate keys
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_TRUE(
|
|
|
|
mem->Add(seq, kTypeDeletion, "key", "", nullptr /* kv_prot_info */)
|
|
|
|
.IsTryAgain());
|
|
|
|
ASSERT_TRUE(
|
|
|
|
mem->Add(seq, kTypeSingleDeletion, "key", "", nullptr /* kv_prot_info */)
|
|
|
|
.IsTryAgain());
|
2018-02-01 02:45:49 +00:00
|
|
|
|
|
|
|
// Test the duplicate keys under stress
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
|
|
|
bool insert_dup = i % 10 == 1;
|
|
|
|
if (!insert_dup) {
|
|
|
|
seq++;
|
|
|
|
}
|
2022-05-06 20:03:58 +00:00
|
|
|
Status s = mem->Add(seq, kTypeValue, "foo", "value" + std::to_string(seq),
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
nullptr /* kv_prot_info */);
|
2018-02-01 02:45:49 +00:00
|
|
|
if (insert_dup) {
|
2020-11-24 00:27:46 +00:00
|
|
|
ASSERT_TRUE(s.IsTryAgain());
|
2018-02-01 02:45:49 +00:00
|
|
|
} else {
|
2020-11-24 00:27:46 +00:00
|
|
|
ASSERT_OK(s);
|
2018-02-01 02:45:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
delete mem;
|
|
|
|
|
|
|
|
// Test with InsertWithHint
|
|
|
|
options.memtable_insert_with_hint_prefix_extractor.reset(
|
|
|
|
new TestPrefixExtractor()); // which uses _ to extract the prefix
|
2021-05-05 20:59:21 +00:00
|
|
|
ioptions = ImmutableOptions(options);
|
2018-02-01 02:45:49 +00:00
|
|
|
mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
|
|
|
|
kMaxSequenceNumber, 0 /* column_family_id */);
|
|
|
|
// Insert a duplicate key with _ in it
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
mem->Add(seq, kTypeValue, "key_1", "value", nullptr /* kv_prot_info */));
|
|
|
|
ASSERT_TRUE(
|
|
|
|
mem->Add(seq, kTypeValue, "key_1", "value", nullptr /* kv_prot_info */)
|
|
|
|
.IsTryAgain());
|
2018-02-01 02:45:49 +00:00
|
|
|
delete mem;
|
|
|
|
|
|
|
|
// Test when InsertConcurrently will be invoked
|
|
|
|
options.allow_concurrent_memtable_write = true;
|
2021-05-05 20:59:21 +00:00
|
|
|
ioptions = ImmutableOptions(options);
|
2018-02-01 02:45:49 +00:00
|
|
|
mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
|
|
|
|
kMaxSequenceNumber, 0 /* column_family_id */);
|
|
|
|
MemTablePostProcessInfo post_process_info;
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(mem->Add(seq, kTypeValue, "key", "value",
|
|
|
|
nullptr /* kv_prot_info */, true, &post_process_info));
|
|
|
|
ASSERT_TRUE(mem->Add(seq, kTypeValue, "key", "value",
|
|
|
|
nullptr /* kv_prot_info */, true, &post_process_info)
|
|
|
|
.IsTryAgain());
|
2018-02-01 02:45:49 +00:00
|
|
|
delete mem;
|
|
|
|
}
|
|
|
|
|
2019-05-14 00:43:47 +00:00
|
|
|
// A simple test to verify that the concurrent merge writes is functional
|
|
|
|
TEST_F(DBMemTableTest, ConcurrentMergeWrite) {
|
|
|
|
int num_ops = 1000;
|
|
|
|
std::string value;
|
|
|
|
MergeContext merge_context;
|
|
|
|
Options options;
|
|
|
|
// A merge operator that is not sensitive to concurrent writes since in this
|
|
|
|
// test we don't order the writes.
|
|
|
|
options.merge_operator = MergeOperators::CreateUInt64AddOperator();
|
|
|
|
|
|
|
|
// Create a MemTable
|
|
|
|
InternalKeyComparator cmp(BytewiseComparator());
|
|
|
|
auto factory = std::make_shared<SkipListFactory>();
|
|
|
|
options.memtable_factory = factory;
|
|
|
|
options.allow_concurrent_memtable_write = true;
|
2021-05-05 20:59:21 +00:00
|
|
|
ImmutableOptions ioptions(options);
|
2019-05-14 00:43:47 +00:00
|
|
|
WriteBufferManager wb(options.db_write_buffer_size);
|
|
|
|
MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
|
|
|
|
kMaxSequenceNumber, 0 /* column_family_id */);
|
|
|
|
|
|
|
|
// Put 0 as the base
|
|
|
|
PutFixed64(&value, static_cast<uint64_t>(0));
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(mem->Add(0, kTypeValue, "key", value, nullptr /* kv_prot_info */));
|
2019-05-14 00:43:47 +00:00
|
|
|
value.clear();
|
|
|
|
|
|
|
|
// Write Merge concurrently
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread write_thread1([&]() {
|
|
|
|
MemTablePostProcessInfo post_process_info1;
|
2019-05-14 00:43:47 +00:00
|
|
|
std::string v1;
|
|
|
|
for (int seq = 1; seq < num_ops / 2; seq++) {
|
|
|
|
PutFixed64(&v1, seq);
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(mem->Add(seq, kTypeMerge, "key", v1, nullptr /* kv_prot_info */,
|
|
|
|
true, &post_process_info1));
|
2019-05-14 00:43:47 +00:00
|
|
|
v1.clear();
|
|
|
|
}
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread write_thread2([&]() {
|
|
|
|
MemTablePostProcessInfo post_process_info2;
|
2019-05-14 00:43:47 +00:00
|
|
|
std::string v2;
|
|
|
|
for (int seq = num_ops / 2; seq < num_ops; seq++) {
|
|
|
|
PutFixed64(&v2, seq);
|
Integrity protection for live updates to WriteBatch (#7748)
Summary:
This PR adds the foundation classes for key-value integrity protection and the first use case: protecting live updates from the source buffers added to `WriteBatch` through the destination buffer in `MemTable`. The width of the protection info is not yet configurable -- only eight bytes per key is supported. This PR allows users to enable protection by constructing `WriteBatch` with `protection_bytes_per_key == 8`. It does not yet expose a way for users to get integrity protection via other write APIs (e.g., `Put()`, `Merge()`, `Delete()`, etc.).
The foundation classes (`ProtectionInfo.*`) embed the coverage info in their type, and provide `Protect.*()` and `Strip.*()` functions to navigate between types with different coverage. For making bytes per key configurable (for powers of two up to eight) in the future, these classes are templated on the unsigned integer type used to store the protection info. That integer contains the XOR'd result of hashes with independent seeds for all covered fields. For integer fields, the hash is computed on the raw unadjusted bytes, so the result is endian-dependent. The most significant bytes are truncated when the hash value (8 bytes) is wider than the protection integer.
When `WriteBatch` is constructed with `protection_bytes_per_key == 8`, we hold a `ProtectionInfoKVOTC` (i.e., one that covers key, value, optype aka `ValueType`, timestamp, and CF ID) for each entry added to the batch. The protection info is generated from the original buffers passed by the user, as well as the original metadata generated internally. When writing to memtable, each entry is transformed to a `ProtectionInfoKVOTS` (i.e., dropping coverage of CF ID and adding coverage of sequence number), since at that point we know the sequence number, and have already selected a memtable corresponding to a particular CF. This protection info is verified once the entry is encoded in the `MemTable` buffer.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7748
Test Plan:
- an integration test to verify a wide variety of single-byte changes to the encoded `MemTable` buffer are caught
- add to stress/crash test to verify it works in variety of configs/operations without intentional corruption
- [deferred] unit tests for `ProtectionInfo.*` classes for edge cases like KV swap, `SliceParts` and `Slice` APIs are interchangeable, etc.
Reviewed By: pdillinger
Differential Revision: D25754492
Pulled By: ajkr
fbshipit-source-id: e481bac6c03c2ab268be41359730f1ceb9964866
2021-01-29 20:17:17 +00:00
|
|
|
ASSERT_OK(mem->Add(seq, kTypeMerge, "key", v2, nullptr /* kv_prot_info */,
|
|
|
|
true, &post_process_info2));
|
2019-05-14 00:43:47 +00:00
|
|
|
v2.clear();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
write_thread1.join();
|
|
|
|
write_thread2.join();
|
|
|
|
|
|
|
|
Status status;
|
|
|
|
ReadOptions roptions;
|
|
|
|
SequenceNumber max_covering_tombstone_seq = 0;
|
|
|
|
LookupKey lkey("key", kMaxSequenceNumber);
|
Add support for wide-column point lookups (#10540)
Summary:
The patch adds a new API `GetEntity` that can be used to perform
wide-column point lookups. It also extends the `Get` code path and
the `MemTable` / `MemTableList` and `Version` / `GetContext` logic
accordingly so that wide-column entities can be served from both
memtables and SSTs. If the result of a lookup is a wide-column entity
(`kTypeWideColumnEntity`), it is passed to the application in deserialized
form; if it is a plain old key-value (`kTypeValue`), it is presented as a
wide-column entity with a single default (anonymous) column.
(In contrast, regular `Get` returns plain old key-values as-is, and
returns the value of the default column for wide-column entities, see
https://github.com/facebook/rocksdb/issues/10483 .)
The result of `GetEntity` is a self-contained `PinnableWideColumns` object.
`PinnableWideColumns` contains a `PinnableSlice`, which either stores the
underlying data in its own buffer or holds on to a cache handle. It also contains
a `WideColumns` instance, which indexes the contents of the `PinnableSlice`,
so applications can access the values of columns efficiently.
There are several pieces of functionality which are currently not supported
for wide-column entities: there is currently no `MultiGetEntity` or wide-column
iterator; also, `Merge` and `GetMergeOperands` are not supported, and there
is no `GetEntity` implementation for read-only and secondary instances.
We plan to implement these in future PRs.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10540
Test Plan: `make check`
Reviewed By: akankshamahajan15
Differential Revision: D38847474
Pulled By: ltamasi
fbshipit-source-id: 42311a34ccdfe88b3775e847a5e2a5296e002b5b
2022-08-19 18:51:12 +00:00
|
|
|
bool res = mem->Get(lkey, &value, /*columns=*/nullptr, /*timestamp=*/nullptr,
|
|
|
|
&status, &merge_context, &max_covering_tombstone_seq,
|
|
|
|
roptions, false /* immutable_memtable */);
|
2020-12-08 23:53:59 +00:00
|
|
|
ASSERT_OK(status);
|
2019-05-14 00:43:47 +00:00
|
|
|
ASSERT_TRUE(res);
|
|
|
|
uint64_t ivalue = DecodeFixed64(Slice(value).data());
|
|
|
|
uint64_t sum = 0;
|
|
|
|
for (int seq = 0; seq < num_ops; seq++) {
|
|
|
|
sum += seq;
|
|
|
|
}
|
|
|
|
ASSERT_EQ(ivalue, sum);
|
|
|
|
|
|
|
|
delete mem;
|
|
|
|
}
|
|
|
|
|
2016-11-14 02:58:17 +00:00
|
|
|
TEST_F(DBMemTableTest, InsertWithHint) {
|
|
|
|
Options options;
|
2016-11-16 17:24:52 +00:00
|
|
|
options.allow_concurrent_memtable_write = false;
|
2016-11-14 02:58:17 +00:00
|
|
|
options.create_if_missing = true;
|
|
|
|
options.memtable_factory.reset(new MockMemTableRepFactory());
|
|
|
|
options.memtable_insert_with_hint_prefix_extractor.reset(
|
|
|
|
new TestPrefixExtractor());
|
2017-03-13 16:41:30 +00:00
|
|
|
options.env = env_;
|
2016-11-14 02:58:17 +00:00
|
|
|
Reopen(options);
|
|
|
|
MockMemTableRep* rep =
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
static_cast<MockMemTableRepFactory*>(options.memtable_factory.get())
|
2016-11-14 02:58:17 +00:00
|
|
|
->rep();
|
|
|
|
ASSERT_OK(Put("foo_k1", "foo_v1"));
|
|
|
|
ASSERT_EQ(nullptr, rep->last_hint_in());
|
|
|
|
void* hint_foo = rep->last_hint_out();
|
|
|
|
ASSERT_OK(Put("foo_k2", "foo_v2"));
|
|
|
|
ASSERT_EQ(hint_foo, rep->last_hint_in());
|
|
|
|
ASSERT_EQ(hint_foo, rep->last_hint_out());
|
|
|
|
ASSERT_OK(Put("foo_k3", "foo_v3"));
|
|
|
|
ASSERT_EQ(hint_foo, rep->last_hint_in());
|
|
|
|
ASSERT_EQ(hint_foo, rep->last_hint_out());
|
|
|
|
ASSERT_OK(Put("bar_k1", "bar_v1"));
|
|
|
|
ASSERT_EQ(nullptr, rep->last_hint_in());
|
|
|
|
void* hint_bar = rep->last_hint_out();
|
|
|
|
ASSERT_NE(hint_foo, hint_bar);
|
|
|
|
ASSERT_OK(Put("bar_k2", "bar_v2"));
|
|
|
|
ASSERT_EQ(hint_bar, rep->last_hint_in());
|
|
|
|
ASSERT_EQ(hint_bar, rep->last_hint_out());
|
|
|
|
ASSERT_EQ(5, rep->num_insert_with_hint());
|
2020-06-19 22:26:05 +00:00
|
|
|
ASSERT_OK(Put("NotInPrefixDomain", "vvv"));
|
2016-11-14 02:58:17 +00:00
|
|
|
ASSERT_EQ(5, rep->num_insert_with_hint());
|
|
|
|
ASSERT_EQ("foo_v1", Get("foo_k1"));
|
|
|
|
ASSERT_EQ("foo_v2", Get("foo_k2"));
|
|
|
|
ASSERT_EQ("foo_v3", Get("foo_k3"));
|
|
|
|
ASSERT_EQ("bar_v1", Get("bar_k1"));
|
|
|
|
ASSERT_EQ("bar_v2", Get("bar_k2"));
|
2024-04-23 03:13:58 +00:00
|
|
|
ASSERT_OK(db_->DeleteRange(WriteOptions(), "foo_k1", "foo_k4"));
|
|
|
|
ASSERT_EQ(hint_bar, rep->last_hint_in());
|
|
|
|
ASSERT_EQ(hint_bar, rep->last_hint_out());
|
|
|
|
ASSERT_EQ(5, rep->num_insert_with_hint());
|
2020-06-19 22:26:05 +00:00
|
|
|
ASSERT_EQ("vvv", Get("NotInPrefixDomain"));
|
2016-11-14 02:58:17 +00:00
|
|
|
}
|
|
|
|
|
2017-06-02 19:08:01 +00:00
|
|
|
TEST_F(DBMemTableTest, ColumnFamilyId) {
|
|
|
|
// Verifies MemTableRepFactory is told the right column family id.
|
|
|
|
Options options;
|
Fix many tests to run with MEM_ENV and ENCRYPTED_ENV; Introduce a MemoryFileSystem class (#7566)
Summary:
This PR does a few things:
1. The MockFileSystem class was split out from the MockEnv. This change would theoretically allow a MockFileSystem to be used by other Environments as well (if we created a means of constructing one). The MockFileSystem implements a FileSystem in its entirety and does not rely on any Wrapper implementation.
2. Make the RocksDB test suite work when MOCK_ENV=1 and ENCRYPTED_ENV=1 are set. To accomplish this, a few things were needed:
- The tests that tried to use the "wrong" environment (Env::Default() instead of env_) were updated
- The MockFileSystem was changed to support the features it was missing or mishandled (such as recursively deleting files in a directory or supporting renaming of a directory).
3. Updated the test framework to have a ROCKSDB_GTEST_SKIP macro. This can be used to flag tests that are skipped. Currently, this defaults to doing nothing (marks the test as SUCCESS) but will mark the tests as SKIPPED when RocksDB is upgraded to a version of gtest that supports this (gtest-1.10).
I have run a full "make check" with MEM_ENV, ENCRYPTED_ENV, both, and neither under both MacOS and RedHat. A few tests were disabled/skipped for the MEM/ENCRYPTED cases. The error_handler_fs_test fails/hangs for MEM_ENV (presumably a timing problem) and I will introduce another PR/issue to track that problem. (I will also push a change to disable those tests soon). There is one more test in DBTest2 that also fails which I need to investigate or skip before this PR is merged.
Theoretically, this PR should also allow the test suite to run against an Env loaded from the registry, though I do not have one to try it with currently.
Finally, once this is accepted, it would be nice if there was a CircleCI job to run these tests on a checkin so this effort does not become stale. I do not know how to do that, so if someone could write that job, it would be appreciated :)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7566
Reviewed By: zhichao-cao
Differential Revision: D24408980
Pulled By: jay-zhuang
fbshipit-source-id: 911b1554a4d0da06fd51feca0c090a4abdcb4a5f
2020-10-27 17:31:34 +00:00
|
|
|
options.env = CurrentOptions().env;
|
2017-06-02 19:08:01 +00:00
|
|
|
options.allow_concurrent_memtable_write = false;
|
|
|
|
options.create_if_missing = true;
|
|
|
|
options.memtable_factory.reset(new MockMemTableRepFactory());
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
|
2019-09-09 18:22:28 +00:00
|
|
|
for (uint32_t cf = 0; cf < 2; ++cf) {
|
2017-06-02 19:08:01 +00:00
|
|
|
ASSERT_OK(Put(cf, "key", "val"));
|
|
|
|
ASSERT_OK(Flush(cf));
|
|
|
|
ASSERT_EQ(
|
|
|
|
cf, static_cast<MockMemTableRepFactory*>(options.memtable_factory.get())
|
|
|
|
->GetLastColumnFamilyId());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-19 20:53:25 +00:00
|
|
|
TEST_F(DBMemTableTest, IntegrityChecks) {
|
|
|
|
// We insert keys key000000, key000001 and key000002 into skiplist at fixed
|
|
|
|
// height 1 (smallest height). Then we corrupt the second key to aey000001 to
|
|
|
|
// make it smaller. With `paranoid_memory_checks` set to true, if the
|
|
|
|
// skip list sees key000000 and then aey000001, then it will report out of
|
|
|
|
// order keys with corruption status. With `paranoid_memory_checks` set
|
|
|
|
// to false, read/scan may return wrong results.
|
|
|
|
for (bool allow_data_in_error : {false, true}) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.allow_data_in_errors = allow_data_in_error;
|
|
|
|
options.paranoid_memory_checks = true;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"InlineSkipList::RandomHeight::height", [](void* h) {
|
|
|
|
auto height_ptr = static_cast<int*>(h);
|
|
|
|
*height_ptr = 1;
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
ASSERT_OK(Put(Key(0), "val0"));
|
|
|
|
ASSERT_OK(Put(Key(2), "val2"));
|
|
|
|
// p will point to the buffer for encoded key000001
|
|
|
|
char* p = nullptr;
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"MemTable::Add:BeforeReturn:Encoded", [&](void* encoded) {
|
|
|
|
p = const_cast<char*>(static_cast<Slice*>(encoded)->data());
|
|
|
|
});
|
|
|
|
ASSERT_OK(Put(Key(1), "val1"));
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
ASSERT_TRUE(p);
|
|
|
|
// Offset 0 is key size, key bytes start at offset 1.
|
|
|
|
// "key000001 -> aey000001"
|
|
|
|
p[1] = 'a';
|
|
|
|
|
|
|
|
ReadOptions rops;
|
|
|
|
std::string val;
|
|
|
|
Status s = db_->Get(rops, Key(1), &val);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
std::string key0 = Slice(Key(0)).ToString(true);
|
|
|
|
ASSERT_EQ(s.ToString().find(key0) != std::string::npos,
|
|
|
|
allow_data_in_error);
|
|
|
|
// Without `paranoid_memory_checks`, NotFound will be returned.
|
|
|
|
// This would fail an assertion in InlineSkipList::FindGreaterOrEqual().
|
|
|
|
// If we remove the assertion, this passes.
|
|
|
|
// ASSERT_TRUE(db_->Get(ReadOptions(), Key(1), &val).IsNotFound());
|
|
|
|
|
|
|
|
std::vector<std::string> vals;
|
|
|
|
std::vector<Status> statuses = db_->MultiGet(
|
|
|
|
rops, {db_->DefaultColumnFamily()}, {Key(1)}, &vals, nullptr);
|
|
|
|
ASSERT_TRUE(statuses[0].IsCorruption());
|
|
|
|
ASSERT_EQ(statuses[0].ToString().find(key0) != std::string::npos,
|
|
|
|
allow_data_in_error);
|
|
|
|
|
|
|
|
std::unique_ptr<Iterator> iter{db_->NewIterator(rops)};
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
iter->Seek(Key(1));
|
|
|
|
ASSERT_TRUE(iter->status().IsCorruption());
|
|
|
|
ASSERT_EQ(iter->status().ToString().find(key0) != std::string::npos,
|
|
|
|
allow_data_in_error);
|
|
|
|
|
|
|
|
iter->Seek(Key(0));
|
|
|
|
ASSERT_TRUE(iter->Valid());
|
|
|
|
ASSERT_OK(iter->status());
|
|
|
|
// iterating through skip list at height at 1 should catch out-of-order keys
|
|
|
|
iter->Next();
|
|
|
|
ASSERT_TRUE(iter->status().IsCorruption());
|
|
|
|
ASSERT_EQ(iter->status().ToString().find(key0) != std::string::npos,
|
|
|
|
allow_data_in_error);
|
|
|
|
ASSERT_FALSE(iter->Valid());
|
|
|
|
|
|
|
|
iter->SeekForPrev(Key(2));
|
|
|
|
ASSERT_TRUE(iter->status().IsCorruption());
|
|
|
|
ASSERT_EQ(iter->status().ToString().find(key0) != std::string::npos,
|
|
|
|
allow_data_in_error);
|
|
|
|
|
|
|
|
// Internally DB Iter will iterate backwards (call Prev()) after
|
|
|
|
// SeekToLast() to find the correct internal key with the last user key.
|
|
|
|
// Prev() will do integrity checks and catch corruption.
|
|
|
|
iter->SeekToLast();
|
|
|
|
ASSERT_TRUE(iter->status().IsCorruption());
|
|
|
|
ASSERT_EQ(iter->status().ToString().find(key0) != std::string::npos,
|
|
|
|
allow_data_in_error);
|
|
|
|
ASSERT_FALSE(iter->Valid());
|
|
|
|
}
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2016-11-14 02:58:17 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
2016-11-14 02:58:17 +00:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|