2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2015-10-13 21:24:45 +00:00
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
2022-04-06 17:33:00 +00:00
|
|
|
#include <memory>
|
2015-10-13 21:24:45 +00:00
|
|
|
#include <unordered_set>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "db/db_test_util.h"
|
2020-10-19 18:37:05 +00:00
|
|
|
#include "port/port.h"
|
2015-10-13 21:24:45 +00:00
|
|
|
#include "port/stack_trace.h"
|
|
|
|
#include "rocksdb/db.h"
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
#include "rocksdb/types.h"
|
2017-09-29 01:07:42 +00:00
|
|
|
#include "rocksdb/utilities/table_properties_collectors.h"
|
2020-10-19 18:37:05 +00:00
|
|
|
#include "table/format.h"
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
#include "table/meta_blocks.h"
|
2022-04-06 17:33:00 +00:00
|
|
|
#include "table/table_properties_internal.h"
|
2019-05-30 18:21:38 +00:00
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2023-12-11 20:02:56 +00:00
|
|
|
#include "util/atomic.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
#include "util/random.h"
|
2015-10-13 21:24:45 +00:00
|
|
|
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2015-10-13 21:24:45 +00:00
|
|
|
|
|
|
|
// A helper function that ensures the table properties returned in
|
|
|
|
// `GetPropertiesOfAllTablesTest` is correct.
|
|
|
|
// This test assumes entries size is different for each of the tables.
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
void VerifyTableProperties(DB* db, uint64_t expected_entries_size) {
|
|
|
|
TablePropertiesCollection props;
|
|
|
|
ASSERT_OK(db->GetPropertiesOfAllTables(&props));
|
|
|
|
|
|
|
|
ASSERT_EQ(4U, props.size());
|
|
|
|
std::unordered_set<uint64_t> unique_entries;
|
|
|
|
|
|
|
|
// Indirect test
|
|
|
|
uint64_t sum = 0;
|
|
|
|
for (const auto& item : props) {
|
|
|
|
unique_entries.insert(item.second->num_entries);
|
|
|
|
sum += item.second->num_entries;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(props.size(), unique_entries.size());
|
|
|
|
ASSERT_EQ(expected_entries_size, sum);
|
Experimental support for SST unique IDs (#8990)
Summary:
* New public header unique_id.h and function GetUniqueIdFromTableProperties
which computes a universally unique identifier based on table properties
of table files from recent RocksDB versions.
* Generation of DB session IDs is refactored so that they are
guaranteed unique in the lifetime of a process running RocksDB.
(SemiStructuredUniqueIdGen, new test included.) Along with file numbers,
this enables SST unique IDs to be guaranteed unique among SSTs generated
in a single process, and "better than random" between processes.
See https://github.com/pdillinger/unique_id
* In addition to public API producing 'external' unique IDs, there is a function
for producing 'internal' unique IDs, with functions for converting between the
two. In short, the external ID is "safe" for things people might do with it, and
the internal ID enables more "power user" features for the future. Specifically,
the external ID goes through a hashing layer so that any subset of bits in the
external ID can be used as a hash of the full ID, while also preserving
uniqueness guarantees in the first 128 bits (bijective both on first 128 bits
and on full 192 bits).
Intended follow-up:
* Use the internal unique IDs in cache keys. (Avoid conflicts with https://github.com/facebook/rocksdb/issues/8912) (The file offset can be XORed into
the third 64-bit value of the unique ID.)
* Publish the external unique IDs in FileStorageInfo (https://github.com/facebook/rocksdb/issues/8968)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8990
Test Plan:
Unit tests added, and checking of unique ids in stress test.
NOTE in stress test we do not generate nearly enough files to thoroughly
stress uniqueness, but the test trims off pieces of the ID to check for
uniqueness so that we can infer (with some assumptions) stronger
properties in the aggregate.
Reviewed By: zhichao-cao, mrambacher
Differential Revision: D31582865
Pulled By: pdillinger
fbshipit-source-id: 1f620c4c86af9abe2a8d177b9ccf2ad2b9f48243
2021-10-19 06:28:28 +00:00
|
|
|
|
|
|
|
VerifySstUniqueIds(props);
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
2022-11-02 21:34:24 +00:00
|
|
|
} // anonymous namespace
|
2015-10-13 21:24:45 +00:00
|
|
|
|
2020-04-10 22:27:47 +00:00
|
|
|
class DBTablePropertiesTest : public DBTestBase,
|
|
|
|
public testing::WithParamInterface<std::string> {
|
2015-10-13 21:24:45 +00:00
|
|
|
public:
|
2020-08-18 01:41:20 +00:00
|
|
|
DBTablePropertiesTest()
|
2021-07-23 15:37:27 +00:00
|
|
|
: DBTestBase("db_table_properties_test", /*env_do_fsync=*/false) {}
|
2015-10-13 21:24:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) {
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.level0_file_num_compaction_trigger = 8;
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
// Part of strategy to prevent pinning table files
|
|
|
|
options.max_open_files = 42;
|
2015-10-13 21:24:45 +00:00
|
|
|
Reopen(options);
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
|
2015-10-13 21:24:45 +00:00
|
|
|
// Create 4 tables
|
|
|
|
for (int table = 0; table < 4; ++table) {
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
// Use old meta name for table properties for one file
|
|
|
|
if (table == 3) {
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"BlockBasedTableBuilder::WritePropertiesBlock:Meta", [&](void* meta) {
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
*static_cast<const std::string**>(meta) = &kPropertiesBlockOldName;
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
}
|
|
|
|
// Build file
|
2015-10-13 21:24:45 +00:00
|
|
|
for (int i = 0; i < 10 + table; ++i) {
|
2022-05-06 20:03:58 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->Put(WriteOptions(), std::to_string(table * 100 + i), "val"));
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
std::string original_session_id;
|
|
|
|
ASSERT_OK(db_->GetDbSessionId(original_session_id));
|
|
|
|
|
|
|
|
// Part of strategy to prevent pinning table files
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"VersionEditHandler::LoadTables:skip_load_table_files",
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
[&](void* skip_load) { *static_cast<bool*>(skip_load) = true; });
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
2015-10-13 21:24:45 +00:00
|
|
|
|
|
|
|
// 1. Read table properties directly from file
|
|
|
|
Reopen(options);
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
// Clear out auto-opened files
|
|
|
|
dbfull()->TEST_table_cache()->EraseUnRefEntries();
|
|
|
|
ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U);
|
2015-10-13 21:24:45 +00:00
|
|
|
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
|
|
|
|
|
|
|
|
// 2. Put two tables to table cache and
|
|
|
|
Reopen(options);
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
// Clear out auto-opened files
|
|
|
|
dbfull()->TEST_table_cache()->EraseUnRefEntries();
|
|
|
|
ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U);
|
2015-10-13 21:24:45 +00:00
|
|
|
// fetch key from 1st and 2nd table, which will internally place that table to
|
|
|
|
// the table cache.
|
|
|
|
for (int i = 0; i < 2; ++i) {
|
2022-05-06 20:03:58 +00:00
|
|
|
Get(std::to_string(i * 100 + 0));
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
|
|
|
|
|
|
|
|
// 3. Put all tables to table cache
|
|
|
|
Reopen(options);
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
// fetch key from all tables, which will place them in table cache.
|
2015-10-13 21:24:45 +00:00
|
|
|
for (int i = 0; i < 4; ++i) {
|
2022-05-06 20:03:58 +00:00
|
|
|
Get(std::to_string(i * 100 + 0));
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
|
|
|
VerifyTableProperties(db_, 10 + 11 + 12 + 13);
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
|
|
|
|
// 4. Try to read CORRUPT properties (a) directly from file, and (b)
|
|
|
|
// through reader on Get
|
|
|
|
|
|
|
|
// It's not practical to prevent table file read on Open, so we
|
|
|
|
// corrupt after open and after purging table cache.
|
|
|
|
for (bool direct : {true, false}) {
|
|
|
|
Reopen(options);
|
|
|
|
// Clear out auto-opened files
|
|
|
|
dbfull()->TEST_table_cache()->EraseUnRefEntries();
|
|
|
|
ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U);
|
|
|
|
|
|
|
|
TablePropertiesCollection props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
|
|
|
|
std::string sst_file = props.begin()->first;
|
|
|
|
|
|
|
|
// Corrupt the file's TableProperties using session id
|
|
|
|
std::string contents;
|
|
|
|
ASSERT_OK(
|
|
|
|
ReadFileToString(env_->GetFileSystem().get(), sst_file, &contents));
|
|
|
|
size_t pos = contents.find(original_session_id);
|
|
|
|
ASSERT_NE(pos, std::string::npos);
|
|
|
|
ASSERT_OK(test::CorruptFile(env_, sst_file, static_cast<int>(pos), 1,
|
|
|
|
/*verify checksum fails*/ false));
|
|
|
|
|
|
|
|
// Try to read CORRUPT properties
|
|
|
|
if (direct) {
|
|
|
|
ASSERT_TRUE(db_->GetPropertiesOfAllTables(&props).IsCorruption());
|
|
|
|
} else {
|
|
|
|
bool found_corruption = false;
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
2022-05-06 20:03:58 +00:00
|
|
|
std::string result = Get(std::to_string(i * 100 + 0));
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
if (result.find_first_of("Corruption: block checksum mismatch") !=
|
|
|
|
std::string::npos) {
|
|
|
|
found_corruption = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(found_corruption);
|
|
|
|
}
|
|
|
|
|
|
|
|
// UN-corrupt file for next iteration
|
|
|
|
ASSERT_OK(test::CorruptFile(env_, sst_file, static_cast<int>(pos), 1,
|
|
|
|
/*verify checksum fails*/ false));
|
|
|
|
}
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
|
|
|
|
2021-11-20 01:30:12 +00:00
|
|
|
TEST_F(DBTablePropertiesTest, InvalidIgnored) {
|
|
|
|
// RocksDB versions 2.5 - 2.7 generate some properties that Block considers
|
|
|
|
// invalid in some way. This approximates that.
|
|
|
|
|
|
|
|
// Inject properties block data that Block considers invalid
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"BlockBasedTableBuilder::WritePropertiesBlock:BlockData",
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
[&](void* block_data) { *static_cast<Slice*>(block_data) = Slice("X"); });
|
2021-11-20 01:30:12 +00:00
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
|
Always verify SST unique IDs on SST file open (#10532)
Summary:
Although we've been tracking SST unique IDs in the DB manifest
unconditionally, checking has been opt-in and with an extra pass at DB::Open
time. This changes the behavior of `verify_sst_unique_id_in_manifest` to
check unique ID against manifest every time an SST file is opened through
table cache (normal DB operations), replacing the explicit pass over files
at DB::Open time. This change also enables the option by default and
removes the "EXPERIMENTAL" designation.
One possible criticism is that the option no longer ensures the integrity
of a DB at Open time. This is far from an all-or-nothing issue. Verifying
the IDs of all SST files hardly ensures all the data in the DB is readable.
(VerifyChecksum is supposed to do that.) Also, with
max_open_files=-1 (default, extremely common), all SST files are
opened at DB::Open time anyway.
Implementation details:
* `VerifySstUniqueIdInManifest()` functions are the extra/explicit pass
that is now removed.
* Unit tests that manipulate/corrupt table properties have to opt out of
this check, because that corrupts the "actual" unique id. (And even for
testing we don't currently have a mechanism to set "no unique id"
in the in-memory file metadata for new files.)
* A lot of other unit test churn relates to (a) default checking on, and
(b) checking on SST open even without DB::Open (e.g. on flush)
* Use `FileMetaData` for more `TableCache` operations (in place of
`FileDescriptor`) so that we have access to the unique_id whenever
we might need to open an SST file. **There is the possibility of
performance impact because we can no longer use the more
localized `fd` part of an `FdWithKeyRange` but instead follow the
`file_metadata` pointer. However, this change (possible regression)
is only done for `GetMemoryUsageByTableReaders`.**
* Removed a completely unnecessary constructor overload of
`TableReaderOptions`
Possible follow-up:
* Verification only happens when opening through table cache. Are there
more places where this should happen?
* Improve error message when there is a file size mismatch vs. manifest
(FIXME added in the appropriate place).
* I'm not sure there's a justification for `FileDescriptor` to be distinct from
`FileMetaData`.
* I'm skeptical that `FdWithKeyRange` really still makes sense for
optimizing some data locality by duplicating some data in memory, but I
could be wrong.
* An unnecessary overload of NewTableReader was recently added, in
the public API nonetheless (though unusable there). It should be cleaned
up to put most things under `TableReaderOptions`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10532
Test Plan:
updated unit tests
Performance test showing no significant difference (just noise I think):
`./db_bench -benchmarks=readwhilewriting[-X10] -num=3000000 -disable_wal=1 -bloom_bits=8 -write_buffer_size=1000000 -target_file_size_base=1000000`
Before: readwhilewriting [AVG 10 runs] : 68702 (± 6932) ops/sec
After: readwhilewriting [AVG 10 runs] : 68239 (± 7198) ops/sec
Reviewed By: jay-zhuang
Differential Revision: D38765551
Pulled By: pdillinger
fbshipit-source-id: a827a708155f12344ab2a5c16e7701c7636da4c2
2022-09-08 05:52:42 +00:00
|
|
|
// Corrupting the table properties corrupts the unique id.
|
|
|
|
// Ignore the unique id recorded in the manifest.
|
|
|
|
auto options = CurrentOptions();
|
|
|
|
options.verify_sst_unique_id_in_manifest = false;
|
|
|
|
Reopen(options);
|
|
|
|
|
2021-11-20 01:30:12 +00:00
|
|
|
// Build file
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
2022-05-06 20:03:58 +00:00
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), std::to_string(i), "val"));
|
2021-11-20 01:30:12 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
|
|
|
|
// Not crashing is good enough
|
|
|
|
TablePropertiesCollection props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
|
|
|
|
}
|
|
|
|
|
2021-09-28 12:30:32 +00:00
|
|
|
TEST_F(DBTablePropertiesTest, CreateOnDeletionCollectorFactory) {
|
|
|
|
ConfigOptions options;
|
|
|
|
options.ignore_unsupported_options = false;
|
|
|
|
|
|
|
|
std::shared_ptr<TablePropertiesCollectorFactory> factory;
|
|
|
|
std::string id = CompactOnDeletionCollectorFactory::kClassName();
|
|
|
|
ASSERT_OK(
|
|
|
|
TablePropertiesCollectorFactory::CreateFromString(options, id, &factory));
|
|
|
|
auto del_factory = factory->CheckedCast<CompactOnDeletionCollectorFactory>();
|
|
|
|
ASSERT_NE(del_factory, nullptr);
|
|
|
|
ASSERT_EQ(0U, del_factory->GetWindowSize());
|
|
|
|
ASSERT_EQ(0U, del_factory->GetDeletionTrigger());
|
|
|
|
ASSERT_EQ(0.0, del_factory->GetDeletionRatio());
|
|
|
|
ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString(
|
|
|
|
options, "window_size=100; deletion_trigger=90; id=" + id, &factory));
|
|
|
|
del_factory = factory->CheckedCast<CompactOnDeletionCollectorFactory>();
|
|
|
|
ASSERT_NE(del_factory, nullptr);
|
|
|
|
ASSERT_EQ(100U, del_factory->GetWindowSize());
|
|
|
|
ASSERT_EQ(90U, del_factory->GetDeletionTrigger());
|
|
|
|
ASSERT_EQ(0.0, del_factory->GetDeletionRatio());
|
|
|
|
ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString(
|
|
|
|
options,
|
|
|
|
"window_size=100; deletion_trigger=90; deletion_ratio=0.5; id=" + id,
|
|
|
|
&factory));
|
|
|
|
del_factory = factory->CheckedCast<CompactOnDeletionCollectorFactory>();
|
|
|
|
ASSERT_NE(del_factory, nullptr);
|
|
|
|
ASSERT_EQ(100U, del_factory->GetWindowSize());
|
|
|
|
ASSERT_EQ(90U, del_factory->GetDeletionTrigger());
|
|
|
|
ASSERT_EQ(0.5, del_factory->GetDeletionRatio());
|
|
|
|
}
|
|
|
|
|
2024-02-07 02:35:36 +00:00
|
|
|
// Test params:
|
|
|
|
// 1) whether to enable user-defined timestamps
|
|
|
|
class DBTablePropertiesInRangeTest : public DBTestBase,
|
|
|
|
public testing::WithParamInterface<bool> {
|
|
|
|
public:
|
|
|
|
DBTablePropertiesInRangeTest()
|
|
|
|
: DBTestBase("db_table_properties_in_range_test",
|
|
|
|
/*env_do_fsync=*/false) {}
|
|
|
|
|
|
|
|
void SetUp() override { enable_udt_ = GetParam(); }
|
|
|
|
|
|
|
|
protected:
|
|
|
|
void PutKeyValue(const Slice& key, const Slice& value) {
|
|
|
|
if (enable_udt_) {
|
|
|
|
EXPECT_OK(db_->Put(WriteOptions(), key, min_ts_, value));
|
|
|
|
} else {
|
|
|
|
EXPECT_OK(Put(key, value));
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
2024-02-07 02:35:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string GetValue(const std::string& key) {
|
|
|
|
ReadOptions roptions;
|
|
|
|
std::string result;
|
|
|
|
if (enable_udt_) {
|
|
|
|
roptions.timestamp = &min_ts_;
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
2024-02-07 02:35:36 +00:00
|
|
|
Status s = db_->Get(roptions, key, &result);
|
|
|
|
EXPECT_TRUE(s.ok());
|
|
|
|
return result;
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
|
|
|
|
2024-02-07 02:35:36 +00:00
|
|
|
Status MaybeGetValue(const std::string& key, std::string* result) {
|
|
|
|
ReadOptions roptions;
|
|
|
|
if (enable_udt_) {
|
|
|
|
roptions.timestamp = &min_ts_;
|
|
|
|
}
|
|
|
|
Status s = db_->Get(roptions, key, result);
|
|
|
|
EXPECT_TRUE(s.IsNotFound() || s.ok());
|
|
|
|
return s;
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
|
|
|
|
2024-02-07 02:35:36 +00:00
|
|
|
TablePropertiesCollection TestGetPropertiesOfTablesInRange(
|
|
|
|
std::vector<Range> ranges, std::size_t* num_properties = nullptr,
|
|
|
|
std::size_t* num_files = nullptr) {
|
|
|
|
// Since we deref zero element in the vector it can not be empty
|
|
|
|
// otherwise we pass an address to some random memory
|
|
|
|
EXPECT_GT(ranges.size(), 0U);
|
|
|
|
// run the query
|
|
|
|
TablePropertiesCollection props;
|
|
|
|
ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily();
|
2024-03-04 18:08:32 +00:00
|
|
|
EXPECT_OK(db_->GetPropertiesOfTablesInRange(default_cf, ranges.data(),
|
2024-02-07 02:35:36 +00:00
|
|
|
ranges.size(), &props));
|
|
|
|
|
|
|
|
const Comparator* ucmp = default_cf->GetComparator();
|
|
|
|
EXPECT_NE(ucmp, nullptr);
|
|
|
|
const size_t ts_sz = ucmp->timestamp_size();
|
|
|
|
const size_t range_size = ranges.size();
|
|
|
|
autovector<UserKeyRange> ukey_ranges;
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
ukey_ranges.reserve(range_size);
|
|
|
|
keys.reserve(range_size * 2);
|
|
|
|
for (auto& r : ranges) {
|
|
|
|
auto [start, limit] = MaybeAddTimestampsToRange(
|
|
|
|
&r.start, &r.limit, ts_sz, &keys.emplace_back(), &keys.emplace_back(),
|
|
|
|
/*exclusive_end=*/false);
|
|
|
|
EXPECT_TRUE(start.has_value());
|
|
|
|
EXPECT_TRUE(limit.has_value());
|
|
|
|
ukey_ranges.emplace_back(start.value(), limit.value());
|
|
|
|
}
|
|
|
|
// Make sure that we've received properties for those and for those files
|
|
|
|
// only which fall within requested ranges
|
|
|
|
std::vector<LiveFileMetaData> vmd;
|
|
|
|
db_->GetLiveFilesMetaData(&vmd);
|
|
|
|
for (auto& md : vmd) {
|
|
|
|
std::string fn = md.db_path + md.name;
|
|
|
|
bool in_range = false;
|
|
|
|
for (auto& r : ukey_ranges) {
|
|
|
|
if (ucmp->Compare(r.start, md.largestkey) <= 0 &&
|
|
|
|
ucmp->Compare(r.limit, md.smallestkey) >= 0) {
|
|
|
|
in_range = true;
|
|
|
|
EXPECT_GT(props.count(fn), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!in_range) {
|
|
|
|
EXPECT_EQ(props.count(fn), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_properties) {
|
|
|
|
*num_properties = props.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_files) {
|
|
|
|
*num_files = vmd.size();
|
|
|
|
}
|
|
|
|
return props;
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
|
|
|
|
2024-02-07 02:35:36 +00:00
|
|
|
bool enable_udt_ = false;
|
|
|
|
Slice min_ts_ = MinU64Ts();
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_P(DBTablePropertiesInRangeTest, GetPropertiesOfTablesInRange) {
|
2015-10-13 21:24:45 +00:00
|
|
|
// Fixed random sead
|
|
|
|
Random rnd(301);
|
|
|
|
|
|
|
|
Options options;
|
2023-06-16 04:12:39 +00:00
|
|
|
options.level_compaction_dynamic_level_bytes = false;
|
2015-10-13 21:24:45 +00:00
|
|
|
options.create_if_missing = true;
|
|
|
|
options.write_buffer_size = 4096;
|
2019-05-20 20:47:32 +00:00
|
|
|
options.max_write_buffer_number = 2;
|
2015-10-13 21:24:45 +00:00
|
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
|
|
options.level0_slowdown_writes_trigger = 2;
|
2019-05-20 20:47:32 +00:00
|
|
|
options.level0_stop_writes_trigger = 2;
|
2015-10-13 21:24:45 +00:00
|
|
|
options.target_file_size_base = 2048;
|
2019-05-20 20:47:32 +00:00
|
|
|
options.max_bytes_for_level_base = 40960;
|
2015-10-13 21:24:45 +00:00
|
|
|
options.max_bytes_for_level_multiplier = 4;
|
2016-07-15 17:51:59 +00:00
|
|
|
options.hard_pending_compaction_bytes_limit = 16 * 1024;
|
2015-10-13 21:24:45 +00:00
|
|
|
options.num_levels = 8;
|
2017-03-13 16:41:30 +00:00
|
|
|
options.env = env_;
|
2024-02-07 02:35:36 +00:00
|
|
|
bool udt_enabled = GetParam();
|
|
|
|
if (udt_enabled) {
|
|
|
|
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
}
|
2015-10-13 21:24:45 +00:00
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
// build a decent LSM
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
2024-02-07 02:35:36 +00:00
|
|
|
PutKeyValue(test::RandomKey(&rnd, 5), rnd.RandomString(102));
|
2015-10-13 21:24:45 +00:00
|
|
|
}
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2016-07-15 17:51:59 +00:00
|
|
|
if (NumTableFilesAtLevel(0) == 0) {
|
2024-02-07 02:35:36 +00:00
|
|
|
PutKeyValue(test::RandomKey(&rnd, 5), rnd.RandomString(102));
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Flush());
|
2016-07-15 17:51:59 +00:00
|
|
|
}
|
|
|
|
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(db_->PauseBackgroundWork());
|
2015-10-13 21:24:45 +00:00
|
|
|
|
|
|
|
// Ensure that we have at least L0, L1 and L2
|
|
|
|
ASSERT_GT(NumTableFilesAtLevel(0), 0);
|
|
|
|
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
|
|
|
ASSERT_GT(NumTableFilesAtLevel(2), 0);
|
|
|
|
|
|
|
|
// Query the largest range
|
|
|
|
std::size_t num_properties, num_files;
|
|
|
|
TestGetPropertiesOfTablesInRange(
|
|
|
|
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST),
|
|
|
|
test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))},
|
|
|
|
&num_properties, &num_files);
|
|
|
|
ASSERT_EQ(num_properties, num_files);
|
|
|
|
|
|
|
|
// Query the empty range
|
|
|
|
TestGetPropertiesOfTablesInRange(
|
|
|
|
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST),
|
|
|
|
test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST))},
|
|
|
|
&num_properties, &num_files);
|
|
|
|
ASSERT_GT(num_files, 0);
|
|
|
|
ASSERT_EQ(num_properties, 0);
|
|
|
|
|
|
|
|
// Query the middle rangee
|
|
|
|
TestGetPropertiesOfTablesInRange(
|
|
|
|
{Range(test::RandomKey(&rnd, 5, test::RandomKeyType::MIDDLE),
|
|
|
|
test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))},
|
|
|
|
&num_properties, &num_files);
|
|
|
|
ASSERT_GT(num_files, 0);
|
|
|
|
ASSERT_GT(num_files, num_properties);
|
|
|
|
ASSERT_GT(num_properties, 0);
|
|
|
|
|
|
|
|
// Query a bunch of random ranges
|
|
|
|
for (int j = 0; j < 100; j++) {
|
|
|
|
// create a bunch of ranges
|
|
|
|
std::vector<std::string> random_keys;
|
2015-12-02 22:50:33 +00:00
|
|
|
// Random returns numbers with zero included
|
|
|
|
// when we pass empty ranges TestGetPropertiesOfTablesInRange()
|
|
|
|
// derefs random memory in the empty ranges[0]
|
|
|
|
// so want to be greater than zero and even since
|
|
|
|
// the below loop requires that random_keys.size() to be even.
|
|
|
|
auto n = 2 * (rnd.Uniform(50) + 1);
|
|
|
|
|
2015-10-19 19:29:11 +00:00
|
|
|
for (uint32_t i = 0; i < n; ++i) {
|
2015-10-13 21:24:45 +00:00
|
|
|
random_keys.push_back(test::RandomKey(&rnd, 5));
|
|
|
|
}
|
|
|
|
|
2015-12-02 22:50:33 +00:00
|
|
|
ASSERT_GT(random_keys.size(), 0U);
|
|
|
|
ASSERT_EQ((random_keys.size() % 2), 0U);
|
|
|
|
|
2015-10-13 21:24:45 +00:00
|
|
|
std::vector<Range> ranges;
|
|
|
|
auto it = random_keys.begin();
|
|
|
|
while (it != random_keys.end()) {
|
2024-01-05 19:53:57 +00:00
|
|
|
ranges.emplace_back(*it, *(it + 1));
|
2015-10-13 21:24:45 +00:00
|
|
|
it += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
TestGetPropertiesOfTablesInRange(std::move(ranges));
|
|
|
|
}
|
|
|
|
}
|
2016-04-07 06:10:32 +00:00
|
|
|
|
2024-02-07 02:35:36 +00:00
|
|
|
INSTANTIATE_TEST_CASE_P(DBTablePropertiesInRangeTest,
|
|
|
|
DBTablePropertiesInRangeTest,
|
|
|
|
::testing::Values(true, false));
|
|
|
|
|
2016-04-07 06:10:32 +00:00
|
|
|
TEST_F(DBTablePropertiesTest, GetColumnFamilyNameProperty) {
|
|
|
|
std::string kExtraCfName = "pikachu";
|
|
|
|
CreateAndReopenWithCF({kExtraCfName}, CurrentOptions());
|
|
|
|
|
|
|
|
// Create one table per CF, then verify it was created with the column family
|
|
|
|
// name property.
|
2019-09-09 18:22:28 +00:00
|
|
|
for (uint32_t cf = 0; cf < 2; ++cf) {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(cf, "key", "val"));
|
|
|
|
ASSERT_OK(Flush(cf));
|
2016-04-07 06:10:32 +00:00
|
|
|
|
|
|
|
TablePropertiesCollection fname_to_props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
|
|
|
|
ASSERT_EQ(1U, fname_to_props.size());
|
|
|
|
|
|
|
|
std::string expected_cf_name;
|
|
|
|
if (cf > 0) {
|
|
|
|
expected_cf_name = kExtraCfName;
|
|
|
|
} else {
|
|
|
|
expected_cf_name = kDefaultColumnFamilyName;
|
|
|
|
}
|
|
|
|
ASSERT_EQ(expected_cf_name,
|
|
|
|
fname_to_props.begin()->second->column_family_name);
|
|
|
|
ASSERT_EQ(cf, static_cast<uint32_t>(
|
|
|
|
fname_to_props.begin()->second->column_family_id));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-17 17:55:42 +00:00
|
|
|
TEST_F(DBTablePropertiesTest, GetDbIdentifiersProperty) {
|
|
|
|
CreateAndReopenWithCF({"goku"}, CurrentOptions());
|
|
|
|
|
|
|
|
for (uint32_t cf = 0; cf < 2; ++cf) {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(cf, "key", "val"));
|
|
|
|
ASSERT_OK(Put(cf, "foo", "bar"));
|
|
|
|
ASSERT_OK(Flush(cf));
|
2020-06-17 17:55:42 +00:00
|
|
|
|
|
|
|
TablePropertiesCollection fname_to_props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
|
|
|
|
ASSERT_EQ(1U, fname_to_props.size());
|
|
|
|
|
|
|
|
std::string id, sid;
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(db_->GetDbIdentity(id));
|
|
|
|
ASSERT_OK(db_->GetDbSessionId(sid));
|
2020-06-17 17:55:42 +00:00
|
|
|
ASSERT_EQ(id, fname_to_props.begin()->second->db_id);
|
|
|
|
ASSERT_EQ(sid, fname_to_props.begin()->second->db_session_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-11 20:02:56 +00:00
|
|
|
TEST_F(DBTablePropertiesTest, FactoryReturnsNull) {
|
|
|
|
struct JunkTablePropertiesCollector : public TablePropertiesCollector {
|
|
|
|
const char* Name() const override { return "JunkTablePropertiesCollector"; }
|
|
|
|
Status Finish(UserCollectedProperties* properties) override {
|
|
|
|
properties->insert({"Junk", "Junk"});
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
UserCollectedProperties GetReadableProperties() const override {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// Alternates between putting a "Junk" property and using `nullptr` to
|
|
|
|
// opt out.
|
|
|
|
static RelaxedAtomic<int> count{0};
|
|
|
|
struct SometimesTablePropertiesCollectorFactory
|
|
|
|
: public TablePropertiesCollectorFactory {
|
|
|
|
const char* Name() const override {
|
|
|
|
return "SometimesTablePropertiesCollectorFactory";
|
|
|
|
}
|
|
|
|
TablePropertiesCollector* CreateTablePropertiesCollector(
|
|
|
|
TablePropertiesCollectorFactory::Context /*context*/) override {
|
|
|
|
if (count.FetchAddRelaxed(1) & 1) {
|
|
|
|
return nullptr;
|
|
|
|
} else {
|
|
|
|
return new JunkTablePropertiesCollector();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
Options options = CurrentOptions();
|
|
|
|
options.table_properties_collector_factories.emplace_back(
|
|
|
|
std::make_shared<SometimesTablePropertiesCollectorFactory>());
|
|
|
|
// For plain table
|
|
|
|
options.prefix_extractor.reset(NewFixedPrefixTransform(4));
|
2024-01-05 19:53:57 +00:00
|
|
|
for (const std::shared_ptr<TableFactory>& tf :
|
2023-12-11 20:02:56 +00:00
|
|
|
{options.table_factory,
|
|
|
|
std::shared_ptr<TableFactory>(NewPlainTableFactory({}))}) {
|
|
|
|
SCOPED_TRACE("Table factory = " + std::string(tf->Name()));
|
|
|
|
options.table_factory = tf;
|
|
|
|
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
|
|
|
|
ASSERT_OK(Put("key0", "value1"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_OK(Put("key0", "value2"));
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
TablePropertiesCollection props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(&props));
|
|
|
|
int no_junk_count = 0;
|
|
|
|
int junk_count = 0;
|
|
|
|
for (const auto& item : props) {
|
|
|
|
if (item.second->user_collected_properties.find("Junk") !=
|
|
|
|
item.second->user_collected_properties.end()) {
|
|
|
|
junk_count++;
|
|
|
|
} else {
|
|
|
|
no_junk_count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPECT_EQ(1, no_junk_count);
|
|
|
|
EXPECT_EQ(1, junk_count);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-19 18:37:05 +00:00
|
|
|
class DBTableHostnamePropertyTest
|
|
|
|
: public DBTestBase,
|
|
|
|
public ::testing::WithParamInterface<std::tuple<int, std::string>> {
|
|
|
|
public:
|
|
|
|
DBTableHostnamePropertyTest()
|
2021-07-23 15:37:27 +00:00
|
|
|
: DBTestBase("db_table_hostname_property_test",
|
2020-10-19 18:37:05 +00:00
|
|
|
/*env_do_fsync=*/false) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_P(DBTableHostnamePropertyTest, DbHostLocationProperty) {
|
|
|
|
option_config_ = std::get<0>(GetParam());
|
|
|
|
Options opts = CurrentOptions();
|
|
|
|
std::string expected_host_id = std::get<1>(GetParam());
|
|
|
|
;
|
|
|
|
if (expected_host_id == kHostnameForDbHostId) {
|
|
|
|
ASSERT_OK(env_->GetHostNameString(&expected_host_id));
|
|
|
|
} else {
|
|
|
|
opts.db_host_id = expected_host_id;
|
|
|
|
}
|
|
|
|
CreateAndReopenWithCF({"goku"}, opts);
|
|
|
|
|
|
|
|
for (uint32_t cf = 0; cf < 2; ++cf) {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(cf, "key", "val"));
|
|
|
|
ASSERT_OK(Put(cf, "foo", "bar"));
|
|
|
|
ASSERT_OK(Flush(cf));
|
2020-10-19 18:37:05 +00:00
|
|
|
|
|
|
|
TablePropertiesCollection fname_to_props;
|
|
|
|
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props));
|
|
|
|
ASSERT_EQ(1U, fname_to_props.size());
|
|
|
|
|
|
|
|
ASSERT_EQ(fname_to_props.begin()->second->db_host_id, expected_host_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
|
|
DBTableHostnamePropertyTest, DBTableHostnamePropertyTest,
|
|
|
|
::testing::Values(
|
|
|
|
// OptionConfig, override db_host_location
|
|
|
|
std::make_tuple(DBTestBase::OptionConfig::kDefault,
|
|
|
|
kHostnameForDbHostId),
|
|
|
|
std::make_tuple(DBTestBase::OptionConfig::kDefault, "foobar"),
|
|
|
|
std::make_tuple(DBTestBase::OptionConfig::kDefault, ""),
|
|
|
|
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
|
|
|
|
kHostnameForDbHostId),
|
|
|
|
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
|
|
|
|
"foobar"),
|
|
|
|
std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix,
|
|
|
|
"")));
|
|
|
|
|
2020-04-10 22:27:47 +00:00
|
|
|
class DeletionTriggeredCompactionTestListener : public EventListener {
|
|
|
|
public:
|
2022-11-02 21:34:24 +00:00
|
|
|
void OnCompactionBegin(DB*, const CompactionJobInfo& ci) override {
|
2020-04-10 22:27:47 +00:00
|
|
|
ASSERT_EQ(ci.compaction_reason,
|
|
|
|
CompactionReason::kFilesMarkedForCompaction);
|
|
|
|
}
|
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
void OnCompactionCompleted(DB*, const CompactionJobInfo& ci) override {
|
2020-04-10 22:27:47 +00:00
|
|
|
ASSERT_EQ(ci.compaction_reason,
|
|
|
|
CompactionReason::kFilesMarkedForCompaction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_P(DBTablePropertiesTest, DeletionTriggeredCompactionMarking) {
|
2018-09-20 22:08:20 +00:00
|
|
|
int kNumKeys = 1000;
|
|
|
|
int kWindowSize = 100;
|
|
|
|
int kNumDelsTrigger = 90;
|
|
|
|
std::shared_ptr<TablePropertiesCollectorFactory> compact_on_del =
|
2022-11-02 21:34:24 +00:00
|
|
|
NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger);
|
2017-09-29 01:07:42 +00:00
|
|
|
|
|
|
|
Options opts = CurrentOptions();
|
2020-07-29 20:38:07 +00:00
|
|
|
opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
2018-09-20 22:08:20 +00:00
|
|
|
opts.table_properties_collector_factories.emplace_back(compact_on_del);
|
2020-04-10 22:27:47 +00:00
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
if (GetParam() == "kCompactionStyleUniversal") {
|
2020-04-10 22:27:47 +00:00
|
|
|
opts.compaction_style = kCompactionStyleUniversal;
|
|
|
|
}
|
2017-09-29 01:07:42 +00:00
|
|
|
Reopen(opts);
|
|
|
|
|
|
|
|
// add an L1 file to prevent tombstones from dropping due to obsolescence
|
|
|
|
// during flush
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(Key(0), "val"));
|
|
|
|
ASSERT_OK(Flush());
|
2017-09-29 01:07:42 +00:00
|
|
|
MoveFilesToLevel(1);
|
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
DeletionTriggeredCompactionTestListener* listener =
|
|
|
|
new DeletionTriggeredCompactionTestListener();
|
2020-04-10 22:27:47 +00:00
|
|
|
opts.listeners.emplace_back(listener);
|
|
|
|
Reopen(opts);
|
|
|
|
|
2017-09-29 01:07:42 +00:00
|
|
|
for (int i = 0; i < kNumKeys; ++i) {
|
|
|
|
if (i >= kNumKeys - kWindowSize &&
|
|
|
|
i < kNumKeys - kWindowSize + kNumDelsTrigger) {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Delete(Key(i)));
|
2017-09-29 01:07:42 +00:00
|
|
|
} else {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(Key(i), "val"));
|
2017-09-29 01:07:42 +00:00
|
|
|
}
|
|
|
|
}
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Flush());
|
2017-09-29 01:07:42 +00:00
|
|
|
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2017-09-29 01:07:42 +00:00
|
|
|
ASSERT_EQ(0, NumTableFilesAtLevel(0));
|
2018-09-20 22:08:20 +00:00
|
|
|
|
|
|
|
// Change the window size and deletion trigger and ensure new values take
|
|
|
|
// effect
|
|
|
|
kWindowSize = 50;
|
|
|
|
kNumDelsTrigger = 40;
|
2022-11-02 21:34:24 +00:00
|
|
|
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
|
|
|
|
->SetWindowSize(kWindowSize);
|
|
|
|
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
|
|
|
|
->SetDeletionTrigger(kNumDelsTrigger);
|
2018-09-20 22:08:20 +00:00
|
|
|
for (int i = 0; i < kNumKeys; ++i) {
|
|
|
|
if (i >= kNumKeys - kWindowSize &&
|
|
|
|
i < kNumKeys - kWindowSize + kNumDelsTrigger) {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Delete(Key(i)));
|
2018-09-20 22:08:20 +00:00
|
|
|
} else {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(Key(i), "val"));
|
2018-09-20 22:08:20 +00:00
|
|
|
}
|
|
|
|
}
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Flush());
|
2018-09-20 22:08:20 +00:00
|
|
|
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2018-09-20 22:08:20 +00:00
|
|
|
ASSERT_EQ(0, NumTableFilesAtLevel(0));
|
|
|
|
|
|
|
|
// Change the window size to disable delete triggered compaction
|
|
|
|
kWindowSize = 0;
|
2022-11-02 21:34:24 +00:00
|
|
|
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
|
|
|
|
->SetWindowSize(kWindowSize);
|
|
|
|
static_cast<CompactOnDeletionCollectorFactory*>(compact_on_del.get())
|
|
|
|
->SetDeletionTrigger(kNumDelsTrigger);
|
2018-09-20 22:08:20 +00:00
|
|
|
for (int i = 0; i < kNumKeys; ++i) {
|
|
|
|
if (i >= kNumKeys - kWindowSize &&
|
|
|
|
i < kNumKeys - kWindowSize + kNumDelsTrigger) {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Delete(Key(i)));
|
2018-09-20 22:08:20 +00:00
|
|
|
} else {
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(Key(i), "val"));
|
2018-09-20 22:08:20 +00:00
|
|
|
}
|
|
|
|
}
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Flush());
|
2018-09-20 22:08:20 +00:00
|
|
|
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
2018-09-20 22:08:20 +00:00
|
|
|
ASSERT_EQ(1, NumTableFilesAtLevel(0));
|
2020-07-29 20:38:07 +00:00
|
|
|
ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_WRITE_BYTES_MARKED));
|
|
|
|
ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_READ_BYTES_MARKED));
|
2017-09-29 01:07:42 +00:00
|
|
|
}
|
|
|
|
|
2020-11-04 18:43:17 +00:00
|
|
|
TEST_P(DBTablePropertiesTest, RatioBasedDeletionTriggeredCompactionMarking) {
|
|
|
|
constexpr int kNumKeys = 1000;
|
|
|
|
constexpr int kWindowSize = 0;
|
|
|
|
constexpr int kNumDelsTrigger = 0;
|
|
|
|
constexpr double kDeletionRatio = 0.1;
|
|
|
|
std::shared_ptr<TablePropertiesCollectorFactory> compact_on_del =
|
|
|
|
NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger,
|
|
|
|
kDeletionRatio);
|
|
|
|
|
|
|
|
Options opts = CurrentOptions();
|
|
|
|
opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
|
|
|
opts.table_properties_collector_factories.emplace_back(compact_on_del);
|
|
|
|
|
|
|
|
Reopen(opts);
|
|
|
|
|
|
|
|
// Add an L2 file to prevent tombstones from dropping due to obsolescence
|
|
|
|
// during flush
|
2020-12-23 07:44:44 +00:00
|
|
|
ASSERT_OK(Put(Key(0), "val"));
|
|
|
|
ASSERT_OK(Flush());
|
2020-11-04 18:43:17 +00:00
|
|
|
MoveFilesToLevel(2);
|
|
|
|
|
|
|
|
auto* listener = new DeletionTriggeredCompactionTestListener();
|
|
|
|
opts.listeners.emplace_back(listener);
|
|
|
|
Reopen(opts);
|
|
|
|
|
|
|
|
// Generate one L0 with kNumKeys Put.
|
|
|
|
for (int i = 0; i < kNumKeys; ++i) {
|
|
|
|
ASSERT_OK(Put(Key(i), "not important"));
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
// Generate another L0 with kNumKeys Delete.
|
|
|
|
// This file, due to deletion ratio, will trigger compaction: 2@0 files to L1.
|
|
|
|
// The resulting L1 file has only one tombstone for user key 'Key(0)'.
|
|
|
|
// Again, due to deletion ratio, a compaction will be triggered: 1@1 + 1@2
|
|
|
|
// files to L2. However, the resulting file is empty because the tombstone
|
|
|
|
// and value are both dropped.
|
|
|
|
for (int i = 0; i < kNumKeys; ++i) {
|
|
|
|
ASSERT_OK(Delete(Key(i)));
|
|
|
|
}
|
|
|
|
ASSERT_OK(Flush());
|
|
|
|
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
|
|
for (int i = 0; i < 3; ++i) {
|
|
|
|
ASSERT_EQ(0, NumTableFilesAtLevel(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
INSTANTIATE_TEST_CASE_P(DBTablePropertiesTest, DBTablePropertiesTest,
|
|
|
|
::testing::Values("kCompactionStyleLevel",
|
|
|
|
"kCompactionStyleUniversal"));
|
2020-04-10 22:27:47 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2015-10-13 21:24:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
2015-10-13 21:24:45 +00:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|