2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2015-07-20 17:50:46 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
#include "table/cuckoo/cuckoo_table_builder.h"
|
|
|
|
|
2014-07-21 20:26:09 +00:00
|
|
|
#include <map>
|
2021-01-29 06:08:46 +00:00
|
|
|
#include <string>
|
2014-07-21 20:26:09 +00:00
|
|
|
#include <utility>
|
2021-01-29 06:08:46 +00:00
|
|
|
#include <vector>
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2019-09-16 17:31:27 +00:00
|
|
|
#include "file/random_access_file_reader.h"
|
|
|
|
#include "file/writable_file_writer.h"
|
2021-09-29 11:01:57 +00:00
|
|
|
#include "rocksdb/db.h"
|
2021-01-29 06:08:46 +00:00
|
|
|
#include "rocksdb/file_system.h"
|
2019-05-31 00:39:43 +00:00
|
|
|
#include "table/meta_blocks.h"
|
2019-05-30 18:21:38 +00:00
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2014-07-21 20:26:09 +00:00
|
|
|
extern const uint64_t kCuckooTableMagicNumber;
|
|
|
|
|
|
|
|
namespace {
|
2014-07-24 17:07:41 +00:00
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2014-07-24 17:07:41 +00:00
|
|
|
uint64_t GetSliceHash(const Slice& s, uint32_t index,
|
2018-03-05 21:08:17 +00:00
|
|
|
uint64_t /*max_num_buckets*/) {
|
2014-07-21 20:26:09 +00:00
|
|
|
return hash_map[s.ToString()][index];
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
class CuckooBuilderTest : public testing::Test {
|
2014-07-21 20:26:09 +00:00
|
|
|
public:
|
|
|
|
CuckooBuilderTest() {
|
|
|
|
env_ = Env::Default();
|
2014-08-06 03:55:46 +00:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
2021-01-29 06:08:46 +00:00
|
|
|
file_options_ = FileOptions(options);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2014-08-06 03:55:46 +00:00
|
|
|
void CheckFileContents(const std::vector<std::string>& keys,
|
2022-10-25 18:50:38 +00:00
|
|
|
const std::vector<std::string>& values,
|
|
|
|
const std::vector<uint64_t>& expected_locations,
|
|
|
|
std::string expected_unused_bucket,
|
|
|
|
uint64_t expected_table_size,
|
|
|
|
uint32_t expected_num_hash_func,
|
|
|
|
bool expected_is_last_level,
|
|
|
|
uint32_t expected_cuckoo_block_size = 1) {
|
2018-10-30 22:29:58 +00:00
|
|
|
uint64_t num_deletions = 0;
|
|
|
|
for (const auto& key : keys) {
|
|
|
|
ParsedInternalKey parsed;
|
2020-10-28 17:11:13 +00:00
|
|
|
Status pik_status =
|
|
|
|
ParseInternalKey(key, &parsed, true /* log_err_key */);
|
|
|
|
if (pik_status.ok() && parsed.type == kTypeDeletion) {
|
2018-10-30 22:29:58 +00:00
|
|
|
num_deletions++;
|
|
|
|
}
|
|
|
|
}
|
2014-07-21 20:26:09 +00:00
|
|
|
// Read file
|
|
|
|
uint64_t read_file_size;
|
|
|
|
ASSERT_OK(env_->GetFileSize(fname, &read_file_size));
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<RandomAccessFileReader> file_reader;
|
|
|
|
ASSERT_OK(RandomAccessFileReader::Create(
|
|
|
|
env_->GetFileSystem(), fname, file_options_, &file_reader, nullptr));
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2020-04-20 18:37:36 +00:00
|
|
|
Options options;
|
|
|
|
options.allow_mmap_reads = true;
|
2021-05-05 20:59:21 +00:00
|
|
|
ImmutableOptions ioptions(options);
|
2016-07-19 16:44:03 +00:00
|
|
|
|
2014-07-21 20:26:09 +00:00
|
|
|
// Assert Table Properties.
|
Improve / clean up meta block code & integrity (#9163)
Summary:
* Checksums are now checked on meta blocks unless specifically
suppressed or not applicable (e.g. plain table). (Was other way around.)
This means a number of cases that were not checking checksums now are,
including direct read TableProperties in Version::GetTableProperties
(fixed in meta_blocks ReadTableProperties), reading any block from
PersistentCache (fixed in BlockFetcher), read TableProperties in
SstFileDumper (ldb/sst_dump/BackupEngine) before table reader open,
maybe more.
* For that to work, I moved the global_seqno+TableProperties checksum
logic to the shared table/ code, because that is used by many utilies
such as SstFileDumper.
* Also for that to work, we have to know when we're dealing with a block
that has a checksum (trailer), so added that capability to Footer based
on magic number, and from there BlockFetcher.
* Knowledge of trailer presence has also fixed a problem where other
table formats were reading blocks including bytes for a non-existant
trailer--and awkwardly kind-of not using them, e.g. no shared code
checking checksums. (BlockFetcher compression type was populated
incorrectly.) Now we only read what is needed.
* Minimized code duplication and differing/incompatible/awkward
abstractions in meta_blocks.{cc,h} (e.g. SeekTo in metaindex block
without parsing block handle)
* Moved some meta block handling code from table_properties*.*
* Moved some code specific to block-based table from shared table/ code
to BlockBasedTable class. The checksum stuff means we can't completely
separate it, but things that don't need to be in shared table/ code
should not be.
* Use unique_ptr rather than raw ptr in more places. (Note: you can
std::move from unique_ptr to shared_ptr.)
Without enhancements to GetPropertiesOfAllTablesTest (see below),
net reduction of roughly 100 lines of code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9163
Test Plan:
existing tests and
* Enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to verify that
checksums are now checked on direct read of table properties by TableCache
(new test would fail before this change)
* Also enhanced DBTablePropertiesTest.GetPropertiesOfAllTablesTest to test
putting table properties under old meta name
* Also generally enhanced that same test to actually test what it was
supposed to be testing already, by kicking things out of table cache when
we don't want them there.
Reviewed By: ajkr, mrambacher
Differential Revision: D32514757
Pulled By: pdillinger
fbshipit-source-id: 507964b9311d186ae8d1131182290cbd97a99fa9
2021-11-18 19:42:12 +00:00
|
|
|
std::unique_ptr<TableProperties> props;
|
2023-04-21 16:07:18 +00:00
|
|
|
const ReadOptions read_options;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size,
|
2023-04-21 16:07:18 +00:00
|
|
|
kCuckooTableMagicNumber, ioptions,
|
|
|
|
read_options, &props));
|
2014-07-21 20:26:09 +00:00
|
|
|
// Check unused bucket.
|
2022-10-25 18:50:38 +00:00
|
|
|
std::string unused_key =
|
|
|
|
props->user_collected_properties[CuckooTablePropertyNames::kEmptyKey];
|
|
|
|
ASSERT_EQ(expected_unused_bucket.substr(0, props->fixed_key_len),
|
|
|
|
unused_key);
|
|
|
|
|
|
|
|
uint64_t value_len_found = *reinterpret_cast<const uint64_t*>(
|
|
|
|
props->user_collected_properties[CuckooTablePropertyNames::kValueLength]
|
|
|
|
.data());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
|
2022-10-25 18:50:38 +00:00
|
|
|
ASSERT_EQ(props->raw_value_size, values.size() * value_len_found);
|
|
|
|
const uint64_t table_size = *reinterpret_cast<const uint64_t*>(
|
|
|
|
props
|
|
|
|
->user_collected_properties
|
|
|
|
[CuckooTablePropertyNames::kHashTableSize]
|
|
|
|
.data());
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_EQ(expected_table_size, table_size);
|
2022-10-25 18:50:38 +00:00
|
|
|
const uint32_t num_hash_func_found = *reinterpret_cast<const uint32_t*>(
|
|
|
|
props->user_collected_properties[CuckooTablePropertyNames::kNumHashFunc]
|
|
|
|
.data());
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_EQ(expected_num_hash_func, num_hash_func_found);
|
2022-10-25 18:50:38 +00:00
|
|
|
const uint32_t cuckoo_block_size = *reinterpret_cast<const uint32_t*>(
|
|
|
|
props
|
|
|
|
->user_collected_properties
|
|
|
|
[CuckooTablePropertyNames::kCuckooBlockSize]
|
|
|
|
.data());
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size);
|
2022-10-25 18:50:38 +00:00
|
|
|
const bool is_last_level_found = *reinterpret_cast<const bool*>(
|
|
|
|
props->user_collected_properties[CuckooTablePropertyNames::kIsLastLevel]
|
|
|
|
.data());
|
2014-07-25 23:37:32 +00:00
|
|
|
ASSERT_EQ(expected_is_last_level, is_last_level_found);
|
2014-09-25 20:53:27 +00:00
|
|
|
|
|
|
|
ASSERT_EQ(props->num_entries, keys.size());
|
2018-10-30 22:29:58 +00:00
|
|
|
ASSERT_EQ(props->num_deletions, num_deletions);
|
2014-09-25 20:53:27 +00:00
|
|
|
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
|
2022-10-25 18:50:38 +00:00
|
|
|
ASSERT_EQ(props->data_size,
|
|
|
|
expected_unused_bucket.size() *
|
|
|
|
(expected_table_size + expected_cuckoo_block_size - 1));
|
|
|
|
ASSERT_EQ(props->raw_key_size, keys.size() * props->fixed_key_len);
|
2016-04-07 06:10:32 +00:00
|
|
|
ASSERT_EQ(props->column_family_id, 0);
|
|
|
|
ASSERT_EQ(props->column_family_name, kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2014-07-21 20:26:09 +00:00
|
|
|
// Check contents of the bucket.
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<bool> keys_found(keys.size(), false);
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = expected_unused_bucket.size();
|
2020-06-02 22:02:44 +00:00
|
|
|
for (uint32_t i = 0; i + 1 < table_size + cuckoo_block_size; ++i) {
|
2014-08-06 03:55:46 +00:00
|
|
|
Slice read_slice;
|
2020-04-30 21:48:51 +00:00
|
|
|
ASSERT_OK(file_reader->Read(IOOptions(), i * bucket_size, bucket_size,
|
Group rocksdb.sst.read.micros stat by different user read IOActivity + misc (#11444)
Summary:
**Context/Summary:**
- Similar to https://github.com/facebook/rocksdb/pull/11288 but for user read such as `Get(), MultiGet(), DBIterator::XXX(), Verify(File)Checksum()`.
- For this, I refactored some user-facing `MultiGet` calls in `TransactionBase` and various types of `DB` so that it does not call a user-facing `Get()` but `GetImpl()` for passing the `ReadOptions::io_activity` check (see PR conversation)
- New user read stats breakdown are guarded by `kExceptDetailedTimers` since measurement shows they have 4-5% regression to the upstream/main.
- Misc
- More refactoring: with https://github.com/facebook/rocksdb/pull/11288, we complete passing `ReadOptions/IOOptions` to FS level. So we can now replace the previously [added](https://github.com/facebook/rocksdb/pull/9424) `rate_limiter_priority` parameter in `RandomAccessFileReader`'s `Read/MultiRead/Prefetch()` with `IOOptions::rate_limiter_priority`
- Also, `ReadAsync()` call time is measured in `SST_READ_MICRO` now
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11444
Test Plan:
- CI fake db crash/stress test
- Microbenchmarking
**Build** `make clean && ROCKSDB_NO_FBCODE=1 DEBUG_LEVEL=0 make -jN db_basic_bench`
- google benchmark version: https://github.com/google/benchmark/commit/604f6fd3f4b34a84ec4eb4db81d842fa4db829cd
- db_basic_bench_base: upstream
- db_basic_bench_pr: db_basic_bench_base + this PR
- asyncread_db_basic_bench_base: upstream + [db basic bench patch for IteratorNext](https://github.com/facebook/rocksdb/compare/main...hx235:rocksdb:micro_bench_async_read)
- asyncread_db_basic_bench_pr: asyncread_db_basic_bench_base + this PR
**Test**
Get
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{null_stat|base|pr} --benchmark_filter=DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1/negative_query:0/enable_filter:0/mmap:1/threads:1 --benchmark_repetitions=1000
```
Result
```
Coming soon
```
AsyncRead
```
TEST_TMPDIR=/dev/shm ./asyncread_db_basic_bench_{base|pr} --benchmark_filter=IteratorNext/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1/async_io:1/include_detailed_timers:0 --benchmark_repetitions=1000 > syncread_db_basic_bench_{base|pr}.out
```
Result
```
Base:
1956,1956,1968,1977,1979,1986,1988,1988,1988,1990,1991,1991,1993,1993,1993,1993,1994,1996,1997,1997,1997,1998,1999,2001,2001,2002,2004,2007,2007,2008,
PR (2.3% regression, due to measuring `SST_READ_MICRO` that wasn't measured before):
1993,2014,2016,2022,2024,2027,2027,2028,2028,2030,2031,2031,2032,2032,2038,2039,2042,2044,2044,2047,2047,2047,2048,2049,2050,2052,2052,2052,2053,2053,
```
Reviewed By: ajkr
Differential Revision: D45918925
Pulled By: hx235
fbshipit-source-id: 58a54560d9ebeb3a59b6d807639692614dad058a
2023-08-09 00:26:50 +00:00
|
|
|
&read_slice, nullptr, nullptr));
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t key_idx =
|
|
|
|
std::find(expected_locations.begin(), expected_locations.end(), i) -
|
|
|
|
expected_locations.begin();
|
2014-08-06 03:55:46 +00:00
|
|
|
if (key_idx == keys.size()) {
|
2017-05-18 06:03:54 +00:00
|
|
|
// i is not one of the expected locations. Empty bucket.
|
2017-08-23 17:45:17 +00:00
|
|
|
if (read_slice.data() == nullptr) {
|
|
|
|
ASSERT_EQ(0, expected_unused_bucket.size());
|
|
|
|
} else {
|
|
|
|
ASSERT_EQ(read_slice.compare(expected_unused_bucket), 0);
|
|
|
|
}
|
2014-08-06 03:55:46 +00:00
|
|
|
} else {
|
|
|
|
keys_found[key_idx] = true;
|
|
|
|
ASSERT_EQ(read_slice.compare(keys[key_idx] + values[key_idx]), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (auto key_found : keys_found) {
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
// Check that all keys wereReader found.
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_TRUE(key_found);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-30 22:29:58 +00:00
|
|
|
std::string GetInternalKey(Slice user_key, bool zero_seqno,
|
|
|
|
ValueType type = kTypeValue) {
|
2014-08-06 03:55:46 +00:00
|
|
|
IterKey ikey;
|
2018-10-30 22:29:58 +00:00
|
|
|
ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type);
|
2017-04-04 21:17:16 +00:00
|
|
|
return ikey.GetInternalKey().ToString();
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
Improve Cuckoo Table Reader performance. Inlined hash function and number of buckets a power of two.
Summary:
Use inlined hash functions instead of function pointer. Make number of buckets a power of two and use bitwise and instead of mod.
After these changes, we get almost 50% improvement in performance.
Results:
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.231us (4.3 Mqps) with batch size of 0
Time taken per op is 0.229us (4.4 Mqps) with batch size of 0
Time taken per op is 0.185us (5.4 Mqps) with batch size of 0
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.108us (9.3 Mqps) with batch size of 10
Time taken per op is 0.100us (10.0 Mqps) with batch size of 10
Time taken per op is 0.103us (9.7 Mqps) with batch size of 10
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.101us (9.9 Mqps) with batch size of 25
Time taken per op is 0.098us (10.2 Mqps) with batch size of 25
Time taken per op is 0.097us (10.3 Mqps) with batch size of 25
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.100us (10.0 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
Time taken per op is 0.097us (10.3 Mqps) with batch size of 50
With 120000000 items, utilization is 89.41%, number of hash functions: 2.
Time taken per op is 0.102us (9.8 Mqps) with batch size of 100
Time taken per op is 0.098us (10.2 Mqps) with batch size of 100
Time taken per op is 0.115us (8.7 Mqps) with batch size of 100
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.201us (5.0 Mqps) with batch size of 0
Time taken per op is 0.155us (6.5 Mqps) with batch size of 0
Time taken per op is 0.152us (6.6 Mqps) with batch size of 0
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.089us (11.3 Mqps) with batch size of 10
Time taken per op is 0.084us (11.9 Mqps) with batch size of 10
Time taken per op is 0.086us (11.6 Mqps) with batch size of 10
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.087us (11.5 Mqps) with batch size of 25
Time taken per op is 0.085us (11.7 Mqps) with batch size of 25
Time taken per op is 0.093us (10.8 Mqps) with batch size of 25
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.094us (10.6 Mqps) with batch size of 50
Time taken per op is 0.094us (10.7 Mqps) with batch size of 50
Time taken per op is 0.093us (10.8 Mqps) with batch size of 50
With 100000000 items, utilization is 74.51%, number of hash functions: 2.
Time taken per op is 0.092us (10.9 Mqps) with batch size of 100
Time taken per op is 0.089us (11.2 Mqps) with batch size of 100
Time taken per op is 0.088us (11.3 Mqps) with batch size of 100
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.154us (6.5 Mqps) with batch size of 0
Time taken per op is 0.168us (6.0 Mqps) with batch size of 0
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.081us (12.4 Mqps) with batch size of 10
Time taken per op is 0.077us (13.0 Mqps) with batch size of 10
Time taken per op is 0.083us (12.1 Mqps) with batch size of 10
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
Time taken per op is 0.073us (13.7 Mqps) with batch size of 25
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.076us (13.1 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
Time taken per op is 0.072us (13.8 Mqps) with batch size of 50
With 80000000 items, utilization is 59.60%, number of hash functions: 2.
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.074us (13.6 Mqps) with batch size of 100
Time taken per op is 0.073us (13.6 Mqps) with batch size of 100
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.190us (5.3 Mqps) with batch size of 0
Time taken per op is 0.186us (5.4 Mqps) with batch size of 0
Time taken per op is 0.184us (5.4 Mqps) with batch size of 0
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.079us (12.7 Mqps) with batch size of 10
Time taken per op is 0.070us (14.2 Mqps) with batch size of 10
Time taken per op is 0.072us (14.0 Mqps) with batch size of 10
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 25
Time taken per op is 0.072us (14.0 Mqps) with batch size of 25
Time taken per op is 0.071us (14.1 Mqps) with batch size of 25
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.082us (12.1 Mqps) with batch size of 50
Time taken per op is 0.071us (14.1 Mqps) with batch size of 50
Time taken per op is 0.073us (13.6 Mqps) with batch size of 50
With 70000000 items, utilization is 52.15%, number of hash functions: 2.
Time taken per op is 0.080us (12.5 Mqps) with batch size of 100
Time taken per op is 0.077us (13.0 Mqps) with batch size of 100
Time taken per op is 0.078us (12.8 Mqps) with batch size of 100
Test Plan:
make check all
make valgrind_check
make asan_check
Reviewers: sdong, ljin
Reviewed By: ljin
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D22539
2014-08-30 02:06:15 +00:00
|
|
|
uint64_t NextPowOf2(uint64_t num) {
|
|
|
|
uint64_t n = 2;
|
|
|
|
while (n <= num) {
|
|
|
|
n *= 2;
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t GetExpectedTableSize(uint64_t num) {
|
|
|
|
return NextPowOf2(static_cast<uint64_t>(num / kHashTableRatio));
|
|
|
|
}
|
|
|
|
|
2014-07-21 20:26:09 +00:00
|
|
|
Env* env_;
|
2021-01-29 06:08:46 +00:00
|
|
|
FileOptions file_options_;
|
2014-07-21 20:26:09 +00:00
|
|
|
std::string fname;
|
2014-08-06 03:55:46 +00:00
|
|
|
const double kHashTableRatio = 0.9;
|
2014-07-21 20:26:09 +00:00
|
|
|
};
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) {
|
2018-11-09 19:17:34 +00:00
|
|
|
std::unique_ptr<WritableFile> writable_file;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("EmptyFile");
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100,
|
|
|
|
BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_EQ(0UL, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-25 20:53:27 +00:00
|
|
|
CheckFileContents({}, {}, {}, "", 2, 2, false);
|
2014-08-06 03:55:46 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
2018-10-30 22:29:58 +00:00
|
|
|
for (auto type : {kTypeValue, kTypeDeletion}) {
|
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values;
|
|
|
|
if (type == kTypeValue) {
|
|
|
|
values = {"v01", "v02", "v03", "v04"};
|
|
|
|
} else {
|
|
|
|
values = {"", "", "", ""};
|
|
|
|
}
|
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {1, 2, 3, 4}},
|
|
|
|
{user_keys[2], {2, 3, 4, 5}},
|
|
|
|
{user_keys[3], {3, 4, 5, 6}}};
|
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false, type));
|
|
|
|
}
|
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
|
|
|
|
|
|
|
fname = test::PerThreadDBPath("NoCollisionFullKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
2018-10-30 22:29:58 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
2018-10-30 22:29:58 +00:00
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2018-10-30 22:29:58 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
|
|
|
|
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
|
|
|
|
expected_table_size, 2, false);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
2014-08-06 03:55:46 +00:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {0, 1, 2, 3}},
|
|
|
|
{user_keys[2], {0, 1, 2, 3}},
|
|
|
|
{user_keys[3], {0, 1, 2, 3}},
|
2014-08-06 03:55:46 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionFullKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2014-08-27 17:39:31 +00:00
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
2014-08-06 03:55:46 +00:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
2022-10-25 18:50:38 +00:00
|
|
|
CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
|
|
|
|
expected_table_size, 4, false);
|
2014-08-28 17:42:23 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
|
2014-08-28 17:42:23 +00:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {0, 1, 2, 3}},
|
|
|
|
{user_keys[2], {0, 1, 2, 3}},
|
|
|
|
{user_keys[3], {0, 1, 2, 3}},
|
2014-08-28 17:42:23 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-28 17:42:23 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
|
|
|
}
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-28 17:42:23 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2014-08-28 17:42:23 +00:00
|
|
|
uint32_t cuckoo_block_size = 2;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionFullKey2");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
2016-04-07 06:10:32 +00:00
|
|
|
CuckooTableBuilder builder(
|
|
|
|
file_writer.get(), kHashTableRatio, num_hash_fun, 100,
|
|
|
|
BytewiseComparator(), cuckoo_block_size, false, false, GetSliceHash,
|
|
|
|
0 /* column_family_id */, kDefaultColumnFamilyName);
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-28 17:42:23 +00:00
|
|
|
|
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
2022-10-25 18:50:38 +00:00
|
|
|
CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
|
|
|
|
expected_table_size, 3, false, cuckoo_block_size);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) {
|
2014-08-06 03:55:46 +00:00
|
|
|
// Have two hash functions. Insert elements with overlapping hashes.
|
|
|
|
// Finally insert an element with hash value somewhere in the middle
|
|
|
|
// so that it displaces all the elements after that.
|
|
|
|
uint32_t num_hash_fun = 2;
|
2022-10-25 18:50:38 +00:00
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
|
|
|
|
"key05"};
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
2022-10-25 18:50:38 +00:00
|
|
|
{user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}},
|
|
|
|
{user_keys[3], {3, 4}}, {user_keys[4], {0, 2}},
|
2014-08-06 03:55:46 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathFullKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-28 17:42:23 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-28 17:42:23 +00:00
|
|
|
|
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
2022-10-25 18:50:38 +00:00
|
|
|
CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
|
|
|
|
expected_table_size, 2, false);
|
2014-08-28 17:42:23 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
|
2014-08-28 17:42:23 +00:00
|
|
|
uint32_t num_hash_fun = 2;
|
2022-10-25 18:50:38 +00:00
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
|
|
|
|
"key05"};
|
2014-08-28 17:42:23 +00:00
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
2022-10-25 18:50:38 +00:00
|
|
|
{user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {3, 4}},
|
|
|
|
{user_keys[3], {4, 5}}, {user_keys[4], {0, 3}},
|
2014-08-28 17:42:23 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-28 17:42:23 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {2, 1, 3, 4, 0};
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
for (auto& user_key : user_keys) {
|
|
|
|
keys.push_back(GetInternalKey(user_key, false));
|
|
|
|
}
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(keys.size());
|
2014-08-28 17:42:23 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathFullKeyAndCuckooBlock");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 2, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(keys[i]), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2014-08-27 17:39:31 +00:00
|
|
|
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
2014-08-06 03:55:46 +00:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
2022-10-25 18:50:38 +00:00
|
|
|
CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
|
|
|
|
expected_table_size, 2, false, 2);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
2014-08-06 03:55:46 +00:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {1, 2, 3, 4}},
|
|
|
|
{user_keys[2], {2, 3, 4, 5}},
|
|
|
|
{user_keys[3], {3, 4, 5, 6}}};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("NoCollisionUserKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = user_keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2014-08-27 17:39:31 +00:00
|
|
|
std::string expected_unused_bucket = "key00";
|
2014-08-06 03:55:46 +00:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(user_keys, values, expected_locations,
|
2022-10-25 18:50:38 +00:00
|
|
|
expected_unused_bucket, expected_table_size, 2, true);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
2014-08-06 03:55:46 +00:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{user_keys[0], {0, 1, 2, 3}},
|
|
|
|
{user_keys[1], {0, 1, 2, 3}},
|
|
|
|
{user_keys[2], {0, 1, 2, 3}},
|
|
|
|
{user_keys[3], {0, 1, 2, 3}},
|
2014-08-06 03:55:46 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionUserKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = user_keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2014-08-27 17:39:31 +00:00
|
|
|
std::string expected_unused_bucket = "key00";
|
2014-08-06 03:55:46 +00:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(user_keys, values, expected_locations,
|
2022-10-25 18:50:38 +00:00
|
|
|
expected_unused_bucket, expected_table_size, 4, true);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) {
|
2014-08-06 03:55:46 +00:00
|
|
|
uint32_t num_hash_fun = 2;
|
2022-10-25 18:50:38 +00:00
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
|
|
|
|
"key05"};
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
2022-10-25 18:50:38 +00:00
|
|
|
{user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}},
|
|
|
|
{user_keys[3], {3, 4}}, {user_keys[4], {0, 2}},
|
2014-08-06 03:55:46 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
|
|
|
|
2014-08-06 03:55:46 +00:00
|
|
|
std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
|
2015-11-19 19:47:12 +00:00
|
|
|
uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathUserKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
|
|
|
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
2, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2014-11-11 21:47:22 +00:00
|
|
|
size_t bucket_size = user_keys[0].size() + values[0].size();
|
2014-09-17 22:34:10 +00:00
|
|
|
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.Finish());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-09-05 18:18:01 +00:00
|
|
|
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2014-08-27 17:39:31 +00:00
|
|
|
std::string expected_unused_bucket = "key00";
|
2014-08-06 03:55:46 +00:00
|
|
|
expected_unused_bucket += std::string(values[0].size(), 'a');
|
|
|
|
CheckFileContents(user_keys, values, expected_locations,
|
2022-10-25 18:50:38 +00:00
|
|
|
expected_unused_bucket, expected_table_size, 2, true);
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
2014-07-21 20:26:09 +00:00
|
|
|
// Have two hash functions. Insert elements with overlapping hashes.
|
2014-08-06 03:55:46 +00:00
|
|
|
// Finally try inserting an element with hash value somewhere in the middle
|
|
|
|
// and it should fail because the no. of elements to displace is too high.
|
|
|
|
uint32_t num_hash_fun = 2;
|
2022-10-25 18:50:38 +00:00
|
|
|
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
|
|
|
|
"key05"};
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
2022-10-25 18:50:38 +00:00
|
|
|
{user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}},
|
|
|
|
{user_keys[3], {3, 4}}, {user_keys[4], {0, 1}},
|
2014-08-06 03:55:46 +00:00
|
|
|
};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
2014-08-06 03:55:46 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("WithCollisionPathUserKey");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
2, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
|
|
|
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
|
|
|
|
ASSERT_EQ(builder.NumEntries(), i + 1);
|
|
|
|
ASSERT_OK(builder.status());
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_TRUE(builder.Finish().IsNotSupported());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
2015-07-13 19:11:05 +00:00
|
|
|
// Need to have a temporary variable here as VS compiler does not currently
|
|
|
|
// support operator= with initializer_list as a parameter
|
|
|
|
std::unordered_map<std::string, std::vector<uint64_t>> hm = {
|
|
|
|
{"repeatedkey", {0, 1, 2, 3}}};
|
2015-07-01 23:13:49 +00:00
|
|
|
hash_map = std::move(hm);
|
2014-08-06 03:55:46 +00:00
|
|
|
uint32_t num_hash_fun = 4;
|
|
|
|
std::string user_key = "repeatedkey";
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2021-01-29 06:08:46 +00:00
|
|
|
std::unique_ptr<WritableFileWriter> file_writer;
|
2018-07-14 00:18:39 +00:00
|
|
|
fname = test::PerThreadDBPath("FailWhenSameKeyInserted");
|
2021-01-29 06:08:46 +00:00
|
|
|
ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
|
|
|
|
file_options_, &file_writer, nullptr));
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
|
|
|
|
100, BytewiseComparator(), 1, false, false,
|
2016-04-07 06:10:32 +00:00
|
|
|
GetSliceHash, 0 /* column_family_id */,
|
|
|
|
kDefaultColumnFamilyName);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
|
|
|
|
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
|
2014-08-07 09:06:07 +00:00
|
|
|
ASSERT_EQ(builder.NumEntries(), 1u);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
builder.Add(Slice(GetInternalKey(user_key, true)), Slice("value2"));
|
2014-08-07 09:06:07 +00:00
|
|
|
ASSERT_EQ(builder.NumEntries(), 2u);
|
2014-08-06 03:55:46 +00:00
|
|
|
ASSERT_OK(builder.status());
|
|
|
|
|
|
|
|
ASSERT_TRUE(builder.Finish().IsNotSupported());
|
Group SST write in flush, compaction and db open with new stats (#11910)
Summary:
## Context/Summary
Similar to https://github.com/facebook/rocksdb/pull/11288, https://github.com/facebook/rocksdb/pull/11444, categorizing SST/blob file write according to different io activities allows more insight into the activity.
For that, this PR does the following:
- Tag different write IOs by passing down and converting WriteOptions to IOOptions
- Add new SST_WRITE_MICROS histogram in WritableFileWriter::Append() and breakdown FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS
Some related code refactory to make implementation cleaner:
- Blob stats
- Replace high-level write measurement with low-level WritableFileWriter::Append() measurement for BLOB_DB_BLOB_FILE_WRITE_MICROS. This is to make FILE_WRITE_{FLUSH|COMPACTION|DB_OPEN}_MICROS include blob file. As a consequence, this introduces some behavioral changes on it, see HISTORY and db bench test plan below for more info.
- Fix bugs where BLOB_DB_BLOB_FILE_SYNCED/BLOB_DB_BLOB_FILE_BYTES_WRITTEN include file failed to sync and bytes failed to write.
- Refactor WriteOptions constructor for easier construction with io_activity and rate_limiter_priority
- Refactor DBImpl::~DBImpl()/BlobDBImpl::Close() to bypass thread op verification
- Build table
- TableBuilderOptions now includes Read/WriteOpitons so BuildTable() do not need to take these two variables
- Replace the io_priority passed into BuildTable() with TableBuilderOptions::WriteOpitons::rate_limiter_priority. Similar for BlobFileBuilder.
This parameter is used for dynamically changing file io priority for flush, see https://github.com/facebook/rocksdb/pull/9988?fbclid=IwAR1DtKel6c-bRJAdesGo0jsbztRtciByNlvokbxkV6h_L-AE9MACzqRTT5s for more
- Update ThreadStatus::FLUSH_BYTES_WRITTEN to use io_activity to track flush IO in flush job and db open instead of io_priority
## Test
### db bench
Flush
```
./db_bench --statistics=1 --benchmarks=fillseq --num=100000 --write_buffer_size=100
rocksdb.sst.write.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.flush.micros P50 : 1.830863 P95 : 4.094720 P99 : 6.578947 P100 : 26.000000 COUNT : 7875 SUM : 20377
rocksdb.file.write.compaction.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.db.open.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
```
compaction, db oopen
```
Setup: ./db_bench --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
rocksdb.sst.write.micros P50 : 2.675325 P95 : 9.578788 P99 : 18.780000 P100 : 314.000000 COUNT : 638 SUM : 3279
rocksdb.file.write.flush.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0
rocksdb.file.write.compaction.micros P50 : 2.757353 P95 : 9.610687 P99 : 19.316667 P100 : 314.000000 COUNT : 615 SUM : 3213
rocksdb.file.write.db.open.micros P50 : 2.055556 P95 : 3.925000 P99 : 9.000000 P100 : 9.000000 COUNT : 23 SUM : 66
```
blob stats - just to make sure they aren't broken by this PR
```
Integrated Blob DB
Setup: ./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
Run:./db_bench --enable_blob_files=1 --statistics=1 --benchmarks=compact --db=../db_bench --use_existing_db=1
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 7.298246 P95 : 9.771930 P99 : 9.991813 P100 : 16.000000 COUNT : 235 SUM : 1600
rocksdb.blobdb.blob.file.synced COUNT : 1
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 2.000000 P95 : 2.829360 P99 : 2.993779 P100 : 9.000000 COUNT : 707 SUM : 1614
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 1 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 34842 (stay the same)
```
```
Stacked Blob DB
Run: ./db_bench --use_blob_db=1 --statistics=1 --benchmarks=fillseq --num=10000 --disable_auto_compactions=1 -write_buffer_size=100 --db=../db_bench
pre-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 12.808042 P95 : 19.674497 P99 : 28.539683 P100 : 51.000000 COUNT : 10000 SUM : 140876
rocksdb.blobdb.blob.file.synced COUNT : 8
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445
post-PR:
rocksdb.blobdb.blob.file.write.micros P50 : 1.657370 P95 : 2.952175 P99 : 3.877519 P100 : 24.000000 COUNT : 30001 SUM : 67924
- COUNT is higher and values are smaller as it includes header and footer write
- COUNT is 3X higher due to each Append() count as one post-PR, while in pre-PR, 3 Append()s counts as one. See https://github.com/facebook/rocksdb/pull/11910/files#diff-32b811c0a1c000768cfb2532052b44dc0b3bf82253f3eab078e15ff201a0dabfL157-L164
rocksdb.blobdb.blob.file.synced COUNT : 8 (stay the same)
rocksdb.blobdb.blob.file.bytes.written COUNT : 1043445 (stay the same)
```
### Rehearsal CI stress test
Trigger 3 full runs of all our CI stress tests
### Performance
Flush
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=ManualFlush/key_num:524288/per_key_size:256 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark; enable_statistics = true
Pre-pr: avg 507515519.3 ns
497686074,499444327,500862543,501389862,502994471,503744435,504142123,504224056,505724198,506610393,506837742,506955122,507695561,507929036,508307733,508312691,508999120,509963561,510142147,510698091,510743096,510769317,510957074,511053311,511371367,511409911,511432960,511642385,511691964,511730908,
Post-pr: avg 511971266.5 ns, regressed 0.88%
502744835,506502498,507735420,507929724,508313335,509548582,509994942,510107257,510715603,511046955,511352639,511458478,512117521,512317380,512766303,512972652,513059586,513804934,513808980,514059409,514187369,514389494,514447762,514616464,514622882,514641763,514666265,514716377,514990179,515502408,
```
Compaction
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_{pre|post}_pr --benchmark_filter=ManualCompaction/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 495346098.30 ns
492118301,493203526,494201411,494336607,495269217,495404950,496402598,497012157,497358370,498153846
Post-pr: avg 504528077.20, regressed 1.85%. "ManualCompaction" include flush so the isolated regression for compaction should be around 1.85-0.88 = 0.97%
502465338,502485945,502541789,502909283,503438601,504143885,506113087,506629423,507160414,507393007
```
Put with WAL (in case passing WriteOptions slows down this path even without collecting SST write stats)
```
TEST_TMPDIR=/dev/shm ./db_basic_bench_pre_pr --benchmark_filter=DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1 --benchmark_repetitions=1000
-- default: 1 thread is used to run benchmark
Pre-pr: avg 3848.10 ns
3814,3838,3839,3848,3854,3854,3854,3860,3860,3860
Post-pr: avg 3874.20 ns, regressed 0.68%
3863,3867,3871,3874,3875,3877,3877,3877,3880,3881
```
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11910
Reviewed By: ajkr
Differential Revision: D49788060
Pulled By: hx235
fbshipit-source-id: 79e73699cda5be3b66461687e5147c2484fc5eff
2023-12-29 23:29:23 +00:00
|
|
|
ASSERT_OK(file_writer->Close(IOOptions()));
|
2014-07-21 20:26:09 +00:00
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2014-07-21 20:26:09 +00:00
|
|
|
|
2015-03-17 21:08:00 +00:00
|
|
|
int main(int argc, char** argv) {
|
2022-10-18 07:35:35 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
2015-03-17 21:08:00 +00:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|