2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2014-01-02 17:08:12 +00:00
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
#include <algorithm>
|
2023-10-04 21:14:22 +00:00
|
|
|
#include <atomic>
|
2014-02-26 18:03:34 +00:00
|
|
|
#include <string>
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
#include <thread>
|
2020-04-22 00:35:28 +00:00
|
|
|
#include <vector>
|
2014-02-26 18:03:34 +00:00
|
|
|
|
2019-05-31 18:52:59 +00:00
|
|
|
#include "db/db_impl/db_impl.h"
|
2019-05-31 22:21:36 +00:00
|
|
|
#include "db/db_test_util.h"
|
2017-04-06 02:02:00 +00:00
|
|
|
#include "options/options_parser.h"
|
2017-02-06 22:43:55 +00:00
|
|
|
#include "port/port.h"
|
2019-08-09 22:08:36 +00:00
|
|
|
#include "port/stack_trace.h"
|
2023-07-26 23:25:06 +00:00
|
|
|
#include "rocksdb/comparator.h"
|
2020-04-22 00:35:28 +00:00
|
|
|
#include "rocksdb/convenience.h"
|
2014-01-02 17:08:12 +00:00
|
|
|
#include "rocksdb/db.h"
|
2015-03-20 00:04:29 +00:00
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/iterator.h"
|
2024-06-07 00:29:01 +00:00
|
|
|
#include "rocksdb/listener.h"
|
2019-08-09 22:08:36 +00:00
|
|
|
#include "rocksdb/utilities/object_registry.h"
|
2019-05-30 18:21:38 +00:00
|
|
|
#include "test_util/sync_point.h"
|
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2019-05-31 00:39:43 +00:00
|
|
|
#include "util/coding.h"
|
2023-10-04 21:14:22 +00:00
|
|
|
#include "util/defer.h"
|
2019-05-31 00:39:43 +00:00
|
|
|
#include "util/string_util.h"
|
2020-07-09 21:33:42 +00:00
|
|
|
#include "utilities/fault_injection_env.h"
|
2014-01-28 19:05:04 +00:00
|
|
|
#include "utilities/merge_operators.h"
|
2014-01-02 17:08:12 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2024-06-07 00:29:01 +00:00
|
|
|
namespace {
|
2024-06-13 20:18:10 +00:00
|
|
|
std::string EncodeAsUint64(uint64_t number) {
|
|
|
|
std::string result;
|
|
|
|
PutFixed64(&result, number);
|
|
|
|
return result;
|
2024-06-07 00:29:01 +00:00
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
2016-06-06 21:40:36 +00:00
|
|
|
static const int kValueSize = 1000;
|
|
|
|
|
2014-04-15 16:57:25 +00:00
|
|
|
// counts how many operations were performed
|
2020-11-06 17:57:09 +00:00
|
|
|
class EnvCounter : public SpecialEnv {
|
2014-04-15 16:57:25 +00:00
|
|
|
public:
|
|
|
|
explicit EnvCounter(Env* base)
|
2020-11-06 17:57:09 +00:00
|
|
|
: SpecialEnv(base), num_new_writable_file_(0) {}
|
2022-11-02 21:34:24 +00:00
|
|
|
int GetNumberOfNewWritableFileCalls() { return num_new_writable_file_; }
|
2018-11-09 19:17:34 +00:00
|
|
|
Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
|
2015-02-26 19:28:41 +00:00
|
|
|
const EnvOptions& soptions) override {
|
2014-04-15 16:57:25 +00:00
|
|
|
++num_new_writable_file_;
|
|
|
|
return EnvWrapper::NewWritableFile(f, r, soptions);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2017-05-18 20:55:00 +00:00
|
|
|
std::atomic<int> num_new_writable_file_;
|
2014-04-15 16:57:25 +00:00
|
|
|
};
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
class ColumnFamilyTestBase : public testing::Test {
|
2014-01-02 17:08:12 +00:00
|
|
|
public:
|
2019-08-09 22:08:36 +00:00
|
|
|
explicit ColumnFamilyTestBase(uint32_t format) : rnd_(139), format_(format) {
|
|
|
|
Env* base_env = Env::Default();
|
2021-06-15 10:42:52 +00:00
|
|
|
EXPECT_OK(
|
|
|
|
test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_));
|
2019-08-09 22:08:36 +00:00
|
|
|
EXPECT_NE(nullptr, base_env);
|
|
|
|
env_ = new EnvCounter(base_env);
|
2020-11-06 17:57:09 +00:00
|
|
|
env_->skip_fsync_ = true;
|
2018-07-14 00:18:39 +00:00
|
|
|
dbname_ = test::PerThreadDBPath("column_family_test");
|
2014-01-02 17:08:12 +00:00
|
|
|
db_options_.create_if_missing = true;
|
2015-11-11 06:58:01 +00:00
|
|
|
db_options_.fail_if_options_file_error = true;
|
2014-04-15 16:57:25 +00:00
|
|
|
db_options_.env = env_;
|
2023-07-26 23:25:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void SetUp() override {
|
2020-12-24 00:54:05 +00:00
|
|
|
EXPECT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_)));
|
2014-01-02 17:08:12 +00:00
|
|
|
}
|
|
|
|
|
2019-02-14 21:52:47 +00:00
|
|
|
~ColumnFamilyTestBase() override {
|
2018-04-06 02:49:06 +00:00
|
|
|
std::vector<ColumnFamilyDescriptor> column_families;
|
|
|
|
for (auto h : handles_) {
|
|
|
|
ColumnFamilyDescriptor cfdescriptor;
|
2020-10-02 20:33:50 +00:00
|
|
|
Status s = h->GetDescriptor(&cfdescriptor);
|
|
|
|
EXPECT_OK(s);
|
2018-04-06 02:49:06 +00:00
|
|
|
column_families.push_back(cfdescriptor);
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2018-04-06 02:49:06 +00:00
|
|
|
Destroy(column_families);
|
2014-04-16 00:12:18 +00:00
|
|
|
delete env_;
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions GetBlockBasedTableOptions() {
|
|
|
|
BlockBasedTableOptions options;
|
|
|
|
options.format_version = format_;
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
2016-06-06 21:40:36 +00:00
|
|
|
// Return the value to associate with the specified key
|
|
|
|
Slice Value(int k, std::string* storage) {
|
|
|
|
if (k == 0) {
|
|
|
|
// Ugh. Random seed of 0 used to produce no entropy. This code
|
|
|
|
// preserves the implementation that was in place when all of the
|
|
|
|
// magic values in this file were picked.
|
|
|
|
*storage = std::string(kValueSize, ' ');
|
|
|
|
} else {
|
|
|
|
Random r(k);
|
2020-07-09 21:33:42 +00:00
|
|
|
*storage = r.RandomString(kValueSize);
|
2016-06-06 21:40:36 +00:00
|
|
|
}
|
2020-07-09 21:33:42 +00:00
|
|
|
return Slice(*storage);
|
2016-06-06 21:40:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void Build(int base, int n, int flush_every = 0) {
|
|
|
|
std::string key_space, value_space;
|
|
|
|
WriteBatch batch;
|
|
|
|
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
if (flush_every != 0 && i != 0 && i % flush_every == 0) {
|
2020-07-03 02:24:25 +00:00
|
|
|
DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
|
2016-06-06 21:40:36 +00:00
|
|
|
dbi->TEST_FlushMemTable();
|
|
|
|
}
|
|
|
|
|
|
|
|
int keyi = base + i;
|
|
|
|
Slice key(DBTestBase::Key(keyi));
|
|
|
|
|
|
|
|
batch.Clear();
|
|
|
|
batch.Put(handles_[0], key, Value(keyi, &value_space));
|
|
|
|
batch.Put(handles_[1], key, Value(keyi, &value_space));
|
|
|
|
batch.Put(handles_[2], key, Value(keyi, &value_space));
|
|
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CheckMissed() {
|
|
|
|
uint64_t next_expected = 0;
|
|
|
|
uint64_t missed = 0;
|
|
|
|
int bad_keys = 0;
|
|
|
|
int bad_values = 0;
|
|
|
|
int correct = 0;
|
|
|
|
std::string value_space;
|
|
|
|
for (int cf = 0; cf < 3; cf++) {
|
|
|
|
next_expected = 0;
|
|
|
|
Iterator* iter = db_->NewIterator(ReadOptions(false, true), handles_[cf]);
|
|
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
|
|
uint64_t key;
|
|
|
|
Slice in(iter->key());
|
|
|
|
in.remove_prefix(3);
|
|
|
|
if (!ConsumeDecimalNumber(&in, &key) || !in.empty() ||
|
|
|
|
key < next_expected) {
|
|
|
|
bad_keys++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
missed += (key - next_expected);
|
|
|
|
next_expected = key + 1;
|
|
|
|
if (iter->value() != Value(static_cast<int>(key), &value_space)) {
|
|
|
|
bad_values++;
|
|
|
|
} else {
|
|
|
|
correct++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
delete iter;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(0, bad_keys);
|
|
|
|
ASSERT_EQ(0, bad_values);
|
|
|
|
ASSERT_EQ(0, missed);
|
|
|
|
(void)correct;
|
|
|
|
}
|
|
|
|
|
2014-01-02 17:08:12 +00:00
|
|
|
void Close() {
|
2014-02-11 01:04:44 +00:00
|
|
|
for (auto h : handles_) {
|
2015-12-09 01:01:02 +00:00
|
|
|
if (h) {
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(h));
|
2015-12-09 01:01:02 +00:00
|
|
|
}
|
2014-02-11 01:04:44 +00:00
|
|
|
}
|
|
|
|
handles_.clear();
|
2014-02-26 22:16:23 +00:00
|
|
|
names_.clear();
|
2014-01-02 17:08:12 +00:00
|
|
|
delete db_;
|
|
|
|
db_ = nullptr;
|
|
|
|
}
|
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
Status TryOpen(std::vector<std::string> cf,
|
|
|
|
std::vector<ColumnFamilyOptions> options = {}) {
|
2014-02-26 18:03:34 +00:00
|
|
|
std::vector<ColumnFamilyDescriptor> column_families;
|
2014-02-26 22:16:23 +00:00
|
|
|
names_.clear();
|
|
|
|
for (size_t i = 0; i < cf.size(); ++i) {
|
2021-01-16 00:54:57 +00:00
|
|
|
column_families.emplace_back(
|
|
|
|
cf[i], options.size() == 0 ? column_family_options_ : options[i]);
|
2014-02-26 22:16:23 +00:00
|
|
|
names_.push_back(cf[i]);
|
2014-01-06 21:31:06 +00:00
|
|
|
}
|
2014-02-07 22:48:48 +00:00
|
|
|
return DB::Open(db_options_, dbname_, column_families, &handles_, &db_);
|
2014-01-02 17:08:12 +00:00
|
|
|
}
|
|
|
|
|
2014-04-09 16:56:17 +00:00
|
|
|
Status OpenReadOnly(std::vector<std::string> cf,
|
2022-11-02 21:34:24 +00:00
|
|
|
std::vector<ColumnFamilyOptions> options = {}) {
|
2014-04-09 16:56:17 +00:00
|
|
|
std::vector<ColumnFamilyDescriptor> column_families;
|
|
|
|
names_.clear();
|
|
|
|
for (size_t i = 0; i < cf.size(); ++i) {
|
2021-01-16 00:54:57 +00:00
|
|
|
column_families.emplace_back(
|
|
|
|
cf[i], options.size() == 0 ? column_family_options_ : options[i]);
|
2014-04-09 16:56:17 +00:00
|
|
|
names_.push_back(cf[i]);
|
|
|
|
}
|
|
|
|
return DB::OpenForReadOnly(db_options_, dbname_, column_families, &handles_,
|
|
|
|
&db_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void AssertOpenReadOnly(std::vector<std::string> cf,
|
2022-11-02 21:34:24 +00:00
|
|
|
std::vector<ColumnFamilyOptions> options = {}) {
|
2014-04-09 16:56:17 +00:00
|
|
|
ASSERT_OK(OpenReadOnly(cf, options));
|
|
|
|
}
|
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
void Open(std::vector<std::string> cf,
|
|
|
|
std::vector<ColumnFamilyOptions> options = {}) {
|
|
|
|
ASSERT_OK(TryOpen(cf, options));
|
|
|
|
}
|
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
void Open() { Open({"default"}); }
|
2014-02-26 22:16:23 +00:00
|
|
|
|
2020-07-03 02:24:25 +00:00
|
|
|
DBImpl* dbfull() { return static_cast_with_check<DBImpl>(db_); }
|
2014-02-25 18:38:04 +00:00
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
int GetProperty(int cf, std::string property) {
|
|
|
|
std::string value;
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
2015-03-17 03:52:32 +00:00
|
|
|
EXPECT_TRUE(dbfull()->GetProperty(handles_[cf], property, &value));
|
2015-04-24 02:17:57 +00:00
|
|
|
#ifndef CYGWIN
|
2014-02-26 22:16:23 +00:00
|
|
|
return std::stoi(value);
|
2015-04-24 02:17:57 +00:00
|
|
|
#else
|
2015-06-08 18:43:55 +00:00
|
|
|
return std::strtol(value.c_str(), 0 /* off */, 10 /* base */);
|
2015-04-24 02:17:57 +00:00
|
|
|
#endif
|
2014-02-26 22:16:23 +00:00
|
|
|
}
|
|
|
|
|
2017-03-29 18:42:56 +00:00
|
|
|
bool IsDbWriteStopped() {
|
|
|
|
uint64_t v;
|
|
|
|
EXPECT_TRUE(dbfull()->GetIntProperty("rocksdb.is-write-stopped", &v));
|
|
|
|
return (v == 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t GetDbDelayedWriteRate() {
|
|
|
|
uint64_t v;
|
|
|
|
EXPECT_TRUE(
|
|
|
|
dbfull()->GetIntProperty("rocksdb.actual-delayed-write-rate", &v));
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2018-04-06 02:49:06 +00:00
|
|
|
void Destroy(const std::vector<ColumnFamilyDescriptor>& column_families =
|
2022-11-02 21:34:24 +00:00
|
|
|
std::vector<ColumnFamilyDescriptor>()) {
|
2015-12-28 19:23:06 +00:00
|
|
|
Close();
|
2018-04-06 02:49:06 +00:00
|
|
|
ASSERT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_),
|
|
|
|
column_families));
|
2014-01-28 19:05:04 +00:00
|
|
|
}
|
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
void CreateColumnFamilies(
|
|
|
|
const std::vector<std::string>& cfs,
|
|
|
|
const std::vector<ColumnFamilyOptions> options = {}) {
|
2014-11-11 21:47:22 +00:00
|
|
|
int cfi = static_cast<int>(handles_.size());
|
2014-01-28 19:05:04 +00:00
|
|
|
handles_.resize(cfi + cfs.size());
|
2014-02-26 22:16:23 +00:00
|
|
|
names_.resize(cfi + cfs.size());
|
|
|
|
for (size_t i = 0; i < cfs.size(); ++i) {
|
2016-01-07 02:14:01 +00:00
|
|
|
const auto& current_cf_opt =
|
|
|
|
options.size() == 0 ? column_family_options_ : options[i];
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CreateColumnFamily(current_cf_opt, cfs[i], &handles_[cfi]));
|
2014-02-26 22:16:23 +00:00
|
|
|
names_[cfi] = cfs[i];
|
2016-01-07 02:14:01 +00:00
|
|
|
|
|
|
|
// Verify the CF options of the returned CF handle.
|
|
|
|
ColumnFamilyDescriptor desc;
|
|
|
|
ASSERT_OK(handles_[cfi]->GetDescriptor(&desc));
|
2020-10-02 20:33:50 +00:00
|
|
|
// Need to sanitize the default column family options before comparing
|
|
|
|
// them.
|
|
|
|
ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(
|
|
|
|
ConfigOptions(), desc.options,
|
|
|
|
SanitizeOptions(dbfull()->immutable_db_options(), current_cf_opt)));
|
2014-02-26 22:16:23 +00:00
|
|
|
cfi++;
|
2014-01-28 19:05:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
void Reopen(const std::vector<ColumnFamilyOptions> options = {}) {
|
|
|
|
std::vector<std::string> names;
|
2024-03-04 18:08:32 +00:00
|
|
|
for (const auto& name : names_) {
|
2014-02-26 22:16:23 +00:00
|
|
|
if (name != "") {
|
|
|
|
names.push_back(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
assert(options.size() == 0 || names.size() == options.size());
|
|
|
|
Open(names, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CreateColumnFamiliesAndReopen(const std::vector<std::string>& cfs) {
|
|
|
|
CreateColumnFamilies(cfs);
|
|
|
|
Reopen();
|
|
|
|
}
|
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
void DropColumnFamilies(const std::vector<int>& cfs) {
|
2014-02-11 01:04:44 +00:00
|
|
|
for (auto cf : cfs) {
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(handles_[cf]));
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(handles_[cf]));
|
2014-02-11 01:04:44 +00:00
|
|
|
handles_[cf] = nullptr;
|
2014-02-26 22:16:23 +00:00
|
|
|
names_[cf] = "";
|
2014-02-11 01:04:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
void PutRandomData(int cf, int num, int key_value_size, bool save = false) {
|
2018-05-21 18:52:31 +00:00
|
|
|
if (cf >= static_cast<int>(keys_.size())) {
|
|
|
|
keys_.resize(cf + 1);
|
|
|
|
}
|
2014-02-26 22:16:23 +00:00
|
|
|
for (int i = 0; i < num; ++i) {
|
|
|
|
// 10 bytes for key, rest is value
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
if (!save) {
|
|
|
|
ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 11),
|
2020-07-09 21:33:42 +00:00
|
|
|
rnd_.RandomString(key_value_size - 10)));
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
} else {
|
|
|
|
std::string key = test::RandomKey(&rnd_, 11);
|
2018-05-21 18:52:31 +00:00
|
|
|
keys_[cf].insert(key);
|
2020-07-09 21:33:42 +00:00
|
|
|
ASSERT_OK(Put(cf, key, rnd_.RandomString(key_value_size - 10)));
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
2014-02-25 18:38:04 +00:00
|
|
|
}
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(db_->FlushWAL(/*sync=*/false));
|
2014-02-25 18:38:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void WaitForFlush(int cf) {
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf]));
|
|
|
|
}
|
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
void WaitForCompaction() { ASSERT_OK(dbfull()->TEST_WaitForCompact()); }
|
2015-06-29 21:39:01 +00:00
|
|
|
|
|
|
|
uint64_t MaxTotalInMemoryState() {
|
|
|
|
return dbfull()->TEST_MaxTotalInMemoryState();
|
|
|
|
}
|
|
|
|
|
|
|
|
void AssertMaxTotalInMemoryState(uint64_t value) {
|
|
|
|
ASSERT_EQ(value, MaxTotalInMemoryState());
|
|
|
|
}
|
2014-02-27 00:05:24 +00:00
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
Status Put(int cf, const std::string& key, const std::string& value) {
|
2014-01-28 19:05:04 +00:00
|
|
|
return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(value));
|
|
|
|
}
|
2014-02-26 18:03:34 +00:00
|
|
|
Status Merge(int cf, const std::string& key, const std::string& value) {
|
2014-01-28 19:05:04 +00:00
|
|
|
return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value));
|
|
|
|
}
|
2022-11-02 21:34:24 +00:00
|
|
|
Status Flush(int cf) { return db_->Flush(FlushOptions(), handles_[cf]); }
|
2014-01-28 19:05:04 +00:00
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
std::string Get(int cf, const std::string& key) {
|
2014-01-28 19:05:04 +00:00
|
|
|
ReadOptions options;
|
|
|
|
options.verify_checksums = true;
|
2014-02-26 18:03:34 +00:00
|
|
|
std::string result;
|
2014-01-28 19:05:04 +00:00
|
|
|
Status s = db_->Get(options, handles_[cf], Slice(key), &result);
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
result = "NOT_FOUND";
|
|
|
|
} else if (!s.ok()) {
|
|
|
|
result = s.ToString();
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-02-27 00:05:24 +00:00
|
|
|
void CompactAll(int cf) {
|
2015-06-17 21:36:14 +00:00
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[cf], nullptr,
|
|
|
|
nullptr));
|
2014-02-27 00:05:24 +00:00
|
|
|
}
|
|
|
|
|
2014-02-01 00:45:20 +00:00
|
|
|
void Compact(int cf, const Slice& start, const Slice& limit) {
|
2015-06-17 21:36:14 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(CompactRangeOptions(), handles_[cf], &start, &limit));
|
2014-02-01 00:45:20 +00:00
|
|
|
}
|
|
|
|
|
2014-02-27 00:05:24 +00:00
|
|
|
int NumTableFilesAtLevel(int level, int cf) {
|
|
|
|
return GetProperty(cf,
|
2022-05-06 20:03:58 +00:00
|
|
|
"rocksdb.num-files-at-level" + std::to_string(level));
|
2014-02-01 00:45:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Return spread of files per level
|
2014-02-26 18:03:34 +00:00
|
|
|
std::string FilesPerLevel(int cf) {
|
|
|
|
std::string result;
|
2014-02-01 00:45:20 +00:00
|
|
|
int last_non_zero_offset = 0;
|
2014-02-27 00:05:24 +00:00
|
|
|
for (int level = 0; level < dbfull()->NumberLevels(handles_[cf]); level++) {
|
|
|
|
int f = NumTableFilesAtLevel(level, cf);
|
2014-02-01 00:45:20 +00:00
|
|
|
char buf[100];
|
|
|
|
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
|
|
|
|
result += buf;
|
|
|
|
if (f > 0) {
|
2014-11-11 21:47:22 +00:00
|
|
|
last_non_zero_offset = static_cast<int>(result.size());
|
2014-02-01 00:45:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
result.resize(last_non_zero_offset);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2015-06-29 21:39:01 +00:00
|
|
|
void AssertFilesPerLevel(const std::string& value, int cf) {
|
|
|
|
ASSERT_EQ(value, FilesPerLevel(cf));
|
|
|
|
}
|
|
|
|
|
2014-04-30 18:33:40 +00:00
|
|
|
int CountLiveFiles() {
|
2014-02-11 01:04:44 +00:00
|
|
|
std::vector<LiveFileMetaData> metadata;
|
|
|
|
db_->GetLiveFilesMetaData(&metadata);
|
|
|
|
return static_cast<int>(metadata.size());
|
|
|
|
}
|
2015-06-29 21:39:01 +00:00
|
|
|
|
|
|
|
void AssertCountLiveFiles(int expected_value) {
|
|
|
|
ASSERT_EQ(expected_value, CountLiveFiles());
|
|
|
|
}
|
2014-02-11 01:04:44 +00:00
|
|
|
|
2014-02-01 00:45:20 +00:00
|
|
|
// Do n memtable flushes, each of which produces an sstable
|
|
|
|
// covering the range [small,large].
|
2014-02-26 18:03:34 +00:00
|
|
|
void MakeTables(int cf, int n, const std::string& small,
|
|
|
|
const std::string& large) {
|
2014-02-01 00:45:20 +00:00
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
ASSERT_OK(Put(cf, small, "begin"));
|
|
|
|
ASSERT_OK(Put(cf, large, "end"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions(), handles_[cf]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-25 18:38:04 +00:00
|
|
|
int CountLiveLogFiles() {
|
2014-02-27 18:29:13 +00:00
|
|
|
int micros_wait_for_log_deletion = 20000;
|
|
|
|
env_->SleepForMicroseconds(micros_wait_for_log_deletion);
|
2014-02-25 18:38:04 +00:00
|
|
|
int ret = 0;
|
|
|
|
VectorLogPtr wal_files;
|
2014-02-26 22:41:18 +00:00
|
|
|
Status s;
|
|
|
|
// GetSortedWalFiles is a flakey function -- it gets all the wal_dir
|
2015-04-25 09:14:27 +00:00
|
|
|
// children files and then later checks for their existence. if some of the
|
2014-02-26 22:41:18 +00:00
|
|
|
// log files doesn't exist anymore, it reports an error. it does all of this
|
|
|
|
// without DB mutex held, so if a background process deletes the log file
|
|
|
|
// while the function is being executed, it returns an error. We retry the
|
|
|
|
// function 10 times to avoid the error failing the test
|
|
|
|
for (int retries = 0; retries < 10; ++retries) {
|
|
|
|
wal_files.clear();
|
|
|
|
s = db_->GetSortedWalFiles(wal_files);
|
|
|
|
if (s.ok()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
rocksdb: Replace ASSERT* with EXPECT* in functions that does not return void value
Summary:
gtest does not use exceptions to fail a unit test by design, and `ASSERT*`s are implemented using `return`. As a consequence we cannot use `ASSERT*` in a function that does not return `void` value ([[ https://code.google.com/p/googletest/wiki/AdvancedGuide#Assertion_Placement | 1]]), and have to fix our existing code. This diff does this in a generic way, with no manual changes.
In order to detect all existing `ASSERT*` that are used in functions that doesn't return void value, I change the code to generate compile errors for such cases.
In `util/testharness.h` I defined `EXPECT*` assertions, the same way as `ASSERT*`, and redefined `ASSERT*` to return `void`. Then executed:
```lang=bash
% USE_CLANG=1 make all -j55 -k 2> build.log
% perl -naF: -e 'print "-- -number=".$F[1]." ".$F[0]."\n" if /: error:/' \
build.log | xargs -L 1 perl -spi -e 's/ASSERT/EXPECT/g if $. == $number'
% make format
```
After that I reverted back change to `ASSERT*` in `util/testharness.h`. But preserved introduced `EXPECT*`, which is the same as `ASSERT*`. This will be deleted once switched to gtest.
This diff is independent and contains manual changes only in `util/testharness.h`.
Test Plan:
Make sure all tests are passing.
```lang=bash
% USE_CLANG=1 make check
```
Reviewers: igor, lgalanis, sdong, yufei.zhu, rven, meyering
Reviewed By: meyering
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D33333
2015-03-17 03:52:32 +00:00
|
|
|
EXPECT_OK(s);
|
2014-02-25 18:38:04 +00:00
|
|
|
for (const auto& wal : wal_files) {
|
|
|
|
if (wal->Type() == kAliveLogFile) {
|
|
|
|
++ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
2015-06-29 21:39:01 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void AssertCountLiveLogFiles(int value) {
|
|
|
|
ASSERT_EQ(value, CountLiveLogFiles());
|
2014-02-25 18:38:04 +00:00
|
|
|
}
|
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
void AssertNumberOfImmutableMemtables(std::vector<int> num_per_cf) {
|
|
|
|
assert(num_per_cf.size() == handles_.size());
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_per_cf.size(); ++i) {
|
2014-11-11 21:47:22 +00:00
|
|
|
ASSERT_EQ(num_per_cf[i], GetProperty(static_cast<int>(i),
|
|
|
|
"rocksdb.num-immutable-mem-table"));
|
2014-02-26 22:16:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
void CopyFile(const std::string& source, const std::string& destination,
|
2014-01-28 19:05:04 +00:00
|
|
|
uint64_t size = 0) {
|
|
|
|
const EnvOptions soptions;
|
2018-11-09 19:17:34 +00:00
|
|
|
std::unique_ptr<SequentialFile> srcfile;
|
2014-01-28 19:05:04 +00:00
|
|
|
ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions));
|
2018-11-09 19:17:34 +00:00
|
|
|
std::unique_ptr<WritableFile> destfile;
|
2014-01-28 19:05:04 +00:00
|
|
|
ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions));
|
|
|
|
|
|
|
|
if (size == 0) {
|
|
|
|
// default argument means copy everything
|
|
|
|
ASSERT_OK(env_->GetFileSize(source, &size));
|
|
|
|
}
|
|
|
|
|
|
|
|
char buffer[4096];
|
|
|
|
Slice slice;
|
|
|
|
while (size > 0) {
|
2014-02-26 18:03:34 +00:00
|
|
|
uint64_t one = std::min(uint64_t(sizeof(buffer)), size);
|
2014-01-28 19:05:04 +00:00
|
|
|
ASSERT_OK(srcfile->Read(one, &slice, buffer));
|
|
|
|
ASSERT_OK(destfile->Append(slice));
|
|
|
|
size -= slice.size();
|
|
|
|
}
|
|
|
|
ASSERT_OK(destfile->Close());
|
|
|
|
}
|
|
|
|
|
2018-04-06 02:49:06 +00:00
|
|
|
int GetSstFileCount(std::string path) {
|
|
|
|
std::vector<std::string> files;
|
|
|
|
DBTestBase::GetSstFiles(env_, path, &files);
|
|
|
|
return static_cast<int>(files.size());
|
|
|
|
}
|
|
|
|
|
2022-11-02 21:34:24 +00:00
|
|
|
void RecalculateWriteStallConditions(
|
|
|
|
ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options) {
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
// add lock to avoid race condition between
|
|
|
|
// `RecalculateWriteStallConditions` which writes to CFStats and
|
|
|
|
// background `DBImpl::DumpStats()` threads which read CFStats
|
|
|
|
dbfull()->TEST_LockMutex();
|
|
|
|
cfd->RecalculateWriteStallConditions(mutable_cf_options);
|
2022-11-02 21:34:24 +00:00
|
|
|
dbfull()->TEST_UnlockMutex();
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
}
|
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
std::vector<ColumnFamilyHandle*> handles_;
|
2014-02-26 22:16:23 +00:00
|
|
|
std::vector<std::string> names_;
|
2018-05-21 18:52:31 +00:00
|
|
|
std::vector<std::set<std::string>> keys_;
|
2014-01-02 17:08:12 +00:00
|
|
|
ColumnFamilyOptions column_family_options_;
|
|
|
|
DBOptions db_options_;
|
2014-02-26 18:03:34 +00:00
|
|
|
std::string dbname_;
|
2014-02-01 00:45:20 +00:00
|
|
|
DB* db_ = nullptr;
|
2014-04-15 16:57:25 +00:00
|
|
|
EnvCounter* env_;
|
2019-08-09 22:08:36 +00:00
|
|
|
std::shared_ptr<Env> env_guard_;
|
2014-02-25 18:38:04 +00:00
|
|
|
Random rnd_;
|
2018-06-05 02:59:44 +00:00
|
|
|
uint32_t format_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class ColumnFamilyTest
|
|
|
|
: public ColumnFamilyTestBase,
|
|
|
|
virtual public ::testing::WithParamInterface<uint32_t> {
|
|
|
|
public:
|
|
|
|
ColumnFamilyTest() : ColumnFamilyTestBase(GetParam()) {}
|
2014-01-02 17:08:12 +00:00
|
|
|
};
|
|
|
|
|
2020-06-03 22:53:09 +00:00
|
|
|
INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest,
|
|
|
|
testing::Values(test::kDefaultFormatVersion));
|
|
|
|
INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest,
|
2021-12-10 16:12:09 +00:00
|
|
|
testing::Values(kLatestFormatVersion));
|
2018-06-05 02:59:44 +00:00
|
|
|
|
|
|
|
TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) {
|
2014-03-05 20:13:44 +00:00
|
|
|
for (int iter = 0; iter < 3; ++iter) {
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two", "three"});
|
|
|
|
for (size_t i = 0; i < handles_.size(); ++i) {
|
2020-07-03 02:24:25 +00:00
|
|
|
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handles_[i]);
|
2014-03-14 18:26:13 +00:00
|
|
|
ASSERT_EQ(i, cfh->GetID());
|
2014-03-05 20:13:44 +00:00
|
|
|
}
|
|
|
|
if (iter == 1) {
|
|
|
|
Reopen();
|
|
|
|
}
|
|
|
|
DropColumnFamilies({3});
|
|
|
|
Reopen();
|
|
|
|
if (iter == 2) {
|
|
|
|
// this tests if max_column_family is correctly persisted with
|
|
|
|
// WriteSnapshot()
|
|
|
|
Reopen();
|
|
|
|
}
|
|
|
|
CreateColumnFamilies({"three2"});
|
2018-06-05 02:59:44 +00:00
|
|
|
// ID 3 that was used for dropped column family "three" should not be
|
|
|
|
// reused
|
2020-07-03 02:24:25 +00:00
|
|
|
auto cfh3 = static_cast_with_check<ColumnFamilyHandleImpl>(handles_[3]);
|
2014-04-29 20:25:23 +00:00
|
|
|
ASSERT_EQ(4U, cfh3->GetID());
|
2014-03-05 20:13:44 +00:00
|
|
|
Close();
|
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CreateCFRaceWithGetAggProperty) {
|
2017-06-22 22:45:42 +00:00
|
|
|
Open();
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
2017-06-22 22:45:42 +00:00
|
|
|
{{"DBImpl::WriteOptionsFile:1",
|
|
|
|
"ColumnFamilyTest.CreateCFRaceWithGetAggProperty:1"},
|
|
|
|
{"ColumnFamilyTest.CreateCFRaceWithGetAggProperty:2",
|
|
|
|
"DBImpl::WriteOptionsFile:2"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2017-06-22 22:45:42 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread thread(
|
|
|
|
[&] { CreateColumnFamilies({"one"}); });
|
2017-06-22 22:45:42 +00:00
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest.CreateCFRaceWithGetAggProperty:1");
|
|
|
|
uint64_t pv;
|
|
|
|
db_->GetAggregatedIntProperty(DB::Properties::kEstimateTableReadersMem, &pv);
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest.CreateCFRaceWithGetAggProperty:2");
|
|
|
|
|
|
|
|
thread.join();
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2017-06-22 22:45:42 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
class FlushEmptyCFTestWithParam
|
|
|
|
: public ColumnFamilyTestBase,
|
|
|
|
virtual public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
public:
|
2018-06-05 02:59:44 +00:00
|
|
|
FlushEmptyCFTestWithParam()
|
|
|
|
: ColumnFamilyTestBase(std::get<0>(GetParam())),
|
|
|
|
allow_2pc_(std::get<1>(GetParam())) {}
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
|
|
|
|
// Required if inheriting from testing::WithParamInterface<>
|
|
|
|
static void SetUpTestCase() {}
|
|
|
|
static void TearDownTestCase() {}
|
|
|
|
|
|
|
|
bool allow_2pc_;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_P(FlushEmptyCFTestWithParam, FlushEmptyCFTest) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
db_options_.env = fault_env.get();
|
|
|
|
db_options_.allow_2pc = allow_2pc_;
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
// Generate log file A.
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); // seqID 1
|
|
|
|
|
|
|
|
Reopen();
|
|
|
|
// Log file A is not dropped after reopening because default column family's
|
|
|
|
// min log number is 0.
|
|
|
|
// It flushes to SST file X
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); // seqID 2
|
|
|
|
ASSERT_OK(Put(1, "bar", "v2")); // seqID 3
|
|
|
|
// Current log file is file B now. While flushing, a new log file C is created
|
|
|
|
// and is set to current. Boths' min log number is set to file C in memory, so
|
|
|
|
// after flushing file B is deleted. At the same time, the min log number of
|
|
|
|
// default CF is not written to manifest. Log file A still remains.
|
|
|
|
// Flushed to SST file Y.
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
|
|
|
ASSERT_OK(Flush(0));
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
ASSERT_OK(Put(1, "bar", "v3")); // seqID 4
|
|
|
|
ASSERT_OK(Put(1, "foo", "v4")); // seqID 5
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(db_->FlushWAL(/*sync=*/false));
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
|
|
|
|
// Preserve file system state up to here to simulate a crash condition.
|
|
|
|
fault_env->SetFilesystemActive(false);
|
|
|
|
std::vector<std::string> names;
|
2024-03-04 18:08:32 +00:00
|
|
|
for (const auto& name : names_) {
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
if (name != "") {
|
|
|
|
names.push_back(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Close();
|
|
|
|
fault_env->ResetState();
|
|
|
|
|
|
|
|
// Before opening, there are four files:
|
|
|
|
// Log file A contains seqID 1
|
|
|
|
// Log file C contains seqID 4, 5
|
|
|
|
// SST file X contains seqID 1
|
|
|
|
// SST file Y contains seqID 2, 3
|
|
|
|
// Min log number:
|
|
|
|
// default CF: 0
|
|
|
|
// CF one, two: C
|
|
|
|
// When opening the DB, all the seqID should be preserved.
|
|
|
|
Open(names, {});
|
|
|
|
ASSERT_EQ("v4", Get(1, "foo"));
|
|
|
|
ASSERT_EQ("v3", Get(1, "bar"));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
db_options_.env = env_;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_P(FlushEmptyCFTestWithParam, FlushEmptyCFTest2) {
|
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
db_options_.env = fault_env.get();
|
|
|
|
db_options_.allow_2pc = allow_2pc_;
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
// Generate log file A.
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); // seqID 1
|
|
|
|
|
|
|
|
Reopen();
|
|
|
|
// Log file A is not dropped after reopening because default column family's
|
|
|
|
// min log number is 0.
|
|
|
|
// It flushes to SST file X
|
|
|
|
ASSERT_OK(Put(1, "foo", "v1")); // seqID 2
|
|
|
|
ASSERT_OK(Put(1, "bar", "v2")); // seqID 3
|
|
|
|
// Current log file is file B now. While flushing, a new log file C is created
|
|
|
|
// and is set to current. Both CFs' min log number is set to file C so after
|
|
|
|
// flushing file B is deleted. Log file A still remains.
|
|
|
|
// Flushed to SST file Y.
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
ASSERT_OK(Put(0, "bar", "v2")); // seqID 4
|
|
|
|
ASSERT_OK(Put(2, "bar", "v2")); // seqID 5
|
|
|
|
ASSERT_OK(Put(1, "bar", "v3")); // seqID 6
|
|
|
|
// Flushing all column families. This forces all CFs' min log to current. This
|
|
|
|
// is written to the manifest file. Log file C is cleared.
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(0));
|
|
|
|
ASSERT_OK(Flush(1));
|
|
|
|
ASSERT_OK(Flush(2));
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
// Write to log file D
|
|
|
|
ASSERT_OK(Put(1, "bar", "v4")); // seqID 7
|
|
|
|
ASSERT_OK(Put(1, "bar", "v5")); // seqID 8
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(db_->FlushWAL(/*sync=*/false));
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
// Preserve file system state up to here to simulate a crash condition.
|
|
|
|
fault_env->SetFilesystemActive(false);
|
|
|
|
std::vector<std::string> names;
|
2024-03-04 18:08:32 +00:00
|
|
|
for (const auto& name : names_) {
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
if (name != "") {
|
|
|
|
names.push_back(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Close();
|
|
|
|
fault_env->ResetState();
|
|
|
|
// Before opening, there are two logfiles:
|
|
|
|
// Log file A contains seqID 1
|
|
|
|
// Log file D contains seqID 7, 8
|
|
|
|
// Min log number:
|
|
|
|
// default CF: D
|
|
|
|
// CF one, two: D
|
|
|
|
// When opening the DB, log file D should be replayed using the seqID
|
|
|
|
// specified in the file.
|
|
|
|
Open(names, {});
|
|
|
|
ASSERT_EQ("v1", Get(1, "foo"));
|
|
|
|
ASSERT_EQ("v5", Get(1, "bar"));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
db_options_.env = env_;
|
|
|
|
}
|
|
|
|
|
2020-06-03 22:53:09 +00:00
|
|
|
INSTANTIATE_TEST_CASE_P(
|
2018-06-05 02:59:44 +00:00
|
|
|
FormatDef, FlushEmptyCFTestWithParam,
|
|
|
|
testing::Values(std::make_tuple(test::kDefaultFormatVersion, true),
|
|
|
|
std::make_tuple(test::kDefaultFormatVersion, false)));
|
2020-06-03 22:53:09 +00:00
|
|
|
INSTANTIATE_TEST_CASE_P(
|
2018-06-05 02:59:44 +00:00
|
|
|
FormatLatest, FlushEmptyCFTestWithParam,
|
2021-12-10 16:12:09 +00:00
|
|
|
testing::Values(std::make_tuple(kLatestFormatVersion, true),
|
|
|
|
std::make_tuple(kLatestFormatVersion, false)));
|
Ignore stale logs while restarting DBs
Summary:
Stale log files can be deleted out of order. This can happen for various reasons. One of the reason is that no data is ever inserted to a column family and we have an optimization to update its log number, but not all the old log files are cleaned up (the case shown in the unit tests added). It can also happen when we simply delete multiple log files out of order.
This causes data corruption because we simply increase seqID after processing the next row and we may end up with writing data with smaller seqID than what is already flushed to memtables.
In DB recovery, for the oldest files we are replaying, if there it contains no data for any column family, we ignore the sequence IDs in the file.
Test Plan: Add two unit tests that fail without the fix.
Reviewers: IslamAbdelRahman, igor, yiwu
Reviewed By: yiwu
Subscribers: hermanlee4, yoshinorim, leveldb, andrewkr, dhruba
Differential Revision: https://reviews.facebook.net/D60891
2016-07-19 18:48:00 +00:00
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, AddDrop) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Open();
|
2014-02-11 01:04:44 +00:00
|
|
|
CreateColumnFamilies({"one", "two", "three"});
|
2014-02-28 18:29:37 +00:00
|
|
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(2, "fodor"));
|
2014-02-11 01:04:44 +00:00
|
|
|
DropColumnFamilies({2});
|
2014-02-28 18:29:37 +00:00
|
|
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor"));
|
2014-02-11 01:04:44 +00:00
|
|
|
CreateColumnFamilies({"four"});
|
2014-02-28 18:29:37 +00:00
|
|
|
ASSERT_EQ("NOT_FOUND", Get(3, "fodor"));
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_EQ("mirko", Get(1, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(3, "fodor"));
|
2014-01-02 17:08:12 +00:00
|
|
|
Close();
|
2014-02-26 22:16:23 +00:00
|
|
|
ASSERT_TRUE(TryOpen({"default"}).IsInvalidArgument());
|
|
|
|
Open({"default", "one", "three", "four"});
|
|
|
|
DropColumnFamilies({1});
|
|
|
|
Reopen();
|
2014-01-02 17:08:12 +00:00
|
|
|
Close();
|
|
|
|
|
2014-02-26 18:03:34 +00:00
|
|
|
std::vector<std::string> families;
|
2014-01-22 19:44:53 +00:00
|
|
|
ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families));
|
2016-05-20 14:42:18 +00:00
|
|
|
std::sort(families.begin(), families.end());
|
2014-02-26 18:03:34 +00:00
|
|
|
ASSERT_TRUE(families ==
|
2014-02-26 22:16:23 +00:00
|
|
|
std::vector<std::string>({"default", "four", "three"}));
|
2014-01-02 17:08:12 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, BulkAddDrop) {
|
2017-05-08 05:12:55 +00:00
|
|
|
constexpr int kNumCF = 1000;
|
|
|
|
ColumnFamilyOptions cf_options;
|
|
|
|
WriteOptions write_options;
|
|
|
|
Open();
|
|
|
|
std::vector<std::string> cf_names;
|
|
|
|
std::vector<ColumnFamilyHandle*> cf_handles;
|
|
|
|
for (int i = 1; i <= kNumCF; i++) {
|
2022-05-06 20:03:58 +00:00
|
|
|
cf_names.push_back("cf1-" + std::to_string(i));
|
2017-05-08 05:12:55 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(db_->CreateColumnFamilies(cf_options, cf_names, &cf_handles));
|
|
|
|
for (int i = 1; i <= kNumCF; i++) {
|
|
|
|
ASSERT_OK(db_->Put(write_options, cf_handles[i - 1], "foo", "bar"));
|
|
|
|
}
|
|
|
|
ASSERT_OK(db_->DropColumnFamilies(cf_handles));
|
|
|
|
std::vector<ColumnFamilyDescriptor> cf_descriptors;
|
2017-05-11 05:46:24 +00:00
|
|
|
for (auto* handle : cf_handles) {
|
|
|
|
delete handle;
|
|
|
|
}
|
2017-05-08 05:12:55 +00:00
|
|
|
cf_handles.clear();
|
|
|
|
for (int i = 1; i <= kNumCF; i++) {
|
2022-05-06 20:03:58 +00:00
|
|
|
cf_descriptors.emplace_back("cf2-" + std::to_string(i),
|
|
|
|
ColumnFamilyOptions());
|
2017-05-08 05:12:55 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(db_->CreateColumnFamilies(cf_descriptors, &cf_handles));
|
|
|
|
for (int i = 1; i <= kNumCF; i++) {
|
|
|
|
ASSERT_OK(db_->Put(write_options, cf_handles[i - 1], "foo", "bar"));
|
|
|
|
}
|
|
|
|
ASSERT_OK(db_->DropColumnFamilies(cf_handles));
|
2017-05-11 05:46:24 +00:00
|
|
|
for (auto* handle : cf_handles) {
|
|
|
|
delete handle;
|
|
|
|
}
|
2017-05-08 05:12:55 +00:00
|
|
|
Close();
|
|
|
|
std::vector<std::string> families;
|
|
|
|
ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families));
|
|
|
|
std::sort(families.begin(), families.end());
|
|
|
|
ASSERT_TRUE(families == std::vector<std::string>({"default"}));
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DropTest) {
|
2020-02-28 22:10:51 +00:00
|
|
|
// first iteration - don't reopen DB before dropping
|
2014-02-11 01:04:44 +00:00
|
|
|
// second iteration - reopen DB before dropping
|
|
|
|
for (int iter = 0; iter < 2; ++iter) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Open({"default"});
|
|
|
|
CreateColumnFamiliesAndReopen({"pikachu"});
|
2014-02-11 01:04:44 +00:00
|
|
|
for (int i = 0; i < 100; ++i) {
|
2022-05-06 20:03:58 +00:00
|
|
|
ASSERT_OK(Put(1, std::to_string(i), "bar" + std::to_string(i)));
|
2014-02-11 01:04:44 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Flush(1));
|
|
|
|
|
|
|
|
if (iter == 1) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Reopen();
|
2014-02-11 01:04:44 +00:00
|
|
|
}
|
|
|
|
ASSERT_EQ("bar1", Get(1, "1"));
|
|
|
|
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveFiles(1);
|
2014-02-11 01:04:44 +00:00
|
|
|
DropColumnFamilies({1});
|
|
|
|
// make sure that all files are deleted when we drop the column family
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveFiles(0);
|
2014-02-11 01:04:44 +00:00
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, WriteBatchFailure) {
|
2014-02-26 01:30:54 +00:00
|
|
|
Open();
|
2014-03-14 18:26:13 +00:00
|
|
|
CreateColumnFamiliesAndReopen({"one", "two"});
|
2014-02-26 01:30:54 +00:00
|
|
|
WriteBatch batch;
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(batch.Put(handles_[0], Slice("existing"), Slice("column-family")));
|
|
|
|
ASSERT_OK(
|
|
|
|
batch.Put(handles_[1], Slice("non-existing"), Slice("column-family")));
|
2014-03-14 18:26:13 +00:00
|
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
|
|
DropColumnFamilies({1});
|
2014-09-02 20:29:05 +00:00
|
|
|
WriteOptions woptions_ignore_missing_cf;
|
|
|
|
woptions_ignore_missing_cf.ignore_missing_column_families = true;
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(
|
|
|
|
batch.Put(handles_[0], Slice("still here"), Slice("column-family")));
|
2014-09-02 20:29:05 +00:00
|
|
|
ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch));
|
|
|
|
ASSERT_EQ("column-family", Get(0, "still here"));
|
2014-02-26 01:30:54 +00:00
|
|
|
Status s = db_->Write(WriteOptions(), &batch);
|
|
|
|
ASSERT_TRUE(s.IsInvalidArgument());
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, ReadWrite) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two"});
|
2014-01-28 19:05:04 +00:00
|
|
|
ASSERT_OK(Put(0, "foo", "v1"));
|
|
|
|
ASSERT_OK(Put(0, "bar", "v2"));
|
|
|
|
ASSERT_OK(Put(1, "mirko", "v3"));
|
|
|
|
ASSERT_OK(Put(0, "foo", "v2"));
|
|
|
|
ASSERT_OK(Put(2, "fodor", "v5"));
|
|
|
|
|
|
|
|
for (int iter = 0; iter <= 3; ++iter) {
|
|
|
|
ASSERT_EQ("v2", Get(0, "foo"));
|
|
|
|
ASSERT_EQ("v2", Get(0, "bar"));
|
|
|
|
ASSERT_EQ("v3", Get(1, "mirko"));
|
|
|
|
ASSERT_EQ("v5", Get(2, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(0, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(2, "foo"));
|
|
|
|
if (iter <= 1) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Reopen();
|
2014-01-28 19:05:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, IgnoreRecoveredLog) {
|
2014-02-26 18:03:34 +00:00
|
|
|
std::string backup_logs = dbname_ + "/backup_logs";
|
2014-01-28 19:05:04 +00:00
|
|
|
|
|
|
|
// delete old files in backup_logs directory
|
2014-01-29 22:06:59 +00:00
|
|
|
ASSERT_OK(env_->CreateDirIfMissing(dbname_));
|
|
|
|
ASSERT_OK(env_->CreateDirIfMissing(backup_logs));
|
2014-02-26 18:03:34 +00:00
|
|
|
std::vector<std::string> old_files;
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(env_->GetChildren(backup_logs, &old_files));
|
2014-01-28 19:05:04 +00:00
|
|
|
for (auto& file : old_files) {
|
2021-01-09 17:42:21 +00:00
|
|
|
ASSERT_OK(env_->DeleteFile(backup_logs + "/" + file));
|
2014-01-28 19:05:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
column_family_options_.merge_operator =
|
|
|
|
MergeOperators::CreateUInt64AddOperator();
|
|
|
|
db_options_.wal_dir = dbname_ + "/logs";
|
|
|
|
Destroy();
|
2014-02-26 22:16:23 +00:00
|
|
|
Open();
|
2014-01-28 19:05:04 +00:00
|
|
|
CreateColumnFamilies({"cf1", "cf2"});
|
|
|
|
|
|
|
|
// fill up the DB
|
2014-02-26 18:03:34 +00:00
|
|
|
std::string one, two, three;
|
2014-01-28 19:05:04 +00:00
|
|
|
PutFixed64(&one, 1);
|
|
|
|
PutFixed64(&two, 2);
|
|
|
|
PutFixed64(&three, 3);
|
|
|
|
ASSERT_OK(Merge(0, "foo", one));
|
|
|
|
ASSERT_OK(Merge(1, "mirko", one));
|
|
|
|
ASSERT_OK(Merge(0, "foo", one));
|
|
|
|
ASSERT_OK(Merge(2, "bla", one));
|
|
|
|
ASSERT_OK(Merge(2, "fodor", one));
|
|
|
|
ASSERT_OK(Merge(0, "bar", one));
|
|
|
|
ASSERT_OK(Merge(2, "bla", one));
|
|
|
|
ASSERT_OK(Merge(1, "mirko", two));
|
|
|
|
ASSERT_OK(Merge(1, "franjo", one));
|
|
|
|
|
|
|
|
// copy the logs to backup
|
2014-02-26 18:03:34 +00:00
|
|
|
std::vector<std::string> logs;
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(env_->GetChildren(db_options_.wal_dir, &logs));
|
2014-01-28 19:05:04 +00:00
|
|
|
for (auto& log : logs) {
|
2021-01-09 17:42:21 +00:00
|
|
|
CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log);
|
2014-01-28 19:05:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// recover the DB
|
|
|
|
Close();
|
|
|
|
|
|
|
|
// 1. check consistency
|
|
|
|
// 2. copy the logs from backup back to WAL dir. if the recovery happens
|
|
|
|
// again on the same log files, this should lead to incorrect results
|
|
|
|
// due to applying merge operator twice
|
|
|
|
// 3. check consistency
|
|
|
|
for (int iter = 0; iter < 2; ++iter) {
|
|
|
|
// assert consistency
|
2014-02-26 22:16:23 +00:00
|
|
|
Open({"default", "cf1", "cf2"});
|
2014-01-28 19:05:04 +00:00
|
|
|
ASSERT_EQ(two, Get(0, "foo"));
|
|
|
|
ASSERT_EQ(one, Get(0, "bar"));
|
|
|
|
ASSERT_EQ(three, Get(1, "mirko"));
|
|
|
|
ASSERT_EQ(one, Get(1, "franjo"));
|
|
|
|
ASSERT_EQ(one, Get(2, "fodor"));
|
|
|
|
ASSERT_EQ(two, Get(2, "bla"));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
if (iter == 0) {
|
|
|
|
// copy the logs from backup back to wal dir
|
|
|
|
for (auto& log : logs) {
|
2021-01-09 17:42:21 +00:00
|
|
|
CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log);
|
2014-01-28 19:05:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, FlushTest) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two"});
|
2014-01-31 01:48:42 +00:00
|
|
|
ASSERT_OK(Put(0, "foo", "v1"));
|
|
|
|
ASSERT_OK(Put(0, "bar", "v2"));
|
|
|
|
ASSERT_OK(Put(1, "mirko", "v3"));
|
|
|
|
ASSERT_OK(Put(0, "foo", "v2"));
|
|
|
|
ASSERT_OK(Put(2, "fodor", "v5"));
|
2014-12-08 20:52:18 +00:00
|
|
|
|
|
|
|
for (int j = 0; j < 2; j++) {
|
|
|
|
ReadOptions ro;
|
|
|
|
std::vector<Iterator*> iterators;
|
|
|
|
// Hold super version.
|
|
|
|
if (j == 0) {
|
|
|
|
ASSERT_OK(db_->NewIterators(ro, handles_, &iterators));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 3; ++i) {
|
2022-11-02 21:34:24 +00:00
|
|
|
uint64_t max_total_in_memory_state = MaxTotalInMemoryState();
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(i));
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertMaxTotalInMemoryState(max_total_in_memory_state);
|
2014-12-08 20:52:18 +00:00
|
|
|
}
|
|
|
|
ASSERT_OK(Put(1, "foofoo", "bar"));
|
|
|
|
ASSERT_OK(Put(0, "foofoo", "bar"));
|
|
|
|
|
|
|
|
for (auto* it : iterators) {
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(it->status());
|
2014-12-08 20:52:18 +00:00
|
|
|
delete it;
|
|
|
|
}
|
2014-01-31 01:48:42 +00:00
|
|
|
}
|
2014-02-26 22:16:23 +00:00
|
|
|
Reopen();
|
2014-01-31 01:48:42 +00:00
|
|
|
|
|
|
|
for (int iter = 0; iter <= 2; ++iter) {
|
|
|
|
ASSERT_EQ("v2", Get(0, "foo"));
|
|
|
|
ASSERT_EQ("v2", Get(0, "bar"));
|
|
|
|
ASSERT_EQ("v3", Get(1, "mirko"));
|
|
|
|
ASSERT_EQ("v5", Get(2, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(0, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor"));
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(2, "foo"));
|
|
|
|
if (iter <= 1) {
|
2014-02-26 22:16:23 +00:00
|
|
|
Reopen();
|
2014-01-31 01:48:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2014-02-25 18:38:04 +00:00
|
|
|
// Makes sure that obsolete log files get deleted
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, LogDeletionTest) {
|
2014-04-30 18:33:40 +00:00
|
|
|
db_options_.max_total_wal_size = std::numeric_limits<uint64_t>::max();
|
2015-08-26 21:19:31 +00:00
|
|
|
column_family_options_.arena_block_size = 4 * 1024;
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
column_family_options_.write_buffer_size = 128000; // 128KB
|
2014-02-26 22:16:23 +00:00
|
|
|
Open();
|
2014-02-25 18:38:04 +00:00
|
|
|
CreateColumnFamilies({"one", "two", "three", "four"});
|
|
|
|
// Each bracket is one log file. if number is in (), it means
|
|
|
|
// we don't need it anymore (it's been flushed)
|
|
|
|
// []
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(0);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(0, 1, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
// [0]
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(1, 1, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
// [0, 1]
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(1, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(1);
|
|
|
|
// [0, (1)] [1]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(2);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(0, 1, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
// [0, (1)] [0, 1]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(2);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(2, 1, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
// [0, (1)] [0, 1, 2]
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(2, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(2);
|
|
|
|
// [0, (1)] [0, 1, (2)] [2]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(3);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(2, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(2);
|
|
|
|
// [0, (1)] [0, 1, (2)] [(2)] [2]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(4);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(3, 1, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
// [0, (1)] [0, 1, (2)] [(2)] [2, 3]
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(1, 1, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
// [0, (1)] [0, 1, (2)] [(2)] [1, 2, 3]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(4);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(1, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(1);
|
|
|
|
// [0, (1)] [0, (1), (2)] [(2)] [(1), 2, 3] [1]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(5);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(0, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(0);
|
|
|
|
// [(0), (1)] [(0), (1), (2)] [(2)] [(1), 2, 3] [1, (0)] [0]
|
|
|
|
// delete obsolete logs -->
|
|
|
|
// [(1), 2, 3] [1, (0)] [0]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(3);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(0, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(0);
|
|
|
|
// [(1), 2, 3] [1, (0)], [(0)] [0]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(4);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(1, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(1);
|
|
|
|
// [(1), 2, 3] [(1), (0)] [(0)] [0, (1)] [1]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(5);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(2, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(2);
|
|
|
|
// [(1), (2), 3] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(6);
|
arena: derive alignment unit from std::max_align_t
Summary:
As raised in #2265, the arena allocator will return memory that is improperly aligned to store a `std::function` on macOS. Oddly, I'm unable to tickle this bug without adding a `std::function` field to `struct ReadOptions`—but my proposal in #2265 does exactly that.
In any case, here's a simple reproduction. Apply this bogus patch to get a `std::function` into `struct ReadOptions`
```
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1035,6 +1035,8 @@ struct ReadOptions {
// Default: 0
uint64_t max_skippable_internal_keys;
+ std::function<void()> foo;
+
ReadOptions();
ReadOptions(bool cksum, bool cache);
};
```
then compile `db_properties_test` *with ubsan* and run `ReadLatencyHistogramByLevel`:
```
$ make COMPILE_WITH_UBSAN=1 db_properties_test
$ ./db_properties_test --gtest_filter=DBPropertiesTest.ReadLatencyHistogramByLevel
```
ubsan will complain about several misaligned accesses:
```
Note: Google Test filter = DBPropertiesTest.ReadLatencyHistogramByLevel
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from DBPropertiesTest
[ RUN ] DBPropertiesTest.ReadLatencyHistogramByLevel
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
util/coding.h:362:3: runtime error: store to misaligned address 0x7fff5733fac4 for type 'unsigned long', which requires 8 byte alignment
0x7fff5733fac4: note: pointer points here
01 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 1d 96 0d 01 00 00 00
^
util/coding.h:372:12: runtime error: load of misaligned address 0x00010d85516c for type 'const unsigned long', which requires 8 byte alignment
0x00010d85516c: note: pointer points here
01 00 34 57 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 78 24 82 0a 01 00 00 00
^
version_set.cc:854: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:512: runtime error: constructor call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:505: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1583: runtime error: constructor call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1585:9: runtime error: store to misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:864:29: runtime error: upcast of misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:521:12: runtime error: load of misaligned address 0x00010dbfa5d8 for type 'rocksdb::TableCache *', which requires 16 byte alignment
0x00010dbfa5d8: note: pointer points here
00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00 00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00
^
db/version_set.cc:522:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:9: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:522:24: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:38: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:522:57: runtime error: load of misaligned address 0x00010dbfa678 for type 'rocksdb::RangeDelAggregator *', which requires 16 byte alignment
0x00010dbfa678: note: pointer points here
01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00 00 00 00 00 f8 db 70 0a 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:523:54: runtime error: load of misaligned address 0x00010dbfa668 for type 'rocksdb::HistogramImpl *', which requires 16 byte alignment
0x00010dbfa668: note: pointer points here
01 00 00 00 c8 88 a5 0d 01 00 00 00 00 00 00 00 01 00 00 00 d0 a1 bf 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:9: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:47: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:524:62: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/table_cache.cc:228:33: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1554:41: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
table/block_based_table_reader.cc:1396:21: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
include/rocksdb/options.h:931:8: runtime error: reference binding to misaligned address 0x00010dbfa628 for type 'const std::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1584:13: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *const' (aka '__base<void ()> *const'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
table/block_based_table_reader.cc:1555:24: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:244:54: runtime error: load of misaligned address 0x00010dbfa618 for type 'const bool', which requires 16 byte alignment
0x00010dbfa618: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/table_cache.cc:246:49: runtime error: reference binding to misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
db/version_set.cc:532:12: runtime error: member access within misaligned address 0x00010dbfa5e8 for type 'const rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
db/version_set.cc:532:26: runtime error: load of misaligned address 0x00010dbfa5f8 for type 'const rocksdb::Slice *const', which requires 16 byte alignment
0x00010dbfa5f8: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5c8 for type 'rocksdb::(anonymous namespace)::LevelFileIteratorState', which requires 16 byte alignment
0x00010dbfa5c8: note: pointer points here
00 00 00 00 a0 db 70 0a 01 00 00 00 00 00 00 00 00 00 00 00 90 14 98 0d 01 00 00 00 00 00 00 00
^
version_set.cc:493: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa5e8 for type 'rocksdb::ReadOptions', which requires 16 byte alignment
0x00010dbfa5e8: note: pointer points here
00 00 00 00 01 01 ff ff ff ff ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
options.h:931: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
functional:1765: runtime error: member call on misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:9: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1766:27: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: member access within misaligned address 0x00010dbfa628 for type 'std::__1::function<void ()>', which requires 16 byte alignment
0x00010dbfa628: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional:1768:14: runtime error: load of misaligned address 0x00010dbfa648 for type '__base *' (aka '__base<void ()> *'), which requires 16 byte alignment
0x00010dbfa648: note: pointer points here
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c8 a5 97 0d 01 00 00 00 38 36 9b 0d
^
[ OK ] DBPropertiesTest.ReadLatencyHistogramByLevel (1599 ms)
[----------] 1 test from DBPropertiesTest (1599 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (1599 ms total)
[ PASSED ] 1 test.
```
So it seems the root cause is that the internal implementation of `std::function` on macOS (and perhaps with libc++ generally?) requires 16-byte aligned memory, but the arena allocator only guarantees that the returned memory will be `sizeof(void*)` aligned, which is only 8-byte alignment on my machine. This patch solves the problem by adjusting the allocator to derive the necessary alignment from `alignof(std::max_align_t)`, which is properly 16 bytes on my machine.
As I mentioned in #2265, none of RocksDB's tests will cause this unaligned access to actually abort the process, but, on macOS, linking CockroachDB against a version of RocksDB with the above patch and letting it run for just a few seconds will cause a SIGABRT.
```
Process 19792 stopped
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
cockroach`DBNewIter:
-> 0x4f5e78f <+95>: callq *0x28(%rax)
0x4f5e792 <+98>: jmp 0x4f5e79e ; <+110>
0x4f5e794 <+100>: movq -0x50(%rbp), %rcx
0x4f5e798 <+104>: movq %rax, %rdi
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000004f5e78f cockroach`DBNewIter + 95
```
I'd get you a backtrace, but [Go doesn't include cgo debug information on macOS](https://github.com/golang/go/issues/6942). I've also tried building against libc++ on Linux, where debug information would be available, but I can't seem to trigger the bug there.
In any case, this PR both fixes the segfault in CockroachDB and fixes the warnings reported by ubsan.
Closes https://github.com/facebook/rocksdb/pull/2347
Differential Revision: D5108596
Pulled By: yiwu-arbug
fbshipit-source-id: bd5e4323b2ce915ed4fe78e123cb8996aec75a00
2017-10-17 18:07:35 +00:00
|
|
|
PutRandomData(3, 1000, 128);
|
2014-02-25 18:38:04 +00:00
|
|
|
WaitForFlush(3);
|
|
|
|
// [(1), (2), (3)] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2, (3)] [3]
|
|
|
|
// delete obsolete logs -->
|
|
|
|
// [0, (1)] [1, (2)], [2, (3)] [3]
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(4);
|
2014-02-25 18:38:04 +00:00
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CrashAfterFlush) {
|
2016-07-19 22:12:46 +00:00
|
|
|
std::unique_ptr<FaultInjectionTestEnv> fault_env(
|
|
|
|
new FaultInjectionTestEnv(env_));
|
|
|
|
db_options_.env = fault_env.get();
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
|
|
|
|
WriteBatch batch;
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(batch.Put(handles_[0], Slice("foo"), Slice("bar")));
|
|
|
|
ASSERT_OK(batch.Put(handles_[1], Slice("foo"), Slice("bar")));
|
2016-07-19 22:12:46 +00:00
|
|
|
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(0));
|
2016-07-19 22:12:46 +00:00
|
|
|
fault_env->SetFilesystemActive(false);
|
|
|
|
|
|
|
|
std::vector<std::string> names;
|
2024-03-04 18:08:32 +00:00
|
|
|
for (const auto& name : names_) {
|
2016-07-19 22:12:46 +00:00
|
|
|
if (name != "") {
|
|
|
|
names.push_back(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Close();
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(fault_env->DropUnsyncedFileData());
|
2016-07-19 22:12:46 +00:00
|
|
|
fault_env->ResetState();
|
|
|
|
Open(names, {});
|
|
|
|
|
|
|
|
// Write batch should be atomic.
|
|
|
|
ASSERT_EQ(Get(0, "foo"), Get(1, "foo"));
|
|
|
|
|
|
|
|
Close();
|
|
|
|
db_options_.env = env_;
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, OpenNonexistentColumnFamily) {
|
2016-11-10 06:10:06 +00:00
|
|
|
ASSERT_OK(TryOpen({"default"}));
|
|
|
|
Close();
|
|
|
|
ASSERT_TRUE(TryOpen({"default", "dne"}).IsInvalidArgument());
|
|
|
|
}
|
|
|
|
|
2014-02-26 22:16:23 +00:00
|
|
|
// Makes sure that obsolete log files get deleted
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DifferentWriteBufferSizes) {
|
2014-04-30 18:33:40 +00:00
|
|
|
// disable flushing stale column families
|
|
|
|
db_options_.max_total_wal_size = std::numeric_limits<uint64_t>::max();
|
2014-02-26 22:16:23 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two", "three"});
|
|
|
|
ColumnFamilyOptions default_cf, one, two, three;
|
|
|
|
// setup options. all column families have max_write_buffer_number setup to 10
|
2022-07-23 02:25:52 +00:00
|
|
|
// "default" -> 100KB memtable, start flushing immediately
|
2014-02-26 22:16:23 +00:00
|
|
|
// "one" -> 200KB memtable, start flushing with two immutable memtables
|
|
|
|
// "two" -> 1MB memtable, start flushing with three immutable memtables
|
|
|
|
// "three" -> 90KB memtable, start flushing with four immutable memtables
|
|
|
|
default_cf.write_buffer_size = 100000;
|
2015-08-26 21:19:31 +00:00
|
|
|
default_cf.arena_block_size = 4 * 4096;
|
2014-02-26 22:16:23 +00:00
|
|
|
default_cf.max_write_buffer_number = 10;
|
|
|
|
default_cf.min_write_buffer_number_to_merge = 1;
|
Refactor trimming logic for immutable memtables (#5022)
Summary:
MyRocks currently sets `max_write_buffer_number_to_maintain` in order to maintain enough history for transaction conflict checking. The effectiveness of this approach depends on the size of memtables. When memtables are small, it may not keep enough history; when memtables are large, this may consume too much memory.
We are proposing a new way to configure memtable list history: by limiting the memory usage of immutable memtables. The new option is `max_write_buffer_size_to_maintain` and it will take precedence over the old `max_write_buffer_number_to_maintain` if they are both set to non-zero values. The new option accounts for the total memory usage of flushed immutable memtables and mutable memtable. When the total usage exceeds the limit, RocksDB may start dropping immutable memtables (which is also called trimming history), starting from the oldest one.
The semantics of the old option actually works both as an upper bound and lower bound. History trimming will start if number of immutable memtables exceeds the limit, but it will never go below (limit-1) due to history trimming.
In order the mimic the behavior with the new option, history trimming will stop if dropping the next immutable memtable causes the total memory usage go below the size limit. For example, assuming the size limit is set to 64MB, and there are 3 immutable memtables with sizes of 20, 30, 30. Although the total memory usage is 80MB > 64MB, dropping the oldest memtable will reduce the memory usage to 60MB < 64MB, so in this case no memtable will be dropped.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5022
Differential Revision: D14394062
Pulled By: miasantreble
fbshipit-source-id: 60457a509c6af89d0993f988c9b5c2aa9e45f5c5
2019-08-23 20:54:09 +00:00
|
|
|
default_cf.max_write_buffer_size_to_maintain = 0;
|
2014-02-26 22:16:23 +00:00
|
|
|
one.write_buffer_size = 200000;
|
2015-08-26 21:19:31 +00:00
|
|
|
one.arena_block_size = 4 * 4096;
|
2014-02-26 22:16:23 +00:00
|
|
|
one.max_write_buffer_number = 10;
|
|
|
|
one.min_write_buffer_number_to_merge = 2;
|
Refactor trimming logic for immutable memtables (#5022)
Summary:
MyRocks currently sets `max_write_buffer_number_to_maintain` in order to maintain enough history for transaction conflict checking. The effectiveness of this approach depends on the size of memtables. When memtables are small, it may not keep enough history; when memtables are large, this may consume too much memory.
We are proposing a new way to configure memtable list history: by limiting the memory usage of immutable memtables. The new option is `max_write_buffer_size_to_maintain` and it will take precedence over the old `max_write_buffer_number_to_maintain` if they are both set to non-zero values. The new option accounts for the total memory usage of flushed immutable memtables and mutable memtable. When the total usage exceeds the limit, RocksDB may start dropping immutable memtables (which is also called trimming history), starting from the oldest one.
The semantics of the old option actually works both as an upper bound and lower bound. History trimming will start if number of immutable memtables exceeds the limit, but it will never go below (limit-1) due to history trimming.
In order the mimic the behavior with the new option, history trimming will stop if dropping the next immutable memtable causes the total memory usage go below the size limit. For example, assuming the size limit is set to 64MB, and there are 3 immutable memtables with sizes of 20, 30, 30. Although the total memory usage is 80MB > 64MB, dropping the oldest memtable will reduce the memory usage to 60MB < 64MB, so in this case no memtable will be dropped.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5022
Differential Revision: D14394062
Pulled By: miasantreble
fbshipit-source-id: 60457a509c6af89d0993f988c9b5c2aa9e45f5c5
2019-08-23 20:54:09 +00:00
|
|
|
one.max_write_buffer_size_to_maintain =
|
|
|
|
static_cast<int>(one.write_buffer_size);
|
2014-02-26 22:16:23 +00:00
|
|
|
two.write_buffer_size = 1000000;
|
2015-08-26 21:19:31 +00:00
|
|
|
two.arena_block_size = 4 * 4096;
|
2014-02-26 22:16:23 +00:00
|
|
|
two.max_write_buffer_number = 10;
|
|
|
|
two.min_write_buffer_number_to_merge = 3;
|
Refactor trimming logic for immutable memtables (#5022)
Summary:
MyRocks currently sets `max_write_buffer_number_to_maintain` in order to maintain enough history for transaction conflict checking. The effectiveness of this approach depends on the size of memtables. When memtables are small, it may not keep enough history; when memtables are large, this may consume too much memory.
We are proposing a new way to configure memtable list history: by limiting the memory usage of immutable memtables. The new option is `max_write_buffer_size_to_maintain` and it will take precedence over the old `max_write_buffer_number_to_maintain` if they are both set to non-zero values. The new option accounts for the total memory usage of flushed immutable memtables and mutable memtable. When the total usage exceeds the limit, RocksDB may start dropping immutable memtables (which is also called trimming history), starting from the oldest one.
The semantics of the old option actually works both as an upper bound and lower bound. History trimming will start if number of immutable memtables exceeds the limit, but it will never go below (limit-1) due to history trimming.
In order the mimic the behavior with the new option, history trimming will stop if dropping the next immutable memtable causes the total memory usage go below the size limit. For example, assuming the size limit is set to 64MB, and there are 3 immutable memtables with sizes of 20, 30, 30. Although the total memory usage is 80MB > 64MB, dropping the oldest memtable will reduce the memory usage to 60MB < 64MB, so in this case no memtable will be dropped.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5022
Differential Revision: D14394062
Pulled By: miasantreble
fbshipit-source-id: 60457a509c6af89d0993f988c9b5c2aa9e45f5c5
2019-08-23 20:54:09 +00:00
|
|
|
two.max_write_buffer_size_to_maintain =
|
|
|
|
static_cast<int>(two.write_buffer_size);
|
2015-12-07 18:13:58 +00:00
|
|
|
three.write_buffer_size = 4096 * 22;
|
2015-08-26 21:19:31 +00:00
|
|
|
three.arena_block_size = 4096;
|
2014-02-26 22:16:23 +00:00
|
|
|
three.max_write_buffer_number = 10;
|
|
|
|
three.min_write_buffer_number_to_merge = 4;
|
Refactor trimming logic for immutable memtables (#5022)
Summary:
MyRocks currently sets `max_write_buffer_number_to_maintain` in order to maintain enough history for transaction conflict checking. The effectiveness of this approach depends on the size of memtables. When memtables are small, it may not keep enough history; when memtables are large, this may consume too much memory.
We are proposing a new way to configure memtable list history: by limiting the memory usage of immutable memtables. The new option is `max_write_buffer_size_to_maintain` and it will take precedence over the old `max_write_buffer_number_to_maintain` if they are both set to non-zero values. The new option accounts for the total memory usage of flushed immutable memtables and mutable memtable. When the total usage exceeds the limit, RocksDB may start dropping immutable memtables (which is also called trimming history), starting from the oldest one.
The semantics of the old option actually works both as an upper bound and lower bound. History trimming will start if number of immutable memtables exceeds the limit, but it will never go below (limit-1) due to history trimming.
In order the mimic the behavior with the new option, history trimming will stop if dropping the next immutable memtable causes the total memory usage go below the size limit. For example, assuming the size limit is set to 64MB, and there are 3 immutable memtables with sizes of 20, 30, 30. Although the total memory usage is 80MB > 64MB, dropping the oldest memtable will reduce the memory usage to 60MB < 64MB, so in this case no memtable will be dropped.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5022
Differential Revision: D14394062
Pulled By: miasantreble
fbshipit-source-id: 60457a509c6af89d0993f988c9b5c2aa9e45f5c5
2019-08-23 20:54:09 +00:00
|
|
|
three.max_write_buffer_size_to_maintain =
|
|
|
|
static_cast<int>(three.write_buffer_size);
|
2014-02-26 22:16:23 +00:00
|
|
|
|
|
|
|
Reopen({default_cf, one, two, three});
|
|
|
|
|
2014-02-27 18:29:13 +00:00
|
|
|
int micros_wait_for_flush = 10000;
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(0, 100, 1000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(0);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(1);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(1, 200, 1000);
|
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(2);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(2, 1000, 1000);
|
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 1, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(3);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(2, 1000, 1000);
|
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 2, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(4);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 93, 990);
|
2014-02-26 22:16:23 +00:00
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 2, 1});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(5);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 88, 990);
|
2014-02-26 22:16:23 +00:00
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 2, 2});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(6);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 88, 990);
|
2014-02-26 22:16:23 +00:00
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 2, 3});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(7);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(0, 100, 1000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(0);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 2, 3});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(8);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(2, 100, 10000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(2);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 0, 3});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(9);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 88, 990);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(3);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(10);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 88, 990);
|
2014-02-26 22:16:23 +00:00
|
|
|
env_->SleepForMicroseconds(micros_wait_for_flush);
|
|
|
|
AssertNumberOfImmutableMemtables({0, 1, 0, 1});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(11);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(1, 200, 1000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(1);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 0, 0, 1});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(5);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 88 * 3, 990);
|
2015-05-05 22:59:02 +00:00
|
|
|
WaitForFlush(3);
|
2015-12-07 18:13:58 +00:00
|
|
|
PutRandomData(3, 88 * 4, 990);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(3);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(12);
|
2014-02-26 22:16:23 +00:00
|
|
|
PutRandomData(0, 100, 1000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(0);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(12);
|
2015-08-26 21:19:31 +00:00
|
|
|
PutRandomData(2, 3 * 1000, 1000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(2);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(12);
|
2022-11-02 21:34:24 +00:00
|
|
|
PutRandomData(1, 2 * 200, 1000);
|
2014-02-27 18:29:13 +00:00
|
|
|
WaitForFlush(1);
|
2014-02-26 22:16:23 +00:00
|
|
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0});
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertCountLiveLogFiles(7);
|
2014-02-26 22:16:23 +00:00
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2019-02-08 00:06:40 +00:00
|
|
|
// The test is commented out because we want to test that snapshot is
|
|
|
|
// not created for memtables not supported it, but There isn't a memtable
|
|
|
|
// that doesn't support snapshot right now. If we have one later, we can
|
|
|
|
// re-enable the test.
|
|
|
|
//
|
|
|
|
// TEST_P(ColumnFamilyTest, MemtableNotSupportSnapshot) {
|
|
|
|
// db_options_.allow_concurrent_memtable_write = false;
|
|
|
|
// Open();
|
|
|
|
// auto* s1 = dbfull()->GetSnapshot();
|
|
|
|
// ASSERT_TRUE(s1 != nullptr);
|
|
|
|
// dbfull()->ReleaseSnapshot(s1);
|
|
|
|
|
|
|
|
// // Add a column family that doesn't support snapshot
|
|
|
|
// ColumnFamilyOptions first;
|
|
|
|
// first.memtable_factory.reset(new DummyMemtableNotSupportingSnapshot());
|
|
|
|
// CreateColumnFamilies({"first"}, {first});
|
|
|
|
// auto* s2 = dbfull()->GetSnapshot();
|
|
|
|
// ASSERT_TRUE(s2 == nullptr);
|
|
|
|
|
|
|
|
// // Add a column family that supports snapshot. Snapshot stays not
|
|
|
|
// supported. ColumnFamilyOptions second; CreateColumnFamilies({"second"},
|
|
|
|
// {second}); auto* s3 = dbfull()->GetSnapshot(); ASSERT_TRUE(s3 == nullptr);
|
|
|
|
// Close();
|
|
|
|
// }
|
2014-12-11 02:39:09 +00:00
|
|
|
|
2016-07-25 22:06:11 +00:00
|
|
|
class TestComparator : public Comparator {
|
2020-02-20 20:07:53 +00:00
|
|
|
int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/,
|
|
|
|
const ROCKSDB_NAMESPACE::Slice& /*b*/) const override {
|
2016-07-25 22:06:11 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
const char* Name() const override { return "Test"; }
|
2020-02-20 20:07:53 +00:00
|
|
|
void FindShortestSeparator(
|
|
|
|
std::string* /*start*/,
|
|
|
|
const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {}
|
2018-03-05 21:08:17 +00:00
|
|
|
void FindShortSuccessor(std::string* /*key*/) const override {}
|
2016-07-25 22:06:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static TestComparator third_comparator;
|
|
|
|
static TestComparator fourth_comparator;
|
|
|
|
|
|
|
|
// Test that we can retrieve the comparator from a created CF
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, GetComparator) {
|
2016-07-25 22:06:11 +00:00
|
|
|
Open();
|
|
|
|
// Add a column family with no comparator specified
|
|
|
|
CreateColumnFamilies({"first"});
|
|
|
|
const Comparator* comp = handles_[0]->GetComparator();
|
|
|
|
ASSERT_EQ(comp, BytewiseComparator());
|
|
|
|
|
|
|
|
// Add three column families - one with no comparator and two
|
|
|
|
// with comparators specified
|
|
|
|
ColumnFamilyOptions second, third, fourth;
|
|
|
|
second.comparator = &third_comparator;
|
|
|
|
third.comparator = &fourth_comparator;
|
|
|
|
CreateColumnFamilies({"second", "third", "fourth"}, {second, third, fourth});
|
|
|
|
ASSERT_EQ(handles_[1]->GetComparator(), BytewiseComparator());
|
|
|
|
ASSERT_EQ(handles_[2]->GetComparator(), &third_comparator);
|
|
|
|
ASSERT_EQ(handles_[3]->GetComparator(), &fourth_comparator);
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DifferentMergeOperators) {
|
2014-02-27 00:05:24 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"first", "second"});
|
|
|
|
ColumnFamilyOptions default_cf, first, second;
|
|
|
|
first.merge_operator = MergeOperators::CreateUInt64AddOperator();
|
|
|
|
second.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
|
|
Reopen({default_cf, first, second});
|
|
|
|
|
|
|
|
std::string one, two, three;
|
|
|
|
PutFixed64(&one, 1);
|
|
|
|
PutFixed64(&two, 2);
|
|
|
|
PutFixed64(&three, 3);
|
|
|
|
|
|
|
|
ASSERT_OK(Put(0, "foo", two));
|
|
|
|
ASSERT_OK(Put(0, "foo", one));
|
|
|
|
ASSERT_TRUE(Merge(0, "foo", two).IsNotSupported());
|
|
|
|
ASSERT_EQ(Get(0, "foo"), one);
|
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "foo", two));
|
|
|
|
ASSERT_OK(Put(1, "foo", one));
|
|
|
|
ASSERT_OK(Merge(1, "foo", two));
|
|
|
|
ASSERT_EQ(Get(1, "foo"), three);
|
|
|
|
|
|
|
|
ASSERT_OK(Put(2, "foo", two));
|
|
|
|
ASSERT_OK(Put(2, "foo", one));
|
|
|
|
ASSERT_OK(Merge(2, "foo", two));
|
|
|
|
ASSERT_EQ(Get(2, "foo"), one + "," + two);
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DifferentCompactionStyles) {
|
2014-02-27 00:05:24 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
ColumnFamilyOptions default_cf, one, two;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
default_cf.level_compaction_dynamic_level_bytes = false;
|
2014-02-27 00:05:24 +00:00
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
2014-02-27 00:24:56 +00:00
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = static_cast<uint64_t>(1) << 60;
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
2014-08-25 21:22:05 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
2014-02-27 00:05:24 +00:00
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
2015-06-29 21:39:01 +00:00
|
|
|
|
2015-03-30 21:04:21 +00:00
|
|
|
one.num_levels = 1;
|
2014-02-27 00:05:24 +00:00
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
2015-08-26 21:19:31 +00:00
|
|
|
one.write_buffer_size = 120000;
|
2014-02-27 00:05:24 +00:00
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
two.level_compaction_dynamic_level_bytes = false;
|
2014-02-27 00:05:24 +00:00
|
|
|
two.compaction_style = kCompactionStyleLevel;
|
|
|
|
two.num_levels = 4;
|
|
|
|
two.level0_file_num_compaction_trigger = 3;
|
|
|
|
two.write_buffer_size = 100000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one, two});
|
|
|
|
|
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 1; ++i) {
|
2015-08-26 21:19:31 +00:00
|
|
|
PutRandomData(1, 10, 12000);
|
|
|
|
PutRandomData(1, 1, 10);
|
2014-02-27 00:05:24 +00:00
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
2014-02-27 00:05:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// SETUP column family "two" -- level style with 4 levels
|
|
|
|
for (int i = 0; i < two.level0_file_num_compaction_trigger - 1; ++i) {
|
2015-08-26 21:19:31 +00:00
|
|
|
PutRandomData(2, 10, 12000);
|
|
|
|
PutRandomData(2, 1, 10);
|
2014-02-27 00:05:24 +00:00
|
|
|
WaitForFlush(2);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 2);
|
2014-02-27 00:05:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TRIGGER compaction "one"
|
2015-08-26 21:19:31 +00:00
|
|
|
PutRandomData(1, 10, 12000);
|
|
|
|
PutRandomData(1, 1, 10);
|
2014-02-27 00:05:24 +00:00
|
|
|
|
|
|
|
// TRIGGER compaction "two"
|
2015-08-26 21:19:31 +00:00
|
|
|
PutRandomData(2, 10, 12000);
|
|
|
|
PutRandomData(2, 1, 10);
|
2014-02-27 00:05:24 +00:00
|
|
|
|
|
|
|
// WAIT for compactions
|
|
|
|
WaitForCompaction();
|
|
|
|
|
|
|
|
// VERIFY compaction "one"
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertFilesPerLevel("1", 1);
|
2014-02-27 00:05:24 +00:00
|
|
|
|
|
|
|
// VERIFY compaction "two"
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertFilesPerLevel("0,1", 2);
|
2014-02-27 00:05:24 +00:00
|
|
|
CompactAll(2);
|
2015-06-29 21:39:01 +00:00
|
|
|
AssertFilesPerLevel("0,1", 2);
|
2014-02-27 00:05:24 +00:00
|
|
|
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
// Sync points not supported in RocksDB Lite
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, MultipleManualCompactions) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
ColumnFamilyOptions default_cf, one, two;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
default_cf.level_compaction_dynamic_level_bytes = false;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
two.level_compaction_dynamic_level_bytes = false;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
two.compaction_style = kCompactionStyleLevel;
|
|
|
|
two.num_levels = 4;
|
|
|
|
two.level0_file_num_compaction_trigger = 3;
|
|
|
|
two.write_buffer_size = 100000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one, two});
|
|
|
|
|
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
{{"ColumnFamilyTest::MultiManual:4", "ColumnFamilyTest::MultiManual:1"},
|
|
|
|
{"ColumnFamilyTest::MultiManual:2", "ColumnFamilyTest::MultiManual:5"},
|
|
|
|
{"ColumnFamilyTest::MultiManual:2", "ColumnFamilyTest::MultiManual:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:3");
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2017-02-06 22:43:55 +00:00
|
|
|
std::vector<port::Thread> threads;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
threads.emplace_back([&] {
|
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
});
|
|
|
|
|
|
|
|
// SETUP column family "two" -- level style with 4 levels
|
|
|
|
for (int i = 0; i < two.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(2, 10, 12000);
|
|
|
|
PutRandomData(2, 1, 10);
|
|
|
|
WaitForFlush(2);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 2);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
threads.emplace_back([&] {
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:1");
|
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[2], nullptr, nullptr));
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:2");
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:5");
|
|
|
|
for (auto& t : threads) {
|
|
|
|
t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
AssertFilesPerLevel("1", 1);
|
|
|
|
|
|
|
|
// VERIFY compaction "two"
|
|
|
|
AssertFilesPerLevel("0,1", 2);
|
|
|
|
CompactAll(2);
|
|
|
|
AssertFilesPerLevel("0,1", 2);
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
ColumnFamilyOptions default_cf, one, two;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
default_cf.level_compaction_dynamic_level_bytes = false;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
2023-06-16 04:12:39 +00:00
|
|
|
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
two.level_compaction_dynamic_level_bytes = false;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
two.compaction_style = kCompactionStyleLevel;
|
|
|
|
two.num_levels = 4;
|
|
|
|
two.level0_file_num_compaction_trigger = 3;
|
|
|
|
two.write_buffer_size = 100000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one, two});
|
2017-05-24 18:25:38 +00:00
|
|
|
// make sure all background compaction jobs can be scheduled
|
|
|
|
auto stop_token =
|
|
|
|
dbfull()->TEST_write_controler().GetCompactionPressureToken();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
{{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:1"},
|
|
|
|
{"ColumnFamilyTest::AutoManual:2", "ColumnFamilyTest::AutoManual:5"},
|
|
|
|
{"ColumnFamilyTest::AutoManual:2", "ColumnFamilyTest::AutoManual:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:3");
|
|
|
|
}
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1");
|
|
|
|
|
|
|
|
// SETUP column family "two" -- level style with 4 levels
|
|
|
|
for (int i = 0; i < two.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(2, 10, 12000);
|
|
|
|
PutRandomData(2, 1, 10);
|
|
|
|
WaitForFlush(2);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 2);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread threads([&] {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[2], nullptr, nullptr));
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:2");
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5");
|
|
|
|
threads.join();
|
|
|
|
|
|
|
|
// WAIT for compactions
|
|
|
|
WaitForCompaction();
|
|
|
|
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
AssertFilesPerLevel("1", 1);
|
|
|
|
|
|
|
|
// VERIFY compaction "two"
|
|
|
|
AssertFilesPerLevel("0,1", 2);
|
|
|
|
CompactAll(2);
|
|
|
|
AssertFilesPerLevel("0,1", 2);
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
ColumnFamilyOptions default_cf, one, two;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
default_cf.level_compaction_dynamic_level_bytes = false;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
2023-06-16 04:12:39 +00:00
|
|
|
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
2023-06-16 04:12:39 +00:00
|
|
|
two.level_compaction_dynamic_level_bytes = false;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
two.compaction_style = kCompactionStyleLevel;
|
|
|
|
two.num_levels = 4;
|
|
|
|
two.level0_file_num_compaction_trigger = 3;
|
|
|
|
two.write_buffer_size = 100000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one, two});
|
2017-05-24 18:25:38 +00:00
|
|
|
// make sure all background compaction jobs can be scheduled
|
|
|
|
auto stop_token =
|
|
|
|
dbfull()->TEST_write_controler().GetCompactionPressureToken();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
|
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
|
|
|
std::atomic_bool cf_1_2{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
{{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:1"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:5", "ColumnFamilyTest::ManualAuto:2"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:2", "ColumnFamilyTest::ManualAuto:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3");
|
2022-05-30 19:31:46 +00:00
|
|
|
} else if (cf_1_2.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2");
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::port::Thread threads([&] {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
|
|
|
|
|
|
|
|
// SETUP column family "two" -- level style with 4 levels
|
|
|
|
for (int i = 0; i < two.level0_file_num_compaction_trigger; ++i) {
|
|
|
|
PutRandomData(2, 10, 12000);
|
|
|
|
PutRandomData(2, 1, 10);
|
|
|
|
WaitForFlush(2);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 2);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5");
|
|
|
|
threads.join();
|
|
|
|
|
|
|
|
// WAIT for compactions
|
|
|
|
WaitForCompaction();
|
|
|
|
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
AssertFilesPerLevel("1", 1);
|
|
|
|
|
|
|
|
// VERIFY compaction "two"
|
|
|
|
AssertFilesPerLevel("0,1", 2);
|
|
|
|
CompactAll(2);
|
|
|
|
AssertFilesPerLevel("0,1", 2);
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, SameCFManualManualCompactions) {
|
2015-12-18 00:59:00 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ColumnFamilyOptions default_cf, one;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
|
|
|
;
|
2015-12-18 00:59:00 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one});
|
2017-05-24 18:25:38 +00:00
|
|
|
// make sure all background compaction jobs can be scheduled
|
|
|
|
auto stop_token =
|
|
|
|
dbfull()->TEST_write_controler().GetCompactionPressureToken();
|
2015-12-18 00:59:00 +00:00
|
|
|
|
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
2015-12-18 00:59:00 +00:00
|
|
|
}
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
|
|
|
std::atomic_bool cf_1_2{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
2015-12-18 00:59:00 +00:00
|
|
|
{{"ColumnFamilyTest::ManualManual:4", "ColumnFamilyTest::ManualManual:2"},
|
|
|
|
{"ColumnFamilyTest::ManualManual:4", "ColumnFamilyTest::ManualManual:5"},
|
|
|
|
{"ColumnFamilyTest::ManualManual:1", "ColumnFamilyTest::ManualManual:2"},
|
|
|
|
{"ColumnFamilyTest::ManualManual:1",
|
|
|
|
"ColumnFamilyTest::ManualManual:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
2015-12-18 00:59:00 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:3");
|
2022-05-30 19:31:46 +00:00
|
|
|
} else if (cf_1_2.exchange(false)) {
|
2015-12-18 00:59:00 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:2");
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::port::Thread threads([&] {
|
2015-12-18 00:59:00 +00:00
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = true;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:5");
|
|
|
|
|
|
|
|
WaitForFlush(1);
|
|
|
|
|
|
|
|
// Add more L0 files and force another manual compaction
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(
|
|
|
|
std::to_string(one.level0_file_num_compaction_trigger + i), 1);
|
2015-12-18 00:59:00 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread threads1([&] {
|
2015-12-18 00:59:00 +00:00
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:1");
|
|
|
|
|
|
|
|
threads.join();
|
|
|
|
threads1.join();
|
|
|
|
WaitForCompaction();
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
ASSERT_LE(NumTableFilesAtLevel(0, 1), 2);
|
|
|
|
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
2015-12-18 00:59:00 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
2015-12-18 00:59:00 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactions) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ColumnFamilyOptions default_cf, one;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
|
|
|
;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one});
|
2017-05-24 18:25:38 +00:00
|
|
|
// make sure all background compaction jobs can be scheduled
|
|
|
|
auto stop_token =
|
|
|
|
dbfull()->TEST_write_controler().GetCompactionPressureToken();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
|
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
|
|
|
std::atomic_bool cf_1_2{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
{{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:2"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:5"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:2"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3");
|
2022-05-30 19:31:46 +00:00
|
|
|
} else if (cf_1_2.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2");
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::port::Thread threads([&] {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5");
|
|
|
|
|
|
|
|
WaitForFlush(1);
|
|
|
|
|
|
|
|
// Add more L0 files and force automatic compaction
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(
|
|
|
|
std::to_string(one.level0_file_num_compaction_trigger + i), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
|
|
|
|
|
|
|
|
threads.join();
|
|
|
|
WaitForCompaction();
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
ASSERT_LE(NumTableFilesAtLevel(0, 1), 2);
|
|
|
|
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactionsLevel) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ColumnFamilyOptions default_cf, one;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
|
|
|
;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleLevel;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
2017-04-05 00:57:27 +00:00
|
|
|
one.level0_file_num_compaction_trigger = 3;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one});
|
2017-05-24 18:25:38 +00:00
|
|
|
// make sure all background compaction jobs can be scheduled
|
|
|
|
auto stop_token =
|
|
|
|
dbfull()->TEST_write_controler().GetCompactionPressureToken();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
|
|
|
|
// SETUP column family "one" -- level style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
|
|
|
std::atomic_bool cf_1_2{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
{{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:2"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:5"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:3", "ColumnFamilyTest::ManualAuto:2"},
|
|
|
|
{"LevelCompactionPicker::PickCompactionBySize:0",
|
|
|
|
"ColumnFamilyTest::ManualAuto:3"},
|
|
|
|
{"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3");
|
2022-05-30 19:31:46 +00:00
|
|
|
} else if (cf_1_2.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2");
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::port::Thread threads([&] {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
});
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5");
|
|
|
|
|
|
|
|
// Add more L0 files and force automatic compaction
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(
|
|
|
|
std::to_string(one.level0_file_num_compaction_trigger + i), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
|
|
|
|
|
|
|
|
threads.join();
|
|
|
|
WaitForCompaction();
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
AssertFilesPerLevel("0,1", 1);
|
|
|
|
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// In this test, we generate enough files to trigger automatic compactions.
|
|
|
|
// The automatic compaction waits in NonTrivial:AfterRun
|
|
|
|
// We generate more files and then trigger an automatic compaction
|
|
|
|
// This will wait because the automatic compaction has files it needs.
|
|
|
|
// Once the conflict is hit, the automatic compaction starts and ends
|
|
|
|
// Then the manual will run and end.
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, SameCFAutomaticManualCompactions) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ColumnFamilyOptions default_cf, one;
|
|
|
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
|
|
|
db_options_.max_background_compactions = 3;
|
|
|
|
|
|
|
|
default_cf.compaction_style = kCompactionStyleLevel;
|
|
|
|
default_cf.num_levels = 3;
|
|
|
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
|
|
|
default_cf.target_file_size_base = 30 << 10;
|
2016-06-16 23:02:52 +00:00
|
|
|
default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100;
|
2018-06-05 02:59:44 +00:00
|
|
|
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
|
|
|
;
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
table_options.no_block_cache = true;
|
|
|
|
default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
|
|
|
|
|
|
one.compaction_style = kCompactionStyleUniversal;
|
|
|
|
|
|
|
|
one.num_levels = 1;
|
|
|
|
// trigger compaction if there are >= 4 files
|
|
|
|
one.level0_file_num_compaction_trigger = 4;
|
|
|
|
one.write_buffer_size = 120000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one});
|
2017-05-24 18:25:38 +00:00
|
|
|
// make sure all background compaction jobs can be scheduled
|
|
|
|
auto stop_token =
|
|
|
|
dbfull()->TEST_write_controler().GetCompactionPressureToken();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
|
2022-05-30 19:31:46 +00:00
|
|
|
std::atomic_bool cf_1_1{true};
|
|
|
|
std::atomic_bool cf_1_2{true};
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
{{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:2"},
|
|
|
|
{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:5"},
|
|
|
|
{"CompactionPicker::CompactRange:Conflict",
|
|
|
|
"ColumnFamilyTest::AutoManual:3"}});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
|
2022-05-30 19:31:46 +00:00
|
|
|
if (cf_1_1.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:4");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:3");
|
2022-05-30 19:31:46 +00:00
|
|
|
} else if (cf_1_2.exchange(false)) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:2");
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
|
|
|
|
// SETUP column family "one" -- universal style
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
2022-05-06 20:03:58 +00:00
|
|
|
AssertFilesPerLevel(std::to_string(i + 1), 1);
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5");
|
|
|
|
|
|
|
|
// Add another L0 file and force automatic compaction
|
|
|
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) {
|
|
|
|
PutRandomData(1, 10, 12000, true);
|
|
|
|
PutRandomData(1, 1, 10, true);
|
|
|
|
WaitForFlush(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactRangeOptions compact_options;
|
|
|
|
compact_options.exclusive_manual_compaction = false;
|
|
|
|
ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1");
|
|
|
|
|
|
|
|
WaitForCompaction();
|
|
|
|
// VERIFY compaction "one"
|
|
|
|
AssertFilesPerLevel("1", 1);
|
|
|
|
// Compare against saved keys
|
2018-05-21 18:52:31 +00:00
|
|
|
std::set<std::string>::iterator key_iter = keys_[1].begin();
|
|
|
|
while (key_iter != keys_[1].end()) {
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
ASSERT_NE("NOT_FOUND", Get(1, *key_iter));
|
|
|
|
key_iter++;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
Running manual compactions in parallel with other automatic or manual compactions in restricted cases
Summary:
This diff provides a framework for doing manual
compactions in parallel with other compactions. We now have a deque of manual compactions. We also pass manual compactions as an argument from RunManualCompactions down to
BackgroundCompactions, so that RunManualCompactions can be reentrant.
Parallelism is controlled by the two routines
ConflictingManualCompaction to allow/disallow new parallel/manual
compactions based on already existing ManualCompactions. In this diff, by default manual compactions still have to run exclusive of other compactions. However, by setting the compaction option, exclusive_manual_compaction to false, it is possible to run other compactions in parallel with a manual compaction. However, we are still restricted to one manual compaction per column family at a time. All of these restrictions will be relaxed in future diffs.
I will be adding more tests later.
Test Plan: Rocksdb regression + new tests + valgrind
Reviewers: igor, anthony, IslamAbdelRahman, kradhakrishnan, yhchiang, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D47973
2015-12-14 19:20:34 +00:00
|
|
|
}
|
|
|
|
|
2014-03-08 00:12:34 +00:00
|
|
|
namespace {
|
|
|
|
std::string IterStatus(Iterator* iter) {
|
|
|
|
std::string result;
|
|
|
|
if (iter->Valid()) {
|
|
|
|
result = iter->key().ToString() + "->" + iter->value().ToString();
|
|
|
|
} else {
|
2020-10-02 20:33:50 +00:00
|
|
|
EXPECT_OK(iter->status());
|
2014-03-08 00:12:34 +00:00
|
|
|
result = "(invalid)";
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
2014-03-11 21:52:17 +00:00
|
|
|
} // anonymous namespace
|
2014-03-08 00:12:34 +00:00
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, NewIteratorsTest) {
|
2014-03-08 00:12:34 +00:00
|
|
|
// iter == 0 -- no tailing
|
|
|
|
// iter == 2 -- tailing
|
|
|
|
for (int iter = 0; iter < 2; ++iter) {
|
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two"});
|
|
|
|
ASSERT_OK(Put(0, "a", "b"));
|
|
|
|
ASSERT_OK(Put(1, "b", "a"));
|
|
|
|
ASSERT_OK(Put(2, "c", "m"));
|
|
|
|
ASSERT_OK(Put(2, "v", "t"));
|
|
|
|
std::vector<Iterator*> iterators;
|
|
|
|
ReadOptions options;
|
|
|
|
options.tailing = (iter == 1);
|
|
|
|
ASSERT_OK(db_->NewIterators(options, handles_, &iterators));
|
|
|
|
|
|
|
|
for (auto it : iterators) {
|
|
|
|
it->SeekToFirst();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(IterStatus(iterators[0]), "a->b");
|
|
|
|
ASSERT_EQ(IterStatus(iterators[1]), "b->a");
|
|
|
|
ASSERT_EQ(IterStatus(iterators[2]), "c->m");
|
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "x", "x"));
|
|
|
|
|
|
|
|
for (auto it : iterators) {
|
|
|
|
it->Next();
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(IterStatus(iterators[0]), "(invalid)");
|
|
|
|
if (iter == 0) {
|
|
|
|
// no tailing
|
|
|
|
ASSERT_EQ(IterStatus(iterators[1]), "(invalid)");
|
|
|
|
} else {
|
|
|
|
// tailing
|
|
|
|
ASSERT_EQ(IterStatus(iterators[1]), "x->x");
|
|
|
|
}
|
|
|
|
ASSERT_EQ(IterStatus(iterators[2]), "v->t");
|
|
|
|
|
|
|
|
for (auto it : iterators) {
|
|
|
|
delete it;
|
|
|
|
}
|
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, ReadOnlyDBTest) {
|
2014-04-09 16:56:17 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two", "three", "four"});
|
2014-07-23 20:52:11 +00:00
|
|
|
ASSERT_OK(Put(0, "a", "b"));
|
2014-04-09 16:56:17 +00:00
|
|
|
ASSERT_OK(Put(1, "foo", "bla"));
|
|
|
|
ASSERT_OK(Put(2, "foo", "blabla"));
|
|
|
|
ASSERT_OK(Put(3, "foo", "blablabla"));
|
|
|
|
ASSERT_OK(Put(4, "foo", "blablablabla"));
|
|
|
|
|
|
|
|
DropColumnFamilies({2});
|
|
|
|
Close();
|
|
|
|
// open only a subset of column families
|
|
|
|
AssertOpenReadOnly({"default", "one", "four"});
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
|
|
|
|
ASSERT_EQ("bla", Get(1, "foo"));
|
|
|
|
ASSERT_EQ("blablablabla", Get(2, "foo"));
|
|
|
|
|
2014-07-23 20:52:11 +00:00
|
|
|
// test newiterators
|
|
|
|
{
|
|
|
|
std::vector<Iterator*> iterators;
|
|
|
|
ASSERT_OK(db_->NewIterators(ReadOptions(), handles_, &iterators));
|
|
|
|
for (auto it : iterators) {
|
|
|
|
it->SeekToFirst();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(IterStatus(iterators[0]), "a->b");
|
|
|
|
ASSERT_EQ(IterStatus(iterators[1]), "foo->bla");
|
|
|
|
ASSERT_EQ(IterStatus(iterators[2]), "foo->blablablabla");
|
|
|
|
for (auto it : iterators) {
|
|
|
|
it->Next();
|
|
|
|
}
|
|
|
|
ASSERT_EQ(IterStatus(iterators[0]), "(invalid)");
|
|
|
|
ASSERT_EQ(IterStatus(iterators[1]), "(invalid)");
|
|
|
|
ASSERT_EQ(IterStatus(iterators[2]), "(invalid)");
|
|
|
|
|
|
|
|
for (auto it : iterators) {
|
|
|
|
delete it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-09 16:56:17 +00:00
|
|
|
Close();
|
|
|
|
// can't open dropped column family
|
|
|
|
Status s = OpenReadOnly({"default", "one", "two"});
|
|
|
|
ASSERT_TRUE(!s.ok());
|
|
|
|
|
|
|
|
// Can't open without specifying default column family
|
|
|
|
s = OpenReadOnly({"one", "four"});
|
|
|
|
ASSERT_TRUE(!s.ok());
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DontRollEmptyLogs) {
|
2014-04-15 16:57:25 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two", "three", "four"});
|
|
|
|
|
2014-04-29 19:33:57 +00:00
|
|
|
for (size_t i = 0; i < handles_.size(); ++i) {
|
2014-11-11 21:47:22 +00:00
|
|
|
PutRandomData(static_cast<int>(i), 10, 100);
|
2014-04-15 16:57:25 +00:00
|
|
|
}
|
|
|
|
int num_writable_file_start = env_->GetNumberOfNewWritableFileCalls();
|
|
|
|
// this will trigger the flushes
|
2014-11-11 21:47:22 +00:00
|
|
|
for (int i = 0; i <= 4; ++i) {
|
2014-08-12 05:10:32 +00:00
|
|
|
ASSERT_OK(Flush(i));
|
|
|
|
}
|
2014-04-15 16:57:25 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
2015-06-29 21:39:01 +00:00
|
|
|
WaitForFlush(i);
|
2014-04-15 16:57:25 +00:00
|
|
|
}
|
|
|
|
int total_new_writable_files =
|
|
|
|
env_->GetNumberOfNewWritableFileCalls() - num_writable_file_start;
|
2014-04-29 19:47:48 +00:00
|
|
|
ASSERT_EQ(static_cast<size_t>(total_new_writable_files), handles_.size() + 1);
|
2014-04-16 00:12:18 +00:00
|
|
|
Close();
|
2014-04-15 16:57:25 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, FlushStaleColumnFamilies) {
|
2014-04-30 18:33:40 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one", "two"});
|
|
|
|
ColumnFamilyOptions default_cf, one, two;
|
|
|
|
default_cf.write_buffer_size = 100000; // small write buffer size
|
2015-08-26 21:19:31 +00:00
|
|
|
default_cf.arena_block_size = 4096;
|
2014-04-30 18:33:40 +00:00
|
|
|
default_cf.disable_auto_compactions = true;
|
|
|
|
one.disable_auto_compactions = true;
|
|
|
|
two.disable_auto_compactions = true;
|
|
|
|
db_options_.max_total_wal_size = 210000;
|
|
|
|
|
|
|
|
Reopen({default_cf, one, two});
|
|
|
|
|
|
|
|
PutRandomData(2, 1, 10); // 10 bytes
|
|
|
|
for (int i = 0; i < 2; ++i) {
|
|
|
|
PutRandomData(0, 100, 1000); // flush
|
|
|
|
WaitForFlush(0);
|
2015-06-29 21:39:01 +00:00
|
|
|
|
|
|
|
AssertCountLiveFiles(i + 1);
|
2014-04-30 18:33:40 +00:00
|
|
|
}
|
|
|
|
// third flush. now, CF [two] should be detected as stale and flushed
|
|
|
|
// column family 1 should not be flushed since it's empty
|
|
|
|
PutRandomData(0, 100, 1000); // flush
|
|
|
|
WaitForFlush(0);
|
|
|
|
WaitForFlush(2);
|
2022-07-26 19:50:27 +00:00
|
|
|
// at least 3 files for default column families, 1 file for column family
|
|
|
|
// [two], zero files for column family [one], because it's empty
|
|
|
|
std::vector<LiveFileMetaData> metadata;
|
|
|
|
db_->GetLiveFilesMetaData(&metadata);
|
|
|
|
ASSERT_GE(metadata.size(), 4);
|
|
|
|
bool has_cf1_sst = false;
|
|
|
|
bool has_cf2_sst = false;
|
|
|
|
for (const auto& file : metadata) {
|
|
|
|
if (file.column_family_name == "one") {
|
|
|
|
has_cf1_sst = true;
|
|
|
|
} else if (file.column_family_name == "two") {
|
|
|
|
has_cf2_sst = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT_FALSE(has_cf1_sst);
|
|
|
|
ASSERT_TRUE(has_cf2_sst);
|
2016-06-28 00:42:14 +00:00
|
|
|
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(0));
|
2016-06-28 00:42:14 +00:00
|
|
|
ASSERT_EQ(0, dbfull()->TEST_total_log_size());
|
2014-04-30 18:33:40 +00:00
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2023-10-04 21:14:22 +00:00
|
|
|
namespace {
|
|
|
|
struct CountOptionsFilesFs : public FileSystemWrapper {
|
|
|
|
explicit CountOptionsFilesFs(const std::shared_ptr<FileSystem>& t)
|
|
|
|
: FileSystemWrapper(t) {}
|
|
|
|
const char* Name() const override { return "CountOptionsFilesFs"; }
|
|
|
|
|
|
|
|
IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts,
|
|
|
|
std::unique_ptr<FSWritableFile>* r,
|
|
|
|
IODebugContext* dbg) override {
|
|
|
|
if (f.find("OPTIONS-") != std::string::npos) {
|
|
|
|
options_files_created.fetch_add(1, std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
return FileSystemWrapper::NewWritableFile(f, file_opts, r, dbg);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::atomic<int> options_files_created{};
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CreateMissingColumnFamilies) {
|
2023-10-04 21:14:22 +00:00
|
|
|
// Can't accidentally add CFs to an existing DB
|
|
|
|
Open();
|
|
|
|
Close();
|
|
|
|
ASSERT_FALSE(db_options_.create_missing_column_families);
|
|
|
|
ASSERT_NOK(TryOpen({"one", "two"}));
|
|
|
|
|
|
|
|
// Nor accidentally create in a new DB
|
|
|
|
Destroy();
|
|
|
|
db_options_.create_if_missing = true;
|
|
|
|
ASSERT_NOK(TryOpen({"one", "two"}));
|
|
|
|
|
|
|
|
// Only with the option (new DB case)
|
2014-06-07 01:04:56 +00:00
|
|
|
db_options_.create_missing_column_families = true;
|
2023-10-04 21:14:22 +00:00
|
|
|
// Also setup to count number of options files created (see check below)
|
|
|
|
auto my_fs =
|
|
|
|
std::make_shared<CountOptionsFilesFs>(db_options_.env->GetFileSystem());
|
|
|
|
auto my_env = std::make_unique<CompositeEnvWrapper>(db_options_.env, my_fs);
|
|
|
|
SaveAndRestore<Env*> save_restore_env(&db_options_.env, my_env.get());
|
|
|
|
|
|
|
|
ASSERT_OK(TryOpen({"default", "one", "two"}));
|
|
|
|
Close();
|
|
|
|
|
|
|
|
// An older version would write an updated options file for each column
|
|
|
|
// family created under create_missing_column_families, which would be
|
|
|
|
// quadratic I/O in the number of column families.
|
|
|
|
ASSERT_EQ(my_fs->options_files_created.load(), 1);
|
|
|
|
|
|
|
|
// Add to existing DB case
|
|
|
|
ASSERT_OK(TryOpen({"default", "one", "two", "three", "four"}));
|
2014-06-07 01:04:56 +00:00
|
|
|
Close();
|
2023-10-04 21:14:22 +00:00
|
|
|
ASSERT_EQ(my_fs->options_files_created.load(), 2);
|
2014-06-07 01:04:56 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, SanitizeOptions) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 19:44:17 +00:00
|
|
|
DBOptions db_options;
|
2015-05-22 18:35:40 +00:00
|
|
|
for (int s = kCompactionStyleLevel; s <= kCompactionStyleUniversal; ++s) {
|
|
|
|
for (int l = 0; l <= 2; l++) {
|
|
|
|
for (int i = 1; i <= 3; i++) {
|
|
|
|
for (int j = 1; j <= 3; j++) {
|
|
|
|
for (int k = 1; k <= 3; k++) {
|
|
|
|
ColumnFamilyOptions original;
|
|
|
|
original.compaction_style = static_cast<CompactionStyle>(s);
|
|
|
|
original.num_levels = l;
|
|
|
|
original.level0_stop_writes_trigger = i;
|
|
|
|
original.level0_slowdown_writes_trigger = j;
|
|
|
|
original.level0_file_num_compaction_trigger = k;
|
2015-09-01 21:43:23 +00:00
|
|
|
original.write_buffer_size =
|
|
|
|
l * 4 * 1024 * 1024 + i * 1024 * 1024 + j * 1024 + k;
|
|
|
|
|
2016-10-21 18:31:42 +00:00
|
|
|
ColumnFamilyOptions result =
|
|
|
|
SanitizeOptions(ImmutableDBOptions(db_options), original);
|
2015-05-22 18:35:40 +00:00
|
|
|
ASSERT_TRUE(result.level0_stop_writes_trigger >=
|
|
|
|
result.level0_slowdown_writes_trigger);
|
|
|
|
ASSERT_TRUE(result.level0_slowdown_writes_trigger >=
|
|
|
|
result.level0_file_num_compaction_trigger);
|
|
|
|
ASSERT_TRUE(result.level0_file_num_compaction_trigger ==
|
|
|
|
original.level0_file_num_compaction_trigger);
|
|
|
|
if (s == kCompactionStyleLevel) {
|
|
|
|
ASSERT_GE(result.num_levels, 2);
|
|
|
|
} else {
|
|
|
|
ASSERT_GE(result.num_levels, 1);
|
|
|
|
if (original.num_levels >= 1) {
|
|
|
|
ASSERT_EQ(result.num_levels, original.num_levels);
|
|
|
|
}
|
|
|
|
}
|
2015-09-01 21:43:23 +00:00
|
|
|
|
|
|
|
// Make sure Sanitize options sets arena_block_size to 1/8 of
|
|
|
|
// the write_buffer_size, rounded up to a multiple of 4k.
|
|
|
|
size_t expected_arena_block_size =
|
|
|
|
l * 4 * 1024 * 1024 / 8 + i * 1024 * 1024 / 8;
|
|
|
|
if (j + k != 0) {
|
|
|
|
// not a multiple of 4k, round up 4k
|
|
|
|
expected_arena_block_size += 4 * 1024;
|
|
|
|
}
|
2021-05-07 20:14:36 +00:00
|
|
|
expected_arena_block_size =
|
|
|
|
std::min(size_t{1024 * 1024}, expected_arena_block_size);
|
2015-09-01 21:43:23 +00:00
|
|
|
ASSERT_EQ(expected_arena_block_size, result.arena_block_size);
|
2015-05-22 18:35:40 +00:00
|
|
|
}
|
|
|
|
}
|
2015-02-24 00:08:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, ReadDroppedColumnFamily) {
|
2015-03-20 00:04:29 +00:00
|
|
|
// iter 0 -- drop CF, don't reopen
|
|
|
|
// iter 1 -- delete CF, reopen
|
|
|
|
for (int iter = 0; iter < 2; ++iter) {
|
|
|
|
db_options_.create_missing_column_families = true;
|
|
|
|
db_options_.max_open_files = 20;
|
|
|
|
// delete obsolete files always
|
|
|
|
db_options_.delete_obsolete_files_period_micros = 0;
|
|
|
|
Open({"default", "one", "two"});
|
|
|
|
ColumnFamilyOptions options;
|
|
|
|
options.level0_file_num_compaction_trigger = 100;
|
|
|
|
options.level0_slowdown_writes_trigger = 200;
|
|
|
|
options.level0_stop_writes_trigger = 200;
|
|
|
|
options.write_buffer_size = 100000; // small write buffer size
|
|
|
|
Reopen({options, options, options});
|
|
|
|
|
|
|
|
// 1MB should create ~10 files for each CF
|
|
|
|
int kKeysNum = 10000;
|
|
|
|
PutRandomData(0, kKeysNum, 100);
|
|
|
|
PutRandomData(1, kKeysNum, 100);
|
|
|
|
PutRandomData(2, kKeysNum, 100);
|
|
|
|
|
2016-02-23 22:54:05 +00:00
|
|
|
{
|
|
|
|
std::unique_ptr<Iterator> iterator(
|
|
|
|
db_->NewIterator(ReadOptions(), handles_[2]));
|
|
|
|
iterator->SeekToFirst();
|
|
|
|
|
|
|
|
if (iter == 0) {
|
|
|
|
// Drop CF two
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(handles_[2]));
|
|
|
|
} else {
|
|
|
|
// delete CF two
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(handles_[2]));
|
2016-02-23 22:54:05 +00:00
|
|
|
handles_[2] = nullptr;
|
|
|
|
}
|
|
|
|
// Make sure iterator created can still be used.
|
|
|
|
int count = 0;
|
|
|
|
for (; iterator->Valid(); iterator->Next()) {
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
ASSERT_EQ(count, kKeysNum);
|
2015-03-20 00:04:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add bunch more data to other CFs
|
|
|
|
PutRandomData(0, kKeysNum, 100);
|
|
|
|
PutRandomData(1, kKeysNum, 100);
|
|
|
|
|
|
|
|
if (iter == 1) {
|
|
|
|
Reopen();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since we didn't delete CF handle, RocksDB's contract guarantees that
|
|
|
|
// we're still able to read dropped CF
|
|
|
|
for (int i = 0; i < 3; ++i) {
|
|
|
|
std::unique_ptr<Iterator> iterator(
|
|
|
|
db_->NewIterator(ReadOptions(), handles_[i]));
|
|
|
|
int count = 0;
|
|
|
|
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
++count;
|
|
|
|
}
|
2015-09-10 21:17:12 +00:00
|
|
|
ASSERT_OK(iterator->status());
|
2015-03-20 00:04:29 +00:00
|
|
|
ASSERT_EQ(count, kKeysNum * ((i == 2) ? 1 : 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
Close();
|
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-18 19:45:18 +00:00
|
|
|
TEST_P(ColumnFamilyTest, LiveIteratorWithDroppedColumnFamily) {
|
|
|
|
db_options_.create_missing_column_families = true;
|
|
|
|
db_options_.max_open_files = 20;
|
|
|
|
// delete obsolete files always
|
|
|
|
db_options_.delete_obsolete_files_period_micros = 0;
|
|
|
|
Open({"default", "one", "two"});
|
|
|
|
ColumnFamilyOptions options;
|
|
|
|
options.level0_file_num_compaction_trigger = 100;
|
|
|
|
options.level0_slowdown_writes_trigger = 200;
|
|
|
|
options.level0_stop_writes_trigger = 200;
|
|
|
|
options.write_buffer_size = 100000; // small write buffer size
|
|
|
|
Reopen({options, options, options});
|
|
|
|
|
|
|
|
// 1MB should create ~10 files for each CF
|
|
|
|
int kKeysNum = 10000;
|
|
|
|
PutRandomData(1, kKeysNum, 100);
|
|
|
|
{
|
|
|
|
std::unique_ptr<Iterator> iterator(
|
|
|
|
db_->NewIterator(ReadOptions(), handles_[1]));
|
|
|
|
iterator->SeekToFirst();
|
|
|
|
|
|
|
|
DropColumnFamilies({1});
|
|
|
|
|
|
|
|
// Make sure iterator created can still be used.
|
|
|
|
int count = 0;
|
|
|
|
for (; iterator->Valid(); iterator->Next()) {
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
ASSERT_EQ(count, kKeysNum);
|
|
|
|
}
|
|
|
|
|
|
|
|
Reopen();
|
|
|
|
Close();
|
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, FlushAndDropRaceCondition) {
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
db_options_.create_missing_column_families = true;
|
|
|
|
Open({"default", "one"});
|
|
|
|
ColumnFamilyOptions options;
|
|
|
|
options.level0_file_num_compaction_trigger = 100;
|
|
|
|
options.level0_slowdown_writes_trigger = 200;
|
|
|
|
options.level0_stop_writes_trigger = 200;
|
|
|
|
options.max_write_buffer_number = 20;
|
|
|
|
options.write_buffer_size = 100000; // small write buffer size
|
|
|
|
Reopen({options, options});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
2016-01-26 17:12:20 +00:00
|
|
|
{{"VersionSet::LogAndApply::ColumnFamilyDrop:0",
|
|
|
|
"FlushJob::WriteLevel0Table"},
|
|
|
|
{"VersionSet::LogAndApply::ColumnFamilyDrop:1",
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
"FlushJob::InstallResults"},
|
|
|
|
{"FlushJob::InstallResults",
|
2015-12-30 13:49:06 +00:00
|
|
|
"VersionSet::LogAndApply::ColumnFamilyDrop:2"}});
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
test::SleepingBackgroundTask sleeping_task;
|
|
|
|
|
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
|
|
|
|
Env::Priority::HIGH);
|
2020-07-20 21:17:05 +00:00
|
|
|
// Make sure the task is sleeping. Otherwise, it might start to execute
|
|
|
|
// after sleeping_task.WaitUntilDone() and cause TSAN warning.
|
|
|
|
sleeping_task.WaitUntilSleeping();
|
2020-09-18 03:22:35 +00:00
|
|
|
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
// 1MB should create ~10 files for each CF
|
|
|
|
int kKeysNum = 10000;
|
|
|
|
PutRandomData(1, kKeysNum, 100);
|
|
|
|
|
2017-02-06 22:43:55 +00:00
|
|
|
std::vector<port::Thread> threads;
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
threads.emplace_back([&] { ASSERT_OK(db_->DropColumnFamily(handles_[1])); });
|
|
|
|
|
|
|
|
sleeping_task.WakeUp();
|
|
|
|
sleeping_task.WaitUntilDone();
|
|
|
|
sleeping_task.Reset();
|
|
|
|
// now we sleep again. this is just so we're certain that flush job finished
|
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
|
|
|
|
Env::Priority::HIGH);
|
2020-07-20 21:17:05 +00:00
|
|
|
// Make sure the task is sleeping. Otherwise, it might start to execute
|
|
|
|
// after sleeping_task.WaitUntilDone() and cause TSAN warning.
|
|
|
|
sleeping_task.WaitUntilSleeping();
|
LogAndApply() should fail if the column family has been dropped
Summary:
This patch finally fixes the ColumnFamilyTest.ReadDroppedColumnFamily test. The test has been failing very sporadically and it was hard to repro. However, I managed to write a new tests that reproes the failure deterministically.
Here's what happens:
1. We start the flush for the column family
2. We check if the column family was dropped here: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/flush_job.cc#L149
3. This check goes through, ends up in InstallMemtableFlushResults() and it goes into LogAndApply()
4. At about this time, we start dropping the column family. Dropping the column family process gets to LogAndApply() at about the same time as LogAndApply() from flush process
5. Drop column family goes through LogAndApply() first, marking the column family as dropped.
6. Flush process gets woken up and gets a chance to write to the MANIFEST. However, this is where it gets stuck: https://github.com/facebook/rocksdb/blob/a3fc49bfddcdb1ff29409aacd06c04df56c7a1d7/db/version_set.cc#L1975
7. We see that the column family was dropped, so there is no need to write to the MANIFEST. We return OK.
8. Flush gets OK back from LogAndApply() and it deletes the memtable, thinking that the data is now safely persisted to sst file.
The fix is pretty simple. Instead of OK, we return ShutdownInProgress. This is not really true, but we have been using this status code to also mean "this operation was canceled because the column family has been dropped".
The fix is only one LOC. All other code is related to tests. I added a new test that reproes the failure. I also moved SleepingBackgroundTask to util/testutil.h (because I needed it in column_family_test for my new test). There's plenty of other places where we reimplement SleepingBackgroundTask, but I'll address that in a separate commit.
Test Plan:
1. new test
2. make check
3. Make sure the ColumnFamilyTest.ReadDroppedColumnFamily doesn't fail on Travis: https://travis-ci.org/facebook/rocksdb/jobs/79952386
Reviewers: yhchiang, anthony, IslamAbdelRahman, kradhakrishnan, rven, sdong
Reviewed By: sdong
Subscribers: dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D46773
2015-09-15 18:28:44 +00:00
|
|
|
sleeping_task.WakeUp();
|
|
|
|
sleeping_task.WaitUntilDone();
|
|
|
|
|
|
|
|
{
|
|
|
|
// Since we didn't delete CF handle, RocksDB's contract guarantees that
|
|
|
|
// we're still able to read dropped CF
|
|
|
|
std::unique_ptr<Iterator> iterator(
|
|
|
|
db_->NewIterator(ReadOptions(), handles_[1]));
|
|
|
|
int count = 0;
|
|
|
|
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
ASSERT_OK(iterator->status());
|
|
|
|
ASSERT_EQ(count, kKeysNum);
|
|
|
|
}
|
|
|
|
for (auto& t : threads) {
|
|
|
|
t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
Close();
|
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
|
2015-12-09 01:01:02 +00:00
|
|
|
namespace {
|
|
|
|
std::atomic<int> test_stage(0);
|
2018-06-05 02:59:44 +00:00
|
|
|
std::atomic<bool> ordered_by_writethread(false);
|
2015-12-09 01:01:02 +00:00
|
|
|
const int kMainThreadStartPersistingOptionsFile = 1;
|
|
|
|
const int kChildThreadFinishDroppingColumnFamily = 2;
|
|
|
|
void DropSingleColumnFamily(ColumnFamilyTest* cf_test, int cf_id,
|
2015-12-10 19:53:53 +00:00
|
|
|
std::vector<Comparator*>* comparators) {
|
2018-06-05 02:59:44 +00:00
|
|
|
while (test_stage < kMainThreadStartPersistingOptionsFile &&
|
|
|
|
!ordered_by_writethread) {
|
2015-12-09 01:01:02 +00:00
|
|
|
Env::Default()->SleepForMicroseconds(100);
|
|
|
|
}
|
|
|
|
cf_test->DropColumnFamilies({cf_id});
|
2015-12-10 19:53:53 +00:00
|
|
|
if ((*comparators)[cf_id]) {
|
|
|
|
delete (*comparators)[cf_id];
|
|
|
|
(*comparators)[cf_id] = nullptr;
|
|
|
|
}
|
2015-12-09 01:01:02 +00:00
|
|
|
test_stage = kChildThreadFinishDroppingColumnFamily;
|
|
|
|
}
|
2022-11-02 21:34:24 +00:00
|
|
|
} // anonymous namespace
|
2015-12-09 01:01:02 +00:00
|
|
|
|
2023-10-12 17:05:23 +00:00
|
|
|
// This test attempts to set up a race condition in a way that is no longer
|
|
|
|
// possible, causing the test to hang. If DBImpl::options_mutex_ is removed
|
|
|
|
// in the future, this test might become relevant again.
|
|
|
|
TEST_P(ColumnFamilyTest, DISABLED_CreateAndDropRace) {
|
2015-12-09 01:01:02 +00:00
|
|
|
const int kCfCount = 5;
|
|
|
|
std::vector<ColumnFamilyOptions> cf_opts;
|
|
|
|
std::vector<Comparator*> comparators;
|
|
|
|
for (int i = 0; i < kCfCount; ++i) {
|
|
|
|
cf_opts.emplace_back();
|
|
|
|
comparators.push_back(new test::SimpleSuffixReverseComparator());
|
|
|
|
cf_opts.back().comparator = comparators.back();
|
|
|
|
}
|
|
|
|
db_options_.create_if_missing = true;
|
|
|
|
db_options_.create_missing_column_families = true;
|
|
|
|
|
|
|
|
auto main_thread_id = std::this_thread::get_id();
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"PersistRocksDBOptions:start", [&](void* /*arg*/) {
|
|
|
|
auto current_thread_id = std::this_thread::get_id();
|
|
|
|
// If it's the main thread hitting this sync-point, then it
|
|
|
|
// will be blocked until some other thread update the test_stage.
|
|
|
|
if (main_thread_id == current_thread_id) {
|
|
|
|
test_stage = kMainThreadStartPersistingOptionsFile;
|
|
|
|
while (test_stage < kChildThreadFinishDroppingColumnFamily &&
|
|
|
|
!ordered_by_writethread) {
|
|
|
|
Env::Default()->SleepForMicroseconds(100);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2015-12-09 01:01:02 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
2018-04-13 00:55:14 +00:00
|
|
|
"WriteThread::EnterUnbatched:Wait", [&](void* /*arg*/) {
|
2015-12-09 01:01:02 +00:00
|
|
|
// This means a thread doing DropColumnFamily() is waiting for
|
|
|
|
// other thread to finish persisting options.
|
|
|
|
// In such case, we update the test_stage to unblock the main thread.
|
2018-06-05 02:59:44 +00:00
|
|
|
ordered_by_writethread = true;
|
2015-12-09 01:01:02 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
// Create a database with four column families
|
|
|
|
Open({"default", "one", "two", "three"},
|
|
|
|
{cf_opts[0], cf_opts[1], cf_opts[2], cf_opts[3]});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2015-12-09 01:01:02 +00:00
|
|
|
|
|
|
|
// Start a thread that will drop the first column family
|
|
|
|
// and its comparator
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread drop_cf_thread(DropSingleColumnFamily, this,
|
|
|
|
1, &comparators);
|
2015-12-09 01:01:02 +00:00
|
|
|
|
|
|
|
DropColumnFamilies({2});
|
|
|
|
|
|
|
|
drop_cf_thread.join();
|
|
|
|
Close();
|
|
|
|
Destroy();
|
2015-12-10 19:53:53 +00:00
|
|
|
for (auto* comparator : comparators) {
|
|
|
|
if (comparator) {
|
|
|
|
delete comparator;
|
|
|
|
}
|
|
|
|
}
|
2018-06-05 02:59:44 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
2015-12-09 01:01:02 +00:00
|
|
|
}
|
|
|
|
|
2023-10-12 17:05:23 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CreateAndDropPeriodicRace) {
|
|
|
|
// This is a mini-stress test looking for inconsistency between the set of
|
|
|
|
// CFs in the DB, particularly whether any use preserve_internal_time_seconds,
|
|
|
|
// and whether that is accurately reflected in the periodic task setup.
|
|
|
|
constexpr size_t kNumThreads = 12;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
bool last_cf_on = Random::GetTLSInstance()->OneIn(2);
|
|
|
|
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::RegisterRecordSeqnoTimeWorker:BeforePeriodicTaskType",
|
|
|
|
[&](void* /*arg*/) { std::this_thread::yield(); });
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
|
|
|
|
ASSERT_EQ(column_family_options_.preserve_internal_time_seconds, 0U);
|
|
|
|
ColumnFamilyOptions other_opts = column_family_options_;
|
|
|
|
ColumnFamilyOptions last_opts = column_family_options_;
|
|
|
|
(last_cf_on ? last_opts : other_opts).preserve_internal_time_seconds =
|
|
|
|
1000000;
|
|
|
|
Open();
|
|
|
|
|
|
|
|
for (size_t i = 0; i < kNumThreads; i++) {
|
|
|
|
threads.emplace_back([this, &other_opts, i]() {
|
|
|
|
ColumnFamilyHandle* cfh;
|
|
|
|
ASSERT_OK(db_->CreateColumnFamily(other_opts, std::to_string(i), &cfh));
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(cfh));
|
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh));
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnFamilyHandle* last_cfh;
|
|
|
|
ASSERT_OK(db_->CreateColumnFamily(last_opts, "last", &last_cfh));
|
|
|
|
|
|
|
|
for (auto& t : threads) {
|
|
|
|
t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool task_enabled = dbfull()->TEST_GetPeriodicTaskScheduler().TEST_HasTask(
|
|
|
|
PeriodicTaskType::kRecordSeqnoTime);
|
|
|
|
ASSERT_EQ(last_cf_on, task_enabled);
|
|
|
|
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(last_cfh));
|
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(last_cfh));
|
|
|
|
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, WriteStallSingleColumnFamily) {
|
2016-11-23 17:19:11 +00:00
|
|
|
const uint64_t kBaseRate = 800000u;
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
db_options_.delayed_write_rate = kBaseRate;
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
db_options_.max_background_compactions = 6;
|
|
|
|
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
Open({"default"});
|
|
|
|
ColumnFamilyData* cfd =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
|
|
|
|
|
|
|
|
VersionStorageInfo* vstorage = cfd->current()->storage_info();
|
|
|
|
|
2016-09-14 04:11:59 +00:00
|
|
|
MutableCFOptions mutable_cf_options(column_family_options_);
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
|
|
|
mutable_cf_options.level0_slowdown_writes_trigger = 20;
|
|
|
|
mutable_cf_options.level0_stop_writes_trigger = 10000;
|
|
|
|
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
|
|
|
|
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
|
2016-08-03 04:55:26 +00:00
|
|
|
mutable_cf_options.disable_auto_compactions = false;
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
auto dbmu = dbfull()->TEST_Mutex();
|
|
|
|
|
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(50, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(201, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(400, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(500, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(450, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(205, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(202, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(201, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(198, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(399, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(599, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(2001, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(3001, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(390, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(100, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(100);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(101);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(0);
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(300, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(101);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25 / 1.25 / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(200, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(0);
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(0, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
|
|
|
mutable_cf_options.disable_auto_compactions = true;
|
|
|
|
dbfull()->TEST_write_controler().set_delayed_write_rate(kBaseRate);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(50);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
2016-08-03 04:55:26 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(0, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate());
|
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(60);
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(300, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
2016-08-03 04:55:26 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(0, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate());
|
|
|
|
|
2016-08-03 04:55:26 +00:00
|
|
|
mutable_cf_options.disable_auto_compactions = false;
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
vstorage->set_l0_delay_trigger_count(70);
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(500, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(71);
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(501, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CompactionSpeedupSingleColumnFamily) {
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
db_options_.max_background_compactions = 6;
|
|
|
|
Open({"default"});
|
|
|
|
ColumnFamilyData* cfd =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
|
|
|
|
|
|
|
|
VersionStorageInfo* vstorage = cfd->current()->storage_info();
|
|
|
|
|
2016-09-14 04:11:59 +00:00
|
|
|
MutableCFOptions mutable_cf_options(column_family_options_);
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
// Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8
|
|
|
|
mutable_cf_options.level0_file_num_compaction_trigger = 4;
|
|
|
|
mutable_cf_options.level0_slowdown_writes_trigger = 36;
|
|
|
|
mutable_cf_options.level0_stop_writes_trigger = 50;
|
|
|
|
// Speedup threshold = 200 / 4 = 50
|
|
|
|
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
|
|
|
|
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
auto dbmu = dbfull()->TEST_Mutex();
|
|
|
|
|
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(40, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(50, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(300, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(45, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(7);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(9);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(6);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
// Speed up threshold = min(4 * 2, 4 + (12 - 4)/4) = 6
|
|
|
|
mutable_cf_options.level0_file_num_compaction_trigger = 4;
|
|
|
|
mutable_cf_options.level0_slowdown_writes_trigger = 16;
|
|
|
|
mutable_cf_options.level0_stop_writes_trigger = 30;
|
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(5);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(7);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(3);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, WriteStallTwoColumnFamilies) {
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
const uint64_t kBaseRate = 810000u;
|
|
|
|
db_options_.delayed_write_rate = kBaseRate;
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ColumnFamilyData* cfd =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
|
|
|
|
VersionStorageInfo* vstorage = cfd->current()->storage_info();
|
|
|
|
|
|
|
|
ColumnFamilyData* cfd1 =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(handles_[1])->cfd();
|
|
|
|
VersionStorageInfo* vstorage1 = cfd1->current()->storage_info();
|
|
|
|
|
2016-09-14 04:11:59 +00:00
|
|
|
MutableCFOptions mutable_cf_options(column_family_options_);
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
mutable_cf_options.level0_slowdown_writes_trigger = 20;
|
|
|
|
mutable_cf_options.level0_stop_writes_trigger = 10000;
|
|
|
|
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
|
|
|
|
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
|
|
|
|
|
|
|
|
MutableCFOptions mutable_cf_options1 = mutable_cf_options;
|
|
|
|
mutable_cf_options1.soft_pending_compaction_bytes_limit = 500;
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
auto dbmu = dbfull()->TEST_Mutex();
|
|
|
|
|
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(50, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(201, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(600, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(70, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(800, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(300, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(700, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(500, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(600, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_TRUE(!IsDbWriteStopped());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
2017-03-29 18:42:56 +00:00
|
|
|
ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate());
|
When slowdown is triggered, reduce the write rate
Summary: It's usually hard for users to set a value of options.delayed_write_rate. With this diff, after slowdown condition triggers, we greedily reduce write rate if estimated pending compaction bytes increase. If estimated compaction pending bytes drop, we increase the write rate.
Test Plan:
Add a unit test
Test with db_bench setting:
TEST_TMPDIR=/dev/shm/ ./db_bench --benchmarks=fillrandom -num=10000000 --soft_pending_compaction_bytes_limit=1000000000 --hard_pending_compaction_bytes_limit=3000000000 --delayed_write_rate=100000000
and make sure without the commit, write stop will happen, but with the commit, it will not happen.
Reviewers: igor, anthony, rven, yhchiang, kradhakrishnan, IslamAbdelRahman
Reviewed By: IslamAbdelRahman
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D52131
2015-12-18 01:07:44 +00:00
|
|
|
}
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) {
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
db_options_.max_background_compactions = 6;
|
|
|
|
column_family_options_.soft_pending_compaction_bytes_limit = 200;
|
|
|
|
column_family_options_.hard_pending_compaction_bytes_limit = 2000;
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ColumnFamilyData* cfd =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
|
|
|
|
VersionStorageInfo* vstorage = cfd->current()->storage_info();
|
|
|
|
|
|
|
|
ColumnFamilyData* cfd1 =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(handles_[1])->cfd();
|
|
|
|
VersionStorageInfo* vstorage1 = cfd1->current()->storage_info();
|
|
|
|
|
2016-09-14 04:11:59 +00:00
|
|
|
MutableCFOptions mutable_cf_options(column_family_options_);
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
// Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8
|
|
|
|
mutable_cf_options.level0_file_num_compaction_trigger = 4;
|
|
|
|
mutable_cf_options.level0_slowdown_writes_trigger = 36;
|
|
|
|
mutable_cf_options.level0_stop_writes_trigger = 30;
|
|
|
|
// Speedup threshold = 200 / 4 = 50
|
|
|
|
mutable_cf_options.soft_pending_compaction_bytes_limit = 200;
|
|
|
|
mutable_cf_options.hard_pending_compaction_bytes_limit = 2000;
|
|
|
|
|
|
|
|
MutableCFOptions mutable_cf_options1 = mutable_cf_options;
|
|
|
|
mutable_cf_options1.level0_slowdown_writes_trigger = 16;
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
auto dbmu = dbfull()->TEST_Mutex();
|
|
|
|
|
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(40, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(60, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(30, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(70, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(20, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage1->TEST_set_estimated_compaction_needed_bytes(3, dbmu);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(9);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage1->set_l0_delay_trigger_count(2);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd1, mutable_cf_options);
|
2016-10-14 19:25:39 +00:00
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
|
|
|
|
vstorage->set_l0_delay_trigger_count(0);
|
add locking around calls to RecalculateWriteStallConditions in column_family_test (#4474)
Summary:
this should fix the current failing TSAN jobs:
The callstack for TSAN:
> WARNING: ThreadSanitizer: data race (pid=87440)
Read of size 8 at 0x7d580000fce0 by thread T22 (mutexes: write M548703):
#0 rocksdb::InternalStats::DumpCFStatsNoFileHistogram(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1204 (column_family_test+0x00000080eca7)
#1 rocksdb::InternalStats::DumpCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:1169 (column_family_test+0x0000008106d0)
#2 rocksdb::InternalStats::HandleCFStats(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, rocksdb::Slice) db/internal_stats.cc:578 (column_family_test+0x000000810720)
#3 rocksdb::InternalStats::GetStringProperty(rocksdb::DBPropertyInfo const&, rocksdb::Slice const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) db/internal_stats.cc:488 (column_family_test+0x00000080670c)
#4 rocksdb::DBImpl::DumpStats() db/db_impl.cc:625 (column_family_test+0x00000070ce9a)
> Previous write of size 8 at 0x7d580000fce0 by main thread:
#0 rocksdb::InternalStats::AddCFStats(rocksdb::InternalStats::InternalCFStatsType, unsigned long) db/internal_stats.h:324 (column_family_test+0x000000693bbf)
#1 rocksdb::ColumnFamilyData::RecalculateWriteStallConditions(rocksdb::MutableCFOptions const&) db/column_family.cc:818 (column_family_test+0x000000693bbf)
#2 rocksdb::ColumnFamilyTest_WriteStallSingleColumnFamily_Test::TestBody() db/column_family_test.cc:2563 (column_family_test+0x0000005e5a49)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4474
Differential Revision: D10262099
Pulled By: miasantreble
fbshipit-source-id: 1247973a3ca32e399b4575d3401dd5439c39efc5
2018-10-09 21:08:17 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
2017-05-24 18:25:38 +00:00
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
Add options.base_background_compactions as a number of compaction threads for low compaction debt
Summary:
If options.base_background_compactions is given, we try to schedule number of compactions not existing this number, only when L0 files increase to certain number, or pending compaction bytes more than certain threshold, we schedule compactions based on options.max_background_compactions.
The watermarks are calculated based on slowdown thresholds.
Test Plan:
Add new test cases in column_family_test.
Adding more unit tests.
Reviewers: IslamAbdelRahman, yhchiang, kradhakrishnan, rven, anthony
Reviewed By: anthony
Subscribers: leveldb, dhruba, yoshinorim
Differential Revision: https://reviews.facebook.net/D53409
2016-01-28 19:56:16 +00:00
|
|
|
}
|
2016-02-23 01:29:28 +00:00
|
|
|
|
2023-12-13 18:37:27 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CompactionSpeedupForCompactionDebt) {
|
|
|
|
db_options_.max_background_compactions = 6;
|
|
|
|
Open();
|
|
|
|
ColumnFamilyData* cfd =
|
|
|
|
static_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd();
|
|
|
|
MutableCFOptions mutable_cf_options(column_family_options_);
|
|
|
|
mutable_cf_options.soft_pending_compaction_bytes_limit =
|
|
|
|
std::numeric_limits<uint64_t>::max();
|
|
|
|
|
2024-01-25 22:40:18 +00:00
|
|
|
auto dbmu = dbfull()->TEST_Mutex();
|
|
|
|
|
2023-12-13 18:37:27 +00:00
|
|
|
{
|
|
|
|
// No bottommost data, so debt ratio cannot trigger speedup.
|
|
|
|
VersionStorageInfo* vstorage = cfd->current()->storage_info();
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(1048576 /* 1MB */,
|
|
|
|
dbmu);
|
2023-12-13 18:37:27 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add a tiny amount of bottommost data.
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
|
|
|
|
{
|
|
|
|
VersionStorageInfo* vstorage = cfd->current()->storage_info();
|
|
|
|
// Eight bytes is way smaller than bottommost data so definitely does not
|
|
|
|
// trigger speedup.
|
2024-01-25 22:40:18 +00:00
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(8, dbmu);
|
2023-12-13 18:37:27 +00:00
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
2024-10-04 22:01:54 +00:00
|
|
|
|
|
|
|
// 1MB is much larger than bottommost level size. However, since it's too
|
|
|
|
// small in terms of absolute size, it does not trigger parallel compaction
|
|
|
|
// in this case (see GetPendingCompactionBytesForCompactionSpeedup()).
|
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(1048576 /* 1MB */,
|
|
|
|
dbmu);
|
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
|
|
|
|
|
|
|
vstorage->TEST_set_estimated_compaction_needed_bytes(
|
|
|
|
2 * mutable_cf_options.max_bytes_for_level_base, dbmu);
|
|
|
|
RecalculateWriteStallConditions(cfd, mutable_cf_options);
|
|
|
|
ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed());
|
2023-12-13 18:37:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-30 01:29:04 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CompactionSpeedupForMarkedFiles) {
|
|
|
|
const int kParallelismLimit = 3;
|
|
|
|
class AlwaysCompactTpc : public TablePropertiesCollector {
|
|
|
|
public:
|
|
|
|
Status Finish(UserCollectedProperties* /* properties */) override {
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
UserCollectedProperties GetReadableProperties() const override {
|
|
|
|
return UserCollectedProperties{};
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* Name() const override { return "AlwaysCompactTpc"; }
|
|
|
|
|
|
|
|
bool NeedCompact() const override { return true; }
|
|
|
|
};
|
|
|
|
|
|
|
|
class AlwaysCompactTpcf : public TablePropertiesCollectorFactory {
|
|
|
|
public:
|
|
|
|
TablePropertiesCollector* CreateTablePropertiesCollector(
|
|
|
|
TablePropertiesCollectorFactory::Context /* context */) override {
|
|
|
|
return new AlwaysCompactTpc();
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* Name() const override { return "AlwaysCompactTpcf"; }
|
|
|
|
};
|
|
|
|
|
|
|
|
column_family_options_.num_levels = 2;
|
|
|
|
column_family_options_.table_properties_collector_factories.emplace_back(
|
|
|
|
std::make_shared<AlwaysCompactTpcf>());
|
|
|
|
db_options_.max_background_compactions = kParallelismLimit;
|
|
|
|
Open();
|
|
|
|
|
|
|
|
// Make a nonempty last level. Only marked files in upper levels count.
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
WaitForCompaction();
|
|
|
|
AssertFilesPerLevel("0,1", 0 /* cf */);
|
|
|
|
|
2024-08-09 22:05:02 +00:00
|
|
|
// We should calculate the limit by obtaining the number of env background
|
|
|
|
// threads, because the current test case will share the same env
|
|
|
|
// with another case that may have already increased the number of
|
|
|
|
// background threads which is larger than kParallelismLimit
|
|
|
|
const auto limit = env_->GetBackgroundThreads(Env::Priority::LOW);
|
|
|
|
|
2024-01-30 01:29:04 +00:00
|
|
|
// Block the compaction thread pool so marked files accumulate in L0.
|
2024-08-09 22:05:02 +00:00
|
|
|
std::vector<std::shared_ptr<test::SleepingBackgroundTask>> sleeping_tasks;
|
|
|
|
for (int i = 0; i < limit; i++) {
|
|
|
|
sleeping_tasks.emplace_back(
|
|
|
|
std::make_shared<test::SleepingBackgroundTask>());
|
2024-01-30 01:29:04 +00:00
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
|
2024-08-09 22:05:02 +00:00
|
|
|
sleeping_tasks[i].get(), Env::Priority::LOW);
|
|
|
|
sleeping_tasks[i]->WaitUntilSleeping();
|
2024-01-30 01:29:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Zero marked upper-level files. No speedup.
|
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
|
|
|
AssertFilesPerLevel("0,1", 0 /* cf */);
|
|
|
|
|
|
|
|
// One marked upper-level file. No speedup.
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed());
|
|
|
|
AssertFilesPerLevel("1,1", 0 /* cf */);
|
|
|
|
|
|
|
|
// Two marked upper-level files. Speedup.
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
|
|
ASSERT_EQ(kParallelismLimit, dbfull()->TEST_BGCompactionsAllowed());
|
|
|
|
AssertFilesPerLevel("2,1", 0 /* cf */);
|
|
|
|
|
2024-08-09 22:05:02 +00:00
|
|
|
for (int i = 0; i < limit; i++) {
|
|
|
|
sleeping_tasks[i]->WakeUp();
|
|
|
|
sleeping_tasks[i]->WaitUntilDone();
|
2024-01-30 01:29:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-11 00:34:38 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CreateAndDestroyOptions) {
|
2018-03-21 00:07:28 +00:00
|
|
|
std::unique_ptr<ColumnFamilyOptions> cfo(new ColumnFamilyOptions());
|
|
|
|
ColumnFamilyHandle* cfh;
|
|
|
|
Open();
|
|
|
|
ASSERT_OK(db_->CreateColumnFamily(*(cfo.get()), "yoyo", &cfh));
|
|
|
|
cfo.reset();
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions(), cfh));
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(cfh));
|
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh));
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CreateDropAndDestroy) {
|
Fix segfault caused by object premature destruction
Summary:
Please refer to earlier discussion in [issue 3609](https://github.com/facebook/rocksdb/issues/3609).
There was also an alternative fix in [PR 3888](https://github.com/facebook/rocksdb/pull/3888), but the proposed solution requires complex change.
To summarize the cause of the problem. Upon creation of a column family, a `BlockBasedTableFactory` object is `new`ed and encapsulated by a `std::shared_ptr`. Since there is no other `std::shared_ptr` pointing to this `BlockBasedTableFactory`, when the column family is dropped, the `ColumnFamilyData` is `delete`d, causing the destructor of `std::shared_ptr`. Since there is no other `std::shared_ptr`, the underlying memory is also freed.
Later when the db exits, it releases all the table readers, including the table readers that have been operating on the dropped column family. This needs to access the `table_options` owned by `BlockBasedTableFactory` that has already been deleted. Therefore, a segfault is raised.
Previous workaround is to purge all obsolete files upon `ColumnFamilyData` destruction, which leads to a force release of table readers of the dropped column family. However this does not work when the user disables file deletion.
Our solution in this PR is making a copy of `table_options` in `BlockBasedTable::Rep`. This solution increases memory copy and usage, but is much simpler.
Test plan
```
$ make -j16
$ ./column_family_test --gtest_filter=ColumnFamilyTest.CreateDropAndDestroy:ColumnFamilyTest.CreateDropAndDestroyWithoutFileDeletion
```
Expected behavior:
All tests should pass.
Closes https://github.com/facebook/rocksdb/pull/3898
Differential Revision: D8149421
Pulled By: riversand963
fbshipit-source-id: eaecc2e064057ef607fbdd4cc275874f866c3438
2018-05-25 18:45:12 +00:00
|
|
|
ColumnFamilyHandle* cfh;
|
|
|
|
Open();
|
|
|
|
ASSERT_OK(db_->CreateColumnFamily(ColumnFamilyOptions(), "yoyo", &cfh));
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions(), cfh));
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(cfh));
|
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh));
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, CreateDropAndDestroyWithoutFileDeletion) {
|
Fix segfault caused by object premature destruction
Summary:
Please refer to earlier discussion in [issue 3609](https://github.com/facebook/rocksdb/issues/3609).
There was also an alternative fix in [PR 3888](https://github.com/facebook/rocksdb/pull/3888), but the proposed solution requires complex change.
To summarize the cause of the problem. Upon creation of a column family, a `BlockBasedTableFactory` object is `new`ed and encapsulated by a `std::shared_ptr`. Since there is no other `std::shared_ptr` pointing to this `BlockBasedTableFactory`, when the column family is dropped, the `ColumnFamilyData` is `delete`d, causing the destructor of `std::shared_ptr`. Since there is no other `std::shared_ptr`, the underlying memory is also freed.
Later when the db exits, it releases all the table readers, including the table readers that have been operating on the dropped column family. This needs to access the `table_options` owned by `BlockBasedTableFactory` that has already been deleted. Therefore, a segfault is raised.
Previous workaround is to purge all obsolete files upon `ColumnFamilyData` destruction, which leads to a force release of table readers of the dropped column family. However this does not work when the user disables file deletion.
Our solution in this PR is making a copy of `table_options` in `BlockBasedTable::Rep`. This solution increases memory copy and usage, but is much simpler.
Test plan
```
$ make -j16
$ ./column_family_test --gtest_filter=ColumnFamilyTest.CreateDropAndDestroy:ColumnFamilyTest.CreateDropAndDestroyWithoutFileDeletion
```
Expected behavior:
All tests should pass.
Closes https://github.com/facebook/rocksdb/pull/3898
Differential Revision: D8149421
Pulled By: riversand963
fbshipit-source-id: eaecc2e064057ef607fbdd4cc275874f866c3438
2018-05-25 18:45:12 +00:00
|
|
|
ColumnFamilyHandle* cfh;
|
|
|
|
Open();
|
|
|
|
ASSERT_OK(db_->CreateColumnFamily(ColumnFamilyOptions(), "yoyo", &cfh));
|
|
|
|
ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar"));
|
|
|
|
ASSERT_OK(db_->Flush(FlushOptions(), cfh));
|
|
|
|
ASSERT_OK(db_->DisableFileDeletions());
|
|
|
|
ASSERT_OK(db_->DropColumnFamily(cfh));
|
|
|
|
ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh));
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, FlushCloseWALFiles) {
|
2016-08-04 00:42:06 +00:00
|
|
|
SpecialEnv env(Env::Default());
|
|
|
|
db_options_.env = &env;
|
|
|
|
db_options_.max_background_flushes = 1;
|
2021-09-08 14:45:59 +00:00
|
|
|
column_family_options_.memtable_factory.reset(
|
|
|
|
test::NewSpecialSkipListFactory(2));
|
2016-08-04 00:42:06 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(0, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2016-09-06 19:57:46 +00:00
|
|
|
{"DBImpl::BGWorkFlush:done", "FlushCloseWALFiles:0"},
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-09-06 19:57:46 +00:00
|
|
|
|
2016-08-04 00:42:06 +00:00
|
|
|
// Block flush jobs from running
|
|
|
|
test::SleepingBackgroundTask sleeping_task;
|
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
|
|
|
|
Env::Priority::HIGH);
|
2020-07-20 21:17:05 +00:00
|
|
|
// Make sure the task is sleeping. Otherwise, it might start to execute
|
|
|
|
// after sleeping_task.WaitUntilDone() and cause TSAN warning.
|
|
|
|
sleeping_task.WaitUntilSleeping();
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
WriteOptions wo;
|
|
|
|
wo.sync = true;
|
|
|
|
ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko"));
|
|
|
|
|
|
|
|
ASSERT_EQ(2, env.num_open_wal_file_.load());
|
|
|
|
|
|
|
|
sleeping_task.WakeUp();
|
|
|
|
sleeping_task.WaitUntilDone();
|
2016-09-06 19:57:46 +00:00
|
|
|
TEST_SYNC_POINT("FlushCloseWALFiles:0");
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-08-04 00:42:06 +00:00
|
|
|
ASSERT_EQ(1, env.num_open_wal_file_.load());
|
|
|
|
|
|
|
|
Reopen();
|
|
|
|
ASSERT_EQ("mirko", Get(0, "fodor"));
|
|
|
|
ASSERT_EQ("mirko", Get(1, "fodor"));
|
|
|
|
db_options_.env = env_;
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, IteratorCloseWALFile1) {
|
2016-08-04 00:42:06 +00:00
|
|
|
SpecialEnv env(Env::Default());
|
|
|
|
db_options_.env = &env;
|
|
|
|
db_options_.max_background_flushes = 1;
|
Ensure Close() before LinkFile() for WALs in Checkpoint (#12734)
Summary:
POSIX semantics for LinkFile (hard links) allow linking a file
that is still being written two, with both the source and destination
showing any subsequent writes to the source. This may not be practical
semantics for some FileSystem implementations such as remote storage.
They might only link the flushed or sync-ed file contents at time of
LinkFile, or might even have undefined behavior if LinkFile is called on
a file still open for write (not yet "sealed"). This change builds on https://github.com/facebook/rocksdb/issues/12731
to bring more hygiene to our handling of WAL files in Checkpoint.
Specifically, we now Close WAL files as soon as they are either
(a) inactive and fully synced, or (b) inactive and obsolete (so maybe
never fully synced), rather than letting Close() happen in handling
obsolete files (maybe a background thread). This should not be a
performance issue as Close() should be trivial cost relative to other
IO ops, but just in case:
* We don't Close() while holding a mutex, to avoid blocking, and
* The old behavior is available with a new kill switch option
`background_close_inactive_wals`.
Stacked on https://github.com/facebook/rocksdb/issues/12731
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12734
Test Plan:
Extended existing unit test, especially adding a hygiene
check to FaultInjectionTestFS to detect LinkFile() on a file still open
for writes. FaultInjectionTestFS already has relevant tracking data, and
tests can opt out of the new check, as in a smoke test I have left for
the old, deprecated functionality `background_close_inactive_wals=true`.
Also ran lengthy blackbox_crash_test to ensure the hygiene check is OK
with the crash test. (The only place I can find we use LinkFile in
production is Checkpoint.)
Reviewed By: cbi42
Differential Revision: D58295284
Pulled By: pdillinger
fbshipit-source-id: 64d90ed8477e2366c19eaf9c4c5ad60b82cac5c6
2024-06-12 18:48:45 +00:00
|
|
|
// When this option is removed, the test will need re-engineering
|
|
|
|
db_options_.background_close_inactive_wals = true;
|
2021-09-08 14:45:59 +00:00
|
|
|
column_family_options_.memtable_factory.reset(
|
|
|
|
test::NewSpecialSkipListFactory(2));
|
2016-08-04 00:42:06 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
// Create an iterator holding the current super version.
|
|
|
|
Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]);
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(it->status());
|
2016-08-04 00:42:06 +00:00
|
|
|
// A flush will make `it` hold the last reference of its super version.
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(0, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
|
|
|
|
// Flush jobs will close previous WAL files after finishing. By
|
|
|
|
// block flush jobs from running, we trigger a condition where
|
|
|
|
// the iterator destructor should close the WAL files.
|
|
|
|
test::SleepingBackgroundTask sleeping_task;
|
|
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
|
|
|
|
Env::Priority::HIGH);
|
2020-07-20 21:17:05 +00:00
|
|
|
// Make sure the task is sleeping. Otherwise, it might start to execute
|
|
|
|
// after sleeping_task.WaitUntilDone() and cause TSAN warning.
|
|
|
|
sleeping_task.WaitUntilSleeping();
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
WriteOptions wo;
|
|
|
|
wo.sync = true;
|
|
|
|
ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko"));
|
|
|
|
|
|
|
|
ASSERT_EQ(2, env.num_open_wal_file_.load());
|
|
|
|
// Deleting the iterator will clear its super version, triggering
|
|
|
|
// closing all files
|
|
|
|
delete it;
|
|
|
|
ASSERT_EQ(1, env.num_open_wal_file_.load());
|
|
|
|
|
|
|
|
sleeping_task.WakeUp();
|
|
|
|
sleeping_task.WaitUntilDone();
|
|
|
|
WaitForFlush(1);
|
|
|
|
|
|
|
|
Reopen();
|
|
|
|
ASSERT_EQ("mirko", Get(0, "fodor"));
|
|
|
|
ASSERT_EQ("mirko", Get(1, "fodor"));
|
|
|
|
db_options_.env = env_;
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, IteratorCloseWALFile2) {
|
2016-08-04 00:42:06 +00:00
|
|
|
SpecialEnv env(Env::Default());
|
|
|
|
// Allow both of flush and purge job to schedule.
|
|
|
|
env.SetBackgroundThreads(2, Env::HIGH);
|
|
|
|
db_options_.env = &env;
|
|
|
|
db_options_.max_background_flushes = 1;
|
Ensure Close() before LinkFile() for WALs in Checkpoint (#12734)
Summary:
POSIX semantics for LinkFile (hard links) allow linking a file
that is still being written two, with both the source and destination
showing any subsequent writes to the source. This may not be practical
semantics for some FileSystem implementations such as remote storage.
They might only link the flushed or sync-ed file contents at time of
LinkFile, or might even have undefined behavior if LinkFile is called on
a file still open for write (not yet "sealed"). This change builds on https://github.com/facebook/rocksdb/issues/12731
to bring more hygiene to our handling of WAL files in Checkpoint.
Specifically, we now Close WAL files as soon as they are either
(a) inactive and fully synced, or (b) inactive and obsolete (so maybe
never fully synced), rather than letting Close() happen in handling
obsolete files (maybe a background thread). This should not be a
performance issue as Close() should be trivial cost relative to other
IO ops, but just in case:
* We don't Close() while holding a mutex, to avoid blocking, and
* The old behavior is available with a new kill switch option
`background_close_inactive_wals`.
Stacked on https://github.com/facebook/rocksdb/issues/12731
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12734
Test Plan:
Extended existing unit test, especially adding a hygiene
check to FaultInjectionTestFS to detect LinkFile() on a file still open
for writes. FaultInjectionTestFS already has relevant tracking data, and
tests can opt out of the new check, as in a smoke test I have left for
the old, deprecated functionality `background_close_inactive_wals=true`.
Also ran lengthy blackbox_crash_test to ensure the hygiene check is OK
with the crash test. (The only place I can find we use LinkFile in
production is Checkpoint.)
Reviewed By: cbi42
Differential Revision: D58295284
Pulled By: pdillinger
fbshipit-source-id: 64d90ed8477e2366c19eaf9c4c5ad60b82cac5c6
2024-06-12 18:48:45 +00:00
|
|
|
// When this option is removed, the test will need re-engineering
|
|
|
|
db_options_.background_close_inactive_wals = true;
|
2021-09-08 14:45:59 +00:00
|
|
|
column_family_options_.memtable_factory.reset(
|
|
|
|
test::NewSpecialSkipListFactory(2));
|
2016-08-04 00:42:06 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
// Create an iterator holding the current super version.
|
|
|
|
ReadOptions ro;
|
|
|
|
ro.background_purge_on_iterator_cleanup = true;
|
|
|
|
Iterator* it = db_->NewIterator(ro, handles_[1]);
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(it->status());
|
2016-08-04 00:42:06 +00:00
|
|
|
// A flush will make `it` hold the last reference of its super version.
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(0, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2016-08-04 00:42:06 +00:00
|
|
|
{"ColumnFamilyTest::IteratorCloseWALFile2:0",
|
|
|
|
"DBImpl::BGWorkPurge:start"},
|
|
|
|
{"ColumnFamilyTest::IteratorCloseWALFile2:2",
|
|
|
|
"DBImpl::BackgroundCallFlush:start"},
|
|
|
|
{"DBImpl::BGWorkPurge:end", "ColumnFamilyTest::IteratorCloseWALFile2:1"},
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
WriteOptions wo;
|
|
|
|
wo.sync = true;
|
|
|
|
ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko"));
|
|
|
|
|
|
|
|
ASSERT_EQ(2, env.num_open_wal_file_.load());
|
|
|
|
// Deleting the iterator will clear its super version, triggering
|
|
|
|
// closing all files
|
|
|
|
delete it;
|
|
|
|
ASSERT_EQ(2, env.num_open_wal_file_.load());
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:0");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:1");
|
|
|
|
ASSERT_EQ(1, env.num_open_wal_file_.load());
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:2");
|
|
|
|
WaitForFlush(1);
|
|
|
|
ASSERT_EQ(1, env.num_open_wal_file_.load());
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
Reopen();
|
|
|
|
ASSERT_EQ("mirko", Get(0, "fodor"));
|
|
|
|
ASSERT_EQ("mirko", Get(1, "fodor"));
|
|
|
|
db_options_.env = env_;
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, ForwardIteratorCloseWALFile) {
|
2016-08-04 00:42:06 +00:00
|
|
|
SpecialEnv env(Env::Default());
|
|
|
|
// Allow both of flush and purge job to schedule.
|
|
|
|
env.SetBackgroundThreads(2, Env::HIGH);
|
|
|
|
db_options_.env = &env;
|
|
|
|
db_options_.max_background_flushes = 1;
|
Ensure Close() before LinkFile() for WALs in Checkpoint (#12734)
Summary:
POSIX semantics for LinkFile (hard links) allow linking a file
that is still being written two, with both the source and destination
showing any subsequent writes to the source. This may not be practical
semantics for some FileSystem implementations such as remote storage.
They might only link the flushed or sync-ed file contents at time of
LinkFile, or might even have undefined behavior if LinkFile is called on
a file still open for write (not yet "sealed"). This change builds on https://github.com/facebook/rocksdb/issues/12731
to bring more hygiene to our handling of WAL files in Checkpoint.
Specifically, we now Close WAL files as soon as they are either
(a) inactive and fully synced, or (b) inactive and obsolete (so maybe
never fully synced), rather than letting Close() happen in handling
obsolete files (maybe a background thread). This should not be a
performance issue as Close() should be trivial cost relative to other
IO ops, but just in case:
* We don't Close() while holding a mutex, to avoid blocking, and
* The old behavior is available with a new kill switch option
`background_close_inactive_wals`.
Stacked on https://github.com/facebook/rocksdb/issues/12731
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12734
Test Plan:
Extended existing unit test, especially adding a hygiene
check to FaultInjectionTestFS to detect LinkFile() on a file still open
for writes. FaultInjectionTestFS already has relevant tracking data, and
tests can opt out of the new check, as in a smoke test I have left for
the old, deprecated functionality `background_close_inactive_wals=true`.
Also ran lengthy blackbox_crash_test to ensure the hygiene check is OK
with the crash test. (The only place I can find we use LinkFile in
production is Checkpoint.)
Reviewed By: cbi42
Differential Revision: D58295284
Pulled By: pdillinger
fbshipit-source-id: 64d90ed8477e2366c19eaf9c4c5ad60b82cac5c6
2024-06-12 18:48:45 +00:00
|
|
|
// When this option is removed, the test will need re-engineering
|
|
|
|
db_options_.background_close_inactive_wals = true;
|
2021-09-08 14:45:59 +00:00
|
|
|
column_family_options_.memtable_factory.reset(
|
|
|
|
test::NewSpecialSkipListFactory(3));
|
2016-08-04 00:42:06 +00:00
|
|
|
column_family_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
Open();
|
|
|
|
CreateColumnFamilies({"one"});
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodar2", "mirko"));
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
// Create an iterator holding the current super version, as well as
|
|
|
|
// the SST file just flushed.
|
|
|
|
ReadOptions ro;
|
|
|
|
ro.tailing = true;
|
|
|
|
ro.background_purge_on_iterator_cleanup = true;
|
|
|
|
Iterator* it = db_->NewIterator(ro, handles_[1]);
|
|
|
|
// A flush will make `it` hold the last reference of its super version.
|
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodar2", "mirko"));
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
WaitForCompaction();
|
|
|
|
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(0, "fodor", "mirko"));
|
|
|
|
ASSERT_OK(Put(1, "fodor", "mirko"));
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
|
2016-08-04 00:42:06 +00:00
|
|
|
{"ColumnFamilyTest::IteratorCloseWALFile2:0",
|
|
|
|
"DBImpl::BGWorkPurge:start"},
|
|
|
|
{"ColumnFamilyTest::IteratorCloseWALFile2:2",
|
|
|
|
"DBImpl::BackgroundCallFlush:start"},
|
|
|
|
{"DBImpl::BGWorkPurge:end", "ColumnFamilyTest::IteratorCloseWALFile2:1"},
|
|
|
|
});
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
WriteOptions wo;
|
|
|
|
wo.sync = true;
|
|
|
|
ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko"));
|
|
|
|
|
|
|
|
env.delete_count_.store(0);
|
|
|
|
ASSERT_EQ(2, env.num_open_wal_file_.load());
|
|
|
|
// Deleting the iterator will clear its super version, triggering
|
|
|
|
// closing all files
|
|
|
|
it->Seek("");
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(it->status());
|
|
|
|
|
2016-08-04 00:42:06 +00:00
|
|
|
ASSERT_EQ(2, env.num_open_wal_file_.load());
|
|
|
|
ASSERT_EQ(0, env.delete_count_.load());
|
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:0");
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:1");
|
|
|
|
ASSERT_EQ(1, env.num_open_wal_file_.load());
|
|
|
|
ASSERT_EQ(1, env.delete_count_.load());
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:2");
|
|
|
|
WaitForFlush(1);
|
|
|
|
ASSERT_EQ(1, env.num_open_wal_file_.load());
|
|
|
|
ASSERT_EQ(1, env.delete_count_.load());
|
|
|
|
|
|
|
|
delete it;
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-08-04 00:42:06 +00:00
|
|
|
|
|
|
|
Reopen();
|
|
|
|
ASSERT_EQ("mirko", Get(0, "fodor"));
|
|
|
|
ASSERT_EQ("mirko", Get(1, "fodor"));
|
|
|
|
db_options_.env = env_;
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2016-04-15 00:33:53 +00:00
|
|
|
// Disable on windows because SyncWAL requires env->IsSyncThreadSafe()
|
|
|
|
// to return true which is not so in unbuffered mode.
|
|
|
|
#ifndef OS_WIN
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, LogSyncConflictFlush) {
|
2016-02-23 01:29:28 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two"});
|
|
|
|
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(Put(0, "", ""));
|
|
|
|
ASSERT_OK(Put(1, "foo", "bar"));
|
2016-02-23 01:29:28 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
2016-02-23 01:29:28 +00:00
|
|
|
{{"DBImpl::SyncWAL:BeforeMarkLogsSynced:1",
|
|
|
|
"ColumnFamilyTest::LogSyncConflictFlush:1"},
|
|
|
|
{"ColumnFamilyTest::LogSyncConflictFlush:2",
|
|
|
|
"DBImpl::SyncWAL:BeforeMarkLogsSynced:2"}});
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
2016-02-23 01:29:28 +00:00
|
|
|
|
2020-10-02 20:33:50 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::Thread thread([&] { ASSERT_OK(db_->SyncWAL()); });
|
2016-02-23 01:29:28 +00:00
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:1");
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(Put(1, "foo", "bar"));
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2016-02-23 01:29:28 +00:00
|
|
|
|
|
|
|
TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:2");
|
|
|
|
|
|
|
|
thread.join();
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
2016-02-23 01:29:28 +00:00
|
|
|
Close();
|
|
|
|
}
|
2016-04-15 00:33:53 +00:00
|
|
|
#endif
|
2016-06-06 21:40:36 +00:00
|
|
|
|
|
|
|
// this test is placed here, because the infrastructure for Column Family
|
|
|
|
// test is being used to ensure a roll of wal files.
|
|
|
|
// Basic idea is to test that WAL truncation is being detected and not
|
|
|
|
// ignored
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DISABLED_LogTruncationTest) {
|
2016-06-06 21:40:36 +00:00
|
|
|
Open();
|
|
|
|
CreateColumnFamiliesAndReopen({"one", "two"});
|
|
|
|
|
|
|
|
Build(0, 100);
|
|
|
|
|
|
|
|
// Flush the 0th column family to force a roll of the wal log
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(0));
|
2016-06-06 21:40:36 +00:00
|
|
|
|
|
|
|
// Add some more entries
|
|
|
|
Build(100, 100);
|
|
|
|
|
|
|
|
std::vector<std::string> filenames;
|
|
|
|
ASSERT_OK(env_->GetChildren(dbname_, &filenames));
|
|
|
|
|
|
|
|
// collect wal files
|
|
|
|
std::vector<std::string> logfs;
|
|
|
|
for (size_t i = 0; i < filenames.size(); i++) {
|
|
|
|
uint64_t number;
|
|
|
|
FileType type;
|
2024-03-04 18:08:32 +00:00
|
|
|
if (!(ParseFileName(filenames[i], &number, &type))) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-06-06 21:40:36 +00:00
|
|
|
|
2024-03-04 18:08:32 +00:00
|
|
|
if (type != kWalFile) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-06-06 21:40:36 +00:00
|
|
|
|
|
|
|
logfs.push_back(filenames[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::sort(logfs.begin(), logfs.end());
|
|
|
|
ASSERT_GE(logfs.size(), 2);
|
|
|
|
|
|
|
|
// Take the last but one file, and truncate it
|
|
|
|
std::string fpath = dbname_ + "/" + logfs[logfs.size() - 2];
|
|
|
|
std::vector<std::string> names_save = names_;
|
|
|
|
|
|
|
|
uint64_t fsize;
|
|
|
|
ASSERT_OK(env_->GetFileSize(fpath, &fsize));
|
|
|
|
ASSERT_GT(fsize, 0);
|
|
|
|
|
|
|
|
Close();
|
|
|
|
|
|
|
|
std::string backup_logs = dbname_ + "/backup_logs";
|
|
|
|
std::string t_fpath = backup_logs + "/" + logfs[logfs.size() - 2];
|
|
|
|
|
|
|
|
ASSERT_OK(env_->CreateDirIfMissing(backup_logs));
|
|
|
|
// Not sure how easy it is to make this data driven.
|
|
|
|
// need to read back the WAL file and truncate last 10
|
|
|
|
// entries
|
|
|
|
CopyFile(fpath, t_fpath, fsize - 9180);
|
|
|
|
|
|
|
|
ASSERT_OK(env_->DeleteFile(fpath));
|
|
|
|
ASSERT_OK(env_->RenameFile(t_fpath, fpath));
|
|
|
|
|
|
|
|
db_options_.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
|
|
|
|
|
|
|
|
OpenReadOnly(names_save);
|
|
|
|
|
|
|
|
CheckMissed();
|
|
|
|
|
|
|
|
Close();
|
|
|
|
|
|
|
|
Open(names_save);
|
|
|
|
|
|
|
|
CheckMissed();
|
|
|
|
|
|
|
|
Close();
|
|
|
|
|
|
|
|
// cleanup
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(env_->DeleteDir(backup_logs));
|
2016-06-06 21:40:36 +00:00
|
|
|
}
|
2018-04-06 02:49:06 +00:00
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, DefaultCfPathsTest) {
|
2018-04-06 02:49:06 +00:00
|
|
|
Open();
|
|
|
|
// Leave cf_paths for one column families to be empty.
|
|
|
|
// Files should be generated according to db_paths for that
|
|
|
|
// column family.
|
|
|
|
ColumnFamilyOptions cf_opt1, cf_opt2;
|
|
|
|
cf_opt1.cf_paths.emplace_back(dbname_ + "_one_1",
|
|
|
|
std::numeric_limits<uint64_t>::max());
|
|
|
|
CreateColumnFamilies({"one", "two"}, {cf_opt1, cf_opt2});
|
|
|
|
Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2});
|
|
|
|
|
|
|
|
// Fill Column family 1.
|
|
|
|
PutRandomData(1, 100, 100);
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2018-04-06 02:49:06 +00:00
|
|
|
|
|
|
|
ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path));
|
|
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
|
|
|
|
// Fill column family 2
|
|
|
|
PutRandomData(2, 100, 100);
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(2));
|
2018-04-06 02:49:06 +00:00
|
|
|
|
|
|
|
// SST from Column family 2 should be generated in
|
|
|
|
// db_paths which is dbname_ in this case.
|
|
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
}
|
|
|
|
|
2018-06-05 02:59:44 +00:00
|
|
|
TEST_P(ColumnFamilyTest, MultipleCFPathsTest) {
|
2018-04-06 02:49:06 +00:00
|
|
|
Open();
|
|
|
|
// Configure Column family specific paths.
|
|
|
|
ColumnFamilyOptions cf_opt1, cf_opt2;
|
|
|
|
cf_opt1.cf_paths.emplace_back(dbname_ + "_one_1",
|
|
|
|
std::numeric_limits<uint64_t>::max());
|
|
|
|
cf_opt2.cf_paths.emplace_back(dbname_ + "_two_1",
|
|
|
|
std::numeric_limits<uint64_t>::max());
|
|
|
|
CreateColumnFamilies({"one", "two"}, {cf_opt1, cf_opt2});
|
|
|
|
Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2});
|
|
|
|
|
2018-05-21 18:52:31 +00:00
|
|
|
PutRandomData(1, 100, 100, true /* save */);
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(1));
|
2018-04-06 02:49:06 +00:00
|
|
|
|
|
|
|
// Check that files are generated in appropriate paths.
|
|
|
|
ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path));
|
|
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
|
2018-05-21 18:52:31 +00:00
|
|
|
PutRandomData(2, 100, 100, true /* save */);
|
2020-12-24 00:54:05 +00:00
|
|
|
ASSERT_OK(Flush(2));
|
2018-04-06 02:49:06 +00:00
|
|
|
|
|
|
|
ASSERT_EQ(1, GetSstFileCount(cf_opt2.cf_paths[0].path));
|
|
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
2018-05-21 18:52:31 +00:00
|
|
|
|
|
|
|
// Re-open and verify the keys.
|
|
|
|
Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2});
|
2020-07-03 02:24:25 +00:00
|
|
|
DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
|
2018-05-21 18:52:31 +00:00
|
|
|
for (int cf = 1; cf != 3; ++cf) {
|
|
|
|
ReadOptions read_options;
|
|
|
|
read_options.readahead_size = 0;
|
|
|
|
auto it = dbi->NewIterator(read_options, handles_[cf]);
|
|
|
|
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(it->status());
|
2018-05-21 18:52:31 +00:00
|
|
|
Slice key(it->key());
|
|
|
|
ASSERT_NE(keys_[cf].end(), keys_[cf].find(key.ToString()));
|
|
|
|
}
|
2020-10-02 20:33:50 +00:00
|
|
|
ASSERT_OK(it->status());
|
2018-05-21 18:52:31 +00:00
|
|
|
delete it;
|
|
|
|
|
|
|
|
for (const auto& key : keys_[cf]) {
|
|
|
|
ASSERT_NE("NOT_FOUND", Get(cf, key));
|
|
|
|
}
|
|
|
|
}
|
2018-04-06 02:49:06 +00:00
|
|
|
}
|
2018-05-21 18:52:31 +00:00
|
|
|
|
Integrated blob garbage collection: relocate blobs (#7694)
Summary:
The patch adds basic garbage collection support to the integrated BlobDB
implementation. Valid blobs residing in the oldest blob files are relocated
as they are encountered during compaction. The threshold that determines
which blob files qualify is computed based on the configuration option
`blob_garbage_collection_age_cutoff`, which was introduced in https://github.com/facebook/rocksdb/issues/7661 .
Once a blob is retrieved for the purposes of relocation, it passes through the
same logic that extracts large values to blob files in general. This means that
if, for instance, the size threshold for key-value separation (`min_blob_size`)
got changed or writing blob files got disabled altogether, it is possible for the
value to be moved back into the LSM tree. In particular, one way to re-inline
all blob values if needed would be to perform a full manual compaction with
`enable_blob_files` set to `false`, `enable_blob_garbage_collection` set to
`true`, and `blob_file_garbage_collection_age_cutoff` set to `1.0`.
Some TODOs that I plan to address in separate PRs:
1) We'll have to measure the amount of new garbage in each blob file and log
`BlobFileGarbage` entries as part of the compaction job's `VersionEdit`.
(For the time being, blob files are cleaned up solely based on the
`oldest_blob_file_number` relationships.)
2) When compression is used for blobs, the compression type hasn't changed,
and the blob still qualifies for being written to a blob file, we can simply copy
the compressed blob to the new file instead of going through decompression
and compression.
3) We need to update the formula for computing write amplification to account
for the amount of data read from blob files as part of GC.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7694
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D25069663
Pulled By: ltamasi
fbshipit-source-id: bdfa8feb09afcf5bca3b4eba2ba72ce2f15cd06a
2020-11-24 05:07:01 +00:00
|
|
|
TEST(ColumnFamilyTest, ValidateBlobGCCutoff) {
|
|
|
|
DBOptions db_options;
|
|
|
|
|
|
|
|
ColumnFamilyOptions cf_options;
|
|
|
|
cf_options.enable_blob_garbage_collection = true;
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_age_cutoff = -0.5;
|
|
|
|
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
|
|
|
|
.IsInvalidArgument());
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_age_cutoff = 0.0;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_age_cutoff = 0.5;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_age_cutoff = 1.0;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_age_cutoff = 1.5;
|
|
|
|
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
|
|
|
|
.IsInvalidArgument());
|
|
|
|
}
|
|
|
|
|
Make it possible to force the garbage collection of the oldest blob files (#8994)
Summary:
The current BlobDB garbage collection logic works by relocating the valid
blobs from the oldest blob files as they are encountered during compaction,
and cleaning up blob files once they contain nothing but garbage. However,
with sufficiently skewed workloads, it is theoretically possible to end up in a
situation when few or no compactions get scheduled for the SST files that contain
references to the oldest blob files, which can lead to increased space amp due
to the lack of GC.
In order to efficiently handle such workloads, the patch adds a new BlobDB
configuration option called `blob_garbage_collection_force_threshold`,
which signals to BlobDB to schedule targeted compactions for the SST files
that keep alive the oldest batch of blob files if the overall ratio of garbage in
the given blob files meets the threshold *and* all the given blob files are
eligible for GC based on `blob_garbage_collection_age_cutoff`. (For example,
if the new option is set to 0.9, targeted compactions will get scheduled if the
sum of garbage bytes meets or exceeds 90% of the sum of total bytes in the
oldest blob files, assuming all affected blob files are below the age-based cutoff.)
The net result of these targeted compactions is that the valid blobs in the oldest
blob files are relocated and the oldest blob files themselves cleaned up (since
*all* SST files that rely on them get compacted away).
These targeted compactions are similar to periodic compactions in the sense
that they force certain SST files that otherwise would not get picked up to undergo
compaction and also in the sense that instead of merging files from multiple levels,
they target a single file. (Note: such compactions might still include neighboring files
from the same level due to the need of having a "clean cut" boundary but they never
include any files from any other level.)
This functionality is currently only supported with the leveled compaction style
and is inactive by default (since the default value is set to 1.0, i.e. 100%).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/8994
Test Plan: Ran `make check` and tested using `db_bench` and the stress/crash tests.
Reviewed By: riversand963
Differential Revision: D31489850
Pulled By: ltamasi
fbshipit-source-id: 44057d511726a0e2a03c5d9313d7511b3f0c4eab
2021-10-12 01:00:44 +00:00
|
|
|
TEST(ColumnFamilyTest, ValidateBlobGCForceThreshold) {
|
|
|
|
DBOptions db_options;
|
|
|
|
|
|
|
|
ColumnFamilyOptions cf_options;
|
|
|
|
cf_options.enable_blob_garbage_collection = true;
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_force_threshold = -0.5;
|
|
|
|
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
|
|
|
|
.IsInvalidArgument());
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_force_threshold = 0.0;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_force_threshold = 0.5;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_force_threshold = 1.0;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.blob_garbage_collection_force_threshold = 1.5;
|
|
|
|
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
|
|
|
|
.IsInvalidArgument());
|
|
|
|
}
|
|
|
|
|
2022-09-01 00:47:07 +00:00
|
|
|
TEST(ColumnFamilyTest, ValidateMemtableKVChecksumOption) {
|
|
|
|
DBOptions db_options;
|
|
|
|
|
|
|
|
ColumnFamilyOptions cf_options;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.memtable_protection_bytes_per_key = 5;
|
|
|
|
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
|
|
|
|
.IsNotSupported());
|
|
|
|
|
|
|
|
cf_options.memtable_protection_bytes_per_key = 1;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
|
|
|
|
cf_options.memtable_protection_bytes_per_key = 16;
|
|
|
|
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
|
|
|
|
.IsNotSupported());
|
|
|
|
|
|
|
|
cf_options.memtable_protection_bytes_per_key = 0;
|
|
|
|
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
|
|
|
|
}
|
|
|
|
|
2023-07-26 23:25:06 +00:00
|
|
|
// Tests the flushing behavior of a column family to retain user-defined
|
2024-06-13 20:18:10 +00:00
|
|
|
// timestamp when `persist_user_defined_timestamp` is false. The behavior of
|
|
|
|
// auto flush is it makes some effort to retain user-defined timestamps while
|
|
|
|
// the behavior of manual flush is that it skips retaining UDTs.
|
2023-07-26 23:25:06 +00:00
|
|
|
class ColumnFamilyRetainUDTTest : public ColumnFamilyTestBase {
|
|
|
|
public:
|
|
|
|
ColumnFamilyRetainUDTTest() : ColumnFamilyTestBase(kLatestFormatVersion) {}
|
|
|
|
|
|
|
|
void SetUp() override {
|
|
|
|
db_options_.allow_concurrent_memtable_write = false;
|
|
|
|
column_family_options_.comparator =
|
|
|
|
test::BytewiseComparatorWithU64TsWrapper();
|
|
|
|
column_family_options_.persist_user_defined_timestamps = false;
|
|
|
|
ColumnFamilyTestBase::SetUp();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Put(int cf, const std::string& key, const std::string& ts,
|
|
|
|
const std::string& value) {
|
|
|
|
return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(ts),
|
|
|
|
Slice(value));
|
|
|
|
}
|
2024-06-13 20:18:10 +00:00
|
|
|
|
|
|
|
std::string Get(int cf, const std::string& key, const std::string& read_ts) {
|
|
|
|
ReadOptions ropts;
|
|
|
|
Slice timestamp = read_ts;
|
|
|
|
ropts.timestamp = ×tamp;
|
|
|
|
std::string value;
|
|
|
|
Status s = db_->Get(ropts, handles_[cf], Slice(key), &value);
|
|
|
|
if (s.IsNotFound()) {
|
|
|
|
return "NOT_FOUND";
|
|
|
|
} else if (s.ok()) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
void CheckEffectiveCutoffTime(uint64_t expected_cutoff) {
|
|
|
|
std::string effective_full_history_ts_low;
|
|
|
|
EXPECT_OK(
|
|
|
|
db_->GetFullHistoryTsLow(handles_[0], &effective_full_history_ts_low));
|
|
|
|
EXPECT_EQ(EncodeAsUint64(expected_cutoff), effective_full_history_ts_low);
|
|
|
|
}
|
2023-07-26 23:25:06 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class TestTsComparator : public Comparator {
|
|
|
|
public:
|
|
|
|
TestTsComparator() : Comparator(8 /*ts_sz*/) {}
|
|
|
|
|
|
|
|
int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/,
|
|
|
|
const ROCKSDB_NAMESPACE::Slice& /*b*/) const override {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
const char* Name() const override { return "TestTs"; }
|
|
|
|
void FindShortestSeparator(
|
|
|
|
std::string* /*start*/,
|
|
|
|
const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {}
|
|
|
|
void FindShortSuccessor(std::string* /*key*/) const override {}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(ColumnFamilyRetainUDTTest, SanityCheck) {
|
|
|
|
Open();
|
|
|
|
ColumnFamilyOptions cf_options;
|
|
|
|
cf_options.persist_user_defined_timestamps = false;
|
|
|
|
TestTsComparator test_comparator;
|
|
|
|
cf_options.comparator = &test_comparator;
|
|
|
|
ColumnFamilyHandle* handle;
|
|
|
|
// Not persisting user-defined timestamps feature only supports user-defined
|
|
|
|
// timestamps formatted as uint64_t.
|
|
|
|
ASSERT_TRUE(
|
|
|
|
db_->CreateColumnFamily(cf_options, "pikachu", &handle).IsNotSupported());
|
|
|
|
|
|
|
|
Destroy();
|
|
|
|
// Not persisting user-defined timestamps feature doesn't work in combination
|
|
|
|
// with atomic flush.
|
|
|
|
db_options_.atomic_flush = true;
|
|
|
|
ASSERT_TRUE(TryOpen({"default"}).IsNotSupported());
|
|
|
|
|
|
|
|
// Not persisting user-defined timestamps feature doesn't work in combination
|
|
|
|
// with concurrent memtable write.
|
|
|
|
db_options_.atomic_flush = false;
|
|
|
|
db_options_.allow_concurrent_memtable_write = true;
|
|
|
|
ASSERT_TRUE(TryOpen({"default"}).IsNotSupported());
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
class AutoFlushRetainUDTTest : public ColumnFamilyRetainUDTTest {};
|
|
|
|
|
|
|
|
TEST_F(AutoFlushRetainUDTTest, FullHistoryTsLowNotSet) {
|
2023-07-26 23:25:06 +00:00
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) {
|
|
|
|
ASSERT_NE(nullptr, arg);
|
|
|
|
auto reschedule_count = *static_cast<int*>(arg);
|
|
|
|
ASSERT_EQ(1, reschedule_count);
|
|
|
|
});
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Open();
|
2024-06-07 00:29:01 +00:00
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
2024-06-13 20:18:10 +00:00
|
|
|
// No `full_history_ts_low` explicitly set by user, auto flush is continued
|
2023-07-26 23:25:06 +00:00
|
|
|
// without checking if its UDTs expired.
|
2024-06-13 20:18:10 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_SwitchWAL());
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
2023-07-26 23:25:06 +00:00
|
|
|
|
|
|
|
// After flush, `full_history_ts_low` should be automatically advanced to
|
|
|
|
// the effective cutoff timestamp: write_ts + 1
|
2024-06-13 20:18:10 +00:00
|
|
|
CheckEffectiveCutoffTime(2);
|
2023-07-26 23:25:06 +00:00
|
|
|
Close();
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(AutoFlushRetainUDTTest, AllKeysExpired) {
|
2023-07-26 23:25:06 +00:00
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) {
|
|
|
|
ASSERT_NE(nullptr, arg);
|
|
|
|
auto reschedule_count = *static_cast<int*>(arg);
|
|
|
|
ASSERT_EQ(1, reschedule_count);
|
|
|
|
});
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Open();
|
2024-06-07 00:29:01 +00:00
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(3)));
|
2024-06-13 20:18:10 +00:00
|
|
|
// All keys expired w.r.t the configured `full_history_ts_low`, auto flush
|
|
|
|
// continue without the need for a re-schedule.
|
|
|
|
ASSERT_OK(dbfull()->TEST_SwitchWAL());
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
2023-07-26 23:25:06 +00:00
|
|
|
|
|
|
|
// `full_history_ts_low` stays unchanged after flush.
|
2024-06-13 20:18:10 +00:00
|
|
|
CheckEffectiveCutoffTime(3);
|
2023-07-26 23:25:06 +00:00
|
|
|
Close();
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
2024-06-13 20:18:10 +00:00
|
|
|
|
|
|
|
TEST_F(AutoFlushRetainUDTTest, NotAllKeysExpiredFlushToAvoidWriteStall) {
|
2023-07-26 23:25:06 +00:00
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) {
|
|
|
|
ASSERT_NE(nullptr, arg);
|
|
|
|
auto reschedule_count = *static_cast<int*>(arg);
|
|
|
|
ASSERT_EQ(1, reschedule_count);
|
|
|
|
});
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Open();
|
2024-06-07 00:29:01 +00:00
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(1)));
|
2023-07-26 23:25:06 +00:00
|
|
|
ASSERT_OK(db_->SetOptions(handles_[0], {{"max_write_buffer_number", "1"}}));
|
2024-06-13 20:18:10 +00:00
|
|
|
// Not all keys expired, but auto flush is continued without a re-schedule
|
|
|
|
// because of risk of write stall.
|
|
|
|
ASSERT_OK(dbfull()->TEST_SwitchWAL());
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
2023-07-26 23:25:06 +00:00
|
|
|
|
|
|
|
// After flush, `full_history_ts_low` should be automatically advanced to
|
|
|
|
// the effective cutoff timestamp: write_ts + 1
|
2024-06-13 20:18:10 +00:00
|
|
|
CheckEffectiveCutoffTime(2);
|
2023-07-26 23:25:06 +00:00
|
|
|
Close();
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(AutoFlushRetainUDTTest, NotAllKeysExpiredFlushRescheduled) {
|
|
|
|
std::atomic<int> local_counter{1};
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::AfterRetainUDTReschedule:cb", [&](void* /*arg*/) {
|
|
|
|
// Increasing full_history_ts_low so all keys expired after the initial
|
|
|
|
// FlushRequest is rescheduled
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(3)));
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) {
|
|
|
|
ASSERT_NE(nullptr, arg);
|
|
|
|
auto reschedule_count = *static_cast<int*>(arg);
|
|
|
|
ASSERT_EQ(2, reschedule_count);
|
|
|
|
local_counter.fetch_add(1);
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
Open();
|
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(1)));
|
|
|
|
// Not all keys expired, and there is no risk of write stall. Flush is
|
|
|
|
// rescheduled. The actual flush happens after `full_history_ts_low` is
|
|
|
|
// increased to mark all keys expired.
|
|
|
|
ASSERT_OK(dbfull()->TEST_SwitchWAL());
|
|
|
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
|
|
// Make sure callback is not skipped.
|
|
|
|
ASSERT_EQ(2, local_counter);
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
CheckEffectiveCutoffTime(3);
|
|
|
|
Close();
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
class ManualFlushSkipRetainUDTTest : public ColumnFamilyRetainUDTTest {
|
|
|
|
public:
|
|
|
|
// Write an entry with timestamp that is not expired w.r.t cutoff timestamp,
|
|
|
|
// and make sure automatic flush would be rescheduled to retain UDT.
|
|
|
|
void CheckAutomaticFlushRetainUDT(uint64_t write_ts) {
|
|
|
|
std::atomic<int> local_counter{1};
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::AfterRetainUDTReschedule:cb", [&](void* /*arg*/) {
|
|
|
|
// Increasing full_history_ts_low so all keys expired after the
|
|
|
|
// initial FlushRequest is rescheduled
|
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(
|
|
|
|
handles_[0], EncodeAsUint64(write_ts + 1)));
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->SetCallBack(
|
|
|
|
"DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) {
|
|
|
|
ASSERT_NE(nullptr, arg);
|
|
|
|
auto reschedule_count = *static_cast<int*>(arg);
|
|
|
|
ASSERT_EQ(2, reschedule_count);
|
|
|
|
local_counter.fetch_add(1);
|
|
|
|
});
|
|
|
|
SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
EXPECT_OK(Put(0, "foo", EncodeAsUint64(write_ts),
|
|
|
|
"foo" + std::to_string(write_ts)));
|
|
|
|
EXPECT_OK(dbfull()->TEST_SwitchWAL());
|
|
|
|
EXPECT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
|
|
// Make sure callback is not skipped.
|
|
|
|
EXPECT_EQ(2, local_counter);
|
|
|
|
|
|
|
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
|
|
}
|
2024-06-07 00:29:01 +00:00
|
|
|
};
|
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(ManualFlushSkipRetainUDTTest, ManualFlush) {
|
2024-06-07 00:29:01 +00:00
|
|
|
Open();
|
2024-06-13 20:18:10 +00:00
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(0)));
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
// Manual flush proceeds without trying to retain UDT.
|
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
|
|
|
ASSERT_OK(Flush(0));
|
|
|
|
CheckEffectiveCutoffTime(2);
|
|
|
|
CheckAutomaticFlushRetainUDT(3);
|
2024-06-07 00:29:01 +00:00
|
|
|
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(ManualFlushSkipRetainUDTTest, ManualCompaction) {
|
|
|
|
Open();
|
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(0)));
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
// Manual compaction proceeds without trying to retain UDT.
|
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v2"));
|
|
|
|
ASSERT_OK(
|
|
|
|
db_->CompactRange(CompactRangeOptions(), handles_[0], nullptr, nullptr));
|
|
|
|
CheckEffectiveCutoffTime(2);
|
|
|
|
CheckAutomaticFlushRetainUDT(3);
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
Close();
|
|
|
|
}
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(ManualFlushSkipRetainUDTTest, BulkLoading) {
|
2024-06-07 00:29:01 +00:00
|
|
|
Open();
|
2024-06-13 20:18:10 +00:00
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(0)));
|
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
// Test flush behavior in bulk loading scenarios.
|
|
|
|
Options options(db_options_, column_family_options_);
|
|
|
|
std::string sst_files_dir = dbname_ + "/sst_files/";
|
|
|
|
ASSERT_OK(DestroyDir(env_, sst_files_dir));
|
|
|
|
ASSERT_OK(env_->CreateDir(sst_files_dir));
|
|
|
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
|
|
|
std::string file1 = sst_files_dir + "file1.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file1));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("foo", EncodeAsUint64(0), "v2"));
|
|
|
|
ExternalSstFileInfo file1_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file1_info));
|
|
|
|
|
|
|
|
// Bulk loading in UDT mode doesn't support external file key range overlap
|
|
|
|
// with DB key range.
|
|
|
|
ASSERT_TRUE(db_->IngestExternalFile({file1}, IngestExternalFileOptions())
|
|
|
|
.IsInvalidArgument());
|
2024-06-07 00:29:01 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
std::string file2 = sst_files_dir + "file2.sst";
|
|
|
|
ASSERT_OK(sst_file_writer.Open(file2));
|
|
|
|
ASSERT_OK(sst_file_writer.Put("bar", EncodeAsUint64(0), "val"));
|
|
|
|
ExternalSstFileInfo file2_info;
|
|
|
|
ASSERT_OK(sst_file_writer.Finish(&file2_info));
|
|
|
|
// A successful bulk loading, and it doesn't trigger any flush. As a result
|
|
|
|
// the effective cutoff timestamp is also unchanged.
|
|
|
|
ASSERT_OK(db_->IngestExternalFile({file2}, IngestExternalFileOptions()));
|
|
|
|
|
|
|
|
ASSERT_EQ(Get(0, "foo", EncodeAsUint64(1)), "v1");
|
|
|
|
ASSERT_EQ(Get(0, "bar", EncodeAsUint64(0)), "val");
|
|
|
|
CheckEffectiveCutoffTime(0);
|
|
|
|
CheckAutomaticFlushRetainUDT(1);
|
2024-06-07 00:29:01 +00:00
|
|
|
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(ManualFlushSkipRetainUDTTest, AutomaticFlushQueued) {
|
2023-07-26 23:25:06 +00:00
|
|
|
Open();
|
2024-06-13 20:18:10 +00:00
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(0)));
|
|
|
|
|
2024-06-07 00:29:01 +00:00
|
|
|
ASSERT_OK(Put(0, "foo", EncodeAsUint64(1), "v1"));
|
2024-06-13 20:18:10 +00:00
|
|
|
ASSERT_OK(dbfull()->TEST_SwitchWAL());
|
|
|
|
CheckEffectiveCutoffTime(0);
|
|
|
|
|
2024-06-14 20:37:37 +00:00
|
|
|
// Default `max_write_buffer_number=2` used, writing another memtable can get
|
|
|
|
// automatic flush to proceed because of memory pressure. Not doing that so
|
|
|
|
// we can test automatic flush gets to proceed because of an ongoing manual
|
|
|
|
// flush attempt.
|
2023-07-26 23:25:06 +00:00
|
|
|
ASSERT_OK(Flush(0));
|
2024-06-14 20:37:37 +00:00
|
|
|
CheckEffectiveCutoffTime(2);
|
|
|
|
CheckAutomaticFlushRetainUDT(3);
|
2023-07-26 23:25:06 +00:00
|
|
|
|
|
|
|
Close();
|
2024-06-13 20:18:10 +00:00
|
|
|
}
|
2023-07-26 23:25:06 +00:00
|
|
|
|
2024-06-13 20:18:10 +00:00
|
|
|
TEST_F(ManualFlushSkipRetainUDTTest, ConcurrentManualFlushes) {
|
|
|
|
Open();
|
|
|
|
ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], EncodeAsUint64(0)));
|
|
|
|
|
|
|
|
std::vector<ROCKSDB_NAMESPACE::port::Thread> manual_flush_tds;
|
|
|
|
std::atomic<int> next_ts{0};
|
|
|
|
std::mutex mtx;
|
|
|
|
std::condition_variable cv;
|
|
|
|
|
|
|
|
auto manual_flush = [&](int write_ts) {
|
|
|
|
{
|
|
|
|
std::unique_lock<std::mutex> lock(mtx);
|
|
|
|
cv.wait(lock,
|
|
|
|
[&write_ts, &next_ts] { return write_ts == next_ts.load(); });
|
|
|
|
ASSERT_OK(Put(0, "foo" + std::to_string(write_ts),
|
|
|
|
EncodeAsUint64(write_ts),
|
|
|
|
"val_" + std::to_string(write_ts)));
|
|
|
|
next_ts.fetch_add(1);
|
|
|
|
cv.notify_all();
|
|
|
|
}
|
|
|
|
if (write_ts % 2 == 0) {
|
|
|
|
ASSERT_OK(Flush(0));
|
|
|
|
} else {
|
|
|
|
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[0], nullptr,
|
|
|
|
nullptr));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
for (int write_ts = 0; write_ts < 10; write_ts++) {
|
|
|
|
manual_flush_tds.emplace_back(manual_flush, write_ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& td : manual_flush_tds) {
|
|
|
|
td.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
CheckEffectiveCutoffTime(10);
|
|
|
|
CheckAutomaticFlushRetainUDT(11);
|
|
|
|
Close();
|
2023-07-26 23:25:06 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2014-01-02 17:08:12 +00:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
2015-03-17 21:08:00 +00:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
2019-08-09 22:08:36 +00:00
|
|
|
RegisterCustomObjects(argc, argv);
|
2015-03-17 21:08:00 +00:00
|
|
|
return RUN_ALL_TESTS();
|
2014-01-02 17:08:12 +00:00
|
|
|
}
|