mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 04:41:49 +00:00
3dff28cf9b
Summary: For performance purposes, the lower level routines were changed to use a SystemClock* instead of a std::shared_ptr<SystemClock>. The shared ptr has some performance degradation on certain hardware classes. For most of the system, there is no risk of the pointer being deleted/invalid because the shared_ptr will be stored elsewhere. For example, the ImmutableDBOptions stores the Env which has a std::shared_ptr<SystemClock> in it. The SystemClock* within the ImmutableDBOptions is essentially a "short cut" to gain access to this constant resource. There were a few classes (PeriodicWorkScheduler?) where the "short cut" property did not hold. In those cases, the shared pointer was preserved. Using db_bench readrandom perf_level=3 on my EC2 box, this change performed as well or better than 6.17: 6.17: readrandom : 28.046 micros/op 854902 ops/sec; 61.3 MB/s (355999 of 355999 found) 6.18: readrandom : 32.615 micros/op 735306 ops/sec; 52.7 MB/s (290999 of 290999 found) PR: readrandom : 27.500 micros/op 871909 ops/sec; 62.5 MB/s (367999 of 367999 found) (Note that the times for 6.18 are prior to revert of the SystemClock). Pull Request resolved: https://github.com/facebook/rocksdb/pull/8033 Reviewed By: pdillinger Differential Revision: D27014563 Pulled By: mrambacher fbshipit-source-id: ad0459eba03182e454391b5926bf5cdd45657b67
334 lines
12 KiB
C++
334 lines
12 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "trace_replay/io_tracer.h"
|
|
|
|
#include "rocksdb/env.h"
|
|
#include "rocksdb/status.h"
|
|
#include "rocksdb/trace_reader_writer.h"
|
|
#include "test_util/testharness.h"
|
|
#include "test_util/testutil.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
namespace {
|
|
const std::string kDummyFile = "/dummy/file";
|
|
|
|
} // namespace
|
|
|
|
class IOTracerTest : public testing::Test {
|
|
public:
|
|
IOTracerTest() {
|
|
test_path_ = test::PerThreadDBPath("io_tracer_test");
|
|
env_ = ROCKSDB_NAMESPACE::Env::Default();
|
|
clock_ = env_->GetSystemClock().get();
|
|
EXPECT_OK(env_->CreateDir(test_path_));
|
|
trace_file_path_ = test_path_ + "/io_trace";
|
|
}
|
|
|
|
~IOTracerTest() override {
|
|
EXPECT_OK(env_->DeleteFile(trace_file_path_));
|
|
EXPECT_OK(env_->DeleteDir(test_path_));
|
|
}
|
|
|
|
std::string GetFileOperation(uint64_t id) {
|
|
id = id % 4;
|
|
switch (id) {
|
|
case 0:
|
|
return "CreateDir";
|
|
case 1:
|
|
return "GetChildren";
|
|
case 2:
|
|
return "FileSize";
|
|
case 3:
|
|
return "DeleteDir";
|
|
default:
|
|
assert(false);
|
|
}
|
|
return "";
|
|
}
|
|
|
|
void WriteIOOp(IOTraceWriter* writer, uint64_t nrecords) {
|
|
assert(writer);
|
|
for (uint64_t i = 0; i < nrecords; i++) {
|
|
IOTraceRecord record;
|
|
record.io_op_data = 0;
|
|
record.trace_type = TraceType::kIOTracer;
|
|
record.io_op_data |= (1 << IOTraceOp::kIOLen);
|
|
record.io_op_data |= (1 << IOTraceOp::kIOOffset);
|
|
record.file_operation = GetFileOperation(i);
|
|
record.io_status = IOStatus::OK().ToString();
|
|
record.file_name = kDummyFile + std::to_string(i);
|
|
record.len = i;
|
|
record.offset = i + 20;
|
|
EXPECT_OK(writer->WriteIOOp(record));
|
|
}
|
|
}
|
|
|
|
void VerifyIOOp(IOTraceReader* reader, uint32_t nrecords) {
|
|
assert(reader);
|
|
for (uint32_t i = 0; i < nrecords; i++) {
|
|
IOTraceRecord record;
|
|
ASSERT_OK(reader->ReadIOOp(&record));
|
|
ASSERT_EQ(record.file_operation, GetFileOperation(i));
|
|
ASSERT_EQ(record.io_status, IOStatus::OK().ToString());
|
|
ASSERT_EQ(record.len, i);
|
|
ASSERT_EQ(record.offset, i + 20);
|
|
}
|
|
}
|
|
|
|
Env* env_;
|
|
SystemClock* clock_;
|
|
EnvOptions env_options_;
|
|
std::string trace_file_path_;
|
|
std::string test_path_;
|
|
};
|
|
|
|
TEST_F(IOTracerTest, MultipleRecordsWithDifferentIOOpOptions) {
|
|
std::string file_name = kDummyFile + std::to_string(5);
|
|
{
|
|
TraceOptions trace_opt;
|
|
std::unique_ptr<TraceWriter> trace_writer;
|
|
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
|
&trace_writer));
|
|
IOTracer writer;
|
|
ASSERT_OK(writer.StartIOTrace(clock_, trace_opt, std::move(trace_writer)));
|
|
|
|
// Write general record.
|
|
IOTraceRecord record0(0, TraceType::kIOTracer, 0 /*io_op_data*/,
|
|
GetFileOperation(0), 155 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name);
|
|
writer.WriteIOOp(record0);
|
|
|
|
// Write record with FileSize.
|
|
uint64_t io_op_data = 0;
|
|
io_op_data |= (1 << IOTraceOp::kIOFileSize);
|
|
IOTraceRecord record1(0, TraceType::kIOTracer, io_op_data,
|
|
GetFileOperation(1), 10 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name,
|
|
256 /*file_size*/);
|
|
writer.WriteIOOp(record1);
|
|
|
|
// Write record with Length.
|
|
io_op_data = 0;
|
|
io_op_data |= (1 << IOTraceOp::kIOLen);
|
|
IOTraceRecord record2(0, TraceType::kIOTracer, io_op_data,
|
|
GetFileOperation(2), 10 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name, 100 /*length*/,
|
|
200 /*offset*/);
|
|
writer.WriteIOOp(record2);
|
|
|
|
// Write record with Length and offset.
|
|
io_op_data = 0;
|
|
io_op_data |= (1 << IOTraceOp::kIOLen);
|
|
io_op_data |= (1 << IOTraceOp::kIOOffset);
|
|
IOTraceRecord record3(0, TraceType::kIOTracer, io_op_data,
|
|
GetFileOperation(3), 10 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name, 120 /*length*/,
|
|
17 /*offset*/);
|
|
writer.WriteIOOp(record3);
|
|
|
|
// Write record with offset.
|
|
io_op_data = 0;
|
|
io_op_data |= (1 << IOTraceOp::kIOOffset);
|
|
IOTraceRecord record4(0, TraceType::kIOTracer, io_op_data,
|
|
GetFileOperation(4), 10 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name, 13 /*length*/,
|
|
50 /*offset*/);
|
|
writer.WriteIOOp(record4);
|
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
|
}
|
|
{
|
|
// Verify trace file is generated correctly.
|
|
std::unique_ptr<TraceReader> trace_reader;
|
|
ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
|
|
&trace_reader));
|
|
IOTraceReader reader(std::move(trace_reader));
|
|
IOTraceHeader header;
|
|
ASSERT_OK(reader.ReadHeader(&header));
|
|
ASSERT_EQ(kMajorVersion, static_cast<int>(header.rocksdb_major_version));
|
|
ASSERT_EQ(kMinorVersion, static_cast<int>(header.rocksdb_minor_version));
|
|
|
|
// Read general record.
|
|
IOTraceRecord record0;
|
|
ASSERT_OK(reader.ReadIOOp(&record0));
|
|
ASSERT_EQ(record0.file_operation, GetFileOperation(0));
|
|
ASSERT_EQ(record0.latency, 155);
|
|
ASSERT_EQ(record0.file_name, file_name);
|
|
|
|
// Read record with FileSize.
|
|
IOTraceRecord record1;
|
|
ASSERT_OK(reader.ReadIOOp(&record1));
|
|
ASSERT_EQ(record1.file_size, 256);
|
|
ASSERT_EQ(record1.len, 0);
|
|
ASSERT_EQ(record1.offset, 0);
|
|
|
|
// Read record with Length.
|
|
IOTraceRecord record2;
|
|
ASSERT_OK(reader.ReadIOOp(&record2));
|
|
ASSERT_EQ(record2.len, 100);
|
|
ASSERT_EQ(record2.file_size, 0);
|
|
ASSERT_EQ(record2.offset, 0);
|
|
|
|
// Read record with Length and offset.
|
|
IOTraceRecord record3;
|
|
ASSERT_OK(reader.ReadIOOp(&record3));
|
|
ASSERT_EQ(record3.len, 120);
|
|
ASSERT_EQ(record3.file_size, 0);
|
|
ASSERT_EQ(record3.offset, 17);
|
|
|
|
// Read record with offset.
|
|
IOTraceRecord record4;
|
|
ASSERT_OK(reader.ReadIOOp(&record4));
|
|
ASSERT_EQ(record4.len, 0);
|
|
ASSERT_EQ(record4.file_size, 0);
|
|
ASSERT_EQ(record4.offset, 50);
|
|
|
|
// Read one more record and it should report error.
|
|
IOTraceRecord record5;
|
|
ASSERT_NOK(reader.ReadIOOp(&record5));
|
|
}
|
|
}
|
|
|
|
TEST_F(IOTracerTest, AtomicWrite) {
|
|
std::string file_name = kDummyFile + std::to_string(0);
|
|
{
|
|
IOTraceRecord record(0, TraceType::kIOTracer, 0 /*io_op_data*/,
|
|
GetFileOperation(0), 10 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name);
|
|
TraceOptions trace_opt;
|
|
std::unique_ptr<TraceWriter> trace_writer;
|
|
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
|
&trace_writer));
|
|
IOTracer writer;
|
|
ASSERT_OK(writer.StartIOTrace(clock_, trace_opt, std::move(trace_writer)));
|
|
writer.WriteIOOp(record);
|
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
|
}
|
|
{
|
|
// Verify trace file contains one record.
|
|
std::unique_ptr<TraceReader> trace_reader;
|
|
ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
|
|
&trace_reader));
|
|
IOTraceReader reader(std::move(trace_reader));
|
|
IOTraceHeader header;
|
|
ASSERT_OK(reader.ReadHeader(&header));
|
|
ASSERT_EQ(kMajorVersion, static_cast<int>(header.rocksdb_major_version));
|
|
ASSERT_EQ(kMinorVersion, static_cast<int>(header.rocksdb_minor_version));
|
|
// Read record and verify data.
|
|
IOTraceRecord access_record;
|
|
ASSERT_OK(reader.ReadIOOp(&access_record));
|
|
ASSERT_EQ(access_record.file_operation, GetFileOperation(0));
|
|
ASSERT_EQ(access_record.io_status, IOStatus::OK().ToString());
|
|
ASSERT_EQ(access_record.file_name, file_name);
|
|
ASSERT_NOK(reader.ReadIOOp(&access_record));
|
|
}
|
|
}
|
|
|
|
TEST_F(IOTracerTest, AtomicWriteBeforeStartTrace) {
|
|
std::string file_name = kDummyFile + std::to_string(0);
|
|
{
|
|
IOTraceRecord record(0, TraceType::kIOTracer, 0 /*io_op_data*/,
|
|
GetFileOperation(0), 0, IOStatus::OK().ToString(),
|
|
file_name);
|
|
std::unique_ptr<TraceWriter> trace_writer;
|
|
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
|
&trace_writer));
|
|
IOTracer writer;
|
|
// The record should not be written to the trace_file since StartIOTrace is
|
|
// not called.
|
|
writer.WriteIOOp(record);
|
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
|
}
|
|
{
|
|
// Verify trace file contains nothing.
|
|
std::unique_ptr<TraceReader> trace_reader;
|
|
ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
|
|
&trace_reader));
|
|
IOTraceReader reader(std::move(trace_reader));
|
|
IOTraceHeader header;
|
|
ASSERT_NOK(reader.ReadHeader(&header));
|
|
}
|
|
}
|
|
|
|
TEST_F(IOTracerTest, AtomicNoWriteAfterEndTrace) {
|
|
std::string file_name = kDummyFile + std::to_string(0);
|
|
{
|
|
uint64_t io_op_data = 0;
|
|
io_op_data |= (1 << IOTraceOp::kIOFileSize);
|
|
IOTraceRecord record(
|
|
0, TraceType::kIOTracer, io_op_data, GetFileOperation(2), 0 /*latency*/,
|
|
IOStatus::OK().ToString(), file_name, 10 /*file_size*/);
|
|
TraceOptions trace_opt;
|
|
std::unique_ptr<TraceWriter> trace_writer;
|
|
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
|
&trace_writer));
|
|
IOTracer writer;
|
|
ASSERT_OK(writer.StartIOTrace(clock_, trace_opt, std::move(trace_writer)));
|
|
writer.WriteIOOp(record);
|
|
writer.EndIOTrace();
|
|
// Write the record again. This time the record should not be written since
|
|
// EndIOTrace is called.
|
|
writer.WriteIOOp(record);
|
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
|
}
|
|
{
|
|
// Verify trace file contains one record.
|
|
std::unique_ptr<TraceReader> trace_reader;
|
|
ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
|
|
&trace_reader));
|
|
IOTraceReader reader(std::move(trace_reader));
|
|
IOTraceHeader header;
|
|
ASSERT_OK(reader.ReadHeader(&header));
|
|
ASSERT_EQ(kMajorVersion, static_cast<int>(header.rocksdb_major_version));
|
|
ASSERT_EQ(kMinorVersion, static_cast<int>(header.rocksdb_minor_version));
|
|
|
|
IOTraceRecord access_record;
|
|
ASSERT_OK(reader.ReadIOOp(&access_record));
|
|
ASSERT_EQ(access_record.file_operation, GetFileOperation(2));
|
|
ASSERT_EQ(access_record.io_status, IOStatus::OK().ToString());
|
|
ASSERT_EQ(access_record.file_size, 10);
|
|
// No more record.
|
|
ASSERT_NOK(reader.ReadIOOp(&access_record));
|
|
}
|
|
}
|
|
|
|
TEST_F(IOTracerTest, AtomicMultipleWrites) {
|
|
{
|
|
TraceOptions trace_opt;
|
|
std::unique_ptr<TraceWriter> trace_writer;
|
|
ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_,
|
|
&trace_writer));
|
|
IOTraceWriter writer(clock_, trace_opt, std::move(trace_writer));
|
|
ASSERT_OK(writer.WriteHeader());
|
|
// Write 10 records
|
|
WriteIOOp(&writer, 10);
|
|
ASSERT_OK(env_->FileExists(trace_file_path_));
|
|
}
|
|
|
|
{
|
|
// Verify trace file is generated correctly.
|
|
std::unique_ptr<TraceReader> trace_reader;
|
|
ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_,
|
|
&trace_reader));
|
|
IOTraceReader reader(std::move(trace_reader));
|
|
IOTraceHeader header;
|
|
ASSERT_OK(reader.ReadHeader(&header));
|
|
ASSERT_EQ(kMajorVersion, static_cast<int>(header.rocksdb_major_version));
|
|
ASSERT_EQ(kMinorVersion, static_cast<int>(header.rocksdb_minor_version));
|
|
// Read 10 records.
|
|
VerifyIOOp(&reader, 10);
|
|
// Read one more and record and it should report error.
|
|
IOTraceRecord record;
|
|
ASSERT_NOK(reader.ReadIOOp(&record));
|
|
}
|
|
}
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
int main(int argc, char** argv) {
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|