From 9f0bd568897288952329e05bf2354cb21602cd6d Mon Sep 17 00:00:00 2001 From: haoyuhuang Date: Mon, 1 Jul 2019 12:43:14 -0700 Subject: [PATCH] Cache simulator: Refactor the cache simulator so that we can add alternative policies easily (#5517) Summary: This PR creates cache_simulator.h file. It contains a CacheSimulator that runs against a block cache trace record. We can add alternative cache simulators derived from CacheSimulator later. For example, this PR adds a PrioritizedCacheSimulator that inserts filter/index/uncompressed dictionary blocks with high priority. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5517 Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32 Differential Revision: D16043689 Pulled By: HaoyuHuang fbshipit-source-id: 65f28ed52b866ffb0e6eceffd7f9ca7c45bb680d --- CMakeLists.txt | 1 + TARGETS | 1 + src.mk | 1 + tools/block_cache_trace_analyzer.cc | 67 +++--------- tools/block_cache_trace_analyzer.h | 49 +-------- utilities/simulator_cache/cache_simulator.cc | 104 +++++++++++++++++++ utilities/simulator_cache/cache_simulator.h | 98 +++++++++++++++++ 7 files changed, 219 insertions(+), 102 deletions(-) create mode 100644 utilities/simulator_cache/cache_simulator.cc create mode 100644 utilities/simulator_cache/cache_simulator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ff61dca99..0ca338bd63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -685,6 +685,7 @@ set(SOURCES utilities/persistent_cache/block_cache_tier_metadata.cc utilities/persistent_cache/persistent_cache_tier.cc utilities/persistent_cache/volatile_tier_impl.cc + utilities/simulator_cache/cache_simulator.cc utilities/simulator_cache/sim_cache.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/trace/file_trace_reader_writer.cc diff --git a/TARGETS b/TARGETS index a43ed6b108..3935f1f740 100644 --- a/TARGETS +++ b/TARGETS @@ -280,6 +280,7 @@ cpp_library( "utilities/persistent_cache/block_cache_tier_metadata.cc", "utilities/persistent_cache/persistent_cache_tier.cc", "utilities/persistent_cache/volatile_tier_impl.cc", + "utilities/simulator_cache/cache_simulator.cc", "utilities/simulator_cache/sim_cache.cc", "utilities/table_properties_collectors/compact_on_deletion_collector.cc", "utilities/trace/file_trace_reader_writer.cc", diff --git a/src.mk b/src.mk index 71c2bd0180..7c35ee6758 100644 --- a/src.mk +++ b/src.mk @@ -199,6 +199,7 @@ LIB_SOURCES = \ utilities/persistent_cache/block_cache_tier_metadata.cc \ utilities/persistent_cache/persistent_cache_tier.cc \ utilities/persistent_cache/volatile_tier_impl.cc \ + utilities/simulator_cache/cache_simulator.cc \ utilities/simulator_cache/sim_cache.cc \ utilities/table_properties_collectors/compact_on_deletion_collector.cc \ utilities/trace/file_trace_reader_writer.cc \ diff --git a/tools/block_cache_trace_analyzer.cc b/tools/block_cache_trace_analyzer.cc index 78753a2162..4770348a79 100644 --- a/tools/block_cache_trace_analyzer.cc +++ b/tools/block_cache_trace_analyzer.cc @@ -24,7 +24,7 @@ DEFINE_string( "The config file path. One cache configuration per line. The format of a " "cache configuration is " "cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. " - "cache_name is lru. cache_capacity can be xK, xM or xG " + "cache_name is lru or lru_priority. cache_capacity can be xK, xM or xG " "where x is a positive number."); DEFINE_int32(block_cache_trace_downsample_ratio, 1, "The trace collected accesses on one in every " @@ -179,47 +179,6 @@ double percent(uint64_t numerator, uint64_t denomenator) { } // namespace -BlockCacheTraceSimulator::BlockCacheTraceSimulator( - uint64_t warmup_seconds, uint32_t downsample_ratio, - const std::vector& cache_configurations) - : warmup_seconds_(warmup_seconds), - downsample_ratio_(downsample_ratio), - cache_configurations_(cache_configurations) { - for (auto const& config : cache_configurations_) { - for (auto cache_capacity : config.cache_capacities) { - // Scale down the cache capacity since the trace contains accesses on - // 1/'downsample_ratio' blocks. - uint64_t simulate_cache_capacity = - cache_capacity / downsample_ratio_; - sim_caches_.push_back(NewSimCache( - NewLRUCache(simulate_cache_capacity, config.num_shard_bits), - /*real_cache=*/nullptr, config.num_shard_bits)); - } - } -} - -void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) { - if (trace_start_time_ == 0) { - trace_start_time_ = access.access_timestamp; - } - // access.access_timestamp is in microseconds. - if (!warmup_complete_ && - trace_start_time_ + warmup_seconds_ * kMicrosInSecond <= - access.access_timestamp) { - for (auto& sim_cache : sim_caches_) { - sim_cache->reset_counter(); - } - warmup_complete_ = true; - } - for (auto& sim_cache : sim_caches_) { - auto handle = sim_cache->Lookup(access.block_key); - if (handle == nullptr && !access.no_insert) { - sim_cache->Insert(access.block_key, /*value=*/nullptr, access.block_size, - /*deleter=*/nullptr); - } - } -} - void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const { if (!cache_simulator_) { return; @@ -237,27 +196,21 @@ void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const { const std::string header = "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses"; out << header << std::endl; - uint64_t sim_cache_index = 0; - for (auto const& config : cache_simulator_->cache_configurations()) { - for (auto cache_capacity : config.cache_capacities) { - uint64_t hits = - cache_simulator_->sim_caches()[sim_cache_index]->get_hit_counter(); - uint64_t misses = - cache_simulator_->sim_caches()[sim_cache_index]->get_miss_counter(); - uint64_t total_accesses = hits + misses; - double miss_ratio = static_cast(misses * 100.0 / total_accesses); + for (auto const& config_caches : cache_simulator_->sim_caches()) { + const CacheConfiguration& config = config_caches.first; + for (uint32_t i = 0; i < config.cache_capacities.size(); i++) { + double miss_ratio = config_caches.second[i]->miss_ratio(); // Write the body. out << config.cache_name; out << ","; out << config.num_shard_bits; out << ","; - out << cache_capacity; + out << config.cache_capacities[i]; out << ","; out << std::fixed << std::setprecision(4) << miss_ratio; out << ","; - out << total_accesses; + out << config_caches.second[i]->total_accesses(); out << std::endl; - sim_cache_index++; } } out.close(); @@ -1095,6 +1048,12 @@ int block_cache_trace_analyzer_tool(int argc, char** argv) { if (!cache_configs.empty()) { cache_simulator.reset(new BlockCacheTraceSimulator( warmup_seconds, downsample_ratio, cache_configs)); + Status s = cache_simulator->InitializeCaches(); + if (!s.ok()) { + fprintf(stderr, "Cannot initialize cache simulators %s\n", + s.ToString().c_str()); + exit(1); + } } BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path, FLAGS_block_cache_analysis_result_dir, diff --git a/tools/block_cache_trace_analyzer.h b/tools/block_cache_trace_analyzer.h index 21a99f7db7..617b90280c 100644 --- a/tools/block_cache_trace_analyzer.h +++ b/tools/block_cache_trace_analyzer.h @@ -12,57 +12,10 @@ #include "rocksdb/env.h" #include "rocksdb/utilities/sim_cache.h" #include "trace_replay/block_cache_tracer.h" +#include "utilities/simulator_cache/cache_simulator.h" namespace rocksdb { -const uint64_t kMicrosInSecond = 1000000; - -class BlockCacheTraceAnalyzer; - -// A cache configuration provided by user. -struct CacheConfiguration { - std::string cache_name; // LRU. - uint32_t num_shard_bits; - std::vector - cache_capacities; // simulate cache capacities in bytes. -}; - -// A block cache simulator that reports miss ratio curves given a set of cache -// configurations. -class BlockCacheTraceSimulator { - public: - // warmup_seconds: The number of seconds to warmup simulated caches. The - // hit/miss counters are reset after the warmup completes. - BlockCacheTraceSimulator( - uint64_t warmup_seconds, uint32_t downsample_ratio, - const std::vector& cache_configurations); - ~BlockCacheTraceSimulator() = default; - // No copy and move. - BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete; - BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete; - BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete; - BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete; - - void Access(const BlockCacheTraceRecord& access); - - const std::vector>& sim_caches() const { - return sim_caches_; - } - - const std::vector& cache_configurations() const { - return cache_configurations_; - } - - private: - const uint64_t warmup_seconds_; - const uint32_t downsample_ratio_; - const std::vector cache_configurations_; - - bool warmup_complete_ = false; - std::vector> sim_caches_; - uint64_t trace_start_time_ = 0; -}; - // Statistics of a block. struct BlockAccessInfo { uint64_t num_accesses = 0; diff --git a/utilities/simulator_cache/cache_simulator.cc b/utilities/simulator_cache/cache_simulator.cc new file mode 100644 index 0000000000..145efdb6cb --- /dev/null +++ b/utilities/simulator_cache/cache_simulator.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "utilities/simulator_cache/cache_simulator.h" + +namespace rocksdb { +CacheSimulator::CacheSimulator(std::shared_ptr sim_cache) + : sim_cache_(sim_cache) {} + +void CacheSimulator::Access(const BlockCacheTraceRecord& access) { + auto handle = sim_cache_->Lookup(access.block_key); + if (handle == nullptr && !access.no_insert) { + sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, + /*deleter=*/nullptr, /*handle=*/nullptr); + } +} + +void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) { + auto handle = sim_cache_->Lookup(access.block_key); + if (handle == nullptr && !access.no_insert) { + Cache::Priority priority = Cache::Priority::LOW; + if (access.block_type == TraceType::kBlockTraceFilterBlock || + access.block_type == TraceType::kBlockTraceIndexBlock || + access.block_type == TraceType::kBlockTraceUncompressionDictBlock) { + priority = Cache::Priority::HIGH; + } + sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, + /*deleter=*/nullptr, /*handle=*/nullptr, priority); + } +} + +double CacheSimulator::miss_ratio() { + uint64_t hits = sim_cache_->get_hit_counter(); + uint64_t misses = sim_cache_->get_miss_counter(); + uint64_t total_accesses = hits + misses; + return static_cast(misses * 100.0 / total_accesses); +} + +uint64_t CacheSimulator::total_accesses() { + return sim_cache_->get_hit_counter() + sim_cache_->get_miss_counter(); +} + +BlockCacheTraceSimulator::BlockCacheTraceSimulator( + uint64_t warmup_seconds, uint32_t downsample_ratio, + const std::vector& cache_configurations) + : warmup_seconds_(warmup_seconds), + downsample_ratio_(downsample_ratio), + cache_configurations_(cache_configurations) {} + +Status BlockCacheTraceSimulator::InitializeCaches() { + for (auto const& config : cache_configurations_) { + for (auto cache_capacity : config.cache_capacities) { + // Scale down the cache capacity since the trace contains accesses on + // 1/'downsample_ratio' blocks. + uint64_t simulate_cache_capacity = cache_capacity / downsample_ratio_; + std::shared_ptr sim_cache; + if (config.cache_name == "lru") { + sim_cache = std::make_shared(NewSimCache( + NewLRUCache(simulate_cache_capacity, config.num_shard_bits, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0), + /*real_cache=*/nullptr, config.num_shard_bits)); + } else if (config.cache_name == "lru_priority") { + sim_cache = std::make_shared(NewSimCache( + NewLRUCache(simulate_cache_capacity, config.num_shard_bits, + /*strict_capacity_limit=*/false, + /*high_pri_pool_ratio=*/0.5), + /*real_cache=*/nullptr, config.num_shard_bits)); + } else { + // Not supported. + return Status::InvalidArgument("Unknown cache name " + + config.cache_name); + } + sim_caches_[config].push_back(sim_cache); + } + } + return Status::OK(); +} + +void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) { + if (trace_start_time_ == 0) { + trace_start_time_ = access.access_timestamp; + } + // access.access_timestamp is in microseconds. + if (!warmup_complete_ && + trace_start_time_ + warmup_seconds_ * kMicrosInSecond <= + access.access_timestamp) { + for (auto& config_caches : sim_caches_) { + for (auto& sim_cache : config_caches.second) { + sim_cache->reset_counter(); + } + } + warmup_complete_ = true; + } + for (auto& config_caches : sim_caches_) { + for (auto& sim_cache : config_caches.second) { + sim_cache->Access(access); + } + } +} + +} // namespace rocksdb diff --git a/utilities/simulator_cache/cache_simulator.h b/utilities/simulator_cache/cache_simulator.h new file mode 100644 index 0000000000..37166d8a9c --- /dev/null +++ b/utilities/simulator_cache/cache_simulator.h @@ -0,0 +1,98 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/utilities/sim_cache.h" +#include "trace_replay/block_cache_tracer.h" + +namespace rocksdb { + +const uint64_t kMicrosInSecond = 1000000; + +// A cache configuration provided by user. +struct CacheConfiguration { + std::string cache_name; // LRU. + uint32_t num_shard_bits; + std::vector + cache_capacities; // simulate cache capacities in bytes. + + bool operator=(const CacheConfiguration& o) const { + return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits; + } + bool operator<(const CacheConfiguration& o) const { + return cache_name < o.cache_name || + (cache_name == o.cache_name && num_shard_bits < o.num_shard_bits); + } +}; + +// A cache simulator that runs against a block cache trace. +class CacheSimulator { + public: + CacheSimulator(std::shared_ptr sim_cache); + virtual ~CacheSimulator() = default; + // No copy and move. + CacheSimulator(const CacheSimulator&) = delete; + CacheSimulator& operator=(const CacheSimulator&) = delete; + CacheSimulator(CacheSimulator&&) = delete; + CacheSimulator& operator=(CacheSimulator&&) = delete; + + virtual void Access(const BlockCacheTraceRecord& access); + void reset_counter() { sim_cache_->reset_counter(); } + double miss_ratio(); + uint64_t total_accesses(); + + protected: + std::shared_ptr sim_cache_; +}; + +// A prioritized cache simulator that runs against a block cache trace. +// It inserts missing index/filter/uncompression-dictionary blocks with high +// priority in the cache. +class PrioritizedCacheSimulator : public CacheSimulator { + public: + PrioritizedCacheSimulator(std::shared_ptr sim_cache) + : CacheSimulator(sim_cache) {} + void Access(const BlockCacheTraceRecord& access) override; +}; + +// A block cache simulator that reports miss ratio curves given a set of cache +// configurations. +class BlockCacheTraceSimulator { + public: + // warmup_seconds: The number of seconds to warmup simulated caches. The + // hit/miss counters are reset after the warmup completes. + BlockCacheTraceSimulator( + uint64_t warmup_seconds, uint32_t downsample_ratio, + const std::vector& cache_configurations); + ~BlockCacheTraceSimulator() = default; + // No copy and move. + BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete; + BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete; + BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete; + BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete; + + Status InitializeCaches(); + + void Access(const BlockCacheTraceRecord& access); + + const std::map>>& + sim_caches() const { + return sim_caches_; + } + + private: + const uint64_t warmup_seconds_; + const uint32_t downsample_ratio_; + const std::vector cache_configurations_; + + bool warmup_complete_ = false; + std::map>> + sim_caches_; + uint64_t trace_start_time_ = 0; +}; + +} // namespace rocksdb