rocksdb/tools/block_cache_analyzer/block_cache_pysim.sh

#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# A shell script to run a batch of pysims and combine individual pysim output files.
#
# Usage: bash block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs
# trace_file_path: The file path that stores the traces.
# result_dir: The directory to store pysim results. The output files from a pysim is stores in result_dir/ml
# downsample_size: The downsample size used to collect the trace.
# warmup_seconds: The number of seconds used for warmup.
# max_jobs: The max number of concurrent pysims to run.

if [ $# -ne 5 ]; then
  echo "Usage: ./block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs"
  exit 0
fi

trace_file="$1"
result_dir="$2"
downsample_size="$3"
warmup_seconds="$4"
max_jobs="$5"
current_jobs=0

ml_tmp_result_dir="$result_dir/ml"
rm -rf "$ml_tmp_result_dir"
mkdir -p "$result_dir"
mkdir -p "$ml_tmp_result_dir"

for cache_type in "ts" "linucb" "ts_hybrid" "linucb_hybrid"
do
for cache_size in "16M" "256M" "1G" "2G" "4G" "8G" "12G" "16G"
do
    while [ "$current_jobs" -ge "$max_jobs" ]
    do
      sleep 10
      echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
      current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
      echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
    done
    output="log-ml-$cache_type-$cache_size"
    echo "Running simulation for $cache_type and cache size $cache_size. Number of running jobs: $current_jobs. "
    nohup python block_cache_pysim.py "$cache_type" "$cache_size" "$downsample_size" "$warmup_seconds" "$trace_file" "$ml_tmp_result_dir" >& $ml_tmp_result_dir/$output &
    current_jobs=$((current_jobs+1))
done
done

# Wait for all jobs to complete.
while [ $current_jobs -gt 0 ]
do
  sleep 10
  echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
  current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
  echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
done

echo "Combine individual pysim output files"

rm -rf "$result_dir/ml_*"
mrc_file="$result_dir/ml_mrc"
for header in "header-" "data-"
do
for fn in $ml_tmp_result_dir/*
do
  sum_file=""
  time_unit=""
  capacity=""
  if [[ $fn == *"timeline"* ]]; then
    tmpfn="$fn"
    IFS='-' read -ra elements <<< "$tmpfn"
    time_unit_index=0
    capacity_index=0
    for i in "${elements[@]}"
    do
       if [[ $i == "timeline" ]]; then
         break
       fi
       time_unit_index=$((time_unit_index+1))
    done
    time_unit_index=$((time_unit_index+1))
    capacity_index=$((time_unit_index+2))
    time_unit="${elements[$time_unit_index]}_"
    capacity="${elements[$capacity_index]}_"
  fi

  if [[ $fn == "${header}ml-policy-timeline"* ]]; then
    sum_file="$result_dir/ml_${capacity}${time_unit}policy_timeline"
  fi
  if [[ $fn == "${header}ml-policy-ratio-timeline"* ]]; then
    sum_file="$result_dir/ml_${capacity}${time_unit}policy_ratio_timeline"
  fi
  if [[ $fn == "${header}ml-miss-timeline"* ]]; then
    sum_file="$result_dir/ml_${capacity}${time_unit}miss_timeline"
  fi
  if [[ $fn == "${header}ml-miss-ratio-timeline"* ]]; then
    sum_file="$result_dir/ml_${capacity}${time_unit}miss_ratio_timeline"
  fi
  if [[ $fn == "${header}ml-mrc"* ]]; then
    sum_file="$mrc_file"
  fi
  if [[ $sum_file == "" ]]; then
    continue
  fi
  if [[ $header == "header-" ]]; then
    if [ -e "$sum_file" ]; then
      continue
    fi
  fi
  cat "$ml_tmp_result_dir/$fn" >> "$sum_file"
done
done

echo "Done"
# Sort MRC file by cache_type and cache_size.
tmp_file="$result_dir/tmp_mrc"
cat "$mrc_file" | sort -t ',' -k1,1 -k4,4n > "$tmp_file"
cat "$tmp_file" > "$mrc_file"
rm -rf "$tmp_file"
Block cache simulator: Add pysim to simulate caches using reinforcement learning. (#5610) Summary: This PR implements cache eviction using reinforcement learning. It includes two implementations: 1. An implementation of Thompson Sampling for the Bernoulli Bandit [1]. 2. An implementation of LinUCB with disjoint linear models [2]. The idea is that a cache uses multiple eviction policies, e.g., MRU, LRU, and LFU. The cache learns which eviction policy is the best and uses it upon a cache miss. Thompson Sampling is contextless and does not include any features. LinUCB includes features such as level, block type, caller, column family id to decide which eviction policy to use. [1] Daniel J. Russo, Benjamin Van Roy, Abbas Kazerouni, Ian Osband, and Zheng Wen. 2018. A Tutorial on Thompson Sampling. Found. Trends Mach. Learn. 11, 1 (July 2018), 1-96. DOI: https://doi.org/10.1561/2200000070 [2] Lihong Li, Wei Chu, John Langford, and Robert E. Schapire. 2010. A contextual-bandit approach to personalized news article recommendation. In Proceedings of the 19th international conference on World wide web (WWW '10). ACM, New York, NY, USA, 661-670. DOI=http://dx.doi.org/10.1145/1772690.1772758 Pull Request resolved: https://github.com/facebook/rocksdb/pull/5610 Differential Revision: D16435067 Pulled By: HaoyuHuang fbshipit-source-id: 6549239ae14115c01cb1e70548af9e46d8dc21bb 2019-07-26 21:36:16 +00:00			`#!/usr/bin/env bash`
			`# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.`
			`#`
			`# A shell script to run a batch of pysims and combine individual pysim output files.`
			`#`
			`# Usage: bash block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs`
			`# trace_file_path: The file path that stores the traces.`
			`# result_dir: The directory to store pysim results. The output files from a pysim is stores in result_dir/ml`
			`# downsample_size: The downsample size used to collect the trace.`
			`# warmup_seconds: The number of seconds used for warmup.`
			`# max_jobs: The max number of concurrent pysims to run.`

			`if [ $# -ne 5 ]; then`
			`echo "Usage: ./block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs"`
			`exit 0`
			`fi`

			`trace_file="$1"`
			`result_dir="$2"`
			`downsample_size="$3"`
			`warmup_seconds="$4"`
			`max_jobs="$5"`
			`current_jobs=0`

			`ml_tmp_result_dir="$result_dir/ml"`
			`rm -rf "$ml_tmp_result_dir"`
			`mkdir -p "$result_dir"`
			`mkdir -p "$ml_tmp_result_dir"`

			`for cache_type in "ts" "linucb" "ts_hybrid" "linucb_hybrid"`
			`do`
			`for cache_size in "16M" "256M" "1G" "2G" "4G" "8G" "12G" "16G"`
			`do`
			`while [ "$current_jobs" -ge "$max_jobs" ]`
			`do`
			`sleep 10`
			`echo "Waiting jobs to complete. Number of running jobs: $current_jobs"`
			`current_jobs=$(ps aux \| grep pysim \| grep python \| grep -cv grep)`
			`echo "Waiting jobs to complete. Number of running jobs: $current_jobs"`
			`done`
			`output="log-ml-$cache_type-$cache_size"`
			`echo "Running simulation for $cache_type and cache size $cache_size. Number of running jobs: $current_jobs. "`
			`nohup python block_cache_pysim.py "$cache_type" "$cache_size" "$downsample_size" "$warmup_seconds" "$trace_file" "$ml_tmp_result_dir" >& $ml_tmp_result_dir/$output &`
			`current_jobs=$((current_jobs+1))`
			`done`
			`done`

			`# Wait for all jobs to complete.`
			`while [ $current_jobs -gt 0 ]`
			`do`
			`sleep 10`
			`echo "Waiting jobs to complete. Number of running jobs: $current_jobs"`
			`current_jobs=$(ps aux \| grep pysim \| grep python \| grep -cv grep)`
			`echo "Waiting jobs to complete. Number of running jobs: $current_jobs"`
			`done`

			`echo "Combine individual pysim output files"`

			`rm -rf "$result_dir/ml_*"`
			`mrc_file="$result_dir/ml_mrc"`
			`for header in "header-" "data-"`
			`do`
			`for fn in $ml_tmp_result_dir/*`
			`do`
			`sum_file=""`
			`time_unit=""`
			`capacity=""`
			`if [[ $fn == "timeline" ]]; then`
			`tmpfn="$fn"`
			`IFS='-' read -ra elements <<< "$tmpfn"`
			`time_unit_index=0`
			`capacity_index=0`
			`for i in "${elements[@]}"`
			`do`
			`if [[ $i == "timeline" ]]; then`
			`break`
			`fi`
			`time_unit_index=$((time_unit_index+1))`
			`done`
			`time_unit_index=$((time_unit_index+1))`
			`capacity_index=$((time_unit_index+2))`
			`time_unit="${elements[$time_unit_index]}_"`
			`capacity="${elements[$capacity_index]}_"`
			`fi`

			`if [[ $fn == "${header}ml-policy-timeline"* ]]; then`
			`sum_file="$result_dir/ml_${capacity}${time_unit}policy_timeline"`
			`fi`
			`if [[ $fn == "${header}ml-policy-ratio-timeline"* ]]; then`
			`sum_file="$result_dir/ml_${capacity}${time_unit}policy_ratio_timeline"`
			`fi`
			`if [[ $fn == "${header}ml-miss-timeline"* ]]; then`
			`sum_file="$result_dir/ml_${capacity}${time_unit}miss_timeline"`
			`fi`
			`if [[ $fn == "${header}ml-miss-ratio-timeline"* ]]; then`
			`sum_file="$result_dir/ml_${capacity}${time_unit}miss_ratio_timeline"`
			`fi`
			`if [[ $fn == "${header}ml-mrc"* ]]; then`
			`sum_file="$mrc_file"`
			`fi`
			`if [[ $sum_file == "" ]]; then`
			`continue`
			`fi`
			`if [[ $header == "header-" ]]; then`
			`if [ -e "$sum_file" ]; then`
			`continue`
			`fi`
			`fi`
			`cat "$ml_tmp_result_dir/$fn" >> "$sum_file"`
			`done`
			`done`

			`echo "Done"`
			`# Sort MRC file by cache_type and cache_size.`
			`tmp_file="$result_dir/tmp_mrc"`
			`cat "$mrc_file" \| sort -t ',' -k1,1 -k4,4n > "$tmp_file"`
			`cat "$tmp_file" > "$mrc_file"`
			`rm -rf "$tmp_file"`