From f7237e33956f9b8db55fc4be06d8bf1580d6ce87 Mon Sep 17 00:00:00 2001 From: generatedunixname89002005287564 Date: Mon, 14 Oct 2024 03:01:20 -0700 Subject: [PATCH] internal_repo_rocksdb Reviewed By: jermenkoo Differential Revision: D64318168 fbshipit-source-id: 62bddd81424f1c5d4f50ce3512a9a8fe57a19ec3 --- buckifier/buckify_rocksdb.py | 5 +- buckifier/targets_builder.py | 1 - buckifier/targets_cfg.py | 1 - buckifier/util.py | 1 - build_tools/amalgamate.py | 9 +- build_tools/benchmark_log_tool.py | 4 +- build_tools/error_filter.py | 21 +++-- coverage/parse_gcov_output.py | 7 +- tools/advisor/advisor/bench_runner.py | 2 +- tools/advisor/advisor/db_bench_runner.py | 6 +- tools/advisor/advisor/db_log_parser.py | 6 +- tools/advisor/advisor/db_options_parser.py | 2 +- tools/advisor/advisor/db_stats_fetcher.py | 12 +-- tools/advisor/advisor/db_timeseries_parser.py | 2 +- tools/advisor/advisor/rule_parser.py | 6 +- tools/advisor/test/test_db_stats_fetcher.py | 2 +- tools/benchmark_ci.py | 2 +- .../block_cache_analyzer/block_cache_pysim.py | 90 +++++++++---------- .../block_cache_pysim_test.py | 70 +++++++-------- .../block_cache_trace_analyzer_plot.py | 86 +++++++++--------- tools/check_all_python.py | 4 +- tools/db_crashtest.py | 5 +- tools/ldb_test.py | 43 +++++---- tools/write_stress_runner.py | 1 - 24 files changed, 189 insertions(+), 199 deletions(-) diff --git a/buckifier/buckify_rocksdb.py b/buckifier/buckify_rocksdb.py index 0ce29a695e..e802c77590 100755 --- a/buckifier/buckify_rocksdb.py +++ b/buckifier/buckify_rocksdb.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import, division, print_function, unicode_literals try: from builtins import str @@ -132,7 +131,7 @@ def generate_targets(repo_path, deps_map): if len(sys.argv) >= 2: # Heuristically quote and canonicalize whitespace for inclusion # in how the file was generated. - extra_argv = " '{0}'".format(" ".join(sys.argv[1].split())) + extra_argv = " '{}'".format(" ".join(sys.argv[1].split())) TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv) @@ -213,7 +212,7 @@ def generate_targets(repo_path, deps_map): for src in src_mk.get("MICROBENCH_SOURCES", []): name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0] TARGETS.add_binary(name, [src], [], extra_bench_libs=True) - print("Extra dependencies:\n{0}".format(json.dumps(deps_map))) + print(f"Extra dependencies:\n{json.dumps(deps_map)}") # Dictionary test executable name -> relative source file path test_source_map = {} diff --git a/buckifier/targets_builder.py b/buckifier/targets_builder.py index f6e35593d4..e62eaf9585 100644 --- a/buckifier/targets_builder.py +++ b/buckifier/targets_builder.py @@ -1,5 +1,4 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import, division, print_function, unicode_literals try: from builtins import object, str diff --git a/buckifier/targets_cfg.py b/buckifier/targets_cfg.py index 08f58628a7..4e58d12102 100644 --- a/buckifier/targets_cfg.py +++ b/buckifier/targets_cfg.py @@ -1,5 +1,4 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import, division, print_function, unicode_literals rocksdb_target_header_template = """# This file \100generated by: #$ python3 buckifier/buckify_rocksdb.py{extra_argv} diff --git a/buckifier/util.py b/buckifier/util.py index be197efd07..69bbc0329a 100644 --- a/buckifier/util.py +++ b/buckifier/util.py @@ -2,7 +2,6 @@ """ This module keeps commonly used components. """ -from __future__ import absolute_import, division, print_function, unicode_literals try: from builtins import object diff --git a/build_tools/amalgamate.py b/build_tools/amalgamate.py index f79e9075e3..d4fafbb207 100755 --- a/build_tools/amalgamate.py +++ b/build_tools/amalgamate.py @@ -25,7 +25,6 @@ # # The solution is to move the include out of the #ifdef. -from __future__ import print_function import argparse import re @@ -62,7 +61,7 @@ def expand_include( included.add(include_path) with open(include_path) as f: - print('#line 1 "{}"'.format(include_path), file=source_out) + print(f'#line 1 "{include_path}"', file=source_out) process_file( f, include_path, source_out, header_out, include_paths, public_include_paths ) @@ -118,7 +117,7 @@ def process_file( ) if expanded: - print('#line {} "{}"'.format(line + 1, abs_path), file=source_out) + print(f'#line {line + 1} "{abs_path}"', file=source_out) elif text != "#pragma once\n": source_out.write(text) @@ -157,8 +156,8 @@ def main(): with open(filename) as f, open(args.source_out, "w") as source_out, open( args.header_out, "w" ) as header_out: - print('#line 1 "{}"'.format(filename), file=source_out) - print('#include "{}"'.format(header_out.name), file=source_out) + print(f'#line 1 "{filename}"', file=source_out) + print(f'#include "{header_out.name}"', file=source_out) process_file( f, abs_path, source_out, header_out, include_paths, public_include_paths ) diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py index d1ad459116..116740d33c 100755 --- a/build_tools/benchmark_log_tool.py +++ b/build_tools/benchmark_log_tool.py @@ -102,7 +102,7 @@ class BenchmarkUtils: class ResultParser: - def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"): + def __init__(self, field=r"(\w|[+-:.%])+", intrafield=r"(\s)+", separator="\t"): self.field = re.compile(field) self.intra = re.compile(intrafield) self.sep = re.compile(separator) @@ -159,7 +159,7 @@ class ResultParser: def load_report_from_tsv(filename: str): - file = open(filename, "r") + file = open(filename) contents = file.readlines() file.close() parser = ResultParser() diff --git a/build_tools/error_filter.py b/build_tools/error_filter.py index d9cb1099c4..b610f7c33f 100644 --- a/build_tools/error_filter.py +++ b/build_tools/error_filter.py @@ -9,7 +9,6 @@ - Prints those error messages to stdout """ -from __future__ import absolute_import, division, print_function, unicode_literals import re import sys @@ -43,7 +42,7 @@ class GTestErrorParser(ErrorParserBase): return None gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line) if gtest_fail_match: - return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1)) + return "{} failed: {}".format(self._last_gtest_name, gtest_fail_match.group(1)) return None @@ -66,52 +65,52 @@ class CompilerErrorParser(MatchErrorParser): # format (link error): # ':: error: ' # The below regex catches both - super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:") + super().__init__(r"\S+:\d+: error:") class ScanBuildErrorParser(MatchErrorParser): def __init__(self): - super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$") + super().__init__(r"scan-build: \d+ bugs found.$") class DbCrashErrorParser(MatchErrorParser): def __init__(self): - super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.") + super().__init__(r"\*\*\*.*\^$|TEST FAILED.") class WriteStressErrorParser(MatchErrorParser): def __init__(self): - super(WriteStressErrorParser, self).__init__( + super().__init__( r"ERROR: write_stress died with exitcode=\d+" ) class AsanErrorParser(MatchErrorParser): def __init__(self): - super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:") + super().__init__(r"==\d+==ERROR: AddressSanitizer:") class UbsanErrorParser(MatchErrorParser): def __init__(self): # format: '::: runtime error: ' - super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:") + super().__init__(r"\S+:\d+:\d+: runtime error:") class ValgrindErrorParser(MatchErrorParser): def __init__(self): # just grab the summary, valgrind doesn't clearly distinguish errors # from other log messages. - super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:") + super().__init__(r"==\d+== ERROR SUMMARY:") class CompatErrorParser(MatchErrorParser): def __init__(self): - super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$") + super().__init__(r"==== .*[Ee]rror.* ====$") class TsanErrorParser(MatchErrorParser): def __init__(self): - super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:") + super().__init__(r"WARNING: ThreadSanitizer:") _TEST_NAME_TO_PARSERS = { diff --git a/coverage/parse_gcov_output.py b/coverage/parse_gcov_output.py index b9788ec815..8a1056c511 100644 --- a/coverage/parse_gcov_output.py +++ b/coverage/parse_gcov_output.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import print_function import optparse import re @@ -109,11 +108,11 @@ def report_coverage(): # Check if we need to display coverage info for interested files. if len(interested_files): - per_file_coverage = dict( - (fname, per_file_coverage[fname]) + per_file_coverage = { + fname: per_file_coverage[fname] for fname in interested_files if fname in per_file_coverage - ) + } # If we only interested in several files, it makes no sense to report # the total_coverage total_coverage = None diff --git a/tools/advisor/advisor/bench_runner.py b/tools/advisor/advisor/bench_runner.py index 45d6c83133..702ccd8c01 100644 --- a/tools/advisor/advisor/bench_runner.py +++ b/tools/advisor/advisor/bench_runner.py @@ -30,7 +30,7 @@ class BenchmarkRunner(ABC): # refer GetInfoLogPrefix() in rocksdb/util/filename.cc # example db_path: /dev/shm/dbbench file_name = db_path[1:] # to ignore the leading '/' character - to_be_replaced = re.compile("[^0-9a-zA-Z\-_\.]") # noqa + to_be_replaced = re.compile(r"[^0-9a-zA-Z\-_\.]") # noqa for character in to_be_replaced.findall(db_path): file_name = file_name.replace(character, "_") if not file_name.endswith("_"): diff --git a/tools/advisor/advisor/db_bench_runner.py b/tools/advisor/advisor/db_bench_runner.py index f5802ed15e..c249e90741 100644 --- a/tools/advisor/advisor/db_bench_runner.py +++ b/tools/advisor/advisor/db_bench_runner.py @@ -65,7 +65,7 @@ class DBBenchRunner(BenchmarkRunner): """ output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None} perf_context_begins = False - with open(self.OUTPUT_FILE, "r") as fp: + with open(self.OUTPUT_FILE) as fp: for line in fp: if line.startswith(self.benchmark): # line from sample output: @@ -159,7 +159,7 @@ class DBBenchRunner(BenchmarkRunner): except OSError as e: print("Error: rmdir " + e.filename + " " + e.strerror) # setup database with a million keys using the fillrandom benchmark - command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % ( + command = "{} --benchmarks=fillrandom --db={} --num=1000000".format( self.db_bench_binary, db_path, ) @@ -168,7 +168,7 @@ class DBBenchRunner(BenchmarkRunner): self._run_command(command) def _build_experiment_command(self, curr_options, db_path): - command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % ( + command = "{} --benchmarks={} --statistics --perf_level=3 --db={}".format( self.db_bench_binary, self.benchmark, db_path, diff --git a/tools/advisor/advisor/db_log_parser.py b/tools/advisor/advisor/db_log_parser.py index 9ba541fc3a..14662b2cab 100644 --- a/tools/advisor/advisor/db_log_parser.py +++ b/tools/advisor/advisor/db_log_parser.py @@ -33,7 +33,7 @@ class Log: def is_new_log(log_line): # The assumption is that a new log will start with a date printed in # the below regex format. - date_regex = "\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}" # noqa + date_regex = r"\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}" # noqa return re.match(date_regex, log_line) def __init__(self, log_line, column_families): @@ -46,7 +46,7 @@ class Log: # "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634] # [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n" for col_fam in column_families: - search_for_str = "\[" + col_fam + "\]" # noqa + search_for_str = r"\[" + col_fam + r"\]" # noqa if re.search(search_for_str, self.message): self.column_family = col_fam break @@ -119,7 +119,7 @@ class DatabaseLogs(DataSource): # 'old' and were not deleted for some reason if re.search("old", file_name, re.IGNORECASE): continue - with open(file_name, "r") as db_logs: + with open(file_name) as db_logs: new_log = None for line in db_logs: if Log.is_new_log(line): diff --git a/tools/advisor/advisor/db_options_parser.py b/tools/advisor/advisor/db_options_parser.py index 062aeeec48..4587efb509 100644 --- a/tools/advisor/advisor/db_options_parser.py +++ b/tools/advisor/advisor/db_options_parser.py @@ -143,7 +143,7 @@ class DatabaseOptions(DataSource): def load_from_source(self, options_path): self.options_dict = {} - with open(options_path, "r") as db_options: + with open(options_path) as db_options: for line in db_options: line = OptionsSpecParser.remove_trailing_comment(line) if not line: diff --git a/tools/advisor/advisor/db_stats_fetcher.py b/tools/advisor/advisor/db_stats_fetcher.py index 30d1ad8b37..b8d9970da9 100755 --- a/tools/advisor/advisor/db_stats_fetcher.py +++ b/tools/advisor/advisor/db_stats_fetcher.py @@ -99,7 +99,7 @@ class LogStatsParser(TimeSeriesData): # directory if re.search("old", file_name, re.IGNORECASE): continue - with open(file_name, "r") as db_logs: + with open(file_name) as db_logs: new_log = None for line in db_logs: if Log.is_new_log(line): @@ -215,7 +215,7 @@ class OdsStatsFetcher(TimeSeriesData): # \t\t[[ts, value], [ts, value], ...] # ts = timestamp; value = value of key_name in entity_name at time ts self.keys_ts = {} - with open(self.OUTPUT_FILE, "r") as fp: + with open(self.OUTPUT_FILE) as fp: for line in fp: token_list = line.strip().split("\t") entity = token_list[0] @@ -236,7 +236,7 @@ class OdsStatsFetcher(TimeSeriesData): # \t\t\t # there is one line per (entity_name, key_name, timestamp) self.keys_ts = {} - with open(self.OUTPUT_FILE, "r") as fp: + with open(self.OUTPUT_FILE) as fp: for line in fp: token_list = line.split() entity = token_list[0] @@ -301,8 +301,8 @@ class OdsStatsFetcher(TimeSeriesData): def fetch_rate_url( self, - entities: List[str], - keys: List[str], + entities: list[str], + keys: list[str], window_len: str, percent: str, display: bool, @@ -341,6 +341,6 @@ class OdsStatsFetcher(TimeSeriesData): ) self.execute_script(command) url = "" - with open(self.OUTPUT_FILE, "r") as fp: + with open(self.OUTPUT_FILE) as fp: url = fp.readline() return url diff --git a/tools/advisor/advisor/db_timeseries_parser.py b/tools/advisor/advisor/db_timeseries_parser.py index 5840d7b909..92977532cd 100644 --- a/tools/advisor/advisor/db_timeseries_parser.py +++ b/tools/advisor/advisor/db_timeseries_parser.py @@ -51,7 +51,7 @@ class TimeSeriesData(DataSource): window_sec: float, threshold: bool, percent: bool, - ) -> Dict[str, Dict[int, float]]: + ) -> dict[str, dict[int, float]]: # this method calculates the (percent) rate change in the 'statistic' # for each entity (over 'window_sec' seconds) and returns the epochs # where this rate change is greater than or equal to the 'threshold' diff --git a/tools/advisor/advisor/rule_parser.py b/tools/advisor/advisor/rule_parser.py index 169a553631..e2ba450cc3 100644 --- a/tools/advisor/advisor/rule_parser.py +++ b/tools/advisor/advisor/rule_parser.py @@ -67,10 +67,10 @@ class Rule(Section): + ": rule must be associated with 2 conditions\ in order to check for a time dependency between them" ) - time_format = "^\d+[s|m|h|d]$" # noqa + time_format = r"^\d+[s|m|h|d]$" # noqa if not re.match(time_format, self.overlap_time_seconds, re.IGNORECASE): raise ValueError( - self.name + ": overlap_time_seconds format: \d+[s|m|h|d]" + self.name + r": overlap_time_seconds format: \d+[s|m|h|d]" ) else: # convert to seconds in_seconds = int(self.overlap_time_seconds[:-1]) @@ -428,7 +428,7 @@ class RulesSpec: def load_rules_from_spec(self): self.initialise_fields() - with open(self.file_path, "r") as db_rules: + with open(self.file_path) as db_rules: curr_section = None for line in db_rules: line = IniParser.remove_trailing_comment(line) diff --git a/tools/advisor/test/test_db_stats_fetcher.py b/tools/advisor/test/test_db_stats_fetcher.py index e2c29ab741..534d669f84 100644 --- a/tools/advisor/test/test_db_stats_fetcher.py +++ b/tools/advisor/test/test_db_stats_fetcher.py @@ -19,7 +19,7 @@ class TestLogStatsParser(unittest.TestCase): stats_file = os.path.join(this_path, "input_files/log_stats_parser_keys_ts") # populate the keys_ts dictionary of LogStatsParser self.stats_dict = {NO_ENTITY: {}} - with open(stats_file, "r") as fp: + with open(stats_file) as fp: for line in fp: stat_name = line.split(":")[0].strip() self.stats_dict[NO_ENTITY][stat_name] = {} diff --git a/tools/benchmark_ci.py b/tools/benchmark_ci.py index de9f69cf96..c50cb0fb55 100755 --- a/tools/benchmark_ci.py +++ b/tools/benchmark_ci.py @@ -56,7 +56,7 @@ def read_version(config): majorRegex = re.compile(r"#define ROCKSDB_MAJOR\s([0-9]+)") minorRegex = re.compile(r"#define ROCKSDB_MINOR\s([0-9]+)") patchRegex = re.compile(r"#define ROCKSDB_PATCH\s([0-9]+)") - with open(config.version_file, "r") as reader: + with open(config.version_file) as reader: major = None minor = None patch = None diff --git a/tools/block_cache_analyzer/block_cache_pysim.py b/tools/block_cache_analyzer/block_cache_pysim.py index 3962f37ebe..7a542edad8 100644 --- a/tools/block_cache_analyzer/block_cache_pysim.py +++ b/tools/block_cache_analyzer/block_cache_pysim.py @@ -120,18 +120,18 @@ class CacheEntry: def cost_class(self, cost_class_label): if cost_class_label == "table_bt": - return "{}-{}".format(self.table_id, self.block_type) + return f"{self.table_id}-{self.block_type}" elif cost_class_label == "table": - return "{}".format(self.table_id) + return f"{self.table_id}" elif cost_class_label == "bt": - return "{}".format(self.block_type) + return f"{self.block_type}" elif cost_class_label == "cf": - return "{}".format(self.cf_id) + return f"{self.cf_id}" elif cost_class_label == "cf_bt": - return "{}-{}".format(self.cf_id, self.block_type) + return f"{self.cf_id}-{self.block_type}" elif cost_class_label == "table_level_bt": - return "{}-{}-{}".format(self.table_id, self.level, self.block_type) - assert False, "Unknown cost class label {}".format(cost_class_label) + return f"{self.table_id}-{self.level}-{self.block_type}" + assert False, f"Unknown cost class label {cost_class_label}" return None @@ -144,7 +144,7 @@ class HashEntry: self.value = value def __repr__(self): - return "k={},h={},v=[{}]".format(self.key, self.hash, self.value) + return f"k={self.key},h={self.hash},v=[{self.value}]" class HashTable: @@ -190,7 +190,7 @@ class HashTable: for j in range(len(self.table[i])): if self.table[i][j] is not None: all_entries.append(self.table[i][j]) - return "{}".format(all_entries) + return f"{all_entries}" def values(self): all_values = [] @@ -366,15 +366,15 @@ class MissRatioStats: with open(header_file_path, "w+") as header_file: header = "time" for trace_time in range(start, end): - header += ",{}".format(trace_time) + header += f",{trace_time}" header_file.write(header + "\n") file_path = "{}/data-ml-miss-timeline-{}-{}-{}-{}".format( result_dir, self.time_unit, cache_type, cache_size, target_cf_name ) with open(file_path, "w+") as file: - row = "{}".format(cache_type) + row = f"{cache_type}" for trace_time in range(start, end): - row += ",{}".format(self.time_misses.get(trace_time, 0)) + row += f",{self.time_misses.get(trace_time, 0)}" file.write(row + "\n") def write_miss_ratio_timeline( @@ -389,13 +389,13 @@ class MissRatioStats: with open(header_file_path, "w+") as header_file: header = "time" for trace_time in range(start, end): - header += ",{}".format(trace_time) + header += f",{trace_time}" header_file.write(header + "\n") file_path = "{}/data-ml-miss-ratio-timeline-{}-{}-{}-{}".format( result_dir, self.time_unit, cache_type, cache_size, target_cf_name ) with open(file_path, "w+") as file: - row = "{}".format(cache_type) + row = f"{cache_type}" for trace_time in range(start, end): naccesses = self.time_accesses.get(trace_time, 0) miss_ratio = 0 @@ -403,7 +403,7 @@ class MissRatioStats: miss_ratio = float( self.time_misses.get(trace_time, 0) * 100.0 ) / float(naccesses) - row += ",{0:.2f}".format(miss_ratio) + row += f",{miss_ratio:.2f}" file.write(row + "\n") @@ -440,7 +440,7 @@ class PolicyStats: with open(header_file_path, "w+") as header_file: header = "time" for trace_time in range(start, end): - header += ",{}".format(trace_time) + header += f",{trace_time}" header_file.write(header + "\n") file_path = "{}/data-ml-policy-timeline-{}-{}-{}-{}".format( result_dir, self.time_unit, cache_type, cache_size, target_cf_name @@ -448,7 +448,7 @@ class PolicyStats: with open(file_path, "w+") as file: for policy in self.policy_names: policy_name = self.policy_names[policy] - row = "{}-{}".format(cache_type, policy_name) + row = f"{cache_type}-{policy_name}" for trace_time in range(start, end): row += ",{}".format( self.time_selected_polices.get(trace_time, {}).get( @@ -469,7 +469,7 @@ class PolicyStats: with open(header_file_path, "w+") as header_file: header = "time" for trace_time in range(start, end): - header += ",{}".format(trace_time) + header += f",{trace_time}" header_file.write(header + "\n") file_path = "{}/data-ml-policy-ratio-timeline-{}-{}-{}-{}".format( result_dir, self.time_unit, cache_type, cache_size, target_cf_name @@ -477,7 +477,7 @@ class PolicyStats: with open(file_path, "w+") as file: for policy in self.policy_names: policy_name = self.policy_names[policy] - row = "{}-{}".format(cache_type, policy_name) + row = f"{cache_type}-{policy_name}" for trace_time in range(start, end): naccesses = self.time_accesses.get(trace_time, 0) ratio = 0 @@ -488,7 +488,7 @@ class PolicyStats: ) * 100.0 ) / float(naccesses) - row += ",{0:.2f}".format(ratio) + row += f",{ratio:.2f}" file.write(row + "\n") @@ -674,10 +674,10 @@ class Cache: self.retain_get_id_range = 100000 def block_key(self, trace_record): - return "b{}".format(trace_record.block_id) + return f"b{trace_record.block_id}" def row_key(self, trace_record): - return "g{}-{}".format(trace_record.fd, trace_record.key_id) + return f"g{trace_record.fd}-{trace_record.key_id}" def _lookup(self, trace_record, key, hash): """ @@ -893,7 +893,7 @@ class MLCache(Cache): """ def __init__(self, cache_size, enable_cache_row_key, policies, cost_class_label): - super(MLCache, self).__init__(cache_size, enable_cache_row_key) + super().__init__(cache_size, enable_cache_row_key) self.table = HashTable() self.policy_stats = PolicyStats(kSecondsInMinute, policies) self.per_hour_policy_stats = PolicyStats(kSecondsInHour, policies) @@ -1015,7 +1015,7 @@ class ThompsonSamplingCache(MLCache): init_a=1, init_b=1, ): - super(ThompsonSamplingCache, self).__init__( + super().__init__( cache_size, enable_cache_row_key, policies, cost_class_label ) self._as = {} @@ -1042,7 +1042,7 @@ class ThompsonSamplingCache(MLCache): return "Hybrid ThompsonSampling with cost class {} (ts_hybrid)".format( self.cost_class_label ) - return "ThompsonSampling with cost class {} (ts)".format(self.cost_class_label) + return f"ThompsonSampling with cost class {self.cost_class_label} (ts)" class LinUCBCache(MLCache): @@ -1057,7 +1057,7 @@ class LinUCBCache(MLCache): """ def __init__(self, cache_size, enable_cache_row_key, policies, cost_class_label): - super(LinUCBCache, self).__init__( + super().__init__( cache_size, enable_cache_row_key, policies, cost_class_label ) self.nfeatures = 4 # Block type, level, cf. @@ -1101,7 +1101,7 @@ class LinUCBCache(MLCache): return "Hybrid LinUCB with cost class {} (linucb_hybrid)".format( self.cost_class_label ) - return "LinUCB with cost class {} (linucb)".format(self.cost_class_label) + return f"LinUCB with cost class {self.cost_class_label} (linucb)" class OPTCacheEntry: @@ -1198,7 +1198,7 @@ class OPTCache(Cache): """ def __init__(self, cache_size): - super(OPTCache, self).__init__(cache_size, enable_cache_row_key=0) + super().__init__(cache_size, enable_cache_row_key=0) self.table = PQTable() def _lookup(self, trace_record, key, hash): @@ -1271,7 +1271,7 @@ class GDSizeCache(Cache): """ def __init__(self, cache_size, enable_cache_row_key): - super(GDSizeCache, self).__init__(cache_size, enable_cache_row_key) + super().__init__(cache_size, enable_cache_row_key) self.table = PQTable() self.L = 0.0 @@ -1340,7 +1340,7 @@ class Deque: return reversed(self.od) def __repr__(self): - return "Deque(%r)" % (list(self),) + return "Deque({!r})".format(list(self)) class ARCCache(Cache): @@ -1361,7 +1361,7 @@ class ARCCache(Cache): """ def __init__(self, cache_size, enable_cache_row_key): - super(ARCCache, self).__init__(cache_size, enable_cache_row_key) + super().__init__(cache_size, enable_cache_row_key) self.table = {} self.c = cache_size / 16 * 1024 # Number of elements in the cache. self.p = 0 # Target size for the list T1 @@ -1459,7 +1459,7 @@ class LRUCache(Cache): """ def __init__(self, cache_size, enable_cache_row_key): - super(LRUCache, self).__init__(cache_size, enable_cache_row_key) + super().__init__(cache_size, enable_cache_row_key) self.table = {} self.lru = Deque() @@ -1505,7 +1505,7 @@ class TraceCache(Cache): """ def __init__(self, cache_size): - super(TraceCache, self).__init__(cache_size, enable_cache_row_key=0) + super().__init__(cache_size, enable_cache_row_key=0) def _lookup(self, trace_record, key, hash): return trace_record.is_hit @@ -1629,7 +1629,7 @@ def create_cache(cache_type, cache_size, downsample_size): elif cache_type == "gdsize": return GDSizeCache(cache_size, enable_cache_row_key) else: - print("Unknown cache type {}".format(cache_type)) + print(f"Unknown cache type {cache_type}") assert False return None @@ -1692,7 +1692,7 @@ def run( # can use this information to evict the cached key which next access is # the furthest in the future. print("Preprocessing block traces.") - with open(trace_file_path, "r") as trace_file: + with open(trace_file_path) as trace_file: for line in trace_file: if ( max_accesses_to_process != -1 @@ -1735,9 +1735,9 @@ def run( ) time_interval += 1 print( - "Trace contains {0} blocks, {1}({2:.2f}%) blocks with no size." - "{3} accesses, {4}({5:.2f}%) accesses with no_insert," - "{6}({7:.2f}%) accesses that want to insert but block size is 0.".format( + "Trace contains {} blocks, {}({:.2f}%) blocks with no size." + "{} accesses, {}({:.2f}%) accesses with no_insert," + "{}({:.2f}%) accesses that want to insert but block size is 0.".format( len(block_access_timelines), num_blocks_with_no_size, percent(num_blocks_with_no_size, len(block_access_timelines)), @@ -1754,8 +1754,8 @@ def run( start_time = time.time() trace_start_time = 0 trace_duration = 0 - print("Running simulated {} cache on block traces.".format(cache.cache_name())) - with open(trace_file_path, "r") as trace_file: + print(f"Running simulated {cache.cache_name()} cache on block traces.") + with open(trace_file_path) as trace_file: for line in trace_file: if ( max_accesses_to_process != -1 @@ -1871,8 +1871,8 @@ def report_stats( trace_start_time, trace_end_time, ): - cache_label = "{}-{}-{}".format(cache_type, cache_size, target_cf_name) - with open("{}/data-ml-mrc-{}".format(result_dir, cache_label), "w+") as mrc_file: + cache_label = f"{cache_type}-{cache_size}-{target_cf_name}" + with open(f"{result_dir}/data-ml-mrc-{cache_label}", "w+") as mrc_file: mrc_file.write( "{},0,0,{},{},{}\n".format( cache_type, @@ -1897,7 +1897,7 @@ def report_stats( "w+", ) as mb_file: mb_file.write( - "{},0,0,{},{}\n".format(cache_type, cache_size, avg_miss_bytes) + f"{cache_type},0,0,{cache_size},{avg_miss_bytes}\n" ) with open( @@ -1907,7 +1907,7 @@ def report_stats( "w+", ) as mb_file: mb_file.write( - "{},0,0,{},{}\n".format(cache_type, cache_size, p95_miss_bytes) + f"{cache_type},0,0,{cache_size},{p95_miss_bytes}\n" ) cache_stats[i].write_miss_timeline( @@ -1970,7 +1970,7 @@ if __name__ == "__main__": "it will run against all accesses.)" ) exit(1) - print("Arguments: {}".format(sys.argv)) + print(f"Arguments: {sys.argv}") cache_type = sys.argv[1] cache_size = parse_cache_size(sys.argv[2]) downsample_size = int(sys.argv[3]) diff --git a/tools/block_cache_analyzer/block_cache_pysim_test.py b/tools/block_cache_analyzer/block_cache_pysim_test.py index eed1b94af6..68fcd462e9 100644 --- a/tools/block_cache_analyzer/block_cache_pysim_test.py +++ b/tools/block_cache_analyzer/block_cache_pysim_test.py @@ -33,13 +33,13 @@ def test_hash_table(): table = HashTable() data_size = 10000 for i in range(data_size): - table.insert("k{}".format(i), i, "v{}".format(i)) + table.insert(f"k{i}", i, f"v{i}") for i in range(data_size): - assert table.lookup("k{}".format(i), i) is not None + assert table.lookup(f"k{i}", i) is not None for i in range(data_size): - table.delete("k{}".format(i), i) + table.delete(f"k{i}", i) for i in range(data_size): - assert table.lookup("k{}".format(i), i) is None + assert table.lookup(f"k{i}", i) is None truth_map = {} n = 1000000 @@ -47,7 +47,7 @@ def test_hash_table(): for i in range(n): key_id = random.randint(0, records) v = random.randint(0, records) - key = "k{}".format(key_id) + key = f"k{key_id}" value = CacheEntry(v, v, v, v, v, v, v) action = random.randint(0, 10) assert len(truth_map) == table.elements, "{} {} {}".format( @@ -104,18 +104,18 @@ def assert_metrics(cache, expected_value, expected_value_size=1, custom_hashtabl ) for expeceted_k in expected_value[3]: if custom_hashtable: - val = cache.table.lookup("b{}".format(expeceted_k), expeceted_k) + val = cache.table.lookup(f"b{expeceted_k}", expeceted_k) else: - val = cache.table["b{}".format(expeceted_k)] + val = cache.table[f"b{expeceted_k}"] assert val is not None, "Expected {} Actual: Not Exist {}, Table: {}".format( expeceted_k, expected_value, cache.table ) assert val.value_size == expected_value_size for expeceted_k in expected_value[4]: if custom_hashtable: - val = cache.table.lookup("g0-{}".format(expeceted_k), expeceted_k) + val = cache.table.lookup(f"g0-{expeceted_k}", expeceted_k) else: - val = cache.table["g0-{}".format(expeceted_k)] + val = cache.table[f"g0-{expeceted_k}"] assert val is not None assert val.value_size == expected_value_size @@ -288,7 +288,7 @@ def test_lfu_cache(): def test_mix(cache): - print("Test Mix {} cache".format(cache.cache_name())) + print(f"Test Mix {cache.cache_name()} cache") n = 100000 records = 100 block_size_table = {} @@ -343,7 +343,7 @@ def test_mix(cache): assert cached_size == cache.used_size, "Expeced {} Actual {}".format( cache.used_size, cached_size ) - print("Test Mix {} cache: Success".format(cache.cache_name())) + print(f"Test Mix {cache.cache_name()} cache: Success") def test_end_to_end(): @@ -366,27 +366,27 @@ def test_end_to_end(): fd = random.randint(0, nfds) now = i * kMicrosInSecond access_record = "" - access_record += "{},".format(now) - access_record += "{},".format(key_id) - access_record += "{},".format(9) # block type - access_record += "{},".format(block_size) # block size - access_record += "{},".format(cf_id) - access_record += "cf_{},".format(cf_id) - access_record += "{},".format(level) - access_record += "{},".format(fd) - access_record += "{},".format(key_id % 3) # caller - access_record += "{},".format(0) # no insert - access_record += "{},".format(i) # get_id - access_record += "{},".format(i) # key_id - access_record += "{},".format(100) # kv_size - access_record += "{},".format(1) # is_hit - access_record += "{},".format(1) # referenced_key_exist_in_block - access_record += "{},".format(10) # num_keys_in_block - access_record += "{},".format(1) # table_id - access_record += "{},".format(0) # seq_number - access_record += "{},".format(10) # block key size - access_record += "{},".format(20) # key size - access_record += "{},".format(0) # block offset + access_record += f"{now}," + access_record += f"{key_id}," + access_record += f"{9}," # block type + access_record += f"{block_size}," # block size + access_record += f"{cf_id}," + access_record += f"cf_{cf_id}," + access_record += f"{level}," + access_record += f"{fd}," + access_record += f"{key_id % 3}," # caller + access_record += f"{0}," # no insert + access_record += f"{i}," # get_id + access_record += f"{i}," # key_id + access_record += f"{100}," # kv_size + access_record += f"{1}," # is_hit + access_record += f"{1}," # referenced_key_exist_in_block + access_record += f"{10}," # num_keys_in_block + access_record += f"{1}," # table_id + access_record += f"{0}," # seq_number + access_record += f"{10}," # block key size + access_record += f"{20}," # key size + access_record += f"{0}," # block offset access_record = access_record[:-1] access_records += access_record + "\n" trace_file.write(access_records) @@ -424,14 +424,14 @@ def test_end_to_end(): assert cached_size == cache.used_size, "Expeced {} Actual {}".format( cache.used_size, cached_size ) - print("Test All {}: Success".format(cache.cache_name())) + print(f"Test All {cache.cache_name()}: Success") os.remove(trace_file_path) print("Test All: Success") def test_hybrid(cache): - print("Test {} cache".format(cache.cache_name())) + print(f"Test {cache.cache_name()} cache") k = TraceRecord( access_time=0, block_id=1, @@ -530,7 +530,7 @@ def test_hybrid(cache): assert_metrics( cache, [kSampleSize, 103, 99, [i for i in range(101 - kSampleSize, 101)], []] ) - print("Test {} cache: Success".format(cache.cache_name())) + print(f"Test {cache.cache_name()} cache: Success") def test_opt_cache(): diff --git a/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py b/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py index 37166bcb4e..6521ef2861 100644 --- a/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +++ b/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py @@ -43,9 +43,9 @@ random.shuffle(colors) def num_to_gb(n): one_gb = 1024 * 1024 * 1024 if float(n) % one_gb == 0: - return "{}".format(n / one_gb) + return f"{n / one_gb}" # Keep two decimal points. - return "{0:.2f}".format(float(n) / one_gb) + return f"{float(n) / one_gb:.2f}" def plot_miss_stats_graphs( @@ -57,9 +57,9 @@ def plot_miss_stats_graphs( continue if not file.endswith(file_suffix): continue - print("Processing file {}/{}".format(csv_result_dir, file)) + print(f"Processing file {csv_result_dir}/{file}") mrc_file_path = csv_result_dir + "/" + file - with open(mrc_file_path, "r") as csvfile: + with open(mrc_file_path) as csvfile: rows = csv.reader(csvfile, delimiter=",") for row in rows: cache_name = row[0] @@ -67,7 +67,7 @@ def plot_miss_stats_graphs( ghost_capacity = int(row[2]) capacity = int(row[3]) miss_ratio = float(row[4]) - config = "{}-{}-{}".format(cache_name, num_shard_bits, ghost_capacity) + config = f"{cache_name}-{num_shard_bits}-{ghost_capacity}" if config not in miss_ratios: miss_ratios[config] = {} miss_ratios[config]["x"] = [] @@ -83,10 +83,10 @@ def plot_miss_stats_graphs( plt.ylabel(ylabel) plt.xscale("log", basex=2) plt.ylim(ymin=0) - plt.title("{}".format(file)) + plt.title(f"{file}") plt.legend() fig.savefig( - output_result_dir + "/{}.pdf".format(pdf_file_name), bbox_inches="tight" + output_result_dir + f"/{pdf_file_name}.pdf", bbox_inches="tight" ) @@ -99,9 +99,9 @@ def plot_miss_stats_diff_lru_graphs( continue if not file.endswith(file_suffix): continue - print("Processing file {}/{}".format(csv_result_dir, file)) + print(f"Processing file {csv_result_dir}/{file}") mrc_file_path = csv_result_dir + "/" + file - with open(mrc_file_path, "r") as csvfile: + with open(mrc_file_path) as csvfile: rows = csv.reader(csvfile, delimiter=",") for row in rows: cache_name = row[0] @@ -109,7 +109,7 @@ def plot_miss_stats_diff_lru_graphs( ghost_capacity = int(row[2]) capacity = int(row[3]) miss_ratio = float(row[4]) - config = "{}-{}-{}".format(cache_name, num_shard_bits, ghost_capacity) + config = f"{cache_name}-{num_shard_bits}-{ghost_capacity}" if config not in miss_ratios: miss_ratios[config] = {} miss_ratios[config]["x"] = [] @@ -132,10 +132,10 @@ def plot_miss_stats_diff_lru_graphs( plt.xlabel("Cache capacity") plt.ylabel(ylabel) plt.xscale("log", basex=2) - plt.title("{}".format(file)) + plt.title(f"{file}") plt.legend() fig.savefig( - output_result_dir + "/{}.pdf".format(pdf_file_name), bbox_inches="tight" + output_result_dir + f"/{pdf_file_name}.pdf", bbox_inches="tight" ) @@ -226,8 +226,8 @@ def plot_line_charts( continue if not file.startswith(filename_prefix): continue - print("Processing file {}/{}".format(csv_result_dir, file)) - with open(csv_result_dir + "/" + file, "r") as csvfile: + print(f"Processing file {csv_result_dir}/{file}") + with open(csv_result_dir + "/" + file) as csvfile: x, labels, label_stats = read_data_for_plot(csvfile, vertical) if len(x) == 0 or len(labels) == 0: continue @@ -247,11 +247,11 @@ def plot_line_charts( # Translate time unit into x labels. if "_60" in file: - plt.xlabel("{} (Minute)".format(xlabel)) + plt.xlabel(f"{xlabel} (Minute)") if "_3600" in file: - plt.xlabel("{} (Hour)".format(xlabel)) + plt.xlabel(f"{xlabel} (Hour)") plt.ylabel(ylabel) - plt.title("{} {}".format(title, file)) + plt.title(f"{title} {file}") if legend: plt.legend() pdf.savefig(fig) @@ -271,13 +271,13 @@ def plot_stacked_bar_charts( ): global color_index, bar_color_maps, colors pdf = matplotlib.backends.backend_pdf.PdfPages( - "{}/{}".format(output_result_dir, pdf_name) + f"{output_result_dir}/{pdf_name}" ) for file in os.listdir(csv_result_dir): if not file.endswith(filename_suffix): continue - with open(csv_result_dir + "/" + file, "r") as csvfile: - print("Processing file {}/{}".format(csv_result_dir, file)) + with open(csv_result_dir + "/" + file) as csvfile: + print(f"Processing file {csv_result_dir}/{file}") x, labels, label_stats = read_data_for_plot(csvfile, vertical) if len(x) == 0 or len(label_stats) == 0: continue @@ -310,25 +310,25 @@ def plot_stacked_bar_charts( ind, [x_prefix + x[i] for i in range(len(x))], rotation=20, fontsize=8 ) plt.legend(bars, labels) - plt.title("{} filename:{}".format(title, file)) + plt.title(f"{title} filename:{file}") pdf.savefig(fig) pdf.close() def plot_heatmap(csv_result_dir, output_result_dir, filename_suffix, pdf_name, title): pdf = matplotlib.backends.backend_pdf.PdfPages( - "{}/{}".format(output_result_dir, pdf_name) + f"{output_result_dir}/{pdf_name}" ) for file in os.listdir(csv_result_dir): if not file.endswith(filename_suffix): continue - csv_file_name = "{}/{}".format(csv_result_dir, file) - print("Processing file {}/{}".format(csv_result_dir, file)) + csv_file_name = f"{csv_result_dir}/{file}" + print(f"Processing file {csv_result_dir}/{file}") corr_table = pd.read_csv(csv_file_name) corr_table = corr_table.pivot("label", "corr", "value") fig = plt.figure() sns.heatmap(corr_table, annot=True, linewidths=0.5, fmt=".2") - plt.title("{} filename:{}".format(title, file)) + plt.title(f"{title} filename:{file}") pdf.savefig(fig) pdf.close() @@ -360,16 +360,16 @@ def plot_correlation(csv_result_dir, output_result_dir): for file in os.listdir(csv_result_dir): if not file.endswith("correlation_input"): continue - csv_file_name = "{}/{}".format(csv_result_dir, file) - print("Processing file {}/{}".format(csv_result_dir, file)) + csv_file_name = f"{csv_result_dir}/{file}" + print(f"Processing file {csv_result_dir}/{file}") corr_table = pd.read_csv(csv_file_name) label_str = file.split("_")[0] label = file[len(label_str) + 1 :] label = label[: len(label) - len("_correlation_input")] - output_file = "{}/{}_correlation_output".format(csv_result_dir, label_str) + output_file = f"{csv_result_dir}/{label_str}_correlation_output" if output_file not in label_str_file: - f = open("{}/{}_correlation_output".format(csv_result_dir, label_str), "w+") + f = open(f"{csv_result_dir}/{label_str}_correlation_output", "w+") label_str_file[output_file] = f f.write("label,corr,value\n") f = label_str_file[output_file] @@ -666,9 +666,9 @@ if __name__ == "__main__": csv_abs_dir = csv_result_dir + "/" + csv_relative_dir result_dir = output_result_dir + "/" + csv_relative_dir if not os.path.isdir(csv_abs_dir): - print("{} is not a directory".format(csv_abs_dir)) + print(f"{csv_abs_dir} is not a directory") continue - print("Processing experiment dir: {}".format(csv_relative_dir)) + print(f"Processing experiment dir: {csv_relative_dir}") if not os.path.exists(result_dir): os.makedirs(result_dir) plot_access_count_summary(csv_abs_dir, result_dir) @@ -698,32 +698,32 @@ if __name__ == "__main__": plot_miss_stats_graphs( csv_abs_dir, result_dir, - file_prefix="ml_{}_".format(time_unit), + file_prefix=f"ml_{time_unit}_", file_suffix="p95mb", - ylabel="p95 number of byte miss per {} seconds".format(time_unit), - pdf_file_name="p95mb_per{}_seconds".format(time_unit), + ylabel=f"p95 number of byte miss per {time_unit} seconds", + pdf_file_name=f"p95mb_per{time_unit}_seconds", ) plot_miss_stats_graphs( csv_abs_dir, result_dir, - file_prefix="ml_{}_".format(time_unit), + file_prefix=f"ml_{time_unit}_", file_suffix="avgmb", - ylabel="Average number of byte miss per {} seconds".format(time_unit), - pdf_file_name="avgmb_per{}_seconds".format(time_unit), + ylabel=f"Average number of byte miss per {time_unit} seconds", + pdf_file_name=f"avgmb_per{time_unit}_seconds", ) plot_miss_stats_diff_lru_graphs( csv_abs_dir, result_dir, - file_prefix="ml_{}_".format(time_unit), + file_prefix=f"ml_{time_unit}_", file_suffix="p95mb", - ylabel="p95 number of byte miss per {} seconds".format(time_unit), - pdf_file_name="p95mb_per{}_seconds_diff_lru".format(time_unit), + ylabel=f"p95 number of byte miss per {time_unit} seconds", + pdf_file_name=f"p95mb_per{time_unit}_seconds_diff_lru", ) plot_miss_stats_diff_lru_graphs( csv_abs_dir, result_dir, - file_prefix="ml_{}_".format(time_unit), + file_prefix=f"ml_{time_unit}_", file_suffix="avgmb", - ylabel="Average number of byte miss per {} seconds".format(time_unit), - pdf_file_name="avgmb_per{}_seconds_diff_lru".format(time_unit), + ylabel=f"Average number of byte miss per {time_unit} seconds", + pdf_file_name=f"avgmb_per{time_unit}_seconds_diff_lru", ) diff --git a/tools/check_all_python.py b/tools/check_all_python.py index 708339a67f..567e370c4b 100755 --- a/tools/check_all_python.py +++ b/tools/check_all_python.py @@ -15,8 +15,8 @@ for base in ["buckifier", "build_tools", "coverage", "tools"]: filenames += glob.glob(base + "/" + suff + ".py") for filename in filenames: - source = open(filename, "r").read() + "\n" + source = open(filename).read() + "\n" # Parses and syntax checks the file, throwing on error. (No pyc written.) _ = compile(source, filename, "exec") -print("No syntax errors in {0} .py files".format(len(filenames))) +print(f"No syntax errors in {len(filenames)} .py files") diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index f88c0be191..25fdaa0081 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import, division, print_function, unicode_literals import argparse import math @@ -1023,7 +1022,7 @@ def gen_cmd(params, unknown_params): cmd = ( [stress_cmd] + [ - "--{0}={1}".format(k, v) + f"--{k}={v}" for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)] if k not in { @@ -1278,7 +1277,7 @@ def whitebox_crash_main(args, unknown_args): hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd( cmd, exit_time - time.time() + 900 ) - msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format( + msg = "check_mode={}, kill option={}, exitcode={}\n".format( check_mode, additional_opts["kill_random_test"], retncode ) diff --git a/tools/ldb_test.py b/tools/ldb_test.py index 09ab9b799f..a8956f160f 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import, division, print_function, unicode_literals import glob @@ -100,14 +99,14 @@ class LDBTestCase(unittest.TestCase): Uses the default test db. """ self.assertRunOKFull( - "%s %s" % (self.dbParam(self.DB_NAME), params), expectedOutput, unexpected + "{} {}".format(self.dbParam(self.DB_NAME), params), expectedOutput, unexpected ) def assertRunFAIL(self, params): """ Uses the default test db. """ - self.assertRunFAILFull("%s %s" % (self.dbParam(self.DB_NAME), params)) + self.assertRunFAILFull("{} {}".format(self.dbParam(self.DB_NAME), params)) def testSimpleStringPutGet(self): print("Running testSimpleStringPutGet...") @@ -180,18 +179,18 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("checkconsistency", "OK") def dumpDb(self, params, dumpFile): - return 0 == run_err_null("./ldb dump %s > %s" % (params, dumpFile)) + return 0 == run_err_null("./ldb dump {} > {}".format(params, dumpFile)) def loadDb(self, params, dumpFile): - return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params)) + return 0 == run_err_null("cat {} | ./ldb load {}".format(dumpFile, params)) def writeExternSst(self, params, inputDumpFile, outputSst): return 0 == run_err_null( - "cat %s | ./ldb write_extern_sst %s %s" % (inputDumpFile, outputSst, params) + "cat {} | ./ldb write_extern_sst {} {}".format(inputDumpFile, outputSst, params) ) def ingestExternSst(self, params, inputSst): - return 0 == run_err_null("./ldb ingest_extern_sst %s %s" % (inputSst, params)) + return 0 == run_err_null("./ldb ingest_extern_sst {} {}".format(inputSst, params)) def testStringBatchPut(self): print("Running testStringBatchPut...") @@ -444,11 +443,11 @@ class LDBTestCase(unittest.TestCase): dumpFilePath = os.path.join(self.TMP_DIR, "dump6") loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6") self.assertTrue( - self.dumpDb("--db=%s %s" % (origDbPath, extraParams), dumpFilePath) + self.dumpDb("--db={} {}".format(origDbPath, extraParams), dumpFilePath) ) self.assertTrue( self.loadDb( - "--db=%s %s --create_if_missing" % (loadedDbPath, extraParams), + "--db={} {} --create_if_missing".format(loadedDbPath, extraParams), dumpFilePath, ) ) @@ -503,7 +502,7 @@ class LDBTestCase(unittest.TestCase): "'b' seq:2, type:1 => val\nInternal keys in range: 2", ) self.assertRunOK( - "idump --input_key_hex --from=%s --to=%s" % (hex(ord("a")), hex(ord("b"))), + "idump --input_key_hex --from={} --to={}".format(hex(ord("a")), hex(ord("b"))), "'a' seq:1, type:1 => val\nInternal keys in range: 1", ) @@ -513,7 +512,7 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("put b val --enable_blob_files", "OK") # Pattern to expect from dump with decode_blob_index flag enabled. - regex = ".*\[blob ref\].*" + regex = r".*\[blob ref\].*" expected_pattern = re.compile(regex) cmd = "idump %s --decode_blob_index" self.assertRunOKFull( @@ -589,7 +588,7 @@ class LDBTestCase(unittest.TestCase): self.assertRunFAIL("checkconsistency") def dumpLiveFiles(self, params, dumpFile): - return 0 == run_err_null("./ldb dump_live_files %s > %s" % (params, dumpFile)) + return 0 == run_err_null("./ldb dump_live_files {} > {}".format(params, dumpFile)) def testDumpLiveFiles(self): print("Running testDumpLiveFiles...") @@ -620,7 +619,7 @@ class LDBTestCase(unittest.TestCase): ) # Investigate the output - with open(dumpFilePath, "r") as tmp: + with open(dumpFilePath) as tmp: data = tmp.read() # Check that all the SST filenames have a correct full path (no multiple '/'). @@ -651,7 +650,7 @@ class LDBTestCase(unittest.TestCase): def listLiveFilesMetadata(self, params, dumpFile): return 0 == run_err_null( - "./ldb list_live_files_metadata %s > %s" % (params, dumpFile) + "./ldb list_live_files_metadata {} > {}".format(params, dumpFile) ) def testListLiveFilesMetadata(self): @@ -673,13 +672,13 @@ class LDBTestCase(unittest.TestCase): ) # Collect SST filename and level from dump_live_files - with open(dumpFilePath1, "r") as tmp: + with open(dumpFilePath1) as tmp: data = tmp.read() filename1 = re.findall(r".*\d+\.sst", data)[0] level1 = re.findall(r"level:\d+", data)[0].split(":")[1] # Collect SST filename and level from list_live_files_metadata - with open(dumpFilePath2, "r") as tmp: + with open(dumpFilePath2) as tmp: data = tmp.read() filename2 = re.findall(r".*\d+\.sst", data)[0] level2 = re.findall(r"level \d+", data)[0].split(" ")[1] @@ -712,7 +711,7 @@ class LDBTestCase(unittest.TestCase): # parse the output and create a map: # [key: sstFilename]->[value:[LSM level, Column Family Name]] referenceMap = {} - with open(dumpFilePath3, "r") as tmp: + with open(dumpFilePath3) as tmp: data = tmp.read() # Note: the following regex are contingent on what the # dump_live_files outputs. @@ -730,7 +729,7 @@ class LDBTestCase(unittest.TestCase): # parse the output and create a map: # [key: sstFilename]->[value:[LSM level, Column Family Name]] testMap = {} - with open(dumpFilePath4, "r") as tmp: + with open(dumpFilePath4) as tmp: data = tmp.read() # Since for each SST file, all the information is contained # on one line, the parsing is easy to perform and relies on @@ -771,7 +770,7 @@ class LDBTestCase(unittest.TestCase): num = "[0-9]+" st = ".*" subpat = st + " seq:" + num + ", type:" + num - regex = num + ":" + num + "\[" + subpat + ".." + subpat + "\]" + regex = num + ":" + num + r"\[" + subpat + ".." + subpat + r"\]" expected_pattern = re.compile(regex) cmd = "manifest_dump --db=%s" manifest_files = self.getManifests(dbPath) @@ -859,7 +858,7 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("get sst1", "sst1_val") # Pattern to expect from SST dump. - regex = ".*Sst file format:.*\n.*\[blob ref\].*" + regex = ".*Sst file format:.*\n.*\\[blob ref\\].*" expected_pattern = re.compile(regex) sst_files = self.getSSTFiles(dbPath) @@ -878,7 +877,7 @@ class LDBTestCase(unittest.TestCase): ) # Pattern to expect from blob file dump. - regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa + regex = r".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa expected_pattern = re.compile(regex) blob_files = self.getBlobFiles(dbPath) self.assertTrue(len(blob_files) >= 1) @@ -896,7 +895,7 @@ class LDBTestCase(unittest.TestCase): self.assertRunOK("get wal1", "wal1_val") # Pattern to expect from WAL dump. - regex = "^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*" + regex = r"^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*" expected_pattern = re.compile(regex) wal_files = self.getWALFiles(dbPath) diff --git a/tools/write_stress_runner.py b/tools/write_stress_runner.py index f39f79cd4e..515a1789d4 100644 --- a/tools/write_stress_runner.py +++ b/tools/write_stress_runner.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import absolute_import, division, print_function, unicode_literals import argparse import random