2022-06-04 16:31:47 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
# This source code is licensed under both the GPLv2 (found in the
|
|
|
|
# COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
# (found in the LICENSE.Apache file in the root directory).
|
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
"""Access the results of benchmark runs
|
2022-06-04 16:31:47 +00:00
|
|
|
Send these results on to OpenSearch graphing service
|
2022-09-21 00:47:52 +00:00
|
|
|
"""
|
2022-06-04 16:31:47 +00:00
|
|
|
|
|
|
|
import argparse
|
|
|
|
import itertools
|
2022-09-21 00:47:52 +00:00
|
|
|
import logging
|
2022-06-04 16:31:47 +00:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import sys
|
2022-09-21 00:47:52 +00:00
|
|
|
|
2022-06-04 16:31:47 +00:00
|
|
|
import requests
|
|
|
|
from dateutil import parser
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
2022-09-21 00:47:52 +00:00
|
|
|
|
|
|
|
|
2022-06-04 16:31:47 +00:00
|
|
|
class Configuration:
|
2022-09-21 00:47:52 +00:00
|
|
|
opensearch_user = os.environ["ES_USER"]
|
|
|
|
opensearch_pass = os.environ["ES_PASS"]
|
|
|
|
|
2022-06-04 16:31:47 +00:00
|
|
|
|
|
|
|
class BenchmarkResultException(Exception):
|
|
|
|
def __init__(self, message, content):
|
|
|
|
super().__init__(self, message)
|
|
|
|
self.content = content
|
|
|
|
|
|
|
|
|
|
|
|
class BenchmarkUtils:
|
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
expected_keys = [
|
|
|
|
"ops_sec",
|
|
|
|
"mb_sec",
|
|
|
|
"lsm_sz",
|
|
|
|
"blob_sz",
|
|
|
|
"c_wgb",
|
|
|
|
"w_amp",
|
|
|
|
"c_mbps",
|
|
|
|
"c_wsecs",
|
|
|
|
"c_csecs",
|
|
|
|
"b_rgb",
|
|
|
|
"b_wgb",
|
|
|
|
"usec_op",
|
|
|
|
"p50",
|
|
|
|
"p99",
|
|
|
|
"p99.9",
|
|
|
|
"p99.99",
|
|
|
|
"pmax",
|
|
|
|
"uptime",
|
|
|
|
"stall%",
|
|
|
|
"Nstall",
|
|
|
|
"u_cpu",
|
|
|
|
"s_cpu",
|
|
|
|
"rss",
|
|
|
|
"test",
|
|
|
|
"date",
|
|
|
|
"version",
|
|
|
|
"job_id",
|
|
|
|
]
|
2022-06-04 16:31:47 +00:00
|
|
|
|
|
|
|
def sanity_check(row):
|
2022-09-21 00:47:52 +00:00
|
|
|
if "test" not in row:
|
2022-06-22 16:26:13 +00:00
|
|
|
logging.debug(f"not 'test' in row: {row}")
|
2022-06-04 16:31:47 +00:00
|
|
|
return False
|
2022-09-21 00:47:52 +00:00
|
|
|
if row["test"] == "":
|
2022-06-22 16:26:13 +00:00
|
|
|
logging.debug(f"row['test'] == '': {row}")
|
2022-06-04 16:31:47 +00:00
|
|
|
return False
|
2022-09-21 00:47:52 +00:00
|
|
|
if "date" not in row:
|
2022-06-22 16:26:13 +00:00
|
|
|
logging.debug(f"not 'date' in row: {row}")
|
2022-06-04 16:31:47 +00:00
|
|
|
return False
|
2022-09-21 00:47:52 +00:00
|
|
|
if "ops_sec" not in row:
|
2022-06-22 16:26:13 +00:00
|
|
|
logging.debug(f"not 'ops_sec' in row: {row}")
|
2022-06-04 16:31:47 +00:00
|
|
|
return False
|
|
|
|
try:
|
2022-09-21 00:47:52 +00:00
|
|
|
_ = int(row["ops_sec"])
|
2022-06-04 16:31:47 +00:00
|
|
|
except (ValueError, TypeError):
|
2022-06-22 16:26:13 +00:00
|
|
|
logging.debug(f"int(row['ops_sec']): {row}")
|
|
|
|
return False
|
|
|
|
try:
|
2022-09-21 00:47:52 +00:00
|
|
|
(_, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
|
2022-06-22 16:26:13 +00:00
|
|
|
except (parser.ParserError):
|
2022-09-21 00:47:52 +00:00
|
|
|
logging.error(
|
|
|
|
f"parser.parse((row['date']): not a valid format for date in row: {row}"
|
|
|
|
)
|
2022-06-04 16:31:47 +00:00
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
def conform_opensearch(row):
|
2022-09-21 00:47:52 +00:00
|
|
|
(dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True)
|
2022-07-25 21:44:10 +00:00
|
|
|
# create a test_date field, which was previously what was expected
|
|
|
|
# repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month)
|
|
|
|
# e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55
|
2022-09-21 00:47:52 +00:00
|
|
|
row["test_date"] = dt.isoformat()
|
|
|
|
row["date"] = dt.isoformat()
|
2022-09-21 20:37:51 +00:00
|
|
|
return {key.replace(".", "_"): value for key, value in row.items()}
|
2022-06-04 16:31:47 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ResultParser:
|
2022-06-22 16:26:13 +00:00
|
|
|
def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"):
|
2022-06-04 16:31:47 +00:00
|
|
|
self.field = re.compile(field)
|
|
|
|
self.intra = re.compile(intrafield)
|
|
|
|
self.sep = re.compile(separator)
|
|
|
|
|
2022-06-22 16:26:13 +00:00
|
|
|
def ignore(self, l_in: str):
|
|
|
|
if len(l_in) == 0:
|
|
|
|
return True
|
2022-09-21 00:47:52 +00:00
|
|
|
if l_in[0:1] == "#":
|
2022-06-22 16:26:13 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
def line(self, line_in: str):
|
|
|
|
"""Parse a line into items
|
2022-06-04 16:31:47 +00:00
|
|
|
Being clever about separators
|
2022-09-21 00:47:52 +00:00
|
|
|
"""
|
|
|
|
line = line_in
|
2022-06-04 16:31:47 +00:00
|
|
|
row = []
|
2022-09-21 00:47:52 +00:00
|
|
|
while line != "":
|
|
|
|
match_item = self.field.match(line)
|
2022-06-04 16:31:47 +00:00
|
|
|
if match_item:
|
|
|
|
item = match_item.group(0)
|
|
|
|
row.append(item)
|
2022-09-21 00:47:52 +00:00
|
|
|
line = line[len(item) :]
|
2022-06-04 16:31:47 +00:00
|
|
|
else:
|
2022-09-21 00:47:52 +00:00
|
|
|
match_intra = self.intra.match(line)
|
2022-06-04 16:31:47 +00:00
|
|
|
if match_intra:
|
|
|
|
intra = match_intra.group(0)
|
|
|
|
# Count the separators
|
|
|
|
# If there are >1 then generate extra blank fields
|
|
|
|
# White space with no true separators fakes up a single separator
|
|
|
|
tabbed = self.sep.split(intra)
|
|
|
|
sep_count = len(tabbed) - 1
|
|
|
|
if sep_count == 0:
|
|
|
|
sep_count = 1
|
2022-09-21 00:47:52 +00:00
|
|
|
for _ in range(sep_count - 1):
|
|
|
|
row.append("")
|
|
|
|
line = line[len(intra) :]
|
2022-06-04 16:31:47 +00:00
|
|
|
else:
|
2022-09-21 20:37:51 +00:00
|
|
|
raise BenchmarkResultException(
|
|
|
|
"Invalid TSV line", f"{line_in} at {line}"
|
|
|
|
)
|
2022-06-04 16:31:47 +00:00
|
|
|
return row
|
|
|
|
|
|
|
|
def parse(self, lines):
|
2022-09-21 00:47:52 +00:00
|
|
|
"""Parse something that iterates lines"""
|
2022-06-22 16:26:13 +00:00
|
|
|
rows = [self.line(line) for line in lines if not self.ignore(line)]
|
2022-06-04 16:31:47 +00:00
|
|
|
header = rows[0]
|
|
|
|
width = len(header)
|
2022-09-21 00:47:52 +00:00
|
|
|
records = [
|
|
|
|
{k: v for (k, v) in itertools.zip_longest(header, row[:width])}
|
|
|
|
for row in rows[1:]
|
|
|
|
]
|
2022-06-04 16:31:47 +00:00
|
|
|
return records
|
|
|
|
|
|
|
|
|
|
|
|
def load_report_from_tsv(filename: str):
|
2022-09-21 00:47:52 +00:00
|
|
|
file = open(filename, "r")
|
2022-06-04 16:31:47 +00:00
|
|
|
contents = file.readlines()
|
|
|
|
file.close()
|
|
|
|
parser = ResultParser()
|
|
|
|
report = parser.parse(contents)
|
|
|
|
logging.debug(f"Loaded TSV Report: {report}")
|
|
|
|
return report
|
|
|
|
|
|
|
|
|
|
|
|
def push_report_to_opensearch(report, esdocument):
|
2022-09-21 00:47:52 +00:00
|
|
|
sanitized = [
|
|
|
|
BenchmarkUtils.conform_opensearch(row)
|
|
|
|
for row in report
|
|
|
|
if BenchmarkUtils.sanity_check(row)
|
|
|
|
]
|
|
|
|
logging.debug(
|
|
|
|
f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch"
|
|
|
|
)
|
2022-06-04 16:31:47 +00:00
|
|
|
for single_benchmark in sanitized:
|
|
|
|
logging.debug(f"upload benchmark: {single_benchmark}")
|
|
|
|
response = requests.post(
|
|
|
|
esdocument,
|
2022-09-21 00:47:52 +00:00
|
|
|
json=single_benchmark,
|
|
|
|
auth=(os.environ["ES_USER"], os.environ["ES_PASS"]),
|
|
|
|
)
|
2022-06-04 16:31:47 +00:00
|
|
|
logging.debug(
|
2022-09-21 00:47:52 +00:00
|
|
|
f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}"
|
|
|
|
)
|
2022-06-04 16:31:47 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
|
2022-06-22 16:26:13 +00:00
|
|
|
def push_report_to_null(report):
|
2022-09-21 00:47:52 +00:00
|
|
|
|
2022-06-22 16:26:13 +00:00
|
|
|
for row in report:
|
|
|
|
if BenchmarkUtils.sanity_check(row):
|
|
|
|
logging.debug(f"row {row}")
|
|
|
|
conformed = BenchmarkUtils.conform_opensearch(row)
|
|
|
|
logging.debug(f"conformed row {conformed}")
|
2022-06-04 16:31:47 +00:00
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
|
2022-06-04 16:31:47 +00:00
|
|
|
def main():
|
2022-09-21 00:47:52 +00:00
|
|
|
"""Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch
|
2022-06-04 16:31:47 +00:00
|
|
|
This tool will
|
|
|
|
|
|
|
|
(1) Open a local tsv benchmark report file
|
|
|
|
(2) Upload to OpenSearch document, via https/JSON
|
2022-09-21 00:47:52 +00:00
|
|
|
"""
|
2022-06-04 16:31:47 +00:00
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.")
|
2022-06-04 16:31:47 +00:00
|
|
|
|
|
|
|
# --tsvfile is the name of the file to read results from
|
|
|
|
# --esdocument is the ElasticSearch document to push these results into
|
|
|
|
#
|
2022-09-21 00:47:52 +00:00
|
|
|
parser.add_argument(
|
|
|
|
"--tsvfile",
|
|
|
|
default="build_tools/circle_api_scraper_input.txt",
|
|
|
|
help="File from which to read tsv report",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--esdocument",
|
|
|
|
help="ElasticSearch/OpenSearch document URL to upload report into",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--upload", choices=["opensearch", "none"], default="opensearch"
|
|
|
|
)
|
2022-06-04 16:31:47 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
logging.debug(f"Arguments: {args}")
|
|
|
|
reports = load_report_from_tsv(args.tsvfile)
|
2022-09-21 00:47:52 +00:00
|
|
|
if args.upload == "opensearch":
|
2022-06-22 16:26:13 +00:00
|
|
|
push_report_to_opensearch(reports, args.esdocument)
|
|
|
|
else:
|
|
|
|
push_report_to_null(reports)
|
2022-06-04 16:31:47 +00:00
|
|
|
|
2022-09-21 00:47:52 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2022-06-04 16:31:47 +00:00
|
|
|
sys.exit(main())
|