From 519338232cabc55ebfd67ea8876558ab46a8c027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20L=C3=BChne?= Date: Fri, 1 Dec 2017 14:12:36 +0100 Subject: [PATCH] Add evaluation dashboard generator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This script isn’t well-coded but serves its purpose. It shows the current state of the benchmark and aggregated results as a website. --- evaluate.py | 319 +++++++++++++++++++++++++++++ gradient.png | Bin 0 -> 364 bytes style.css | 568 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 887 insertions(+) create mode 100755 evaluate.py create mode 100644 gradient.png create mode 100644 style.css diff --git a/evaluate.py b/evaluate.py new file mode 100755 index 000000000..37a8b2a75 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,319 @@ +#!/usr/bin/python3 + +import math +import os +import re +import subprocess +import sys +import time +import yaml + +import pprint + +gray = (186, 189, 182) + +def executeCommand(command, stdin = None, cwd = None): + with subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE, stdin = (subprocess.PIPE if stdin != None else None), cwd = cwd) as process: + stdout, stderr = process.communicate(input = (stdin.encode("utf-8") if stdin != None else None)) + exitCode = process.returncode + + return stdout.decode("utf-8"), stderr.decode("utf-8"), exitCode + +def git(command, cwd, enforce = False): + stdout, stderr, exitCode = executeCommand(["git"] + command, cwd = cwd) + + if exitCode != 0: + print(stderr, file = sys.stderr) + + if enforce: + raise RuntimeError("git error") + +def initRepo(config): + dataDir = config["storage"]["local"] + + # clone repo if not existing + if not os.path.isdir(config["storage"]["local"]): + git(["clone", config["storage"]["remote"], dataDir], None, enforce = True) + + # fetch origin + git(["fetch"], cwd = dataDir) + + # pull all branches + for key, branch in config["storage"]["branches"].items(): + git(["checkout", branch], cwd = dataDir, enforce = True) + git(["pull"], cwd = dataDir) + +def readBenchmarkConfig(config): + initRepo(config) + + dataDir = config["storage"]["local"] + + # checkout config branch + git(["checkout", config["storage"]["branches"]["config"]], cwd = dataDir, enforce = True) + + # read instance list + instancesFile = os.path.join(config["storage"]["local"], "instances.yml") + + with open(instancesFile, "r") as stream: + instances = yaml.load(stream, Loader=yaml.CLoader) + + # read configurations to test + configurationsFile = os.path.join(config["storage"]["local"], "configurations.yml") + + with open(configurationsFile, "r") as stream: + configurations = yaml.load(stream, Loader=yaml.CLoader) + + # flatten lists of options + for configuration in configurations["configurations"]: + configuration["options"] = [item for sublist in configuration["options"] for item in sublist] + + return {"configurations": configurations, "instances": instances} + +def outputFilenames(configuration, instance, config): + instanceID = instance["ipc"] + "_" + instance["domain"] + "_" + str(instance["instance"]) + outputFile = os.path.join(configuration["id"], instanceID + ".out") + errorFile = os.path.join(configuration["id"], instanceID + ".err") + environmentFile = os.path.join(configuration["id"], instanceID + ".env") + + return {"outputFile": outputFile, "errorFile": errorFile, "environmentFile": environmentFile} + +def jobKey(configuration, instance): + return (configuration["id"], instance["ipc"], instance["domain"], instance["instance"]) + +def instanceKey(instance): + return (instance["ipc"], instance["domain"], instance["instance"]) + +def addResult(results, configuration, instance, result): + if not configuration["id"] in results: + results[configuration["id"]] = {} + + results[configuration["id"]][instanceKey(instance)] = result + +def result(results, configuration, instance): + return results[configuration["id"]][instanceKey(instance)] + +def mix(color1, color2, t): + return (color1[0] * (1 - t) + color2[0] * t, color1[1] * (1 - t) + color2[1] * t, color1[2] * (1 - t) + color2[2] * t) + +def resultColor(result, config): + if result <= 0: + return colors[0] + elif result >= config["limits"]["time"]: + return colors[-1] + + normalizedResult = (result / config["limits"]["time"]) ** 0.2 + normalizedResult *= (len(colors) - 1) + + c0 = min(math.floor(normalizedResult), len(colors) - 1) + t = normalizedResult - c0 + + if t <= 0: + return colors[c0] + elif t >= 1: + return colors[c0 + 1] + + return mix(colors[c0], colors[c0 + 1], t) + +def collectResults(config): + benchmarkConfig = readBenchmarkConfig(config) + + dataDir = config["storage"]["local"] + + # checkout results branch + git(["checkout", config["storage"]["branches"]["results"]], cwd = dataDir, enforce = True) + + configurations = benchmarkConfig["configurations"]["configurations"] + instances = benchmarkConfig["instances"] + + results = {} + + for instanceSetName, instanceSet in instances.items(): + for instance in instanceSet: + for configuration in configurations: + filenames = outputFilenames(configuration, instance, config) + outputFile = os.path.join(config["storage"]["local"], filenames["outputFile"]) + errorFile = os.path.join(config["storage"]["local"], filenames["errorFile"]) + environmentFile = os.path.join(config["storage"]["local"], filenames["environmentFile"]) + + if not os.path.exists(outputFile) or not os.path.exists(errorFile) or not os.path.exists(environmentFile): + addResult(results, configuration, instance, None) + continue + + with open(errorFile, "r") as errorOutput: + errors = errorOutput.read() + + finishedRE = re.compile("^FINISHED CPU", re.M) + runtimeRE = re.compile("", re.M) + timeoutRE = re.compile("^TIMEOUT CPU", re.M) + memoutRE = re.compile("^MEM CPU", re.M) + exitCodeRE = re.compile("^# exit code: (\d+)$", re.M) + + finished = finishedRE.search(errors) + runtime = runtimeRE.search(errors) + timeout = timeoutRE.search(errors) + memout = memoutRE.search(errors) + exitCode = exitCodeRE.search(errors) + + if exitCode and int(exitCode.group(1)) != 0: + text = "error" + color = None + elif finished: + value = float(runtime.group(1)) / 1000 + text = str(value) + color = (value / config["limits"]["time"]) ** 0.2 + elif timeout: + text = "> " + str(config["limits"]["time"]) + color = 1.0 + elif memout: + text = "> " + str(config["limits"]["memory"] / 1000000) + " GB" + color = 1.0 + + result = {"text": text, "color": color} + + addResult(results, configuration, instance, result) + + return configurations, instances, results + +def aggregateResults(configurations, instanceSetID, instanceSet, instances, results): + aggregatedResults = {("total", ""): {}} + + for instance in instanceSet: + ipcDomain = (instance["ipc"], instance["domain"]) + + if not ipcDomain in aggregatedResults: + aggregatedResults[ipcDomain] = {} + + for configuration in configurations: + if not instanceSetID in configuration["instanceSets"]: + continue + + if not configuration["id"] in aggregatedResults[ipcDomain]: + aggregatedResults[ipcDomain][configuration["id"]] = {"instances solved": 0, "average runtime": None, "results": []} + + if not configuration["id"] in aggregatedResults[("total", "")]: + aggregatedResults[("total", "")][configuration["id"]] = {"instances solved": 0, "average runtime": None, "results": []} + + r = result(results, configuration, instance) + + if r == None: + continue + + value = 900.0 + + try: + value = float(r["text"]) + aggregatedResults[ipcDomain][configuration["id"]]["instances solved"] += 1 + aggregatedResults[("total", "")][configuration["id"]]["instances solved"] += 1 + except: + pass + + aggregatedResults[ipcDomain][configuration["id"]]["results"].append(value) + aggregatedResults[("total", "")][configuration["id"]]["results"].append(value) + + for ipcDomain, results in aggregatedResults.items(): + for configurationKey, configurationResults in aggregatedResults[ipcDomain].items(): + configurationResults["average runtime"] = sum(configurationResults["results"]) / max(1, len(configurationResults["results"])) + + return aggregatedResults + +def requiresInstance(configuration, instance, instances): + for requiredInstanceSet in configuration["instanceSets"]: + if not requiredInstanceSet in instances: + raise RuntimeError("undefined instance set “" + requiredInstanceSet + "”") + + if instance in instances[requiredInstanceSet]: + return True + + return False + +def renderResultsTable(configurations, instanceSetID, instanceSet, instances, results): + print("

" + instanceSetID + " (detailed results)

") + + for configuration in configurations: + if not instanceSetID in configuration["instanceSets"]: + continue + + print("") + + print("") + + for instance in instanceSet: + print("") + + for configuration in configurations: + if not instanceSetID in configuration["instanceSets"]: + continue + + r = result(results, configuration, instance) + + if r and r["text"] != "error": + print("") + + print ("") + + print("") + + print("
IPCdomaininstance
" + configuration["id"] + "
" + instance["ipc"] + "" + instance["domain"] + "" + str(instance["instance"]) + "") + print(r["text"]) + elif r and r["text"] == "error": + print("") + print(r["text"]) + else: + print("") + + print("
") + +def renderAggregatedResultsTable(type, configurations, instanceSetID, instanceSet, instances, results, config): + aggregatedResults = aggregateResults(configurations, instanceSetID, instanceSet, instances, results) + + if not aggregatedResults: + print("") + return + + print("

" + instanceSetID + " (" + type + ")

") + + for configuration in configurations: + if not instanceSetID in configuration["instanceSets"]: + continue + + print("") + + print("") + + for ipcDomain, results in sorted(aggregatedResults.items()): + print("") + + for configuration in configurations: + if not instanceSetID in configuration["instanceSets"]: + continue + + if len(results[configuration["id"]]["results"]) == 0: + print("") + continue + + r = results[configuration["id"]][type] + + numberFormat = "%.2f" if type == "average runtime" else "%d/" + str(len(results[configuration["id"]]["results"])) + value = (r / config["limits"]["time"]) ** 0.2 if type == "average runtime" else 1.0 - r / max(1, len(results[configuration["id"]]["results"])) + classes = " result-" + str(int(value * 100)) + + print("") + + print("
IPCdomain
" + configuration["id"] + "
" + ipcDomain[0] + "" + ipcDomain[1] + "" + numberFormat % r + "
") + +def main(): + with open("config.yml", "r") as stream: + config = yaml.load(stream, Loader=yaml.CLoader) + + configurations, instances, results = collectResults(config) + + print("TPLP benchmark results

TPLP Benchmark Results

last updated at " + time.strftime("%Y-%m-%d %H:%M:%S %z") + "
") + + for instanceSetID, instanceSet in instances.items(): + renderAggregatedResultsTable("instances solved", configurations, instanceSetID, instanceSet, instances, results, config) + renderAggregatedResultsTable("average runtime", configurations, instanceSetID, instanceSet, instances, results, config) + renderResultsTable(configurations, instanceSetID, instanceSet, instances, results) + + print("
") + +main() diff --git a/gradient.png b/gradient.png new file mode 100644 index 0000000000000000000000000000000000000000..450a0d821602093c7cce0d869efecd6e15eb86cc GIT binary patch literal 364 zcmeAS@N?(olHy`uVBq!ia0vp^j0_CSFF4qMEctCy;(!!Ov6E*A2N2Y7q;vrJoCO|{ z#S9F5M?jcysy3fAP*AeOHKHUqKdq!Zu_%?Hyu4g5GcUV1Ik6yBFTW^#_B$IXprV(a zE{-7)t#7Bt@--XqH0S2+jehH3&3%(uN@Uu!W!b-!-?DNZHIi53YntWt|7nynI>7!mWR z_lnN+y;Xk%@9g!9QNG9}TpnyRDQ4P@DVchQ-!9v#G2Q%|V)nxjF=hpzrx`q5{an^L HB{Ts5e4mFp literal 0 HcmV?d00001 diff --git a/style.css b/style.css new file mode 100644 index 000000000..8fbad96be --- /dev/null +++ b/style.css @@ -0,0 +1,568 @@ +* +{ + font-family: "Source Sans Pro", sans-serif; + hyphens: auto; +} + +h1, h2 +{ + margin-top: 5rem; +} + +main +{ + max-width: 1600px; + margin-left: auto; + margin-right: auto; +} + +th +{ + /*transform: rotate(-90.0deg); + white-space: nowrap;*/ +} + +tbody +{ + font-size: 10pt; + text-align: center; +} + +td.tbd +{ + background-color: rgb(238, 238, 236); +} + +td +{ + width: 75px; +} + +td.col-header +{ + width: inherit; +} + +div.footnote +{ + font-size: 10pt; + color: #808080; +} + +td.error +{ + background-color: rgb(226, 60, 33); + color: rgb(148, 17, 0); +} + +td.result +{ + background-image: url("gradient.png"); + background-position: 0% 0%; + background-repeat: repeat-x; +} + +td.result-0 +{ + background-position: 0% 0%; +} + +td.result-1 +{ + background-position: 0% 1%; +} + +td.result-2 +{ + background-position: 0% 2%; +} + +td.result-3 +{ + background-position: 0% 3%; +} + +td.result-4 +{ + background-position: 0% 4%; +} + +td.result-5 +{ + background-position: 0% 5%; +} + +td.result-6 +{ + background-position: 0% 6%; +} + +td.result-7 +{ + background-position: 0% 7%; +} + +td.result-8 +{ + background-position: 0% 8%; +} + +td.result-9 +{ + background-position: 0% 9%; +} + +td.result-10 +{ + background-position: 0% 10%; +} + +td.result-11 +{ + background-position: 0% 11%; +} + +td.result-12 +{ + background-position: 0% 12%; +} + +td.result-13 +{ + background-position: 0% 13%; +} + +td.result-14 +{ + background-position: 0% 14%; +} + +td.result-15 +{ + background-position: 0% 15%; +} + +td.result-16 +{ + background-position: 0% 16%; +} + +td.result-17 +{ + background-position: 0% 17%; +} + +td.result-18 +{ + background-position: 0% 18%; +} + +td.result-19 +{ + background-position: 0% 19%; +} + +td.result-20 +{ + background-position: 0% 20%; +} + +td.result-21 +{ + background-position: 0% 21%; +} + +td.result-22 +{ + background-position: 0% 22%; +} + +td.result-23 +{ + background-position: 0% 23%; +} + +td.result-24 +{ + background-position: 0% 24%; +} + +td.result-25 +{ + background-position: 0% 25%; +} + +td.result-26 +{ + background-position: 0% 26%; +} + +td.result-27 +{ + background-position: 0% 27%; +} + +td.result-28 +{ + background-position: 0% 28%; +} + +td.result-29 +{ + background-position: 0% 29%; +} + +td.result-30 +{ + background-position: 0% 30%; +} + +td.result-31 +{ + background-position: 0% 31%; +} + +td.result-32 +{ + background-position: 0% 32%; +} + +td.result-33 +{ + background-position: 0% 33%; +} + +td.result-34 +{ + background-position: 0% 34%; +} + +td.result-35 +{ + background-position: 0% 35%; +} + +td.result-36 +{ + background-position: 0% 36%; +} + +td.result-37 +{ + background-position: 0% 37%; +} + +td.result-38 +{ + background-position: 0% 38%; +} + +td.result-39 +{ + background-position: 0% 39%; +} + +td.result-40 +{ + background-position: 0% 40%; +} + +td.result-41 +{ + background-position: 0% 41%; +} + +td.result-42 +{ + background-position: 0% 42%; +} + +td.result-43 +{ + background-position: 0% 43%; +} + +td.result-44 +{ + background-position: 0% 44%; +} + +td.result-45 +{ + background-position: 0% 45%; +} + +td.result-46 +{ + background-position: 0% 46%; +} + +td.result-47 +{ + background-position: 0% 47%; +} + +td.result-48 +{ + background-position: 0% 48%; +} + +td.result-49 +{ + background-position: 0% 49%; +} + +td.result-50 +{ + background-position: 0% 50%; +} + +td.result-51 +{ + background-position: 0% 51%; +} + +td.result-52 +{ + background-position: 0% 52%; +} + +td.result-53 +{ + background-position: 0% 53%; +} + +td.result-54 +{ + background-position: 0% 54%; +} + +td.result-55 +{ + background-position: 0% 55%; +} + +td.result-56 +{ + background-position: 0% 56%; +} + +td.result-57 +{ + background-position: 0% 57%; +} + +td.result-58 +{ + background-position: 0% 58%; +} + +td.result-59 +{ + background-position: 0% 59%; +} + +td.result-60 +{ + background-position: 0% 60%; +} + +td.result-61 +{ + background-position: 0% 61%; +} + +td.result-62 +{ + background-position: 0% 62%; +} + +td.result-63 +{ + background-position: 0% 63%; +} + +td.result-64 +{ + background-position: 0% 64%; +} + +td.result-65 +{ + background-position: 0% 65%; +} + +td.result-66 +{ + background-position: 0% 66%; +} + +td.result-67 +{ + background-position: 0% 67%; +} + +td.result-68 +{ + background-position: 0% 68%; +} + +td.result-69 +{ + background-position: 0% 69%; +} + +td.result-70 +{ + background-position: 0% 70%; +} + +td.result-71 +{ + background-position: 0% 71%; +} + +td.result-72 +{ + background-position: 0% 72%; +} + +td.result-73 +{ + background-position: 0% 73%; +} + +td.result-74 +{ + background-position: 0% 74%; +} + +td.result-75 +{ + background-position: 0% 75%; +} + +td.result-76 +{ + background-position: 0% 76%; +} + +td.result-77 +{ + background-position: 0% 77%; +} + +td.result-78 +{ + background-position: 0% 78%; +} + +td.result-79 +{ + background-position: 0% 79%; +} + +td.result-80 +{ + background-position: 0% 80%; +} + +td.result-81 +{ + background-position: 0% 81%; +} + +td.result-82 +{ + background-position: 0% 82%; +} + +td.result-83 +{ + background-position: 0% 83%; +} + +td.result-84 +{ + background-position: 0% 84%; +} + +td.result-85 +{ + background-position: 0% 85%; +} + +td.result-86 +{ + background-position: 0% 86%; +} + +td.result-87 +{ + background-position: 0% 87%; +} + +td.result-88 +{ + background-position: 0% 88%; +} + +td.result-89 +{ + background-position: 0% 89%; +} + +td.result-90 +{ + background-position: 0% 90%; +} + +td.result-91 +{ + background-position: 0% 91%; +} + +td.result-92 +{ + background-position: 0% 92%; +} + +td.result-93 +{ + background-position: 0% 93%; +} + +td.result-94 +{ + background-position: 0% 94%; +} + +td.result-95 +{ + background-position: 0% 95%; +} + +td.result-96 +{ + background-position: 0% 96%; +} + +td.result-97 +{ + background-position: 0% 97%; +} + +td.result-98 +{ + background-position: 0% 98%; +} + +td.result-99 +{ + background-position: 0% 99%; +} + +td.result-100 +{ + background-position: 0% 100%; +}