Commit dd8459a6 authored by Raphaël Gomès's avatar Raphaël Gomès
Browse files

Allow multiple env variables and arguments

parent feb8ecb6e959
......@@ -16,22 +16,39 @@ You also need the ``config.json`` file at the root of this repository. Here is a
"executables": {
"upstream": {
"path": "/path/to/upstream/hg",
"env": {
"HGMODULEPOLICY": "rust+c",
"RUST_LOG": "trace"
}
"env": [
{
"HGMODULEPOLICY": "rust+c",
"RUST_LOG": "trace",
"RAYON_NUM_THREADS": "1"
},
{
"HGMODULEPOLICY": "rust+c",
"RUST_LOG": "trace",
"RAYON_NUM_THREADS": "16"
}
]
},
"jane": {
"path": "/path/to/hg-jane-fastpath",
"env": {
"HGRS_HG": "/path/to/upstream/hg"
},
"args": "--no-mmap --no-unclean-cache --profile"
"env": [
{
"HGRS_HG": "/path/to/upstream/hg"
}
],
"args": [
"--no-mmap --profile",
"--no-mmap --no-unclean-cache --profile"
]
}
}
}
```
> **Note:** The only acceptable executable names are "upstream" and "jane" for now, this is because we have special extraction code for their logs (and version).
The ``env`` and ``arg`` values are lists, tests will be ran with each result of their cartesian product.
Finally, you need a ``machine.json`` file at the root of this repository, as follows:
```json
......@@ -53,6 +70,6 @@ Look at the summary:
``./bench.py -s``
Results are JSON files in ``results/{repo name}/[{executable_id}/]{executable name}_{url-encoded command}.json``, they are overwritten after each run that results in the same name.
Results are JSON files in ``results/{machine_name}/{repo name}/[{executable_id}/]{executable name}_{hash}.json``, they are overwritten after each run that results in the same name. The hash is computed from the command, the environment variables and the arguments.
For debugging purposes, ``stderr`` and ``stdout`` outputs of the subprocesses are kept in ``logs/{run_uid}/[stdout,stderr]``, with ``{run_uid}`` corresponding to the key of the same name in any JSON results file.
#!/usr/bin/env python3
import itertools
import re
import sys
import argparse
......@@ -6,8 +7,8 @@ import json
import os
import subprocess
from collections import defaultdict
from hashlib import blake2s
from pathlib import Path
from urllib.parse import quote
from uuid import uuid4, UUID
BASE_DIR = Path(__file__).parent.absolute()
......@@ -57,6 +58,7 @@ def post_process_results(
cmd: str,
result_path: Path,
machine_info: dict,
env_and_args_hash: str,
):
tmp_path = Path(f"{result_path}.tmp")
with tmp_path.open(mode="r") as f:
......@@ -70,6 +72,7 @@ def post_process_results(
data = {
"version": 0,
"env_and_args_hash": env_and_args_hash,
"machine_info": machine_info,
"run_uid": run_uid.hex,
"repository": str(repo_path.absolute()),
......@@ -90,10 +93,17 @@ def post_process_results(
def run_repo_command(
repo_path: Path, exec_kind: str, exec_config, cmd: str, machine_info: dict
):
executable = exec_config["path"]
subprocess_args = exec_config.get("args", "")
executable = exec_config["path"]
exec_name = executable.rsplit(os.sep)[-1]
filename = f"{exec_name}_{quote(cmd)}"
env = exec_config.get("env", {})
env_string = " ".join(f"{k}={v}" for k, v in sorted(env.items()))
hasher_data = b"".join(
map(lambda x: x.encode("utf8"), [cmd, exec_config["args"], env_string])
)
env_and_args_hash = blake2s(hasher_data).hexdigest()
filename = f"{exec_name}_{env_and_args_hash}"
uid = uuid4()
......@@ -111,9 +121,6 @@ def run_repo_command(
out_path.touch(exist_ok=True)
# Pass the env to the subcommand of hyperfine, otherwise it won't work
env = exec_config.get("env", {})
env_string = " ".join(f"{k}={v}" for k, v in env.items())
full_cmd = (
f"{env_string} {executable} {cmd} {subprocess_args} "
f">> {log_dir / 'stdout'} 2>> {log_dir / 'stderr'}"
......@@ -126,7 +133,14 @@ def run_repo_command(
subprocess.run(full_cmd, check=True, shell=True, cwd=str(repo_path))
post_process_results(
repo_path, uid, exec_kind, exec_config, cmd, out_path, machine_info
repo_path,
uid,
exec_kind,
exec_config,
cmd,
out_path,
machine_info,
env_and_args_hash,
)
......@@ -136,7 +150,8 @@ def bench(exec_kind: str, exec_config: dict, machine_info: dict):
if exec_kind == "upstream":
try:
env = os.environ.copy()
env.update(exec_config.get("env", {}))
# This should work for any version of env
env.update(exec_config.get("env", [{}])[0])
res = subprocess.run(
[exec_config["path"], "version", "-T{ver}"],
capture_output=True,
......@@ -158,9 +173,17 @@ def bench(exec_kind: str, exec_config: dict, machine_info: dict):
repos_tested += 1
for command in REPO_COMMANDS:
run_repo_command(
repo_path, exec_kind, exec_config, command, machine_info
)
args = exec_config.get("args") or [""]
env = exec_config.get("env") or [{}]
product = itertools.product(args, env)
for args, env in product:
new_config = exec_config.copy()
new_config["args"] = args
new_config["env"] = env
run_repo_command(
repo_path, exec_kind, new_config, command, machine_info
)
print(f"[MAIN] Tested {repos_tested} repositories")
......@@ -200,7 +223,9 @@ def print_result(res):
def summary():
results = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
results = defaultdict(
lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
)
for root, dirs, files in os.walk(RESULTS_DIR):
for file in files:
......@@ -217,20 +242,25 @@ def summary():
repo = data["repository"]
machine_name = data["machine_info"]["name"]
command = data["command"]
results[repo][command][machine_name].append(data)
args_hash = data["env_and_args_hash"]
results[repo][command][machine_name][args_hash].append(data)
for name, commands in sorted(results.items()):
print(f"=== Repository '{name.rsplit(os.sep)[-1]}' ===")
for command, machines in sorted(commands.items()):
print(f"\n== Command: '{command}' ==")
for machine_name, data in sorted(machines.items()):
for res in sorted(data, key=lambda d: d["config"]["path"]):
executable_name = res["config"]["path"].rsplit(os.sep)[-1]
print(
f"\n--- [{machine_name}] {executable_name} "
f"({res.get('executable_version')}) ---"
)
print_result(res)
for machine_name, args_and_env in sorted(machines.items()):
for hashed, data in args_and_env.items():
for res in sorted(data, key=lambda d: d["config"]["path"]):
config = res["config"]
executable_name = config["path"].rsplit(os.sep)[-1]
print(
f"\n--- [{machine_name}] {executable_name} "
f"({res.get('executable_version')}) ---"
)
print(f"env: {config.get('env', {})}")
print(f"args: {config.get('args', '')}")
print_result(res)
print("")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment