diff --git a/benchmarks/scripts/cccl/bench/bench.py b/benchmarks/scripts/cccl/bench/bench.py index a4e8c34f7cc..588a264bc47 100644 --- a/benchmarks/scripts/cccl/bench/bench.py +++ b/benchmarks/scripts/cccl/bench/bench.py @@ -1,16 +1,17 @@ import itertools -import json import os import signal import subprocess import time import fpzip -import numpy as np +from .bench_result import BenchResult, get_axis_name from .cmake import CMake from .config import BasePoint, Config +from .json_cache import device_json, json_benches from .logger import Logger +from .process_runner import ProcessRunner from .score import compute_axes_ids, compute_weight_matrices, get_workload_weight from .storage import Storage, get_bench_table_name @@ -29,45 +30,6 @@ def first_val(my_dict): return first_value -class JsonCache: - _instance = None - - def __new__(cls): - if cls._instance is None: - cls._instance = super().__new__(cls) - cls._instance.bench_cache = {} - cls._instance.device_cache = {} - return cls._instance - - def get_bench(self, algname): - if algname not in self.bench_cache: - result = subprocess.check_output( - [os.path.join(".", "bin", algname + ".base"), "--jsonlist-benches"] - ) - self.bench_cache[algname] = json.loads(result) - return self.bench_cache[algname] - - def get_device(self, algname): - if algname not in self.device_cache: - result = subprocess.check_output( - [os.path.join(".", "bin", algname + ".base"), "--jsonlist-devices"] - ) - devices = json.loads(result)["devices"] - - if len(devices) != 1: - raise Exception( - "NVBench doesn't work well with multiple GPUs, use `CUDA_VISIBLE_DEVICES`" - ) - - self.device_cache[algname] = devices[0] - - return self.device_cache[algname] - - -def json_benches(algname): - return JsonCache().get_bench(algname) - - def create_benches_tables(conn, subbench, bench_axes): with conn: conn.execute(""" @@ -117,163 +79,12 @@ def create_benches_tables(conn, subbench, bench_axes): ) -def read_json(filename): - with open(filename, "r") as f: - file_root = json.load(f) - return file_root - - -def extract_filename(summary): - summary_data = summary["data"] - value_data = next(filter(lambda v: v["name"] == "filename", summary_data)) - assert value_data["type"] == "string" - return value_data["value"] - - -def extract_size(summary): - summary_data = summary["data"] - value_data = next(filter(lambda v: v["name"] == "size", summary_data)) - assert value_data["type"] == "int64" - return int(value_data["value"]) - - -def extract_bw(summary): - summary_data = summary["data"] - value_data = next(filter(lambda v: v["name"] == "value", summary_data)) - assert value_data["type"] == "float64" - return float(value_data["value"]) - - -def parse_samples_meta(state): - summaries = state["summaries"] - if not summaries: - return None, None - - summary = next( - filter(lambda s: s["tag"] == "nv/json/bin:nv/cold/sample_times", summaries), - None, - ) - if not summary: - return None, None - - sample_filename = extract_filename(summary) - sample_count = extract_size(summary) - return sample_count, sample_filename - - -def parse_samples(state): - sample_count, samples_filename = parse_samples_meta(state) - if not sample_count or not samples_filename: - return np.array([], dtype=np.float32) - - with open(samples_filename, "rb") as f: - samples = np.fromfile(f, " 1: + benchmarks[name] = parse_ranges(columns[1:]) else: - columns = [" ".join(line.split())] - - name = columns[0] - - if name == "ctk_version": - ctk_version = columns[1].rstrip() - elif name == "cccl_revision": - cccl_revision = columns[1].rstrip() - else: - if len(columns) > 1: - benchmarks[name] = parse_ranges(columns[1:]) - else: - benchmarks[name] = [] + benchmarks[name] = [] return ctk_version, cccl_revision, benchmarks diff --git a/benchmarks/scripts/cccl/bench/json_cache.py b/benchmarks/scripts/cccl/bench/json_cache.py new file mode 100644 index 00000000000..a52fa85f2d9 --- /dev/null +++ b/benchmarks/scripts/cccl/bench/json_cache.py @@ -0,0 +1,50 @@ +import json +import os +import subprocess + + +def get_alg_base(algname: str): + return os.path.join(".", "bin", algname + ".base") + + +class JsonCache: + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance.bench_cache = {} + cls._instance.device_cache = {} + return cls._instance + + def get_bench(self, algname): + if algname not in self.bench_cache: + result = subprocess.check_output( + [get_alg_base(algname), "--jsonlist-benches"] + ) + self.bench_cache[algname] = json.loads(result) + return self.bench_cache[algname] + + def get_device(self, algname): + if algname not in self.device_cache: + result = subprocess.check_output( + [get_alg_base(algname), "--jsonlist-devices"] + ) + devices = json.loads(result)["devices"] + + if len(devices) != 1: + raise Exception( + "NVBench doesn't work well with multiple GPUs, use `CUDA_VISIBLE_DEVICES`" + ) + + self.device_cache[algname] = devices[0] + + return self.device_cache[algname] + + +def json_benches(algname): + return JsonCache().get_bench(algname) + + +def device_json(algname): + return JsonCache().get_device(algname) diff --git a/benchmarks/scripts/cccl/bench/process_runner.py b/benchmarks/scripts/cccl/bench/process_runner.py new file mode 100644 index 00000000000..54a9769bda7 --- /dev/null +++ b/benchmarks/scripts/cccl/bench/process_runner.py @@ -0,0 +1,34 @@ +import os +import signal +import subprocess + + +class ProcessRunner: + _instance = None + + def __new__(cls, *args, **kwargs): + if not isinstance(cls._instance, cls): + cls._instance = super(ProcessRunner, cls).__new__(cls, *args, **kwargs) + return cls._instance + + def __init__(self): + self.process = None + signal.signal(signal.SIGINT, self.signal_handler) + signal.signal(signal.SIGTERM, self.signal_handler) + + def new_process(self, cmd): + self.process = subprocess.Popen( + cmd, + start_new_session=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + return self.process + + def signal_handler(self, signum, frame): + self.kill_process() + raise SystemExit("search was interrupted") + + def kill_process(self): + if self.process is not None and self.process.poll is None: + os.killpg(os.getpgid(self.process.pid), signal.SIGKILL) diff --git a/benchmarks/scripts/cccl/bench/score.py b/benchmarks/scripts/cccl/bench/score.py index 5a2ab099213..0f02748f751 100644 --- a/benchmarks/scripts/cccl/bench/score.py +++ b/benchmarks/scripts/cccl/bench/score.py @@ -4,11 +4,11 @@ def importance_function(x): - return 1 - math.exp(-x) + return -math.expm1(-x) def x_by_importance(y): - return -math.log(1 - y) + return -math.log1p(-y) def compute_weights(num_values): @@ -98,7 +98,9 @@ def get_workload_coordinates(rt_workload, rt_axes_values, rt_axes_ids): coordinates = [0] * len(rt_axes_ids) for point in rt_workload: rt_axis, rt_value = point.split("=") - coordinates[rt_axes_ids[rt_axis]] = rt_axes_values[rt_axis].index(rt_value) + axes_id = rt_axes_ids[rt_axis] + index = rt_axes_values[rt_axis].index(rt_value) + coordinates[axes_id] = index return coordinates diff --git a/benchmarks/scripts/cccl/bench/storage.py b/benchmarks/scripts/cccl/bench/storage.py index 72a9036ee35..9c07b96a1df 100644 --- a/benchmarks/scripts/cccl/bench/storage.py +++ b/benchmarks/scripts/cccl/bench/storage.py @@ -44,7 +44,7 @@ def get_postgres_config(): def get_bench_table_name(subbench, algname): - return "{}.{}".format(algname, subbench) + return f"{algname}.{subbench}" def blob_to_samples(blob):