#!/usr/bin/env python3
'''Benchmark pipeline execution times, either for preset or custom ones'''
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import datetime
import os
from pathlib import Path
import sys
import cpuinfo
import numpy as np
from pisa import TARGET, PISA_NUM_THREADS
from pisa.core.distribution_maker import Pipeline
from pisa.utils.fileio import to_file
from pisa.utils.log import Levels, logging, set_verbosity
__all__ = ['PIPELINE_CFGS_TO_TEST', 'NTEMPLATES', 'PFX', 'get_get_outputs_time',
'create_benchmark_result', 'write_benchmark_json', 'parse_args', 'main']
__license__ = '''Copyright (c) 2014-2026, The IceCube Collaboration
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.'''
PIPELINE_CFGS_TO_TEST = ["settings/pipeline/IceCube_3y_neutrinos_daemon.cfg",
"settings/pipeline/IceCube_3y_neutrinos.cfg", "settings/pipeline/IceCube_3y_muons.cfg"
]
"""Pipeline config files in PISA package to test by default"""
NTEMPLATES = 50
"""Number of random Asimov templates to produce by default (no caching)"""
PFX = "[B] "
"""Prefix each line output by this script to clearly delineate output from this
script vs. output from test functions being run"""
[docs]
def get_get_outputs_time(pipeline):
"""
Just extract minimum+average :py:meth:`~.pipeline.Pipeline.get_outputs()` execution
time from a profiled pipeline.
Note that first call to `get_outputs()` typically includes compilation overhead
(numba JIT). We thus just exclude it.
Parameters
----------
pipeline : Pipeline
Pipeline instance with `profile=True` that has been run via `get_outputs()`
Returns
-------
minim, avg, maxim : float
Minimum, average, maximum `get_outputs()` time in seconds
Raises
------
ValueError
If the pipeline has no recorded `get_outputs()` times
"""
times = pipeline._get_outputs_times # pylint: disable=protected-access
if not times:
raise ValueError(
"No get_outputs() times recorded. Ensure pipeline was created with "
"profile=True and get_outputs() was called."
)
# Return minimum and average time across all calls (skip first if multiple
# to avoid contribution from overhead)
if len(times) > 1:
minim, avg, maxim = (float(np.min(times[1:])),
float(np.mean(times[1:])), float(np.max(times[1:]))
)
else:
minim = avg = maxim = float(times[0])
return minim, avg, maxim
[docs]
def create_benchmark_result(pipeline_config_name, target, nthreads, time_s, range_s):
"""
Create a single benchmark result entry in github-action-benchmark format.
Parameters
----------
pipeline_config_name : str
Name/path of the pipeline config being benchmarked
target : str
:py:data:`~pisa.TARGET` value
nthreads : int
:py:data:`~pisa.PISA_NUM_THREADS` value
time_s : float
Execution time metric (average) in seconds
range_s : float
Execution time variation in seconds
Returns
-------
dict
Benchmark entry suitable for github-action-benchmark JSON output
"""
cfg_basename = Path(pipeline_config_name).stem
name = f"{cfg_basename} ({target}, nthreads={nthreads})"
return {
"name": name,
"value": time_s,
"unit": "s",
"range": range_s,
"extra": f"target={target}, nthreads={nthreads}"
}
[docs]
def write_benchmark_json(results, output_path, commit_sha=None, commit_msg=None):
"""
Write benchmark results to JSON file in github-action-benchmark format.
Adds timestamp and possibly commit information for improved manual tracking.
Parameters
----------
results : list of dict
List of benchmark result dictionaries (from :py:func:`~create_benchmark_result`)
output_path : str or Path
Path where JSON file should be written
commit_sha : str, optional
Git commit SHA (for provenance tracking)
commit_msg : str, optional
Git commit message (for provenance tracking)
Returns
-------
Path
Path to written JSON file
Notes
-----
The github-action-benchmark tool expects the JSON file to contain
only the array of results, not a wrapper object. Timestamp and commit
information are not included in the output file but can be added back
if needed for other purposes.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
timestamp = datetime.datetime.now(datetime.UTC).isoformat() + "Z"
# Write only the results array, as expected by github-action-benchmark
data = results
to_file(data, output_path)
return output_path
[docs]
def parse_args():
"""Parse command-line arguments"""
parser = ArgumentParser(description='''Benchmark time it takes to run preset '''
f'''pipeline configurations ({PIPELINE_CFGS_TO_TEST}) or custom ones.''',
formatter_class=ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-n', type=int, default=NTEMPLATES,
help='No. of random (=no caching) Asimov templates to produce.'
)
parser.add_argument(
'-p', type=str, action='append', default=None,
help='''Custom pipelines to benchmark instead of the preset ones. '''
'''Repeat for multiple.'''
)
return parser.parse_args()
[docs]
def main():
"""Function to run when script is executed"""
args = parse_args()
set_verbosity(Levels.INFO)
logging.info("%sPython build: %s", PFX, sys.version)
for key, val in cpuinfo.get_cpu_info().items():
logging.info("%s%s = %s", PFX, key, val)
test_cfgs = PIPELINE_CFGS_TO_TEST if args.p is None else args.p
# Collect benchmark results for JSON output
benchmark_results = []
for cfg in test_cfgs:
logging.info("%sObtaining timings for pipeline %s...", PFX, cfg)
set_verbosity(Levels.WARN)
pipeline = Pipeline(cfg, profile=True)
# Randomize all of the free parameter values n times
# and get the corresponding outputs
for seed in range(args.n):
pipeline.params.randomize_free(random_state=seed)
_ = pipeline.get_outputs()
set_verbosity(Levels.INFO)
logging.info("%s%s:", PFX, os.path.basename(cfg))
pipeline.report_profile(detailed=False)
print("\n")
# Extract min./avg.execution times
min_time, avg_time, max_time = get_get_outputs_time(pipeline)
# Create the benchmark results, using just the average
result_avg = create_benchmark_result(
pipeline_config_name=cfg,
target=TARGET,
nthreads=PISA_NUM_THREADS,
time_s=avg_time,
range_s=max_time - min_time
)
benchmark_results.append(result_avg)
# Prepare JSON output
output_dir = Path("benchmark_results")
output_file = ( output_dir /
f"results_target_{TARGET}_nthreads_{PISA_NUM_THREADS}.json"
)
# Enrich with commit information (currently not written to file as
# github-action-benchmark expects only the results array)
commit_sha = os.environ.get("GITHUB_SHA")
commit_msg = os.environ.get("GITHUB_COMMIT_MSG")
write_benchmark_json(
benchmark_results,
output_file,
commit_sha=commit_sha,
commit_msg=commit_msg
)
logging.info("%sBenchmark results written to: %s", PFX, output_file)
if __name__ == '__main__':
main()