Skip to content

Commit d3b81bf

Browse files
authored
Merge pull request #2439 from pbalcer/explicit-benchmark-groups
[benchmarks] add explicit benchmark groups
2 parents 5c10466 + 10f499a commit d3b81bf

File tree

5 files changed

+203
-20
lines changed

5 files changed

+203
-20
lines changed

scripts/benchmarks/benches/compute.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ def extra_env_vars(self) -> dict:
104104
def setup(self):
105105
self.benchmark_bin = os.path.join(self.bench.directory, 'compute-benchmarks-build', 'bin', self.bench_name)
106106

107+
def explicit_group(self):
108+
return ""
109+
107110
def run(self, env_vars) -> list[Result]:
108111
command = [
109112
f"{self.benchmark_bin}",
@@ -120,7 +123,8 @@ def run(self, env_vars) -> list[Result]:
120123
ret = []
121124
for label, median, stddev, unit in parsed_results:
122125
extra_label = " CPU count" if parse_unit_type(unit) == "instr" else ""
123-
ret.append(Result(label=self.name() + extra_label, value=median, stddev=stddev, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit)))
126+
explicit_group = self.explicit_group() + extra_label if self.explicit_group() != "" else ""
127+
ret.append(Result(label=self.name() + extra_label, explicit_group=explicit_group, value=median, stddev=stddev, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit)))
124128
return ret
125129

126130
def parse_output(self, output):
@@ -158,6 +162,9 @@ def name(self):
158162
order = "in order" if self.ioq else "out of order"
159163
return f"api_overhead_benchmark_sycl SubmitKernel {order}"
160164

165+
def explicit_group(self):
166+
return "SubmitKernel"
167+
161168
def bin_args(self) -> list[str]:
162169
return [
163170
f"--Ioq={self.ioq}",
@@ -178,6 +185,9 @@ def name(self):
178185
order = "in order" if self.ioq else "out of order"
179186
return f"api_overhead_benchmark_ur SubmitKernel {order}"
180187

188+
def explicit_group(self):
189+
return "SubmitKernel"
190+
181191
def bin_args(self) -> list[str]:
182192
return [
183193
f"--Ioq={self.ioq}",
@@ -198,6 +208,9 @@ def name(self):
198208
order = "in order" if self.ioq else "out of order"
199209
return f"api_overhead_benchmark_l0 SubmitKernel {order}"
200210

211+
def explicit_group(self):
212+
return "SubmitKernel"
213+
201214
def bin_args(self) -> list[str]:
202215
return [
203216
f"--Ioq={self.ioq}",

scripts/benchmarks/benches/result.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class Result:
1818
stdout: str
1919
passed: bool = True
2020
unit: str = ""
21+
explicit_group: str = ""
2122
# stddev can be optionally set by the benchmark,
2223
# if not set, it will be calculated automatically.
2324
stddev: float = 0.0

scripts/benchmarks/benches/test.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,31 @@ def setup(self):
2020

2121
def benchmarks(self) -> list[Benchmark]:
2222
bench_configs = [
23-
("Memory Bandwidth", 2000, 200),
24-
("Latency", 100, 20),
25-
("Throughput", 1500, 150),
26-
("FLOPS", 3000, 300),
27-
("Cache Miss Rate", 250, 25),
23+
("Memory Bandwidth", 2000, 200, "Foo Group"),
24+
("Latency", 100, 20, "Bar Group"),
25+
("Throughput", 1500, 150, "Foo Group"),
26+
("FLOPS", 3000, 300, "Foo Group"),
27+
("Cache Miss Rate", 250, 25, "Bar Group"),
2828
]
2929

3030
result = []
31-
for base_name, base_value, base_diff in bench_configs:
31+
for base_name, base_value, base_diff, group in bench_configs:
3232
for variant in range(6):
3333
value_multiplier = 1.0 + (variant * 0.2)
3434
name = f"{base_name} {variant+1}"
3535
value = base_value * value_multiplier
3636
diff = base_diff * value_multiplier
3737

38-
result.append(TestBench(name, value, diff))
38+
result.append(TestBench(name, value, diff, group))
3939

4040
return result
4141

4242
class TestBench(Benchmark):
43-
def __init__(self, name, value, diff):
43+
def __init__(self, name, value, diff, group = ''):
4444
self.bname = name
4545
self.value = value
4646
self.diff = diff
47+
self.group = group
4748
super().__init__("")
4849

4950
def name(self):
@@ -58,7 +59,7 @@ def setup(self):
5859
def run(self, env_vars) -> list[Result]:
5960
random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
6061
return [
61-
Result(label=self.name(), value=random_value, command="", env={"A": "B"}, stdout="no output", unit="ms")
62+
Result(label=self.name(), explicit_group=self.group, value=random_value, command="", env={"A": "B"}, stdout="no output", unit="ms")
6263
]
6364

6465
def teardown(self):

scripts/benchmarks/main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
183183
# should this be configurable?
184184
history.load(1000)
185185

186+
# remove duplicates. this can happen if e.g., --compare baseline is specified manually.
187+
compare_names = list(dict.fromkeys(compare_names))
188+
186189
for name in compare_names:
187190
compare_result = history.get_compare(name)
188191
if compare_result:
@@ -203,7 +206,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
203206
# Otherwise we might be comparing the results to themselves.
204207
if not options.dry_run:
205208
history.save(saved_name, results, save_name is not None)
206-
compare_names.append(saved_name)
209+
if saved_name not in compare_names:
210+
compare_names.append(saved_name)
207211

208212
if options.output_html:
209213
html_content = generate_html(history.runs, 'oneapi-src/unified-runtime', compare_names)

scripts/benchmarks/output_html.py

Lines changed: 173 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from dataclasses import dataclass
1111
import matplotlib.dates as mdates
1212
from benches.result import BenchmarkRun, Result
13+
import numpy as np
1314

1415
@dataclass
1516
class BenchmarkMetadata:
@@ -23,11 +24,14 @@ class BenchmarkSeries:
2324
runs: list[BenchmarkRun]
2425

2526
@dataclass
26-
class BenchmarkTimeSeries:
27+
class BenchmarkChart:
2728
label: str
2829
html: str
2930

30-
def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkTimeSeries]:
31+
def tooltip_css() -> str:
32+
return '.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}'
33+
34+
def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkChart]:
3135
plt.close('all')
3236

3337
num_benchmarks = len(benchmarks)
@@ -66,7 +70,7 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
6670
for point in sorted_points]
6771

6872
tooltip = mpld3.plugins.PointHTMLTooltip(scatter, tooltip_labels,
69-
css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}',
73+
css=tooltip_css(),
7074
targets=targets)
7175
mpld3.plugins.connect(fig, tooltip)
7276

@@ -94,7 +98,104 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
9498
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S'))
9599

96100
plt.tight_layout()
97-
html_charts.append(BenchmarkTimeSeries(html=mpld3.fig_to_html(fig), label=benchmark.label))
101+
html_charts.append(BenchmarkChart(html=mpld3.fig_to_html(fig), label=benchmark.label))
102+
plt.close(fig)
103+
104+
return html_charts
105+
106+
@dataclass
107+
class ExplicitGroup:
108+
name: str
109+
nnames: int
110+
metadata: BenchmarkMetadata
111+
runs: dict[str, dict[str, Result]]
112+
113+
def create_explicit_groups(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[ExplicitGroup]:
114+
groups = {}
115+
116+
for run in benchmark_runs:
117+
if run.name in compare_names:
118+
for res in run.results:
119+
if res.explicit_group != '':
120+
if res.explicit_group not in groups:
121+
groups[res.explicit_group] = ExplicitGroup(name=res.explicit_group, nnames=len(compare_names),
122+
metadata=BenchmarkMetadata(unit=res.unit, lower_is_better=res.lower_is_better),
123+
runs={})
124+
125+
group = groups[res.explicit_group]
126+
if res.label not in group.runs:
127+
group.runs[res.label] = {name: None for name in compare_names}
128+
129+
if group.runs[res.label][run.name] is None:
130+
group.runs[res.label][run.name] = res
131+
132+
return list(groups.values())
133+
134+
def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
135+
plt.close('all')
136+
137+
html_charts = []
138+
139+
for group in groups:
140+
fig, ax = plt.subplots(figsize=(10, 6))
141+
142+
x = np.arange(group.nnames)
143+
x_labels = []
144+
width = 0.8 / len(group.runs)
145+
146+
max_height = 0
147+
148+
for i, (run_name, run_results) in enumerate(group.runs.items()):
149+
offset = width * i
150+
151+
positions = x + offset
152+
x_labels = run_results.keys()
153+
valid_data = [r.value if r is not None else 0 for r in run_results.values()]
154+
rects = ax.bar(positions, valid_data, width, label=run_name)
155+
# This is a hack to disable all bar_label. Setting labels to empty doesn't work.
156+
# We create our own labels below for each bar, this works better in mpld3.
157+
ax.bar_label(rects, fmt='')
158+
159+
for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
160+
height = rect.get_height()
161+
if height > max_height:
162+
max_height = height
163+
164+
ax.text(rect.get_x() + rect.get_width()/2., height + 2,
165+
f'{res.value:.1f}',
166+
ha='center', va='bottom', fontsize=9)
167+
168+
tooltip_labels = [
169+
f"Run: {run}\n"
170+
f"Label: {res.label}\n"
171+
f"Value: {res.value:.2f} {res.unit}\n"
172+
]
173+
tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css=tooltip_css())
174+
mpld3.plugins.connect(ax.figure, tooltip)
175+
176+
ax.set_xticks([])
177+
ax.grid(True, axis='y', alpha=0.2)
178+
ax.set_ylabel(f"Value ({group.metadata.unit})")
179+
ax.legend(loc='upper left')
180+
ax.set_title(group.name, pad=20)
181+
performance_indicator = "lower is better" if group.metadata.lower_is_better else "higher is better"
182+
ax.text(0.5, 1.03, f"({performance_indicator})",
183+
ha='center',
184+
transform=ax.transAxes,
185+
style='italic',
186+
fontsize=7,
187+
color='#666666')
188+
189+
for idx, label in enumerate(x_labels):
190+
# this is a hack to get labels to show above the legend
191+
# we normalize the idx to transAxes transform and offset it a little.
192+
x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (ax.get_xlim()[1] - ax.get_xlim()[0])
193+
ax.text(x_norm, 1.00, label,
194+
transform=ax.transAxes,
195+
color='#666666')
196+
197+
plt.tight_layout()
198+
html_charts.append(BenchmarkChart(label=group.name, html=mpld3.fig_to_html(fig)))
98199
plt.close(fig)
99200

100201
return html_charts
@@ -138,6 +239,11 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
138239
timeseries = create_time_series_chart(benchmarks, github_repo)
139240
timeseries_charts_html = '\n'.join(f'<div class="chart" data-label="{ts.label}"><div>{ts.html}</div></div>' for ts in timeseries)
140241

242+
explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
243+
244+
bar_charts = create_grouped_bar_charts(explicit_groups)
245+
bar_charts_html = '\n'.join(f'<div class="chart" data-label="{bc.label}"><div>{bc.html}</div></div>' for bc in bar_charts)
246+
141247
html_template = f"""
142248
<!DOCTYPE html>
143249
<html>
@@ -199,21 +305,72 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
199305
width: 400px;
200306
max-width: 100%;
201307
}}
308+
details {{
309+
margin-bottom: 24px;
310+
}}
311+
summary {{
312+
font-size: 18px;
313+
font-weight: 500;
314+
cursor: pointer;
315+
padding: 12px;
316+
background: #e9ecef;
317+
border-radius: 8px;
318+
user-select: none;
319+
}}
320+
summary:hover {{
321+
background: #dee2e6;
322+
}}
202323
</style>
203324
<script>
325+
function getQueryParam(param) {{
326+
const urlParams = new URLSearchParams(window.location.search);
327+
return urlParams.get(param);
328+
}}
329+
204330
function filterCharts() {{
205331
const regexInput = document.getElementById('bench-filter').value;
206332
const regex = new RegExp(regexInput, 'i');
207333
const charts = document.querySelectorAll('.chart');
334+
let timeseriesVisible = false;
335+
let barChartsVisible = false;
336+
208337
charts.forEach(chart => {{
209338
const label = chart.getAttribute('data-label');
210339
if (regex.test(label)) {{
211340
chart.style.display = '';
341+
if (chart.closest('.timeseries')) {{
342+
timeseriesVisible = true;
343+
}} else if (chart.closest('.bar-charts')) {{
344+
barChartsVisible = true;
345+
}}
212346
}} else {{
213347
chart.style.display = 'none';
214348
}}
215349
}});
350+
351+
updateURL(regexInput);
352+
353+
document.querySelector('.timeseries').open = timeseriesVisible;
354+
document.querySelector('.bar-charts').open = barChartsVisible;
216355
}}
356+
357+
function updateURL(regex) {{
358+
const url = new URL(window.location);
359+
if (regex) {{
360+
url.searchParams.set('regex', regex);
361+
}} else {{
362+
url.searchParams.delete('regex');
363+
}}
364+
history.replaceState(null, '', url);
365+
}}
366+
367+
document.addEventListener('DOMContentLoaded', (event) => {{
368+
const regexParam = getQueryParam('regex');
369+
if (regexParam) {{
370+
document.getElementById('bench-filter').value = regexParam;
371+
filterCharts();
372+
}}
373+
}});
217374
</script>
218375
</head>
219376
<body>
@@ -222,13 +379,20 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
222379
<div class="filter-container">
223380
<input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
224381
</div>
225-
<h2>Historical Results</h2>
226-
<div class="charts">
227-
{timeseries_charts_html}
228-
</div>
382+
<details class="timeseries">
383+
<summary>Historical Results</summary>
384+
<div class="charts">
385+
{timeseries_charts_html}
386+
</div>
387+
</details>
388+
<details class="bar-charts">
389+
<summary>Comparisons</summary>
390+
<div class="charts">
391+
{bar_charts_html}
392+
</div>
393+
</details>
229394
</div>
230395
</body>
231396
</html>
232397
"""
233-
234398
return html_template

0 commit comments

Comments
 (0)