Skip to content

Commit fd213a3

Browse files
authored
Merge pull request #200 from stackhpc/feature/plot_pingpong
Plot pingpong results
2 parents 831b2e5 + c6020d0 commit fd213a3

File tree

3 files changed

+113
-4
lines changed

3 files changed

+113
-4
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import matplotlib as mpl
2+
import matplotlib.pyplot as plt
3+
from matplotlib import ticker
4+
import numpy as np
5+
import os
6+
7+
def sizeof_fmt(num, suffix='B'):
8+
""" TODO: """
9+
# from https://stackoverflow.com/a/1094933/916373
10+
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
11+
if abs(num) < 1024.0:
12+
return "%3.1f%s%s" % (num, unit, suffix)
13+
num /= 1024.0
14+
return "%.1f%s%s" % (num, 'Yi', suffix)
15+
16+
def read_imb_out(path):
17+
""" Read stdout from an IMB-MPI1 run.
18+
19+
Returns a dict with:
20+
key:= int, total number of processes involved
21+
value:= pandas dataframe, i.e. one per results table. Columns as per table.
22+
23+
If multiple results tables are present it is assumed that they are all the same benchmark,
24+
and only differ in the number of processes.
25+
"""
26+
27+
data = {}
28+
29+
COLTYPES = { # all benchmark names here should be lowercase
30+
'uniband': (int, int, float, int), # #bytes #repetitions Mbytes/sec Msg/sec
31+
'biband': (int, int, float, int),
32+
'pingpong':(int, int, float, float), # #bytes #repetitions t[usec] Mbytes/sec
33+
'alltoall':(int, int, float, float, float) # #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
34+
}
35+
36+
with open(path) as f:
37+
for line in f:
38+
if line.startswith('# Benchmarking '):
39+
benchmark = line.split()[-1].lower()
40+
if benchmark not in COLTYPES:
41+
raise ValueError('Do not know how to read %r benchmark in %s' % (benchmark, path))
42+
converters = COLTYPES[benchmark]
43+
line = next(f)
44+
if not line.startswith('# #processes = '):
45+
raise ValueError('expected %s, got %s' % (expect, nprocs_line))
46+
n_procs = int(line.split('=')[-1].strip())
47+
while line.startswith('#'):
48+
line = next(f) # may or may not include line "# .. additional processes waiting in MPI_Barrier", plus other # lines
49+
rows = []
50+
while True:
51+
line = next(f).strip()
52+
if line == '':
53+
break
54+
rows.append([f(v) for (f, v) in zip(converters, line.split())])
55+
# turn data around:
56+
cols = [[] for _ in range(len(converters))]
57+
for r in rows:
58+
for ic, c in enumerate(cols):
59+
c.append(r[ic])
60+
data[n_procs] = cols
61+
return data
62+
63+
if __name__ == '__main__':
64+
import sys
65+
d = read_imb_out(sys.argv[1])
66+
if len(d) > 1:
67+
raise ValueError('Found > 1 benchmark in', sys.argv[1])
68+
outdir = os.path.dirname(sys.argv[1])
69+
for n, df in d.items():
70+
fig, ax1 = plt.subplots()
71+
ax2 = ax1.twinx()
72+
ax1.plot(df[0], df[2], label='latency', color='b')
73+
ax2.plot(df[0], df[3], label='bandwidth', color='r')
74+
ax1.set_xscale('log', base=2)
75+
ax1.set_yscale('log', base=10)
76+
ax1.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: sizeof_fmt(x)))
77+
ax1.grid(True, which="both")
78+
ax1.set_xlabel('#bytes')
79+
ax1.set_ylabel('latency ($\mu$s)', color='b')
80+
ax2.set_ylabel('bandwidth (Mbytes/sec)', color='r')
81+
fig.legend(loc='upper left')
82+
plt.tight_layout()
83+
figpath = os.path.join(outdir, 'pingpong.png')
84+
plt.savefig(figpath)
85+
print(figpath)

ansible/roles/hpctests/tasks/pingpong.yml

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,41 @@
2424
chdir: "{{ hpctests_rootdir }}/pingpong"
2525
register: hpctests_pingpong_sbatch
2626

27-
- name: Read pingpong
27+
- set_fact:
28+
_pingpong_jobid: "{{ hpctests_pingpong_sbatch.stdout.split()[-1] }}"
29+
- set_fact:
30+
_pingpong_local_output: "{{ hpctests_outdir }}/pingpong/{{_pingpong_jobid}}/pingpong.sh.out"
31+
32+
- name: Retrieve results file
33+
ansible.builtin.fetch:
34+
src: "{{ hpctests_rootdir }}/pingpong/pingpong.sh.out"
35+
dest: "{{ _pingpong_local_output }}"
36+
flat: yes
37+
38+
- name: Read pingpong results
2839
read_imb_pingpong:
29-
path: "{{ hpctests_rootdir }}/pingpong/pingpong.sh.out"
40+
path: "{{ _pingpong_local_output }}"
3041
register: hpctests_pingpong_out
42+
delegate_to: localhost
3143

3244
- name: Read nodes used
33-
shell: "grep 'SLURM_JOB_NODELIST:' {{ hpctests_rootdir }}/pingpong/pingpong.sh.out"
45+
shell: "grep 'SLURM_JOB_NODELIST:' {{ _pingpong_local_output }}"
3446
register: hpctests_pingpong_run_nodes
47+
delegate_to: localhost
3548

49+
- name: Plot image
50+
shell:
51+
cmd: "python {{lookup('env', 'APPLIANCES_REPO_ROOT') }}/ansible/roles/hpctests/files/plot_imb_pingpong.py {{ _pingpong_local_output }}"
52+
creates: "{{ _pingpong_local_output | dirname }}/latency.png"
53+
register: _pingpong_plot
54+
delegate_to: localhost
55+
3656
- debug:
3757
msg: |
38-
Summary for pingpong (2x scheduler-selected nodes) job {{ hpctests_pingpong_sbatch.stdout.split()[-1] }} using {{ hpctests_ucx_net_devices }}:
58+
Summary for pingpong (2x scheduler-selected nodes) job {{ _pingpong_jobid }} (using interface {{ hpctests_ucx_net_devices }}):
3959
nodes: {{ hpctests_pingpong_run_nodes.stdout.split()[1] }}
4060
zero-size msg latency: {{ hpctests_pingpong_out['columns']['latency'][0] }} us
4161
max bandwidth: {{ hpctests_pingpong_out['columns']['bandwidth'] | max }} Mbytes/s ({{ (hpctests_pingpong_out['columns']['bandwidth'] | max) / 125.0 }} Gbit/s)
62+
63+
See plot on localhost:
64+
{{ _pingpong_plot.stdout }}

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ passlib[bcrypt]==1.7.4
66
cookiecutter
77
selinux # this is a shim to avoid having to use --system-site-packages, you still need sudo yum install libselinux-python3
88
netaddr
9+
matplotlib

0 commit comments

Comments
 (0)