Skip to content

Commit 69d0562

Browse files
committed
Merge branch 'ofed' of github.com:stackhpc/ansible-slurm-appliance into ofed
2 parents ba00f71 + 2901294 commit 69d0562

File tree

4 files changed

+45
-32
lines changed

4 files changed

+45
-32
lines changed

.github/workflows/fatimage.yml

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
name: Build fat image
33
'on':
44
workflow_dispatch:
5-
inputs:
6-
use_RL9:
7-
required: true
8-
description: Include RL9 image build
9-
type: boolean
10-
default: false
5+
# inputs:
6+
# use_RL9:
7+
# required: true
8+
# description: Include RL9 image build
9+
# type: boolean
10+
# default: false
11+
push:
12+
branches:
13+
- ofed
1114
jobs:
1215
openstack:
1316
name: openstack-imagebuild
@@ -16,11 +19,11 @@ jobs:
1619
strategy:
1720
matrix:
1821
os_version: [RL8, RL9]
19-
rl9_selected:
20-
- ${{ inputs.use_RL9 == true }} # only potentially true for workflow_dispatch
21-
exclude:
22-
- os_version: RL9
23-
rl9_selected: false
22+
# rl9_selected:
23+
# - ${{ inputs.use_RL9 == true }} # only potentially true for workflow_dispatch
24+
# exclude:
25+
# - os_version: RL9
26+
# rl9_selected: false
2427
env:
2528
ANSIBLE_FORCE_COLOR: True
2629
OS_CLOUD: openstack

ansible/roles/ofed/defaults/main.yml

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,21 @@ ofed_distro_version: "{{ ansible_distribution_version }}" # e.g. '8.9'
55
ofed_arch: "{{ ansible_architecture }}"
66
ofed_tmp_dir: /tmp
77
ofed_update_firmware: false
8-
ofed_build_packages:
9-
- perl
10-
- createrepo
11-
- kernel-rpm-macros
12-
- libtool
13-
- python36
8+
ofed_build_packages: # may require additional packages depending on ofed_package_selection
149
- autoconf
1510
- automake
1611
- gcc
17-
- rpm-build
12+
- kernel-devel-{{ _ofed_loaded_kernel.stdout | trim }}
13+
- kernel-rpm-macros
14+
- libtool
1815
- lsof
19-
- gdb-headless
2016
- patch
21-
- kernel-devel-{{ _ofed_loaded_kernel.stdout | trim }}
2217
- pciutils
23-
- kernel-modules-extra
24-
- tk
25-
- gcc-gfortran
26-
- tcl
18+
- perl
19+
- rpm-build
20+
ofed_build_rl8_packages:
21+
- gdb-headless
22+
- python36
23+
ofed_package_selection: # list of package selection flags for mlnxofedinstall script
24+
- hpc
25+
- with-nfsrdma

ansible/roles/ofed/tasks/install.yml

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
changed_when: false
2727
failed_when:
2828
- _ofed_info.rc > 0
29-
- "'No such file or directory' not in ofed_info.msg"
29+
- "'No such file or directory' not in _ofed_info.msg"
3030
register: _ofed_info
3131

3232
- name: Install build prerequisites
33+
dnf:
34+
name: "{{ ofed_build_packages + (ofed_build_rl8_packages if ofed_distro_version == '8.9' else []) }}"
3335
when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
3436
# don't want to install a load of prereqs unnecessarily
35-
dnf:
36-
name: "{{ ofed_build_packages }}"
3737

3838
- name: Download and unpack Mellanox OFED tarball
3939
ansible.builtin.unarchive:
@@ -45,10 +45,21 @@
4545

4646
# Below from https://docs.nvidia.com/networking/display/mlnxofedv24010331/user+manual
4747
- name: Run OFED install script
48-
shell:
49-
cmd: "{{ ofed_tmp_dir }}/MLNX_OFED_LINUX-{{ ofed_version }}/mlnxofedinstall --add-kernel-support {% if not ofed_update_firmware %}--without-fw-update{% endif %} --force"
48+
command:
49+
cmd: >
50+
./mlnxofedinstall
51+
--add-kernel-support
52+
{% if not ofed_update_firmware %}--without-fw-update{% endif %}
53+
--force
54+
--skip-repo
55+
{% for pkgsel in ofed_package_selection %}
56+
--{{ pkgsel }}
57+
{% endfor %}
58+
chdir: "{{ ofed_tmp_dir }}/MLNX_OFED_LINUX-{{ ofed_version }}-{{ ofed_distro }}{{ ofed_distro_version }}-{{ ofed_arch }}/"
5059
register: _ofed_install
5160
when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
61+
async: "{{ 45 * 60 }}" # wait for up to 45 minutes
62+
poll: 15 # check every 15 seconds
5263

5364
- name: Update initramfs
5465
command:

packer/openstack.pkr.hcl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ source "openstack" "openhpc" {
162162
# "fat" image builds:
163163
build {
164164

165-
# non-CUDA:
165+
# non-OFED:
166166
source "source.openstack.openhpc" {
167167
name = "openhpc"
168168
floating_ip_network = "${var.floating_ip_network}"
@@ -171,9 +171,9 @@ build {
171171
image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" # similar to name from slurm_image_builder
172172
}
173173

174-
# CUDA:
174+
# OFED:
175175
source "source.openstack.openhpc" {
176-
name = "openhpc-cuda" # this is the only difference from the above
176+
name = "openhpc-ofed" # this is the only difference from the above
177177
floating_ip_network = "${var.floating_ip_network}"
178178
source_image = "${var.fatimage_source_image[var.os_version]}"
179179
source_image_name = "${var.fatimage_source_image_name[var.os_version]}"

0 commit comments

Comments
 (0)