Skip to content

Commit 6551c33

Browse files
authored
Merge branch 'main' into update/openhpc-v0.27.9
2 parents 1eeef37 + 64ddf19 commit 6551c33

File tree

27 files changed

+443
-159
lines changed

27 files changed

+443
-159
lines changed

.github/workflows/doca.yml

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
name: Test DOCA extra build
2+
on:
3+
workflow_dispatch:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
9+
- 'ansible/roles/doca/**'
10+
- '.github/workflows/doca'
11+
pull_request:
12+
paths:
13+
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
14+
- 'ansible/roles/doca/**'
15+
- '.github/workflows/doca'
16+
17+
jobs:
18+
doca:
19+
name: doca-build
20+
concurrency:
21+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
22+
cancel-in-progress: true
23+
runs-on: ubuntu-22.04
24+
strategy:
25+
fail-fast: false # allow other matrix jobs to continue even if one fails
26+
matrix: # build RL8, RL9
27+
build:
28+
- image_name: openhpc-doca-RL8
29+
source_image_name_key: RL8 # key into environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
30+
inventory_groups: doca
31+
- image_name: openhpc-doca-RL9
32+
source_image_name_key: RL9
33+
inventory_groups: doca
34+
env:
35+
ANSIBLE_FORCE_COLOR: True
36+
OS_CLOUD: openstack
37+
CI_CLOUD: ${{ vars.CI_CLOUD }} # default from repo settings
38+
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
39+
40+
steps:
41+
- uses: actions/checkout@v2
42+
43+
- name: Load current fat images into GITHUB_ENV
44+
# see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#example-of-a-multiline-string
45+
run: |
46+
{
47+
echo 'FAT_IMAGES<<EOF'
48+
cat environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
49+
echo EOF
50+
} >> "$GITHUB_ENV"
51+
52+
- name: Record settings
53+
run: |
54+
echo CI_CLOUD: ${{ env.CI_CLOUD }}
55+
echo FAT_IMAGES: ${FAT_IMAGES}
56+
57+
- name: Setup ssh
58+
run: |
59+
set -x
60+
mkdir ~/.ssh
61+
echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa
62+
chmod 0600 ~/.ssh/id_rsa
63+
shell: bash
64+
65+
- name: Add bastion's ssh key to known_hosts
66+
run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts
67+
shell: bash
68+
69+
- name: Install ansible etc
70+
run: dev/setup-env.sh
71+
72+
- name: Write clouds.yaml
73+
run: |
74+
mkdir -p ~/.config/openstack/
75+
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
76+
shell: bash
77+
78+
- name: Setup environment
79+
run: |
80+
. venv/bin/activate
81+
. environments/.stackhpc/activate
82+
83+
- name: Build fat image with packer
84+
id: packer_build
85+
run: |
86+
set -x
87+
. venv/bin/activate
88+
. environments/.stackhpc/activate
89+
cd packer/
90+
packer init .
91+
92+
PACKER_LOG=1 packer build \
93+
-on-error=${{ vars.PACKER_ON_ERROR }} \
94+
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
95+
-var "source_image_name=${{ fromJSON(env.FAT_IMAGES)['cluster_image'][matrix.build.source_image_name_key] }}" \
96+
-var "image_name=${{ matrix.build.image_name }}" \
97+
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
98+
openstack.pkr.hcl
99+
100+
- name: Get created image names from manifest
101+
id: manifest
102+
run: |
103+
. venv/bin/activate
104+
IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json)
105+
while ! openstack image show -f value -c name $IMAGE_ID; do
106+
sleep 5
107+
done
108+
IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
109+
echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
110+
echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
111+
echo $IMAGE_ID > image-id.txt
112+
echo $IMAGE_NAME > image-name.txt
113+
114+
- name: Make image usable for further builds
115+
run: |
116+
. venv/bin/activate
117+
openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}"
118+
119+
- name: Delete image for automatically-run workflows
120+
run: |
121+
. venv/bin/activate
122+
openstack image delete "${{ steps.manifest.outputs.image-id }}"
123+
if: ${{ github.event_name != 'workflow_dispatch' }}
124+
125+
- name: Upload manifest artifact
126+
uses: actions/upload-artifact@v4
127+
with:
128+
name: image-details-${{ matrix.build.image_name }}
129+
path: |
130+
./image-id.txt
131+
./image-name.txt
132+
overwrite: true

.github/workflows/fatimage.yml

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,24 @@ jobs:
1515
openstack:
1616
name: openstack-imagebuild
1717
concurrency:
18-
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build
18+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
1919
cancel-in-progress: true
2020
runs-on: ubuntu-22.04
2121
strategy:
2222
fail-fast: false # allow other matrix jobs to continue even if one fails
2323
matrix: # build RL8, RL9
24-
os_version:
25-
- RL8
26-
- RL9
2724
build:
28-
- openstack.openhpc
25+
- image_name: openhpc-RL8
26+
source_image_name: rocky-latest-RL8
27+
inventory_groups: control,compute,login
28+
- image_name: openhpc-RL9
29+
source_image_name: rocky-latest-RL9
30+
inventory_groups: control,compute,login
2931
env:
3032
ANSIBLE_FORCE_COLOR: True
3133
OS_CLOUD: openstack
3234
CI_CLOUD: ${{ github.event.inputs.ci_cloud }}
33-
SOURCE_IMAGES_MAP: |
34-
{
35-
"RL8": {
36-
"openstack.openhpc": "rocky-latest-RL8"
37-
},
38-
"RL9": {
39-
"openstack.openhpc": "rocky-latest-RL9"
40-
}
41-
}
35+
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
4236

4337
steps:
4438
- uses: actions/checkout@v2
@@ -84,13 +78,11 @@ jobs:
8478
8579
PACKER_LOG=1 packer build \
8680
-on-error=${{ vars.PACKER_ON_ERROR }} \
87-
-only=${{ matrix.build }} \
8881
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
89-
-var "source_image_name=${{ env.SOURCE_IMAGE }}" \
82+
-var "source_image_name=${{ matrix.build.source_image_name }}" \
83+
-var "image_name=${{ matrix.build.image_name }}" \
84+
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
9085
openstack.pkr.hcl
91-
env:
92-
PKR_VAR_os_version: ${{ matrix.os_version }}
93-
SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }}
9486
9587
- name: Get created image names from manifest
9688
id: manifest
@@ -101,13 +93,20 @@ jobs:
10193
sleep 5
10294
done
10395
IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
96+
echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
97+
echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
10498
echo $IMAGE_ID > image-id.txt
10599
echo $IMAGE_NAME > image-name.txt
106100
101+
- name: Make image usable for further builds
102+
run: |
103+
. venv/bin/activate
104+
openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}"
105+
107106
- name: Upload manifest artifact
108107
uses: actions/upload-artifact@v4
109108
with:
110-
name: image-details-${{ matrix.build }}-${{ matrix.os_version }}
109+
name: image-details-${{ matrix.build.image_name }}
111110
path: |
112111
./image-id.txt
113112
./image-name.txt

.github/workflows/nightlybuild.yml

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,30 @@ on:
1111
- SMS
1212
- ARCUS
1313
schedule:
14-
- cron: '0 0 * * *' # Run at midnight
14+
- cron: '0 0 * * *' # Run at midnight on default branch
1515

1616
jobs:
1717
openstack:
1818
name: openstack-imagebuild
1919
concurrency:
20-
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build
20+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
2121
cancel-in-progress: true
2222
runs-on: ubuntu-22.04
2323
strategy:
2424
fail-fast: false # allow other matrix jobs to continue even if one fails
2525
matrix: # build RL8, RL9
26-
os_version:
27-
- RL8
28-
- RL9
2926
build:
30-
- openstack.rocky-latest
27+
- image_name: rocky-latest-RL8
28+
source_image_name: Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2
29+
inventory_groups: update
30+
- image_name: rocky-latest-RL9
31+
source_image_name: Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2
32+
inventory_groups: update
3133
env:
3234
ANSIBLE_FORCE_COLOR: True
3335
OS_CLOUD: openstack
3436
CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }}
35-
SOURCE_IMAGES_MAP: |
36-
{
37-
"RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2",
38-
"RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2"
39-
}
37+
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
4038

4139
steps:
4240
- uses: actions/checkout@v2
@@ -82,15 +80,13 @@ jobs:
8280
8381
PACKER_LOG=1 packer build \
8482
-on-error=${{ vars.PACKER_ON_ERROR }} \
85-
-only=${{ matrix.build }} \
8683
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
87-
-var "source_image_name=${{ env.SOURCE_IMAGE }}" \
84+
-var "source_image_name=${{ matrix.build.source_image_name }}" \
85+
-var "image_name=${{ matrix.build.image_name }}" \
86+
-var "image_name_version=" \
87+
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
8888
openstack.pkr.hcl
8989
90-
env:
91-
PKR_VAR_os_version: ${{ matrix.os_version }}
92-
SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version] }}
93-
9490
- name: Get created image names from manifest
9591
id: manifest
9692
run: |
@@ -124,7 +120,7 @@ jobs:
124120
name: upload-nightly-targets
125121
needs: openstack
126122
concurrency:
127-
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }}
123+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }}-${{ matrix.target_cloud }}
128124
cancel-in-progress: true
129125
runs-on: ubuntu-22.04
130126
strategy:
@@ -134,18 +130,15 @@ jobs:
134130
- LEAFCLOUD
135131
- SMS
136132
- ARCUS
137-
os_version:
138-
- RL8
139-
- RL9
140-
image:
141-
- rocky-latest
133+
build:
134+
- image_name: rocky-latest-RL8
135+
- image_name: rocky-latest-RL9
142136
exclude:
143137
- target_cloud: LEAFCLOUD
144138
env:
145139
OS_CLOUD: openstack
146140
SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }}
147141
TARGET_CLOUD: ${{ matrix.target_cloud }}
148-
IMAGE_NAME: "${{ matrix.image }}-${{ matrix.os_version }}"
149142
steps:
150143
- uses: actions/checkout@v2
151144

@@ -160,42 +153,37 @@ jobs:
160153
. venv/bin/activate
161154
pip install -U pip
162155
pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
163-
shell: bash
164156
165157
- name: Write clouds.yaml
166158
run: |
167159
mkdir -p ~/.config/openstack/
168160
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.SOURCE_CLOUD)] }}" > ~/.config/openstack/source_clouds.yaml
169161
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.TARGET_CLOUD)] }}" > ~/.config/openstack/target_clouds.yaml
170-
shell: bash
171162
172163
- name: Download source image
173164
run: |
174165
. venv/bin/activate
175166
export OS_CLIENT_CONFIG_FILE=~/.config/openstack/source_clouds.yaml
176-
openstack image save --file ${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }}
177-
shell: bash
167+
openstack image save --file ${{ matrix.build.image_name }} ${{ matrix.build.image_name }}
178168
179169
- name: Upload to target cloud
180170
run: |
181171
. venv/bin/activate
182172
export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml
183173
184-
openstack image create "${{ env.IMAGE_NAME }}" \
185-
--file "${{ env.IMAGE_NAME }}" \
174+
openstack image create "${{ matrix.build.image_name }}" \
175+
--file "${{ matrix.build.image_name }}" \
186176
--disk-format qcow2 \
187-
shell: bash
188177
189178
- name: Delete old latest image from target cloud
190179
run: |
191180
. venv/bin/activate
192181
export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml
193182
194-
IMAGE_COUNT=$(openstack image list --name ${{ env.IMAGE_NAME }} -f value -c ID | wc -l)
183+
IMAGE_COUNT=$(openstack image list --name ${{ matrix.build.image_name }} -f value -c ID | wc -l)
195184
if [ "$IMAGE_COUNT" -gt 1 ]; then
196-
OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ env.IMAGE_NAME }}" -f value -c ID | head -n 1)
185+
OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ matrix.build.image_name }}" -f value -c ID | head -n 1)
197186
openstack image delete "$OLD_IMAGE_ID"
198187
else
199188
echo "Only one image exists, skipping deletion."
200189
fi
201-
shell: bash

ansible/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,7 @@ roles/*
6464
!roles/k9s/**
6565
!roles/lustre/
6666
!roles/lustre/**
67+
!roles/dnf_repos/
68+
!roles/dnf_repos/**
69+
!roles/doca/
70+
!roles/doca/**

ansible/cleanup.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,10 @@
6161
os: "{{ ansible_distribution }} {{ ansible_distribution_version }}"
6262
kernel: "{{ ansible_kernel }}"
6363
ofed: "{{ ansible_facts.packages['mlnx-ofa_kernel'].0.version | default('-') }}"
64+
doca: "{{ ansible_facts.packages[doca_profile | default('doca-ofed') ].0.version | default('-') }}"
6465
cuda: "{{ ansible_facts.packages['cuda'].0.version | default('-') }}"
6566
slurm-ohpc: "{{ ansible_facts.packages['slurm-ohpc'].0.version | default('-') }}"
67+
68+
- name: Show image summary
69+
debug:
70+
var: image_info

0 commit comments

Comments
 (0)