Skip to content

Commit bbd869e

Browse files
authored
Merge pull request #252 from stackhpc/eessi
Support EESSI
2 parents 0e6ef7e + fa07ec5 commit bbd869e

File tree

11 files changed

+152
-5
lines changed

11 files changed

+152
-5
lines changed

.github/workflows/stackhpc.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ jobs:
9999
. environments/.stackhpc/activate
100100
ansible-playbook -vv ansible/adhoc/hpctests.yml
101101
102+
- name: Run EESSI tests
103+
run: |
104+
. venv/bin/activate
105+
. environments/.stackhpc/activate
106+
ansible-playbook -vv ansible/ci/check_eessi.yml
107+
102108
- name: Confirm Open Ondemand is up (via SOCKS proxy)
103109
run: |
104110
. venv/bin/activate

ansible/bootstrap.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@
112112
tasks_from: config.yml
113113
tags: config
114114

115+
- name: Setup EESSI
116+
hosts: eessi
117+
tags: eessi
118+
become: true
119+
gather_facts: false
120+
tasks:
121+
- name: Install and configure EESSI
122+
import_role:
123+
name: eessi
124+
115125
- hosts: update
116126
gather_facts: false
117127
become: yes

ansible/ci/check_eessi.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
- name: Run EESSI test job
3+
hosts: login[0]
4+
vars:
5+
eessi_test_rootdir: /home/eessi_test
6+
tasks:
7+
- name: Create test root directory
8+
file:
9+
path: "{{ eessi_test_rootdir }}"
10+
state: directory
11+
owner: "{{ ansible_user }}"
12+
group: "{{ ansible_user }}"
13+
become: true
14+
15+
- name: Clone eessi-demo repo
16+
ansible.builtin.git:
17+
repo: "https://github.com/eessi/eessi-demo.git"
18+
dest: "{{ eessi_test_rootdir }}/eessi-demo"
19+
20+
- name: Run test job
21+
ansible.builtin.shell:
22+
cmd: |
23+
source /cvmfs/pilot.eessi-hpc.org/latest/init/bash
24+
srun ./run.sh
25+
chdir: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow"
26+
executable: /bin/bash
27+
register: job_output
28+
29+
- name: Fail if job output contains error
30+
fail:
31+
# Note: Job prints live progress bar to terminal, so use regex filter to remove this from stdout
32+
msg: "Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b', '') }}"
33+
when: '"Epoch 5/5" not in job_output.stdout'
34+

ansible/ci/check_sacct_hpctests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
gather_facts: false
33
become: true
44
vars:
5-
sacct_stdout_expected: |- # based on CI running hpctests as the first job - NB note no trailing newline
5+
sacct_stdout_expected: |- # based on CI running hpctests as the first job
66
JobID,JobName,State
77
1,pingpong.sh,COMPLETED
88
2,pingmatrix.sh,COMPLETED
@@ -18,10 +18,10 @@
1818
register: sacct
1919
- name: Check info for ended jobs
2020
assert:
21-
that: sacct.stdout == sacct_stdout_expected
21+
that: sacct_stdout_expected in sacct.stdout
2222
fail_msg: |
2323
Expected:
2424
--{{ sacct_stdout_expected }}--
2525
Got:
2626
--{{ sacct.stdout }}--
27-
success_msg: sacct shows hpctests jobs as first and only jobs
27+
success_msg: sacct shows hpctests jobs as first jobs in list

ansible/roles/eessi/README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
EESSI
2+
=====
3+
4+
Configure the EESSI pilot respository for use on given hosts.
5+
6+
Requirements
7+
------------
8+
9+
None.
10+
11+
Role Variables
12+
--------------
13+
14+
- `cvmfs_quota_limit_mb`: Optional int. Maximum size of local package cache on each node in MB.
15+
- `cvmfs_config_overrides`: Optional dict. Set of key-value pairs for additional CernVM-FS settings see [official docs](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html) for list of options. Each dict key should correspond to a valid config variable (e.g. `CVMFS_HTTP_PROXY`) and the corresponding dict value will be set as the variable value (e.g. `https://my-proxy.com`). These configuration parameters will be written to the `/etc/cvmfs/default.local` config file on each host in the form `KEY=VALUE`.
16+
17+
Dependencies
18+
------------
19+
20+
None.
21+
22+
Example Playbook
23+
----------------
24+
25+
```yaml
26+
- name: Setup EESSI
27+
hosts: eessi
28+
tags: eessi
29+
become: true
30+
tasks:
31+
- name: Install and configure EESSI
32+
import_role:
33+
name: eessi
34+
```
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
# Default to 10GB
3+
cvmfs_quota_limit_mb: 10000
4+
5+
cvmfs_config_default:
6+
CVMFS_CLIENT_PROFILE: single
7+
CVMFS_QUOTA_LIMIT: "{{ cvmfs_quota_limit_mb }}"
8+
9+
cvmfs_config_overrides: {}
10+
11+
cvmfs_config: "{{ cvmfs_config_default | combine(cvmfs_config_overrides) }}"

ansible/roles/eessi/tasks/main.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
---
2+
- name: Download Cern GPG key
3+
ansible.builtin.get_url:
4+
url: http://cvmrepo.web.cern.ch/cvmrepo/yum/RPM-GPG-KEY-CernVM
5+
dest: ./cvmfs-key.gpg
6+
7+
- name: Import downloaded GPG key
8+
command: rpm --import cvmfs-key.gpg
9+
10+
- name: Add CVMFS repo
11+
dnf:
12+
name: https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm
13+
14+
- name: Install CVMFS
15+
dnf:
16+
name: cvmfs
17+
18+
- name: Install EESSI CVMFS config
19+
dnf:
20+
name: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi-latest.noarch.rpm
21+
# NOTE: Can't find any docs on obtaining gpg key - maybe downloading directly from github is ok?
22+
disable_gpg_check: true
23+
24+
# Alternative version using official repo - still no GPG key :(
25+
# - name: Add EESSI repo
26+
# dnf:
27+
# name: http://repo.eessi-infra.org/eessi/rhel/8/noarch/eessi-release-0-1.noarch.rpm
28+
29+
# - name: Install EESSI CVMFS config
30+
# dnf:
31+
# name: cvmfs-config-eessi
32+
33+
- name: Add base CVMFS config
34+
community.general.ini_file:
35+
dest: /etc/cvmfs/default.local
36+
section: null
37+
option: "{{ item.key }}"
38+
value: "{{ item.value }}"
39+
no_extra_spaces: true
40+
loop: "{{ cvmfs_config | dict2items }}"
41+
42+
43+
# NOTE: Not clear how to make this idempotent
44+
- name: Ensure CVMFS config is setup
45+
command:
46+
cmd: "cvmfs_config setup"

environments/.stackhpc/builder.pkrvars.hcl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
flavor = "vm.ska.cpu.general.small"
22
networks = ["a262aabd-e6bf-4440-a155-13dbc1b5db0e"] # WCDC-iLab-60
3-
source_image_name = "openhpc-230412-1447-e3769af6.qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/258
3+
source_image_name = "openhpc-230503-0944-bf8c3f63.qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
44
#source_image_name = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2"
55
ssh_keypair_name = "slurm-app-ci"
66
security_groups = ["default", "SSH"]

environments/.stackhpc/terraform/main.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ variable "create_nodes" {
1717
variable "cluster_image" {
1818
description = "single image for all cluster nodes - a convenience for CI"
1919
type = string
20-
default = "openhpc-230412-1447-e3769af6.qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/258
20+
default = "openhpc-230503-0944-bf8c3f63.qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
2121
# default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2"
2222
# default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2"
2323
}

environments/common/inventory/groups

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ login
1313
control
1414
compute
1515

16+
[eessi:children]
17+
# Hosts on which EESSI stack should be configured
18+
1619
[hpctests:children]
1720
# Login group to use for running mpi-based testing.
1821
login

environments/common/layouts/everything

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ compute
5555
[etc_hosts]
5656
# Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md
5757

58+
[eessi:children]
59+
openhpc
60+
5861
[resolv_conf]
5962
# Allows defining nameservers in /etc/resolv.conf - see ansible/roles/resolv_conf/README.md
6063

0 commit comments

Comments
 (0)