File tree Expand file tree Collapse file tree 11 files changed +152
-5
lines changed Expand file tree Collapse file tree 11 files changed +152
-5
lines changed Original file line number Diff line number Diff line change 99
99
. environments/.stackhpc/activate
100
100
ansible-playbook -vv ansible/adhoc/hpctests.yml
101
101
102
+ - name : Run EESSI tests
103
+ run : |
104
+ . venv/bin/activate
105
+ . environments/.stackhpc/activate
106
+ ansible-playbook -vv ansible/ci/check_eessi.yml
107
+
102
108
- name : Confirm Open Ondemand is up (via SOCKS proxy)
103
109
run : |
104
110
. venv/bin/activate
Original file line number Diff line number Diff line change 112
112
tasks_from : config.yml
113
113
tags : config
114
114
115
+ - name : Setup EESSI
116
+ hosts : eessi
117
+ tags : eessi
118
+ become : true
119
+ gather_facts : false
120
+ tasks :
121
+ - name : Install and configure EESSI
122
+ import_role :
123
+ name : eessi
124
+
115
125
- hosts : update
116
126
gather_facts : false
117
127
become : yes
Original file line number Diff line number Diff line change
1
+ ---
2
+ - name : Run EESSI test job
3
+ hosts : login[0]
4
+ vars :
5
+ eessi_test_rootdir : /home/eessi_test
6
+ tasks :
7
+ - name : Create test root directory
8
+ file :
9
+ path : " {{ eessi_test_rootdir }}"
10
+ state : directory
11
+ owner : " {{ ansible_user }}"
12
+ group : " {{ ansible_user }}"
13
+ become : true
14
+
15
+ - name : Clone eessi-demo repo
16
+ ansible.builtin.git :
17
+ repo : " https://github.com/eessi/eessi-demo.git"
18
+ dest : " {{ eessi_test_rootdir }}/eessi-demo"
19
+
20
+ - name : Run test job
21
+ ansible.builtin.shell :
22
+ cmd : |
23
+ source /cvmfs/pilot.eessi-hpc.org/latest/init/bash
24
+ srun ./run.sh
25
+ chdir : " {{ eessi_test_rootdir }}/eessi-demo/TensorFlow"
26
+ executable : /bin/bash
27
+ register : job_output
28
+
29
+ - name : Fail if job output contains error
30
+ fail :
31
+ # Note: Job prints live progress bar to terminal, so use regex filter to remove this from stdout
32
+ msg : " Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b ', '') }}"
33
+ when : ' "Epoch 5/5" not in job_output.stdout'
34
+
Original file line number Diff line number Diff line change 2
2
gather_facts : false
3
3
become : true
4
4
vars :
5
- sacct_stdout_expected : |- # based on CI running hpctests as the first job - NB note no trailing newline
5
+ sacct_stdout_expected : |- # based on CI running hpctests as the first job
6
6
JobID,JobName,State
7
7
1,pingpong.sh,COMPLETED
8
8
2,pingmatrix.sh,COMPLETED
18
18
register : sacct
19
19
- name : Check info for ended jobs
20
20
assert :
21
- that : sacct.stdout == sacct_stdout_expected
21
+ that : sacct_stdout_expected in sacct.stdout
22
22
fail_msg : |
23
23
Expected:
24
24
--{{ sacct_stdout_expected }}--
25
25
Got:
26
26
--{{ sacct.stdout }}--
27
- success_msg : sacct shows hpctests jobs as first and only jobs
27
+ success_msg : sacct shows hpctests jobs as first jobs in list
Original file line number Diff line number Diff line change
1
+ EESSI
2
+ =====
3
+
4
+ Configure the EESSI pilot respository for use on given hosts.
5
+
6
+ Requirements
7
+ ------------
8
+
9
+ None.
10
+
11
+ Role Variables
12
+ --------------
13
+
14
+ - ` cvmfs_quota_limit_mb ` : Optional int. Maximum size of local package cache on each node in MB.
15
+ - ` cvmfs_config_overrides ` : Optional dict. Set of key-value pairs for additional CernVM-FS settings see [ official docs] ( https://cvmfs.readthedocs.io/en/stable/cpt-configure.html ) for list of options. Each dict key should correspond to a valid config variable (e.g. ` CVMFS_HTTP_PROXY ` ) and the corresponding dict value will be set as the variable value (e.g. ` https://my-proxy.com ` ). These configuration parameters will be written to the ` /etc/cvmfs/default.local ` config file on each host in the form ` KEY=VALUE ` .
16
+
17
+ Dependencies
18
+ ------------
19
+
20
+ None.
21
+
22
+ Example Playbook
23
+ ----------------
24
+
25
+ ``` yaml
26
+ - name : Setup EESSI
27
+ hosts : eessi
28
+ tags : eessi
29
+ become : true
30
+ tasks :
31
+ - name : Install and configure EESSI
32
+ import_role :
33
+ name : eessi
34
+ ` ` `
Original file line number Diff line number Diff line change
1
+ ---
2
+ # Default to 10GB
3
+ cvmfs_quota_limit_mb : 10000
4
+
5
+ cvmfs_config_default :
6
+ CVMFS_CLIENT_PROFILE : single
7
+ CVMFS_QUOTA_LIMIT : " {{ cvmfs_quota_limit_mb }}"
8
+
9
+ cvmfs_config_overrides : {}
10
+
11
+ cvmfs_config : " {{ cvmfs_config_default | combine(cvmfs_config_overrides) }}"
Original file line number Diff line number Diff line change
1
+ ---
2
+ - name : Download Cern GPG key
3
+ ansible.builtin.get_url :
4
+ url : http://cvmrepo.web.cern.ch/cvmrepo/yum/RPM-GPG-KEY-CernVM
5
+ dest : ./cvmfs-key.gpg
6
+
7
+ - name : Import downloaded GPG key
8
+ command : rpm --import cvmfs-key.gpg
9
+
10
+ - name : Add CVMFS repo
11
+ dnf :
12
+ name : https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm
13
+
14
+ - name : Install CVMFS
15
+ dnf :
16
+ name : cvmfs
17
+
18
+ - name : Install EESSI CVMFS config
19
+ dnf :
20
+ name : https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi-latest.noarch.rpm
21
+ # NOTE: Can't find any docs on obtaining gpg key - maybe downloading directly from github is ok?
22
+ disable_gpg_check : true
23
+
24
+ # Alternative version using official repo - still no GPG key :(
25
+ # - name: Add EESSI repo
26
+ # dnf:
27
+ # name: http://repo.eessi-infra.org/eessi/rhel/8/noarch/eessi-release-0-1.noarch.rpm
28
+
29
+ # - name: Install EESSI CVMFS config
30
+ # dnf:
31
+ # name: cvmfs-config-eessi
32
+
33
+ - name : Add base CVMFS config
34
+ community.general.ini_file :
35
+ dest : /etc/cvmfs/default.local
36
+ section : null
37
+ option : " {{ item.key }}"
38
+ value : " {{ item.value }}"
39
+ no_extra_spaces : true
40
+ loop : " {{ cvmfs_config | dict2items }}"
41
+
42
+
43
+ # NOTE: Not clear how to make this idempotent
44
+ - name : Ensure CVMFS config is setup
45
+ command :
46
+ cmd : " cvmfs_config setup"
Original file line number Diff line number Diff line change 1
1
flavor = " vm.ska.cpu.general.small"
2
2
networks = [" a262aabd-e6bf-4440-a155-13dbc1b5db0e" ] # WCDC-iLab-60
3
- source_image_name = " openhpc-230412-1447-e3769af6 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/258
3
+ source_image_name = " openhpc-230503-0944-bf8c3f63 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
4
4
# source_image_name = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2"
5
5
ssh_keypair_name = " slurm-app-ci"
6
6
security_groups = [" default" , " SSH" ]
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ variable "create_nodes" {
17
17
variable "cluster_image" {
18
18
description = " single image for all cluster nodes - a convenience for CI"
19
19
type = string
20
- default = " openhpc-230412-1447-e3769af6 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/258
20
+ default = " openhpc-230503-0944-bf8c3f63 .qcow2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
21
21
# default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2"
22
22
# default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2"
23
23
}
Original file line number Diff line number Diff line change 13
13
control
14
14
compute
15
15
16
+ [eessi:children]
17
+ # Hosts on which EESSI stack should be configured
18
+
16
19
[hpctests:children]
17
20
# Login group to use for running mpi-based testing.
18
21
login
Original file line number Diff line number Diff line change @@ -55,6 +55,9 @@ compute
55
55
[etc_hosts]
56
56
# Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md
57
57
58
+ [eessi:children]
59
+ openhpc
60
+
58
61
[resolv_conf]
59
62
# Allows defining nameservers in /etc/resolv.conf - see ansible/roles/resolv_conf/README.md
60
63
You can’t perform that action at this time.
0 commit comments