Skip to content

Commit 4352108

Browse files
authored
Merge pull request #159 from stackhpc/feature/openhpc_config
openhpc role bump & dependency pinning
2 parents ba02caa + 283b19c commit 4352108

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+83
-33
lines changed

.github/workflows/smslabs.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
name: Test on OpenStack via smslabs
2+
name: Test on SMS-Labs OpenStack in stackhpc-ci
33
on:
44
push:
55
branches:
@@ -22,7 +22,7 @@ jobs:
2222
SSH_KEY: ${{ secrets.SSH_KEY }}
2323

2424
- name: Add bastion's ssh key to known_hosts
25-
run: cat environments/smslabs-example/bastion_fingerprint >> ~/.ssh/known_hosts
25+
run: cat environments/smslabs/bastion_fingerprint >> ~/.ssh/known_hosts
2626
shell: bash
2727

2828
- name: Install ansible etc
@@ -33,7 +33,7 @@ jobs:
3333

3434
- name: Initialise terraform
3535
run: terraform init
36-
working-directory: ${{ github.workspace }}/environments/smslabs-example/terraform
36+
working-directory: ${{ github.workspace }}/environments/smslabs/terraform
3737

3838
- name: Write clouds.yaml
3939
run: |
@@ -47,7 +47,7 @@ jobs:
4747
id: provision
4848
run: |
4949
. venv/bin/activate
50-
. environments/smslabs-example/activate
50+
. environments/smslabs/activate
5151
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
5252
terraform apply -auto-approve
5353
env:
@@ -58,7 +58,7 @@ jobs:
5858
id: provision_failure
5959
run: |
6060
. venv/bin/activate
61-
. environments/smslabs-example/activate
61+
. environments/smslabs/activate
6262
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
6363
echo "::set-output name=messages::$(./getfaults.py)"
6464
env:
@@ -69,7 +69,7 @@ jobs:
6969
- name: Delete infrastructure if failed due to lack of hosts
7070
run: |
7171
. venv/bin/activate
72-
. environments/smslabs-example/activate
72+
. environments/smslabs/activate
7373
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
7474
terraform destroy -auto-approve
7575
env:
@@ -80,7 +80,7 @@ jobs:
8080
- name: Configure infrastructure
8181
run: |
8282
. venv/bin/activate
83-
. environments/smslabs-example/activate
83+
. environments/smslabs/activate
8484
ansible all -m wait_for_connection
8585
ansible-playbook ansible/adhoc/generate-passwords.yml
8686
ansible-playbook -vv ansible/site.yml
@@ -91,7 +91,7 @@ jobs:
9191
- name: Run MPI-based tests
9292
run: |
9393
. venv/bin/activate
94-
. environments/smslabs-example/activate
94+
. environments/smslabs/activate
9595
ansible-playbook -vv ansible/adhoc/hpctests.yml
9696
env:
9797
ANSIBLE_FORCE_COLOR: True
@@ -100,7 +100,7 @@ jobs:
100100
- name: Build control and compute images
101101
run: |
102102
. venv/bin/activate
103-
. environments/smslabs-example/activate
103+
. environments/smslabs/activate
104104
cd packer
105105
PACKER_LOG=1 PACKER_LOG_PATH=build.log packer build -var-file=$PKR_VAR_environment_root/builder.pkrvars.hcl openstack.pkr.hcl
106106
env:
@@ -110,7 +110,7 @@ jobs:
110110
- name: Reimage compute nodes via slurm and check cluster still up
111111
run: |
112112
. venv/bin/activate
113-
. environments/smslabs-example/activate
113+
. environments/smslabs/activate
114114
ansible-playbook -vv $APPLIANCES_ENVIRONMENT_ROOT/ci/reimage-compute.yml
115115
ansible-playbook -vv $APPLIANCES_ENVIRONMENT_ROOT/hooks/post.yml
116116
env:
@@ -120,7 +120,7 @@ jobs:
120120
- name: Reimage login nodes via openstack and check cluster still up
121121
run: |
122122
. venv/bin/activate
123-
. environments/smslabs-example/activate
123+
. environments/smslabs/activate
124124
ansible-playbook -vv $APPLIANCES_ENVIRONMENT_ROOT/ci/reimage-login.yml
125125
ansible-playbook -vv $APPLIANCES_ENVIRONMENT_ROOT/hooks/post.yml
126126
env:
@@ -130,7 +130,7 @@ jobs:
130130
- name: Delete infrastructure
131131
run: |
132132
. venv/bin/activate
133-
. environments/smslabs-example/activate
133+
. environments/smslabs/activate
134134
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
135135
terraform destroy -auto-approve
136136
env:

README.md

Lines changed: 2 additions & 0 deletions
Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
---
22

3-
# You must define this variable.
4-
#filebeat_config_path: undefined
5-
filebeat_podman_user: "{{ ansible_user }}"
3+
#filebeat_config_path: undefined # REQUIRED. Path to filebeat.yml configuration file template
4+
filebeat_podman_user: "{{ ansible_user }}" # User that runs the filebeat container

ansible/roles/hpctests/tasks/pingmatrix.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
# Note this still doesn't fix any non-unique names but we should get a length mis-match at least with that.
3535
# although this looks a bit crazy:
3636
- name: Expand node list
37-
shell: "scontrol show hostnames {{ openhpc_tests_nodes if openhpc_tests_nodes is defined else (hpctests_computes.stdout_lines | join(',')) }}"
37+
shell: "scontrol show hostnames {{ hpctests_nodes if hpctests_nodes is defined else (hpctests_computes.stdout_lines | join(',')) }}"
3838
register: scontrol_hostnames
3939

4040
- name: Create sorted node expression

ansible/validate.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,17 @@
99
that: groups['control'] | length
1010
fail_msg: "no hosts found in group 'control' - has control node been deployed?"
1111

12+
- name: Validate openhpc configuration
13+
hosts: openhpc
14+
tags: openhpc
15+
tasks:
16+
- assert:
17+
that: "'enable_configless' in openhpc_config.SlurmctldParameters | default([])"
18+
fail_msg: |
19+
'enable_configless' not found in openhpc_config.SlurmctldParameters - is variable openhpc_config overridden?
20+
Additional slurm.conf parameters should be provided using variable openhpc_config_extra.
21+
success_msg: Checked Slurm will be configured for configless operation
22+
1223
- name: Validate podman configuration
1324
hosts: podman
1425
tags: podman
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
1+
---
2+
3+
# See: ansible/roles/basic_users/README.md
4+
# for variable definitions.
5+
16
basic_users_homedir: /home
27
basic_users_users: []

environments/common/inventory/group_vars/all/filebeat.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
---
2+
23
# Path to filebeat.yml configuration file template
34
filebeat_config_path: "{{ appliances_repository_root }}/environments/common/files/filebeat/filebeat.yml"
45

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1+
---
2+
# See: ansible/roles/hpctests/README.md
3+
# for variable definitions.
4+
15
hpctests_rootdir: "/home/hpctests" # Can't use centos's $HOME as that's not on /home and may not have another user

environments/common/inventory/group_vars/all/opendistro.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
---
2-
# Path to template that specifies opendistro users.
32
# See: https://opendistro.github.io/for-elasticsearch-docs/docs/security/configuration/yaml/
3+
4+
# Path to template that specifies opendistro users
45
opendistro_internal_users_path: "{{ appliances_repository_root }}/environments/common/files/opendistro/internal_users.yml"
56

67
# The user what runs the opendistro container

environments/common/inventory/group_vars/all/openhpc.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
---
2+
23
# See: https://github.com/stackhpc/ansible-role-openhpc
34
# for variable definitions
45

@@ -22,5 +23,9 @@ openhpc_packages_default:
2223
openhpc_packages_extra: []
2324
openhpc_packages: "{{ openhpc_packages_default + openhpc_packages_extra }}"
2425
openhpc_munge_key: "{{ vault_openhpc_mungekey | b64decode }}"
25-
openhpc_slurm_configless: true
26-
openhpc_login_only_nodes: login
26+
openhpc_login_only_nodes: login
27+
openhpc_config_default:
28+
SlurmctldParameters:
29+
- enable_configless
30+
openhpc_config_extra: {}
31+
openhpc_config: "{{ openhpc_config_default | combine(openhpc_config_extra, list_merge='append') }}"

environments/common/inventory/group_vars/all/openhpc_tests.yml

Lines changed: 0 additions & 3 deletions
This file was deleted.

environments/common/inventory/group_vars/all/openondemand.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
---
2-
## TODO: maybe we should arrange it following the osc.github.io docs
3-
## NB: Variables prefixed ood_ are all from https://github.com/OSC/ood-ansible
2+
3+
# See: ansible/roles/openondemand/README.md
4+
# for variable definitions.
5+
6+
# NB: Variables prefixed ood_ are all from https://github.com/OSC/ood-ansible
47

58
openondemand_servername: '' # Must be changed when using openondemand, but allows templating when openondemand group is empty
69

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
podman_users: "{{ appliances_local_users_podman }}"
2-
podman_cidr: 10.0.2.0/24 # see slirp4netns:cidr= at https://docs.podman.io/en/latest/markdown/podman-run.1.html
1+
podman_users: "{{ appliances_local_users_podman }}" # user to use for podman
2+
podman_cidr: 10.0.2.0/24 # IP range to use for podman - see slirp4netns:cidr= at https://docs.podman.io/en/latest/markdown/podman-run.1.html

environments/common/inventory/group_vars/all/prometheus.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
---
2+
23
# See: https://github.com/cloudalchemy/ansible-prometheus
34
# for variable definitions
45

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
---
2+
23
selinux_state: permissive
34
selinux_policy: targeted

environments/common/inventory/group_vars/all/update.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
---
2+
13
update_enable: false
24
# These variables define the packages updates and are passed to ansible's yum module parameters with the same names: https://docs.ansible.com/ansible/latest/collections/ansible/builtin/yum_module.html
35
update_name: '*'

environments/common/inventory/groups

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,6 @@ filebeat
5656
[node_exporter]
5757
# All hosts to monitor for hardware and OS metrics.
5858

59-
[openhpc_tests:children]
60-
# For post-deploy MPI-based tests - see ansible/adhoc/test.yml
61-
login
62-
compute
63-
6459
[selinux:children]
6560
# All hosts requiring control of SELinux status.
6661
cluster
File renamed without changes.

environments/smslabs-example/inventory/group_vars/openhpc/overrides.yml renamed to environments/smslabs/inventory/group_vars/openhpc/overrides.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
openhpc_config:
1+
openhpc_config_extra:
22
SlurmctldDebug: debug
33
SlurmdDebug: debug
44
openhpc_slurm_partitions:

environments/smslabs-example/terraform/nodes.tf renamed to environments/smslabs/terraform/nodes.tf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ resource "openstack_compute_instance_v2" "control" {
1313
access_network = true
1414
}
1515

16+
metadata = {
17+
environment_root = var.environment_root
18+
}
19+
1620
}
1721

1822
resource "openstack_compute_instance_v2" "login" {
@@ -31,6 +35,10 @@ resource "openstack_compute_instance_v2" "login" {
3135
access_network = true
3236
}
3337

38+
metadata = {
39+
environment_root = var.environment_root
40+
}
41+
3442
}
3543

3644
resource "openstack_compute_instance_v2" "compute" {
@@ -49,4 +57,8 @@ resource "openstack_compute_instance_v2" "compute" {
4957
access_network = true
5058
}
5159

60+
metadata = {
61+
environment_root = var.environment_root
62+
}
63+
5264
}

environments/smslabs-example/terraform/variables.tf renamed to environments/smslabs/terraform/variables.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,8 @@ variable "compute_images" {
6666
default = {}
6767
description = "Mapping to override compute images from compute_types: key ->(str) node name, value -> (str) image name"
6868
}
69+
70+
variable "environment_root" {
71+
type = string
72+
description = "Path to environment root, automatically set by activate script"
73+
}

requirements.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,26 @@
11
---
22
roles:
33
- src: stackhpc.nfs
4+
version: v21.2.1
45
- src: https://github.com/stackhpc/ansible-role-openhpc.git
5-
version: v0.9.0 # supports Rocky Linux
6+
version: v0.12.0
67
name: stackhpc.openhpc
78
- src: https://github.com/stackhpc/ansible-node-exporter.git
89
version: support-rhel-clones
910
name: cloudalchemy.node_exporter
1011
- src: cloudalchemy.blackbox-exporter
12+
version: 1.0.0
1113
- src: https://github.com/cloudalchemy/ansible-prometheus.git
1214
version: 4d2c8d742de39e50387e0aa6d5510b21c7451343 # need fix in preceeding commit for rocky
1315
name: cloudalchemy.prometheus
1416
- src: cloudalchemy.alertmanager
17+
version: 0.19.1
1518
- src: cloudalchemy.grafana
19+
version: 0.18.0
1620
- src: geerlingguy.mysql
21+
version: 3.3.2
1722
- src: jriguera.configdrive
23+
# No versions available
1824
- src: https://github.com/OSC/ood-ansible.git
1925
name: osc.ood
2026
version: v2.0.5

0 commit comments

Comments
 (0)