Skip to content

Commit b850948

Browse files
committed
Merge branch 'main' into feat/no-ohpc-mergeable
2 parents 2253fb1 + 2d78fc9 commit b850948

File tree

88 files changed

+1684
-379
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1684
-379
lines changed

.github/workflows/fatimage.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ jobs:
4848
cd packer/
4949
packer init .
5050
PACKER_LOG=1 packer build -only openstack.openhpc -on-error=${{ vars.PACKER_ON_ERROR }} -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl
51+
env:
52+
TESTUSER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
5153

5254
- name: Get created image name from manifest
5355
id: manifest

ansible.cfg

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Only used for Azimuth running the caas environment
2+
[defaults]
3+
any_errors_fatal = True
4+
gathering = smart
5+
forks = 30
6+
host_key_checking = False
7+
remote_tmp = /tmp
8+
collections_path = ansible/collections
9+
roles_path = ansible/roles
10+
filter_plugins = ansible/filter_plugins
11+
callbacks_enabled = ansible.posix.profile_tasks
12+
13+
[ssh_connection]
14+
ssh_args = -o ControlMaster=auto -o ControlPersist=240s -o PreferredAuthentications=publickey -o UserKnownHostsFile=/dev/null
15+
pipelining = True
16+
# This is important because we are using one of the hosts in the play as a jump host
17+
# This ensures that if the proxy connection is interrupted, rendering the other hosts
18+
# unreachable, the connection is retried instead of failing the entire play
19+
retries = 10

ansible/.gitignore

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ roles/*
2828
!roles/firewalld/**
2929
!roles/etc_hosts/
3030
!roles/etc_hosts/**
31-
!roles/cloud_init/
32-
!roles/cloud_init/**
3331
!roles/mysql/
3432
!roles/mysql/**
3533
!roles/systemd/
@@ -42,5 +40,16 @@ roles/*
4240
!roles/proxy/**
4341
!roles/resolv_conf/
4442
!roles/resolv_conf/**
45-
!roles/cve-2023-41914
46-
!roles/cve-2023-41914/**
43+
!roles/cluster_infra/
44+
!roles/cluster_infra/**
45+
!roles/image_build_infra/
46+
!roles/image_build_infra/**
47+
!roles/persist_openhpc_secrets/
48+
!roles/persist_openhpc_secrets/**
49+
!roles/zenith_proxy/
50+
!roles/zenith_proxy/**
51+
!roles/image_build/
52+
!roles/image_build/**
53+
!roles/persist_hostkeys/
54+
!roles/persist_hostkeys/**
55+
!roles/requirements.yml

ansible/adhoc/cve-2023-41914.yml

Lines changed: 0 additions & 6 deletions
This file was deleted.

ansible/adhoc/template-cloud-init.yml

Lines changed: 0 additions & 9 deletions
This file was deleted.

ansible/bootstrap.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
name: fail2ban
115115

116116
- name: Setup podman
117+
gather_facts: false
117118
hosts: podman
118119
tags: podman
119120
tasks:

ansible/extras.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
- hosts: basic_users
1+
- hosts: basic_users:!builder
22
become: yes
33
tags:
44
- basic_users

ansible/fatimage.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,13 @@
5252
become: yes
5353
gather_facts: no
5454
tasks:
55-
# - import_playbook: slurm.yml
56-
- include_role:
55+
# - import_playbook: slurm.yml:
56+
- name: Setup DB
57+
include_role:
58+
name: mysql
59+
tasks_from: install.yml
60+
- name: OpenHPC
61+
import_role:
5762
name: stackhpc.openhpc
5863
tasks_from: "install-{{ openhpc_install_type }}.yml"
5964

@@ -76,10 +81,10 @@
7681
name: opensearch
7782
tasks_from: install.yml
7883
become: true
79-
80-
# opensearch - containerised, nothing to do
8184
# slurm_stats - nothing to do
82-
# filebeat - containerised - nothing to do
85+
- import_role:
86+
name: filebeat
87+
tasks_from: install.yml
8388

8489
- import_role:
8590
# can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start

ansible/monitoring.yml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,12 @@
2626
# Collection currently requires root for all tasks.
2727
become: true
2828

29-
- name: Setup filebeat
29+
- name: Deploy filebeat
3030
hosts: filebeat
3131
tags: filebeat
3232
tasks:
3333
- import_role:
3434
name: filebeat
35-
tasks_from: config.yml
36-
tags: config
37-
38-
- import_role:
39-
name: filebeat
40-
tasks_from: deploy.yml
41-
tags: deploy
4235

4336
- name: Deploy node_exporter
4437
hosts: node_exporter

ansible/noop.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66

77
- hosts: localhost
88
gather_facts: false
9-
tasks: []
9+
tasks: []
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cluster_deploy_ssh_keys_extra: []
2+
3+
# List of hw_scsi_models that result in block devices presenting as /dev/sdX
4+
# rather than /dev/vdX
5+
scsi_models:
6+
# Ceph [https://docs.ceph.com/en/quincy/rbd/rbd-openstack/#image-properties]
7+
- virtio-scsi
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
- debug:
2+
msg: |
3+
terraform_backend_type: {{ terraform_backend_type }}
4+
terraform_state: {{ terraform_state }}
5+
cluster_upgrade_system_packages: {{ cluster_upgrade_system_packages | default('undefined') }}
6+
7+
# We need to convert the floating IP id to an address for Terraform
8+
# if we we have cluster_floating_ip, otherwise assume that we're
9+
# assigning the FIP in Terraform and that it will be available in
10+
# outputs.cluster_gateway_ip.
11+
- block:
12+
- name: Look up floating IP
13+
include_role:
14+
name: stackhpc.terraform.infra
15+
tasks_from: lookup_floating_ip
16+
vars:
17+
os_floating_ip_id: "{{ cluster_floating_ip }}"
18+
19+
- name: Set floating IP address fact
20+
set_fact:
21+
cluster_floating_ip_address: "{{ os_floating_ip_info.floating_ip_address }}"
22+
when: cluster_floating_ip is defined
23+
24+
- name: Install Terraform binary
25+
include_role:
26+
name: stackhpc.terraform.install
27+
28+
- name: Make Terraform project directory
29+
file:
30+
path: "{{ terraform_project_path }}"
31+
state: directory
32+
33+
- name: Write backend configuration
34+
copy:
35+
content: |
36+
terraform {
37+
backend "{{ terraform_backend_type }}" { }
38+
}
39+
dest: "{{ terraform_project_path }}/backend.tf"
40+
41+
# Patching in this appliance is implemented as a switch to a new base image
42+
# So unless explicitly patching, we want to use the same image as last time
43+
# To do this, we query the previous Terraform state before updating
44+
- block:
45+
- name: Get previous Terraform state
46+
stackhpc.terraform.terraform_output:
47+
binary_path: "{{ terraform_binary_path }}"
48+
project_path: "{{ terraform_project_path }}"
49+
backend_config: "{{ terraform_backend_config }}"
50+
register: cluster_infra_terraform_output
51+
52+
- name: Extract image from Terraform state
53+
set_fact:
54+
cluster_previous_image: "{{ cluster_infra_terraform_output.outputs.cluster_image.value }}"
55+
when: '"cluster_image" in cluster_infra_terraform_output.outputs'
56+
when:
57+
- terraform_state == "present"
58+
- cluster_upgrade_system_packages is not defined or not cluster_upgrade_system_packages
59+
60+
- name: Template Terraform files into project directory
61+
template:
62+
src: >-
63+
{{
64+
"{}{}.j2".format(
65+
(
66+
cluster_terraform_template_dir ~ "/"
67+
if cluster_terraform_template_dir is defined
68+
else ""
69+
),
70+
item
71+
)
72+
}}
73+
dest: "{{ terraform_project_path }}/{{ item }}"
74+
loop:
75+
- outputs.tf
76+
- providers.tf
77+
- resources.tf
78+
79+
- name: Provision infrastructure
80+
include_role:
81+
name: stackhpc.terraform.infra
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
output "cluster_gateway_ip" {
2+
description = "The IP address of the gateway used to contact the cluster nodes"
3+
value = openstack_compute_floatingip_associate_v2.login_floatingip_assoc.floating_ip
4+
}
5+
6+
{% if cluster_ssh_private_key_file is not defined %}
7+
output "cluster_ssh_private_key" {
8+
description = "The private component of the keypair generated on cluster provision"
9+
value = openstack_compute_keypair_v2.cluster_keypair.private_key
10+
sensitive = true
11+
}
12+
{% endif %}
13+
14+
output "cluster_nodes" {
15+
description = "A list of the nodes in the cluster from which an Ansible inventory will be populated"
16+
value = concat(
17+
[
18+
{
19+
name = openstack_compute_instance_v2.login.name
20+
ip = openstack_compute_instance_v2.login.network[0].fixed_ip_v4
21+
groups = ["login", "{{ cluster_name }}_login"],
22+
facts = {
23+
openstack_project_id = data.openstack_identity_auth_scope_v3.scope.project_id
24+
}
25+
},
26+
{
27+
name = openstack_compute_instance_v2.control.name
28+
ip = openstack_compute_instance_v2.control.network[0].fixed_ip_v4
29+
groups = ["control", "{{ cluster_name }}_control"],
30+
facts = {
31+
openstack_project_id = data.openstack_identity_auth_scope_v3.scope.project_id
32+
}
33+
}
34+
],
35+
{% for partition in openhpc_slurm_partitions %}
36+
[
37+
for compute in openstack_compute_instance_v2.{{ partition.name }}: {
38+
name = compute.name
39+
ip = compute.network[0].fixed_ip_v4
40+
groups = ["compute", "{{ cluster_name }}_compute", "{{ cluster_name }}_{{ partition.name }}"],
41+
facts = {
42+
openstack_project_id = data.openstack_identity_auth_scope_v3.scope.project_id
43+
}
44+
}
45+
]{{ ',' if not loop.last }}
46+
{% endfor %}
47+
)
48+
}
49+
50+
output "cluster_image" {
51+
description = "The id of the image used to build the cluster nodes"
52+
value = "{{ cluster_previous_image | default(cluster_image) }}"
53+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
terraform {
2+
required_version = ">= 0.14"
3+
4+
# We need the OpenStack provider
5+
required_providers {
6+
openstack = {
7+
source = "terraform-provider-openstack/openstack"
8+
}
9+
}
10+
}

0 commit comments

Comments
 (0)