Skip to content

Enable CI on leafcloud #369

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/stackhpc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: Record which cloud CI is running on
- name: Record settings for CI cloud
run: |
echo CI_CLOUD: ${{ vars.CI_CLOUD }}

Expand All @@ -38,10 +38,10 @@ jobs:
- name: Install ansible etc
run: dev/setup-env.sh

- name: Install terraform
uses: hashicorp/setup-terraform@v1
- name: Install OpenTofu
uses: opentofu/setup-opentofu@v1
with:
terraform: v1.5.5
tofu_version: 1.6.2

- name: Initialise terraform
run: terraform init
Expand Down
3 changes: 1 addition & 2 deletions ansible/ci/check_slurm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@
<end>
vars:
expected_sinfo:
- "{{ openhpc_cluster_name }}-compute-[0-1] small* up 60-00:00:00 2 idle"
- "{{ openhpc_cluster_name }}-compute-[2-3] extra up 60-00:00:00 2 idle"
- "{{ openhpc_cluster_name }}-compute-[0-1] standard* up 60-00:00:00 2 idle"
2 changes: 2 additions & 0 deletions dev/setup-env.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -euo pipefail

if [[ ! -d "venv" ]]; then
/usr/bin/python3.8 -m venv venv # use `sudo yum install python38` on Rocky Linux 8 to install this
fi
Expand Down
9 changes: 0 additions & 9 deletions environments/.stackhpc/SMS.pkrvars.hcl

This file was deleted.

1 change: 1 addition & 0 deletions environments/.stackhpc/bastion_fingerprints
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
|1|whGSPLhKW4xt/7PWOZ1treg3PtA=|F5gwV8j0JYWDzjb6DvHHaqO+sxs= ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBCpCG881Gt3dr+nuVIC2uGEQkeVwG6WDdS1WcCoxXC7AG+Oi5bfdqtf4IfeLpWmeuEaAaSFH48ODFr76ViygSjU=
|1|0V6eQ1FKO5NMKaHZeNFbw62mrJs=|H1vuGTbbtZD2MEgZxQf1PXPk+yU= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEnOtYByM3s2qvRT8SS1sn5z5sbwjzb1alm0B3emPcHJ
|1|u3QVAK9R2x7Z3uKNj+0vDEIekl0=|yy09Q0Kw472+J7bjFkmir28x3lE= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINNuXZkH7ppkTGNGKzmGEvAnvlLO2D+YtlJw1m3P16FV
|1|nOHeibGxhsIFnhW0flRwnirJjlg=|IJ8nJB355LGI+1U3Wpvdcgdf0ek= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGG6DieKAdgiTCqRmF2HD0dJi9DuORblPzbridniICsw
6 changes: 3 additions & 3 deletions environments/.stackhpc/inventory/group_vars/all/bastion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ bastion_config:
ARCUS:
user: slurm-app-ci
ip: 128.232.222.183
SMS:
user: steveb
ip: 185.45.78.150
LEAFCLOUD:
user: rocky
ip: 195.114.30.222
# NB: The bastion_{user,ip} variables are used directly in the CI workflow too
bastion_user: "{{ bastion_config[ci_cloud].user }}"
bastion_ip: "{{ bastion_config[ci_cloud].ip }}"
Expand Down
3 changes: 1 addition & 2 deletions environments/.stackhpc/terraform/ARCUS.tfvars
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
cluster_net = "portal-internal"
cluster_subnet = "portal-internal"
vnic_type = "normal"
control_node_flavor = "vm.ska.cpu.general.quarter"
control_node_flavor = "vm.ska.cpu.general.eighth"
other_node_flavor = "vm.ska.cpu.general.small"
6 changes: 6 additions & 0 deletions environments/.stackhpc/terraform/LEAFCLOUD.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cluster_net = "slurmapp-ci"
cluster_subnet = "slurmapp-ci"
control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment
other_node_flavor = "ec1.small"
state_volume_type = "unencrypted"
home_volume_type = "unencrypted"
5 changes: 0 additions & 5 deletions environments/.stackhpc/terraform/SMS.tfvars

This file was deleted.

34 changes: 24 additions & 10 deletions environments/.stackhpc/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,17 @@ variable "cluster_net" {}

variable "cluster_subnet" {}

variable "vnic_type" {}
variable "vnic_type" {
default = "normal"
}

variable "state_volume_type"{
default = null
}

variable "home_volume_type"{
default = null
}

variable "control_node_flavor" {}

Expand Down Expand Up @@ -59,20 +69,21 @@ module "cluster" {
}
}
compute_types = {
small: {
flavor: var.other_node_flavor
image: var.cluster_image
}
extra: {
standard: { # NB: can't call this default!
flavor: var.other_node_flavor
image: var.cluster_image
}
# Example of how to add another partition:
# extra: {
# flavor: var.other_node_flavor
# image: var.cluster_image
# }
}
compute_nodes = {
compute-0: "small"
compute-1: "small"
compute-2: "extra"
compute-3: "extra"
compute-0: "standard"
compute-1: "standard"
# compute-2: "extra"
# compute-3: "extra"
}
volume_backed_instances = var.volume_backed_instances

Expand All @@ -81,4 +92,7 @@ module "cluster" {
state_volume_size = 10
home_volume_size = 20

state_volume_type = var.state_volume_type
home_volume_type = var.home_volume_type

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,24 @@ variable "state_volume_size" {
default = 150 # GB
}

variable "state_volume_type" {
type = string
description = "Type of state volume, if not default type"
default = null
}

variable "home_volume_size" {
type = number
description = "Size of state volume on control node, in GB"
default = 100 # GB, 0 means no home volume
}

variable "home_volume_type" {
type = string
default = null
description = "Type of home volume, if not default type"
}

variable "vnic_type" {
type = string
description = "VNIC type, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ resource "openstack_blockstorage_volume_v3" "state" {
name = "${var.cluster_name}-state"
description = "State for control node" # first word used to label filesystem
size = var.state_volume_size
volume_type = var.state_volume_type
}

resource "openstack_blockstorage_volume_v3" "home" {
Expand All @@ -11,4 +12,5 @@ resource "openstack_blockstorage_volume_v3" "home" {
name = "${var.cluster_name}-home"
description = "Home for control node" # first word used to label filesystem
size = var.home_volume_size
volume_type = var.home_volume_type
}