Skip to content

Add state volume #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion environments/lab/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ resource "openstack_networking_port_v2" "control_control" {
resource "openstack_compute_instance_v2" "control" {

name = "${var.cluster_name}-control"
image_name = var.control_image
image_id = data.openstack_images_image_v2.control.id
flavor_name = var.control_flavor
key_pair = var.key_pair
config_drive = true
availability_zone = var.cluster_availability_zone

# root device:
block_device {
uuid = data.openstack_images_image_v2.control.id
source_type = "image"
destination_type = "local"
boot_index = 0
delete_on_termination = true
}

# state volume:
block_device {
destination_type = "volume"
source_type = "volume"
boot_index = -1
uuid = data.openstack_blockstorage_volume_v3.state.id
}

network {
port = openstack_networking_port_v2.control_cluster.id
}
Expand All @@ -100,6 +117,14 @@ resource "openstack_compute_instance_v2" "control" {
access_network = true
}

user_data = <<-EOF
#cloud-config
bootcmd:
- BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(data.openstack_blockstorage_volume_v3.state.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L state $BLKDEV

mounts:
- [LABEL=state, /var/lib/state]
EOF
}

# --- slurm logins ---
Expand Down
11 changes: 11 additions & 0 deletions environments/lab/terraform/prereqs/prereqs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ terraform {
}
}

locals {
cluster_name = regex("cluster_name = \"([a-z]+)\"", file("${path.module}/../terraform.tfvars"))[0]
}

resource "openstack_networking_network_v2" "storage" {
name = "lab-storage"
admin_state_up = "true"
Expand All @@ -32,3 +36,10 @@ resource "openstack_networking_subnet_v2" "compute" {
cidr = "192.168.101.0/24"
no_gateway = true
}

resource "openstack_blockstorage_volume_v3" "state" {
# read cluster_name from main tfvars file:
name = "${local.cluster_name}-state"
description = "State for control node"
size = 10 # GB, doesn't matter for lab
}
1 change: 1 addition & 0 deletions environments/nrel/inventory/group_vars/all/defaults.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
appliances_state_dir: /var/lib/state # see volume mount in environments/*/terraform/main.tf:openstack_compute_instance_v2.control:userdata
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ openhpc_generic_packages:

# Additional parameters to set in slurm.conf - use yaml format
openhpc_slurmd_spool_dir: /var/spool/slurm/slurmd
openhpc_state_save_location: /var/spool/slurm/slurmctld # TODO: move to persistent storage
openhpc_config_extra:
LaunchParameters: use_interactive_step
FirstJobId: '50000000'
Expand Down
8 changes: 8 additions & 0 deletions environments/prod/terraform/datasources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ data "openstack_networking_subnet_v2" "cluster" {
data "openstack_networking_subnet_v2" "control" {
name = var.control_subnet
}

data "openstack_images_image_v2" "control" {
name = var.control_image
}

data "openstack_blockstorage_volume_v3" "state" {
name = "${var.cluster_name}-state"
}
27 changes: 26 additions & 1 deletion environments/prod/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ resource "openstack_networking_port_v2" "control_control" {
resource "openstack_compute_instance_v2" "control" {

name = "${var.cluster_name}-control"
image_name = var.control_image
image_id = data.openstack_images_image_v2.control.id
flavor_name = var.control_flavor
key_pair = var.key_pair
config_drive = true
availability_zone = var.cluster_availability_zone

# root device:
block_device {
uuid = data.openstack_images_image_v2.control.id
source_type = "image"
destination_type = "local"
boot_index = 0
delete_on_termination = true
}

# state volume:
block_device {
destination_type = "volume"
source_type = "volume"
boot_index = -1
uuid = data.openstack_blockstorage_volume_v3.state.id
}

network {
port = openstack_networking_port_v2.control_cluster.id
}
Expand All @@ -100,6 +117,14 @@ resource "openstack_compute_instance_v2" "control" {
access_network = true
}

user_data = <<-EOF
#cloud-config
bootcmd:
- BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(data.openstack_blockstorage_volume_v3.state.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L state $BLKDEV

mounts:
- [LABEL=state, /var/lib/state]
EOF
}

# --- slurm logins ---
Expand Down
34 changes: 28 additions & 6 deletions environments/vtest/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,29 @@ resource "openstack_networking_port_v2" "control_control" {
resource "openstack_compute_instance_v2" "control" {

name = "${var.cluster_name}-vtcontrol"
image_name = var.control_image
image_id = data.openstack_images_image_v2.control.id
flavor_name = var.control_flavor
key_pair = var.key_pair
config_drive = true
availability_zone = var.cluster_availability_zone

# root device:
block_device {
uuid = data.openstack_images_image_v2.control.id
source_type = "image"
destination_type = "local"
boot_index = 0
delete_on_termination = true
}

# state volume:
block_device {
destination_type = "volume"
source_type = "volume"
boot_index = -1
uuid = data.openstack_blockstorage_volume_v3.state.id
}

network {
port = openstack_networking_port_v2.control_cluster.id
}
Expand All @@ -100,6 +117,14 @@ resource "openstack_compute_instance_v2" "control" {
access_network = true
}

user_data = <<-EOF
#cloud-config
bootcmd:
- BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(data.openstack_blockstorage_volume_v3.state.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L state $BLKDEV

mounts:
- [LABEL=state, /var/lib/state]
EOF
}

# --- slurm logins ---
Expand Down Expand Up @@ -179,12 +204,14 @@ resource "openstack_networking_port_v2" "login_control" {
}
}

# flavor_name = each.value
resource "openstack_compute_instance_v2" "logins" {

for_each = var.login_names

name = "${var.cluster_name}-${each.key}"
image_name = var.login_image

flavor_name = var.control_flavor
key_pair = var.key_pair
config_drive = true
Expand All @@ -204,7 +231,6 @@ resource "openstack_compute_instance_v2" "logins" {
}

}
# flavor_name = each.value

# --- slurm compute ---

Expand Down Expand Up @@ -296,22 +322,18 @@ resource "openstack_compute_instance_v2" "computes" {
key_pair = var.key_pair
config_drive = true
availability_zone = var.cluster_availability_zone
#availability_zone_hints = "{{ var.cluster_availability_zone }}:vs-0519-u03a"

network {
port = openstack_networking_port_v2.compute_control[each.key].id
#name = "control"
}

network {
port = openstack_networking_port_v2.compute_cluster[each.key].id
#name = "compute"
access_network = true
}

network {
port = openstack_networking_port_v2.compute_storage[each.key].id
#name = "storage"
}

}
Expand Down