Skip to content

Commit 92f5115

Browse files
authored
Merge pull request #441 from stackhpc/feature/k3s-ansible-init
Install k3s cluster with ansible init
2 parents b971e7f + 0c17410 commit 92f5115

File tree

24 files changed

+291
-13
lines changed

24 files changed

+291
-13
lines changed

ansible/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,9 @@ roles/*
5858
!roles/squid/**
5959
!roles/tuned/
6060
!roles/tuned/**
61+
!roles/k3s/
62+
!roles/k3s/**
63+
!roles/k9s/
64+
!roles/k9s/**
6165
!roles/lustre/
6266
!roles/lustre/**

ansible/bootstrap.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,11 @@
259259
tasks:
260260
- include_role:
261261
name: azimuth_cloud.image_utils.linux_ansible_init
262+
263+
- hosts: k3s
264+
become: yes
265+
tags: k3s
266+
tasks:
267+
- ansible.builtin.include_role:
268+
name: k3s
269+
tasks_from: install.yml

ansible/cleanup.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
- name: Cleanup /tmp
4040
command : rm -rf /tmp/*
41-
41+
4242
- name: Get package facts
4343
package_facts:
4444

ansible/extras.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,11 @@
3636
tasks:
3737
- import_role:
3838
name: persist_hostkeys
39+
40+
- name: Install k9s
41+
become: yes
42+
hosts: k9s
43+
tags: k9s
44+
tasks:
45+
- import_role:
46+
name: k9s

ansible/roles/cluster_infra/templates/resources.tf.j2

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ data "openstack_identity_auth_scope_v3" "scope" {
77
name = "{{ cluster_name }}"
88
}
99

10+
####
11+
#### Data resources
12+
####
13+
14+
resource "terraform_data" "k3s_token" {
15+
input = "{{ k3s_token }}"
16+
lifecycle {
17+
ignore_changes = [
18+
input, # makes it a write-once value (set via Ansible)
19+
]
20+
}
21+
}
22+
1023
#####
1124
##### Security groups for the cluster
1225
#####
@@ -386,6 +399,8 @@ resource "openstack_compute_instance_v2" "login" {
386399
ansible_init_coll_{{ loop.index0 }}_source = "{{ collection.source }}"
387400
{% endif %}
388401
{% endfor %}
402+
k3s_server = openstack_compute_instance_v2.control.network[0].fixed_ip_v4
403+
k3s_token = "{{ k3s_token }}"
389404
}
390405
}
391406

@@ -400,6 +415,7 @@ resource "openstack_compute_instance_v2" "control" {
400415

401416
network {
402417
port = openstack_networking_port_v2.control.id
418+
access_network = true
403419
}
404420

405421
{% if cluster_storage_network is defined %}
@@ -479,6 +495,7 @@ resource "openstack_compute_instance_v2" "control" {
479495
ansible_init_coll_{{ loop.index0 }}_source = "{{ collection.source }}"
480496
{% endif %}
481497
{% endfor %}
498+
k3s_token = "{{ k3s_token }}"
482499
}
483500
}
484501

@@ -548,6 +565,8 @@ resource "openstack_compute_instance_v2" "{{ partition.name }}" {
548565
ansible_init_coll_{{ loop.index0 }}_source = "{{ collection.source }}"
549566
{% endif %}
550567
{% endfor %}
568+
k3s_server = openstack_compute_instance_v2.control.network[0].fixed_ip_v4
569+
k3s_token = "{{ k3s_token }}"
551570
}
552571
}
553572

ansible/roles/k3s/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
k3s
2+
=====
3+
4+
Installs k3s agent and server services on nodes and an ansible-init playbook to activate them. The service that each node will activate on init is determined by OpenStack metadata. Also includes Helm install. Currently only supports a single k3s-server
5+
(i.e one control node). Install based on the [official k3s ansible role](https://github.com/k3s-io/k3s-ansible).
6+
7+
8+
Requirements
9+
------------
10+
11+
`azimuth_cloud.image_utils.linux_ansible_init` must have been run previously on targeted nodes during image build.
12+
13+
Role Variables
14+
--------------
15+
16+
- `k3s_version`: Optional str. K3s version to install, see [official releases](https://github.com/k3s-io/k3s/releases/).

ansible/roles/k3s/defaults/main.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Warning: changes to these variables won't be reflected in the cluster/image if k3s is already installed
2+
k3s_version: "v1.31.0+k3s1"
3+
k3s_selinux_release: v1.6.latest.1
4+
k3s_selinux_rpm_version: 1.6-1
5+
k3s_helm_version: v3.11.0

ansible/roles/k3s/files/start_k3s.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
- hosts: localhost
2+
become: true
3+
vars:
4+
os_metadata: "{{ lookup('url', 'http://169.254.169.254/openstack/latest/meta_data.json') | from_json }}"
5+
k3s_token: "{{ os_metadata.meta.k3s_token }}"
6+
k3s_server_name: "{{ os_metadata.meta.k3s_server }}"
7+
service_name: "{{ 'k3s-agent' if k3s_server_name is defined else 'k3s' }}"
8+
tasks:
9+
- name: Ensure password directory exists
10+
ansible.builtin.file:
11+
path: "/etc/rancher/node"
12+
state: directory
13+
14+
- name: Set agent node password as token # uses token to keep password consistent between reimages
15+
ansible.builtin.copy:
16+
dest: /etc/rancher/node/password
17+
content: "{{ k3s_token }}"
18+
19+
- name: Add the token for joining the cluster to the environment
20+
no_log: true # avoid logging the server token
21+
ansible.builtin.lineinfile:
22+
path: "/etc/systemd/system/{{ service_name }}.service.env"
23+
line: "K3S_TOKEN={{ k3s_token }}"
24+
25+
- name: Add server url to agents
26+
ansible.builtin.lineinfile:
27+
path: "/etc/systemd/system/{{ service_name }}.service.env"
28+
line: "K3S_URL=https://{{ k3s_server_name }}:6443"
29+
when: k3s_server_name is defined
30+
31+
- name: Start k3s service
32+
ansible.builtin.systemd:
33+
name: "{{ service_name }}"
34+
daemon_reload: true
35+
state: started
36+
enabled: true

ansible/roles/k3s/tasks/install.yml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
---
2+
3+
- name: Check for existing k3s installation
4+
stat:
5+
path: /var/lib/rancher/k3s
6+
register: stat_result
7+
8+
- name: Perform air-gapped installation of k3s
9+
# Using air-gapped install so containers are pre-installed to avoid rate-limiting from registries on cluster startup
10+
when: not stat_result.stat.exists
11+
block:
12+
13+
- name: Download k3s binary
14+
ansible.builtin.get_url:
15+
url: "https://github.com/k3s-io/k3s/releases/download/{{ k3s_version | urlencode }}/k3s"
16+
dest: /usr/bin/k3s
17+
owner: root
18+
group: root
19+
mode: "0755"
20+
21+
- name: Install k3s SELinux policy package
22+
yum:
23+
name: "https://github.com/k3s-io/k3s-selinux/releases/download/{{ k3s_selinux_release }}/k3s-selinux-{{ k3s_selinux_rpm_version }}.el{{ ansible_distribution_major_version }}.noarch.rpm"
24+
disable_gpg_check: true
25+
26+
- name: Create image directory
27+
ansible.builtin.file:
28+
path: "/var/lib/rancher/k3s/agent/images"
29+
state: directory
30+
31+
- name: Install k3s' internal images
32+
ansible.builtin.get_url:
33+
url: "https://github.com/k3s-io/k3s/releases/download/{{ k3s_version | urlencode }}/k3s-airgap-images-amd64.tar.zst"
34+
dest: /var/lib/rancher/k3s/agent/images/k3s-airgap-images-amd64.tar.zst
35+
36+
- name: Download k3s install script
37+
ansible.builtin.get_url:
38+
url: https://get.k3s.io/
39+
timeout: 120
40+
dest: /usr/bin/k3s-install.sh
41+
owner: root
42+
group: root
43+
mode: "0755"
44+
45+
- name: Install k3s
46+
ansible.builtin.shell:
47+
cmd: /usr/bin/k3s-install.sh
48+
environment:
49+
INSTALL_K3S_VERSION: "{{ k3s_version }}"
50+
INSTALL_K3S_EXEC: "{{ item }}"
51+
INSTALL_K3S_SKIP_START: "true"
52+
INSTALL_K3S_SKIP_ENABLE: "true"
53+
INSTALL_K3S_BIN_DIR: "/usr/bin"
54+
INSTALL_K3S_SKIP_DOWNLOAD: "true"
55+
changed_when: true
56+
loop:
57+
- server --disable=traefik
58+
- agent
59+
60+
- name: Install helm
61+
unarchive:
62+
src: "https://get.helm.sh/helm-{{ k3s_helm_version }}-linux-amd64.tar.gz"
63+
dest: /usr/bin
64+
extra_opts: "--strip-components=1"
65+
owner: root
66+
group: root
67+
mode: 0755
68+
remote_src: true
69+
70+
- name: Add k3s kubeconfig as environment variable
71+
ansible.builtin.lineinfile:
72+
path: /etc/environment
73+
line: "KUBECONFIG=/etc/rancher/k3s/k3s.yaml"
74+
75+
- name: Install ansible-init playbook for k3s agent or server activation
76+
copy:
77+
src: start_k3s.yml
78+
dest: /etc/ansible-init/playbooks/0-start-k3s.yml

ansible/roles/k9s/tasks/main.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
3+
- name: Check if k9s is installed
4+
ansible.builtin.stat:
5+
path: "/usr/bin/k9s"
6+
register: _k9s_stat_result
7+
8+
- name: Install k9s and clean up temporary files
9+
block:
10+
- name: Create install directory
11+
ansible.builtin.file:
12+
path: /tmp/k9s
13+
state: directory
14+
owner: root
15+
group: root
16+
mode: "744"
17+
when: not _k9s_stat_result.stat.exists
18+
19+
- name: Download k9s
20+
ansible.builtin.get_url:
21+
url: https://github.com/derailed/k9s/releases/download/v0.32.5/k9s_Linux_amd64.tar.gz
22+
dest: /tmp/k9s/k9s_Linux_amd64.tar.gz
23+
owner: root
24+
group: root
25+
mode: "744"
26+
27+
- name: Unpack k9s binary
28+
ansible.builtin.unarchive:
29+
src: /tmp/k9s/k9s_Linux_amd64.tar.gz
30+
dest: /tmp/k9s
31+
remote_src: yes
32+
33+
- name: Add k9s to root path
34+
ansible.builtin.copy:
35+
src: /tmp/k9s/k9s
36+
dest: /usr/bin/k9s
37+
mode: u+rwx
38+
remote_src: yes
39+
40+
- name: Cleanup k9s install directory
41+
ansible.builtin.file:
42+
path: /tmp/k9s
43+
state: absent
44+
when: not _k9s_stat_result.stat.exists

ansible/roles/passwords/defaults/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ slurm_appliance_secrets:
88
vault_openhpc_mungekey: "{{ secrets_openhpc_mungekey | default(vault_openhpc_mungekey | default(secrets_openhpc_mungekey_default)) }}"
99
vault_freeipa_ds_password: "{{ vault_freeipa_ds_password | default(lookup('password', '/dev/null')) }}"
1010
vault_freeipa_admin_password: "{{ vault_freeipa_admin_password | default(lookup('password', '/dev/null')) }}"
11+
vault_k3s_token: "{{ vault_k3s_token | default(lookup('ansible.builtin.password', '/dev/null', length=64)) }}"
1112

1213
secrets_openhpc_mungekey_default:
1314
content: "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') }}"

ansible/roles/passwords/tasks/main.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
delegate_to: localhost
88
run_once: true
99

10-
# - name: Ensure munge key directory exists
11-
# file:
12-
# state: directory
13-
# recurse: true
14-
# path: "{{ openhpc_passwords_mungekey_output_path | dirname }}"
10+
- name: Get templated passwords from target environment
11+
# inventory group/host vars created in a play cannot be accessed in the same play, even after meta: refresh_inventory
12+
ansible.builtin.include_vars:
13+
file: "{{ openhpc_passwords_output_path }}"
1514

16-
# - name: Create a munge key
17-
# copy:
18-
# content: "{{ lookup('password', '/dev/null chars=ascii_letters,digits,hexdigits,punctuation') }}"
19-
# dest: "{{ openhpc_passwords_mungekey_output_path }}"
20-
# force: false
15+
- name: Template k3s token to terraform
16+
template:
17+
src: k3s-token.auto.tfvars.json.j2
18+
dest: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/terraform/k3s-token.auto.tfvars.json"
19+
delegate_to: localhost
20+
run_once: true
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"k3s_token": "{{ vault_k3s_token }}"
3+
}

docs/k3s.README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Overview
2+
A K3s cluster is deployed with the Slurm cluster. Both an agent and server instance of K3s is installed during image build and the correct service (determined by OpenStack metadata) will be
3+
enabled during boot. Nodes with the `k3s_server` metadata field defined will be configured as K3s agents (this field gives them the address of the server). The Slurm control node is currently configured as a server while all other nodes are configured as agents. Using multiple K3s servers isn't supported. Currently only the root user on the control node has
4+
access to the Kubernetes API. The `k3s` role installs Helm for package management. K9s is also installed in the image and can be used by the root user.
5+
6+
# Idempotency
7+
K3s is intended to only be installed during image build as it is configured by the appliance on first boot with `azimuth_cloud.image_utils.linux_ansible_init`. Therefore, the `k3s` role isn't
8+
idempotent and changes to variables will not be reflected in the image when running `site.yml`.

environments/.caas/hooks/pre.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
---
22

3+
# Generate k3s token
4+
- name: Generate k3s token
5+
# NB: Although this generates a new token on each run, the actual token set in metadata is retrieved from a set-once tofu resource, hence only the first value ever generated is relevant.
6+
hosts: openstack
7+
tasks:
8+
- ansible.builtin.set_fact:
9+
k3s_token: "{{ lookup('ansible.builtin.password', '/dev/null', length=64) }}"
10+
311
# Provision the infrastructure using Terraform
412
- name: Provision infrastructure
513
hosts: openstack
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"cluster_image": {
3-
"RL8": "openhpc-RL8-241115-1209-097cdae1",
4-
"RL9": "openhpc-RL9-241115-1209-097cdae1"
3+
"RL8": "openhpc-RL8-241118-0918-4538c6df",
4+
"RL9": "openhpc-RL9-241118-0918-4538c6df"
55
}
66
}

environments/.stackhpc/terraform/main.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ variable "volume_backed_instances" {
5454
default = false
5555
}
5656

57+
variable "k3s_token" {
58+
type = string
59+
}
60+
5761
data "openstack_images_image_v2" "cluster" {
5862
name = var.cluster_image[var.os_version]
5963
most_recent = true
@@ -69,6 +73,7 @@ module "cluster" {
6973
key_pair = "slurm-app-ci"
7074
cluster_image_id = data.openstack_images_image_v2.cluster.id
7175
control_node_flavor = var.control_node_flavor
76+
k3s_token = var.k3s_token
7277

7378
login_nodes = {
7479
login-0: var.other_node_flavor

environments/common/inventory/groups

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,5 +136,11 @@ freeipa_client
136136
[ansible_init]
137137
# Hosts to run linux-anisble-init
138138

139+
[k3s]
140+
# Hosts to run k3s server/agent
141+
142+
[k9s]
143+
# Hosts to install k9s on
144+
139145
[lustre]
140146
# Hosts to run lustre client

environments/common/layouts/everything

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,13 @@ openhpc
8282
# Hosts to run ansible-init
8383
cluster
8484

85+
[k3s:children]
86+
# Hosts to run k3s server/agent
87+
openhpc
88+
89+
[k9s:children]
90+
# Hosts to install k9s on
91+
control
92+
8593
[lustre]
8694
# Hosts to run lustre client

0 commit comments

Comments
 (0)