Skip to content

Commit 77e8d70

Browse files
authored
Merge branch 'main' into fix/lustre-lu-18085
2 parents 8134653 + de26d54 commit 77e8d70

File tree

3 files changed

+5
-38
lines changed

3 files changed

+5
-38
lines changed

ansible/roles/cuda/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ Requires OFED to be installed to provide required kernel-* packages.
1010

1111
- `cuda_repo_url`: Optional. URL of `.repo` file. Default is upstream for appropriate OS/architecture.
1212
- `cuda_nvidia_driver_stream`: Optional. Version of `nvidia-driver` stream to enable. This controls whether the open or proprietary drivers are installed and the major version. Changing this once the drivers are installed does not change the version.
13-
- `cuda_packages`: Optional. Default: `['cuda', 'nvidia-gds', 'cmake', 'cuda-toolkit-12-8']`.
13+
- `cuda_packages`: Optional. Default: `['cuda', 'nvidia-gds', 'cmake', 'cuda-toolkit-12-9']`.
1414
- `cuda_package_version`: Optional. Default `latest` which will install the latest packages if not installed but won't upgrade already-installed packages. Use `'none'` to skip installing CUDA.
1515
- `cuda_persistenced_state`: Optional. State of systemd `nvidia-persistenced` service. Values as [ansible.builtin.systemd:state](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/systemd_module.html#parameter-state). Default `started`.

ansible/roles/cuda/defaults/main.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}/cuda-rhel{{ ansible_distribution_major_version }}.repo"
2-
cuda_nvidia_driver_stream: '570-open'
3-
cuda_package_version: '12.8.1-1'
4-
cuda_version_short: '12.8'
2+
cuda_nvidia_driver_stream: '575-open'
3+
cuda_package_version: '12.9.0-1'
4+
cuda_version_short: '12.9'
55
cuda_packages:
66
- "cuda{{ ('-' + cuda_package_version) if cuda_package_version != 'latest' else '' }}"
77
- nvidia-gds
88
- cmake
9-
- cuda-toolkit-12-8
9+
- cuda-toolkit-12-9
1010
cuda_samples_release_url: "https://github.com/NVIDIA/cuda-samples/archive/refs/tags/v{{ cuda_version_short }}.tar.gz"
1111
cuda_samples_path: "/var/lib/{{ ansible_user }}/cuda_samples"
1212
cuda_samples_programs:

ansible/roles/cuda/tasks/samples.yml

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,36 +25,3 @@
2525
cmd: . /etc/profile.d/sh.local && cmake .. && make -j {{ ansible_processor_vcpus }}
2626
chdir: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}/build"
2727
creates: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}/build/Samples/1_Utilities/deviceQuery/deviceQuery"
28-
29-
- name: Run CUDA deviceQuery
30-
command:
31-
cmd: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}/build/Samples/1_Utilities/deviceQuery/deviceQuery"
32-
register: _cuda_devicequery
33-
34-
- name: Set fact for CUDA devices
35-
set_fact:
36-
cuda_devices: "{{ _cuda_devicequery.stdout | regex_findall('Device (\\d+):') }}"
37-
38-
- name: Run CUDA bandwidth test
39-
command:
40-
cmd: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}/build/Samples/1_Utilities/bandwidthTest/bandwidthTest --device={{ item }}"
41-
register: _cuda_bandwidthtest
42-
loop: "{{ cuda_devices }}"
43-
loop_control:
44-
label: "Device {{ item }}" # e.g '0'
45-
46-
- name: Summarise bandwidth test output
47-
debug:
48-
msg: |
49-
{{ _parts[1].splitlines()[0] | trim }}
50-
Bandwidths: (Gb/s)
51-
Host to Device: {{ _parts[2].split()[-1] }}
52-
Device to Host: {{ _parts[3].split()[-1] }}
53-
Device to Device: {{ _parts[4].split()[-1] }}
54-
{{ ': '.join(_parts[5].split('=') | map('trim')) }}
55-
{{ _parts[6] }}
56-
loop: "{{ _cuda_bandwidthtest.results }}"
57-
vars:
58-
_parts: "{{ item.stdout.split('\n\n') }}"
59-
loop_control:
60-
label: "Device {{ item.item }}" # e.g '0'

0 commit comments

Comments
 (0)