Skip to content

Commit 04f3bbb

Browse files
committed
template slurm.conf parameters from combined variables
1 parent 0abbf76 commit 04f3bbb

File tree

4 files changed

+44
-177
lines changed

4 files changed

+44
-177
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,12 @@ partition. Each partition mapping may contain:
9393

9494
`openhpc_cluster_name`: name of the cluster.
9595

96-
`openhpc_config`: Optional. Mapping of additional parameters and values for `slurm.conf`. Note these will override any included in `templates/slurm.conf.j2`.
96+
`openhpc_config`: Optional. Mapping of additional parameters and values for
97+
[slurm.conf](https://slurm.schedmd.com/slurm.conf.html). Keys are parameter
98+
names and values are lists or strings as appropriate. This can be used to
99+
supplement or override the template defaults, or to remove a template parameter
100+
by setting the value to `'omit'` - note this is the literal string, not the
101+
Ansible special variable.
97102

98103
`openhpc_ram_multiplier`: Optional, default `0.95`. Multiplier used in the calculation: `total_memory * openhpc_ram_multiplier` when setting `RealMemory` for the partition in slurm.conf. Can be overriden on a per partition basis using `openhpc_slurm_partitions.ram_multiplier`. Has no effect if `openhpc_slurm_partitions.ram_mb` is set.
99104

defaults/main.yml

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,37 @@ openhpc_packages:
1212
openhpc_resume_timeout: 300
1313
openhpc_retry_delay: 10
1414
openhpc_job_maxtime: '60-0' # quote this to avoid ansible converting some formats to seconds, which is interpreted as minutes by Slurm
15-
openhpc_config: "{{ openhpc_extra_config | default({}) }}"
15+
openhpc_default_config:
16+
# This only defines values which are not Slurm defaults
17+
SlurmctldHost: "{{ openhpc_slurm_control_host }}{% if openhpc_slurm_control_host_address is defined %}({{ openhpc_slurm_control_host_address }}){% endif %}"
18+
ProctrackType: proctrack/linuxproc # TODO: really want cgroup but needs cgroup.conf and workaround for CI
19+
SlurmdSpoolDir: /var/spool/slurm # NB: not OpenHPC default!
20+
SlurmUser: slurm
21+
StateSaveLocation: "{{ openhpc_state_save_location }}"
22+
SlurmctldTimeout: 300
23+
SchedulerType: sched/backfill
24+
SelectType: select/cons_tres
25+
SelectTypeParameters: CR_Core
26+
PriorityWeightPartition: 1000
27+
PreemptType: preempt/partition_prio
28+
PreemptMode: SUSPEND,GANG
29+
AccountingStoragePass: "{{ openhpc_slurm_accounting_storage_pass | default('omit') }}"
30+
AccountingStorageHost: "{{ openhpc_slurm_accounting_storage_host }}"
31+
AccountingStoragePort: "{{ openhpc_slurm_accounting_storage_port }}"
32+
AccountingStorageType: "{{ openhpc_slurm_accounting_storage_type }}"
33+
AccountingStorageUser: "{{ openhpc_slurm_accounting_storage_user }}"
34+
JobCompLoc: "{{ openhpc_slurm_job_comp_loc }}"
35+
JobCompType: "{{ openhpc_slurm_job_comp_type }}"
36+
JobAcctGatherFrequency: "{{ openhpc_slurm_job_acct_gather_frequency }}"
37+
JobAcctGatherType: "{{ openhpc_slurm_job_acct_gather_type }}"
38+
SlurmctldSyslogDebug: info
39+
SlurmdSyslogDebug: info
40+
PropagateResourceLimitsExcept: MEMLOCK
41+
Epilog: /etc/slurm/slurm.epilog.clean
42+
ReturnToService: 2
43+
SlurmctldParameters: "{{ 'enable_configless' if openhpc_slurm_configless else 'omit' }}"
44+
45+
openhpc_config: {}
1646
openhpc_gres_template: gres.conf.j2
1747
openhpc_slurm_configless: "{{ 'enable_configless' in openhpc_config.get('SlurmctldParameters', []) }}"
1848

tasks/runtime.yml

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -80,43 +80,9 @@
8080
notify: Restart slurmdbd service
8181
when: openhpc_enable.database | default(false) | bool
8282

83-
- name: Make local tempfile for slurm.conf templating # ensures simultaneous runs don't clobber each other
84-
ansible.builtin.tempfile:
85-
register: _slurm_conf_tmpfile
86-
delegate_to: localhost
87-
when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool
88-
changed_when: false # so molecule doesn't fail
89-
become: no
90-
91-
- name: Template basic slurm.conf
83+
- name: Template slurm.conf
9284
template:
9385
src: slurm.conf.j2
94-
dest: "{{ _slurm_conf_tmpfile.path }}"
95-
lstrip_blocks: true
96-
mode: 0644
97-
delegate_to: localhost
98-
when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool
99-
changed_when: false # so molecule doesn't fail
100-
become: no
101-
102-
- name: Customise slurm.conf
103-
community.general.ini_file:
104-
path: "{{ _slurm_conf_tmpfile.path }}"
105-
option: "{{ item.key }}"
106-
section: ''
107-
value: "{{ (item.value | join(',')) if (item.value is sequence and item.value is not string) else item.value }}"
108-
no_extra_spaces: true
109-
create: no
110-
mode: 0644
111-
loop: "{{ openhpc_config | dict2items }}"
112-
delegate_to: localhost
113-
when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool
114-
changed_when: false # so molecule doesn't fail
115-
become: no
116-
117-
- name: Create slurm.conf
118-
copy:
119-
src: "{{ _slurm_conf_tmpfile.path }}"
12086
dest: /etc/slurm/slurm.conf
12187
owner: root
12288
group: root

templates/slurm.conf.j2

Lines changed: 6 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,152 +1,21 @@
1-
#
2-
# Example slurm.conf file. Please run configurator.html
3-
# (in doc/html) to build a configuration file customized
4-
# for your environment.
5-
#
6-
#
7-
# slurm.conf file generated by configurator.html.
8-
#
9-
# See the slurm.conf man page for more information.
10-
#
111
ClusterName={{ openhpc_cluster_name }}
12-
SlurmctldHost={{ openhpc_slurm_control_host }}{% if openhpc_slurm_control_host_address is defined %}({{ openhpc_slurm_control_host_address }}){% endif %}
132

14-
#DisableRootJobs=NO
15-
#EnforcePartLimits=NO
16-
#EpilogSlurmctld=
17-
#FirstJobId=1
18-
#MaxJobId=67043328
19-
#GresTypes=
20-
#GroupUpdateForce=0
21-
#GroupUpdateTime=600
22-
#JobFileAppend=0
23-
#JobRequeue=1
24-
#JobSubmitPlugins=lua
25-
#KillOnBadExit=0
26-
#LaunchType=launch/slurm
27-
#Licenses=foo*4,bar
28-
#MailProg=/bin/mail
29-
#MaxJobCount=10000
30-
#MaxStepCount=40000
31-
#MaxTasksPerNode=512
32-
MpiDefault=none
33-
#MpiParams=ports=#-#
34-
#PluginDir=
35-
#PlugStackConfig=
36-
#PrivateData=jobs
37-
ProctrackType=proctrack/linuxproc # TODO: really want cgroup but needs cgroup.conf and workaround for CI
38-
#Prolog=
39-
#PrologFlags=
40-
#PrologSlurmctld=
41-
#PropagatePrioProcess=0
42-
#PropagateResourceLimits=
43-
#PropagateResourceLimitsExcept=
44-
#RebootProgram=
45-
SlurmctldPidFile=/var/run/slurmctld.pid
46-
SlurmctldPort=6817
47-
SlurmdPidFile=/var/run/slurmd.pid
48-
SlurmdPort=6818
49-
SlurmdSpoolDir=/var/spool/slurm # NB: not OpenHPC default!
50-
SlurmUser=slurm
51-
#SlurmdUser=root
52-
#SrunEpilog=
53-
#SrunProlog=
54-
StateSaveLocation={{ openhpc_state_save_location }}
55-
SwitchType=switch/none
56-
#TaskEpilog=
57-
#TaskPlugin=task/affinity
58-
#TaskProlog=
59-
#TopologyPlugin=topology/tree
60-
#TmpFS=/tmp
61-
#TrackWCKey=no
62-
#TreeWidth=
63-
#UnkillableStepProgram=
64-
#UsePAM=0
65-
#
66-
#
67-
# TIMERS
68-
#BatchStartTimeout=10
69-
#CompleteWait=0
70-
#EpilogMsgTime=2000
71-
#GetEnvTimeout=2
72-
#HealthCheckInterval=0
73-
#HealthCheckProgram=
74-
InactiveLimit=0
75-
KillWait=30
76-
#MessageTimeout=10
77-
#ResvOverRun=0
78-
MinJobAge=300
79-
#OverTimeLimit=0
80-
SlurmctldTimeout=300
81-
SlurmdTimeout=300
82-
#UnkillableStepTimeout=60
83-
#VSizeFactor=0
84-
Waittime=0
85-
#
86-
#
87-
# SCHEDULING
88-
#DefMemPerCPU=0
89-
#MaxMemPerCPU=0
90-
#SchedulerTimeSlice=30
91-
SchedulerType=sched/backfill
92-
SelectType=select/cons_tres
93-
SelectTypeParameters=CR_Core
94-
#
95-
#
96-
# JOB PRIORITY
97-
#PriorityFlags=
98-
PriorityType=priority/multifactor
99-
#PriorityDecayHalfLife=
100-
#PriorityCalcPeriod=
101-
#PriorityFavorSmall=
102-
#PriorityMaxAge=
103-
#PriorityUsageResetPeriod=
104-
#PriorityWeightAge=
105-
#PriorityWeightFairshare=
106-
#PriorityWeightJobSize=
107-
PriorityWeightPartition=1000
108-
#PriorityWeightQOS=
109-
PreemptType=preempt/partition_prio
110-
PreemptMode=SUSPEND,GANG
111-
#
112-
# LOGGING AND ACCOUNTING
113-
#AccountingStorageEnforce=0
114-
AccountingStorageHost={{ openhpc_slurm_accounting_storage_host }}
115-
{% if openhpc_slurm_accounting_storage_pass | default(false, true) %}
116-
AccountingStoragePass={{ openhpc_slurm_accounting_storage_pass }}
117-
{% endif %}
118-
AccountingStoragePort={{ openhpc_slurm_accounting_storage_port }}
119-
AccountingStorageType={{ openhpc_slurm_accounting_storage_type }}
120-
AccountingStorageUser={{ openhpc_slurm_accounting_storage_user }}
121-
#AccountingStoreFlags=
122-
#JobCompHost=
123-
JobCompLoc={{ openhpc_slurm_job_comp_loc }}
124-
#JobCompPass=
125-
#JobCompPort=
126-
JobCompType={{ openhpc_slurm_job_comp_type }}
127-
#JobCompUser=
128-
#JobContainerType=job_container/none
129-
JobAcctGatherFrequency={{ openhpc_slurm_job_acct_gather_frequency }}
130-
JobAcctGatherType={{ openhpc_slurm_job_acct_gather_type }}
3+
# PARAMETERS
4+
{% for k, v in openhpc_default_config | combine(openhpc_config) | items %}
5+
{% if v != "omit" %}{# allow removing items using setting key: null #}
6+
{{ k }}={{ v | join(',') if (v is sequence and v is not string) else v }}
7+
{% endif %}
8+
{% endfor %}
1319

132-
# By default, SLURM will log to syslog, which is what we want
133-
SlurmctldSyslogDebug=info
134-
SlurmdSyslogDebug=info
135-
#SlurmSchedLogFile=
136-
#SlurmSchedLogLevel=
137-
#DebugFlags=
13810

13911
# LOGIN-ONLY NODES
14012
# Define slurmd nodes not in partitions for login-only nodes in "configless" mode:
14113
{%if openhpc_login_only_nodes %}{% for node in groups[openhpc_login_only_nodes] %}
14214
NodeName={{ node }}
14315
{% endfor %}{% endif %}
14416

145-
PropagateResourceLimitsExcept=MEMLOCK
146-
Epilog=/etc/slurm/slurm.epilog.clean
14717

14818
# COMPUTE NODES
149-
# OpenHPC default configuration
15019
{% for nodegroup in openhpc_nodegroups %}
15120
{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %}
15221
{% set inventory_group_hosts = groups.get(inventory_group_name, []) %}
@@ -185,6 +54,3 @@ PartitionName={{partition.name}} {{ '' -}}
18554
{% endfor %}{# openhpc_partitions #}
18655

18756
{% if openhpc_slurm_configless | bool %}SlurmctldParameters=enable_configless{% endif %}
188-
189-
190-
ReturnToService=2

0 commit comments

Comments
 (0)