Skip to content

Update slurm.conf #161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ jobs:
matrix:
image:
- 'centos:7'
- 'rockylinux:8.6'
- 'rockylinux:8.7'
- 'rockylinux:8.8'
scenario:
- test1
- test1b
Expand Down
160 changes: 160 additions & 0 deletions files/slurm.conf.ohpc
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#
# Example slurm.conf file. Please run configurator.html
# (in doc/html) to build a configuration file customized
# for your environment.
#
#
# slurm.conf file generated by configurator.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
ClusterName=cluster
SlurmctldHost=linux0
#SlurmctldHost=
#
#DisableRootJobs=NO
#EnforcePartLimits=NO
#Epilog=
#EpilogSlurmctld=
#FirstJobId=1
#MaxJobId=67043328
#GresTypes=
#GroupUpdateForce=0
#GroupUpdateTime=600
#JobFileAppend=0
#JobRequeue=1
#JobSubmitPlugins=lua
#KillOnBadExit=0
#LaunchType=launch/slurm
#Licenses=foo*4,bar
#MailProg=/bin/mail
#MaxJobCount=10000
#MaxStepCount=40000
#MaxTasksPerNode=512
MpiDefault=none
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
#PrivateData=jobs
ProctrackType=proctrack/cgroup
#Prolog=
#PrologFlags=
#PrologSlurmctld=
#PropagatePrioProcess=0
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#RebootProgram=
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
#SlurmdUser=root
#SrunEpilog=
#SrunProlog=
StateSaveLocation=/var/spool/slurmctld
SwitchType=switch/none
#TaskEpilog=
TaskPlugin=task/affinity
#TaskProlog=
#TopologyPlugin=topology/tree
#TmpFS=/tmp
#TrackWCKey=no
#TreeWidth=
#UnkillableStepProgram=
#UsePAM=0
#
#
# TIMERS
#BatchStartTimeout=10
#CompleteWait=0
#EpilogMsgTime=2000
#GetEnvTimeout=2
#HealthCheckInterval=0
#HealthCheckProgram=
InactiveLimit=0
KillWait=30
#MessageTimeout=10
#ResvOverRun=0
MinJobAge=300
#OverTimeLimit=0
SlurmctldTimeout=120
SlurmdTimeout=300
#UnkillableStepTimeout=60
#VSizeFactor=0
Waittime=0
#
#
# SCHEDULING
#DefMemPerCPU=0
#MaxMemPerCPU=0
#SchedulerTimeSlice=30
SchedulerType=sched/backfill
SelectType=select/cons_tres
SelectTypeParameters=CR_Core
#
#
# JOB PRIORITY
#PriorityFlags=
#PriorityType=priority/basic
#PriorityDecayHalfLife=
#PriorityCalcPeriod=
#PriorityFavorSmall=
#PriorityMaxAge=
#PriorityUsageResetPeriod=
#PriorityWeightAge=
#PriorityWeightFairshare=
#PriorityWeightJobSize=
#PriorityWeightPartition=
#PriorityWeightQOS=
#
#
# LOGGING AND ACCOUNTING
#AccountingStorageEnforce=0
#AccountingStorageHost=
#AccountingStoragePass=
#AccountingStoragePort=
AccountingStorageType=accounting_storage/none
#AccountingStorageUser=
#AccountingStoreFlags=
#JobCompHost=
#JobCompLoc=
#JobCompPass=
#JobCompPort=
JobCompType=jobcomp/none
#JobCompUser=
#JobContainerType=job_container/none
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=info
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdDebug=info
SlurmdLogFile=/var/log/slurmd.log
#SlurmSchedLogFile=
#SlurmSchedLogLevel=
#DebugFlags=
#
#
# POWER SAVE SUPPORT FOR IDLE NODES (optional)
#SuspendProgram=
#ResumeProgram=
#SuspendTimeout=
#ResumeTimeout=
#ResumeRate=
#SuspendExcNodes=
#SuspendExcParts=
#SuspendRate=
#SuspendTime=
#
#
# COMPUTE NODES
# OpenHPC default configuration
TaskPlugin=task/affinity
PropagateResourceLimitsExcept=MEMLOCK
JobCompType=jobcomp/filetxt
Epilog=/etc/slurm/slurm.epilog.clean
NodeName=c[1-4] Sockets=2 CoresPerSocket=8 ThreadsPerCore=2 State=UNKNOWN
PartitionName=normal Nodes=c[1-4] Default=YES MaxTime=24:00:00 State=UP Oversubscribe=EXCLUSIVE
SlurmctldParameters=enable_configless
ReturnToService=1
5 changes: 2 additions & 3 deletions molecule/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ test14 | 1 | N | As for #5 but also tests `extr
Local installation on a RockyLinux 8.x machine looks like:

sudo dnf install -y podman
sudo dnf install podman-plugins # required for DNS
sudo yum install -y git
git clone [email protected]:stackhpc/ansible-role-openhpc.git
cd ansible-role-openhpc/
python3.8 -m venv venv
python3.9 -m venv venv
. venv/bin/activate
pip install -U pip
pip install -r molecule/requirements.txt
Expand All @@ -44,8 +45,6 @@ Then to run tests, e.g.::
MOLECULE_IMAGE=centos:7 molecule test --all # NB some won't work as require OpenHPC v2.x (-> CentOS 8.x) features - see `.github/workflows/ci.yml`
MOLECULE_IMAGE=rockylinux:8.6 molecule test --all

**NB:** If the host network has an MTU smaller than 1500 (the docker default), check `molecule.yml` for the relevant test contains `DOCKER_MTU`, then prepend `DOCKER_MTU=<mtu>` to your command. If you have already run molecule you will need to destroy the instances and run `docker network prune` before retrying.

During development you may want to:

- See some debugging information by prepending:
Expand Down
3 changes: 2 additions & 1 deletion molecule/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pip
setuptools
molecule[podman,lint,ansible]
molecule[lint,ansible]
molecule-plugins[podman]
ansible>=2.9.0
4 changes: 0 additions & 4 deletions molecule/test12/molecule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
- name: testohpc-compute-0
image: ${MOLECULE_IMAGE}
pre_build_image: true
Expand Down
12 changes: 0 additions & 12 deletions molecule/test14/molecule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
- name: testohpc-compute-0
image: ${MOLECULE_IMAGE}
pre_build_image: true
Expand All @@ -30,10 +26,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
- name: testohpc-compute-1
image: ${MOLECULE_IMAGE}
pre_build_image: true
Expand All @@ -46,10 +38,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
provisioner:
name: ansible
verifier:
Expand Down
12 changes: 0 additions & 12 deletions molecule/test5/molecule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
- name: testohpc-compute-0
image: ${MOLECULE_IMAGE}
pre_build_image: true
Expand All @@ -30,10 +26,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
- name: testohpc-compute-1
image: ${MOLECULE_IMAGE}
pre_build_image: true
Expand All @@ -46,10 +38,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
provisioner:
name: ansible
verifier:
Expand Down
4 changes: 0 additions & 4 deletions molecule/test6/molecule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ platforms:
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
network: net1
docker_networks:
- name: net1
driver_options:
com.docker.network.driver.mtu: ${DOCKER_MTU:-1500} # 1500 is docker default
provisioner:
name: ansible
inventory:
Expand Down
Loading