slurm.conf系统初始配置

#slurm集群配置
##集群名称
ClusterName=myslurm
##主控制器的主机名
ControlMachine=node11
##主控制器的IP地址
ControlAddr=192.168.80.11
##备控制器的主机名
BackupController=node12
##备控制器的IP地址
BackupAddr=192.168.80.12
#
##slurmd的进程用户
SlurmdUser=root
##slurctld的监听端口
SlurmctldPort=6817
##slurmd的通信端口
SlurmdPort=6818
##组件认证方式
AuthType=auth/munge # none|munge
##slurm daemons(slurmctld,slurmdbd,slurmd),slurm clients,的通信认证
AuthInfo=/var/run/munge/munge.socket.2 #cred_expire|socket|ttl
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
##集群状态文件存放位置(全局文件系统)
StateSaveLocation=/usr/local/globle/softs/slurm/16.05.3/state
##?
SlurmdSpoolDir=/var/spool/slurmd
##?
SwitchType=switch/none
MpiDefault=none
##调度
#[批处理作业]在被视为丢失并释放分配的资源之前[允许进行启动的最大时间] (以秒为单位)
#默认10秒.execute prolog,load user environment variables, slurmd get page from memroy.可能需要更大的时间.
#BatchStartTimeout=10
#BurstBufferType=burst_buffer/none
#CheckpointType=checkpoint/blcr|none|ompi
###
##slurmctld的pid文件存放
SlurmctldPidFile=/var/run/slurmctld.pid
##slurmd的pid文件存放
SlurmdPidFile=/var/run/slurmd.pid
##?
#ProctrackType=proctrack/cgroup
ProctrackType=proctrack/pgid
#PluginDir=
##第一个作业ID
FirstJobId=1
##?
ReturnToService=2
##最大的作业ID
MaxJobCount=10000
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
#TaskPlugin=
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
##控制器通信超时
SlurmctldTimeout=60
##slurmd通信超时
SlurmdTimeout=60
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
#SchedulerPort=
#SchedulerRootFilter=
SelectType=select/linear
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
#
# LOGGING
SlurmctldDebug=6
SlurmctldLogFile=/usr/local/globle/softs/slurm/16.05.3/log/SlurmctldLogFile
SlurmdDebug=6
SlurmdLogFile=/var/log/SlurmdLogFile
JobCompType=jobcomp/none
#JobCompLoc=
#
# ACCOUNTING
#JobAcctGatherType=jobacct_gather/cgroup
JobAcctGatherFrequency=30

##AccountingStorage setting
#filetxt,none,slurmdbd
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=192.168.80.13
AccountingStorageBackupHost=192.168.80.14
AccountingStorageLoc=slurm_acct_db
#AccountingStoragePass=liwanliang
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStorageUser=slurmadmin
AccountingStorageEnforce=limints,qos
#AccountingStorageTRES=gres/craynetwork,license/iop1,cpu,energy,memroy,nodes
#AccountingStoreJobComment=yes
AcctGatherNodeFreq=180
#AcctGatherEnergyType=acct_gather_energy/none|ipmi|rapl
#AcctGatherInfinibandType=acct_gather_infiniband/none|ofed
#AcctGatherFilesystemType=acct_gather_filesystem/none|lustre
#AcctGatherProfileType=acct_gather_profile/none|hdf5
#AllowSpecResourceUsage=0|1
#
# COMPUTE NODES
NodeName=node[11-14] CPUs=4 RealMemory=900 Sockets=4 CoresPerSocket=1 ThreadsPerCore=1 Procs=4 State=UNKNOWN
PartitionName=q_x86_1 Nodes=node[11-12] Default=YES MaxTime=INFINITE State=UP
PartitionName=q_x86_2 Nodes=node[13-14] MaxTime=INFINITE State=UP

转载于:https://www.cnblogs.com/liwanliangblog/p/9201624.html

你可能感兴趣的:(slurm.conf系统初始配置)