自己写了个
#!/bin/sh
#in head node , make sure the enviornment is clean , or you should use 'clean' script
#run as root , and run in SLURM_DIR
HEADNODE_IP=192.168.192.*
NODE1_IP=192.168.192.*
SLURM_DIR=/home/slurm
mkdir -p $SLURM_DIR
cd $SLURM_DIR
if [ ! -f "munge_0.5.10.orig.tar.bz2" ]
wget https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/munge/0.5.10-1/munge_0.5.10.orig.tar.bz2
fi
if [ ! -f "slurm-18.08.5-2.tar.bz2" ]
wget https://download.schedmd.com/slurm/slurm-18.08.5-2.tar.bz2
fi
#install dependence package
grep 'Red Hat' /proc/version
if [ "$?" -eq 0 ]
then
yum install gcc openssl-devel readline-devel -y
yum install bzip2-devel.bz2* -y
yum install zlib-devel* -y
yum install pam* -y
yum install perl* -y
yum install rpm-build -y
fi
#create the global users , prepare to install slurm , make sure headenode and nodes have the same UID AND GID to confirm target
export MUNGEUSER=991
groupadd -g $MUNGEUSER munge
useradd -m -d /bar/lib/munge -u $MUNGEUSER -g munge -s /sbin/nologin munge
export SLURMUSER=992
groupadd -g $SLURMUSER slurm
useradd -m -d /var/lib/slurm -u $SLURMUSER -g slurm -s /bin/bash slurm
#rename machine
file="/etc/hosts"
ifconfig | grep 192.168.192.*
if [ -f "$file" ]
then
echo "can not find hosts file"
else
echo "
$HEADNODE_IP worker
$NODE1_IP worker1
" >> /etc/hosts
fi
hostnamectl --static set-hostname worker
echo 'install munge'
mv munge_0.5.10.orig.tar.bz2 munge-0.5.10.tar.bz2
rpmbuild -tb --clean munge-0.5.10.tar.bz2
cd /root/rpmbuild/RPMS/x86_64
rpm --install munge*.rpm
echo 'install mariadb'
yum -y install mariadb-server mariadb-devel
echo 'create munge key'
/usr/sbin/create-munge-key -r
#make sure the priority is correct
chown munge: /etc/munge/munge.key /var/log/munge
chmod 400 /etc/munge/munge.key /var/log/munge
echo 'install slurm'
rpmbuild -ta --clean slurm-18.08.5-2.tar.bz2
cd /root/rpmbuild/RPMS/x86_64
rpm --install slurm*rpm
#configure slurm.conf
if [ ! -f "/etc/slurm/slurm.conf" ]
then
echo "file not exit"
else
echo "
ControlMachine=worker
ControlAddr=$HEADNODE_IP
MpiDefault=none
ProctrackType=proctrack/pgid
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
StateSaveLocation=/var/spool/slurmctld
SwitchType=switch/none
TaskPlugin=task/none
FastSchedule=1
SchedulerType=sched/backfill
SelectType=select/linear
AccountingStorageType=accounting_storage/none
ClusterName=buhpc
JobAcctGatherType=jobacct_gather/none
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdLogFile=/var/log/slurmd.log
NodeName=worker NodeAddr=$HEADNODE_IP CPUs=1 Procs=1 State=UNKNOWN
NodeName=worker1 NodeAddr=$NODE1_IP CPUs=1 Procs=1 State=UNKNOWN
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
" > /etc/slurm/slurm.conf
#radio config file to worker
scp /etc/munge/munge.key root@worker1:/etc/munge/
scp /etc/slurm/slurm.conf root@worker1:/etc/slurm/
#make sure the priority is correct
mkdir /var/spool/slurmctld chown slurm: /var/spool/slurmctld chmod 755 /var/spool/slurmctld touch /var/log/slurmctld.log chown slurm: /var/log/slurmctld.log touch /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log chown slurm: /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
#start slurm at head node
systemctl stop firewalld
systemctl start munge
systemctl enable slurmd.service
systemctl start slurmd.service
systemctl enable slurmctld.service
systemctl start slurmctld.service
参考:
OpenMPI1是一种高性能消息传递库
https://www.cnblogs.com/aaron-agu/p/5700650.html