xcat 安装(liunx高性能刀片集群管理软件)

xcat非常方便,hp有cmu,当然那个是花钱的,xcat是ibm前两年开源的一个软件,功能可以说还是够强大的。

1、先删除掉冲突软件

xCAT2和操作系统的载OpenIPMI-tools和tftpserver不兼容,需要把这两个包删除。

同时bind-chroot和dhcp-chroot软件包如果存在也会导致问题,需要删除。

# rpm -e --nodeps  tftp-server-0.49-2

2、xCAT2软件安装

创建xcat的yum安装源

把xcat上传到服务器,本例是/root /xcat/

# cd /root/xcat/

# ls

xcat-core-2.6.1.tar.bz2  xcat-dep-201105100946.tar.bz2

# tar -xjvfxcat-core-2.6.1.tar.bz2

# tar -xjvfxcat-dep-201105100946.tar.bz2

1)创建xcat-core的yum安装源

# cd xcat-core

#./mklocalrepo.sh

2)创建xcat-dep的yum安装源

#cd ../xcat-dep/rh5/x86_64

#./mklocalrepo.sh

 

安装xCAT

# yum install xCAT

根据提示进行安装即可.

默认安装到/opt/xcat目录中

测试

#source /etc/profile.d/xcat.sh

#tabdump 将列出xcat所有可配置的表。读取site表

#key,value,comments,disable

"blademaxp","64",,

"fsptimeout","0",,

"installdir","/install",,

"ipmimaxp","64",,

"ipmiretries","3",,

"ipmitimeout","2",,

"consoleondemand","no",,

"master","172.16.1.1",,

"forwarders","172.16.1.1",,

"nameservers","172.16.1.1",,

"maxssh","8",,

"ppcmaxp","64",,

"ppcretry","3",,

"ppctimeout","0",,

"sharedtftp","1",,

"SNsyncfiledir","/var/xcat/syncfiles",,

"tftpdir","/tftpboot",,

"xcatdport","3001",,

"xcatiport","3002",,

"xcatconfdir","/etc/xcat",,

"timezone","Asia/Shanghai",,

"useNmapfromMN","no",,

"enableASMI","no",,

"db2installloc","/mntdb2",,

"databaseloc","/var/lib",,

"sshbetweennodes","ALLGROUPS",,

"dnshandler","ddns",,

"vsftp","y",,

节点网络安装配置

设置site

执行添加命令

#chtab key=ntpserverssite.value=172.16.1.1

#chtab key=dhcpinterfacessite.value='manager1|eth1'

#chtab key=domain site.value=cluster.net

#tapdump site  查看site

#key,value,comments,disable

"blademaxp","64",,

"fsptimeout","0",,

"installdir","/install",,

"ipmimaxp","64",,

"ipmiretries","3",,

"ipmitimeout","2",,

"consoleondemand","no",,

"master","172.16.1.1",,

"forwarders","172.16.1.1",,

"nameservers","172.16.1.1",,

"maxssh","8",,

"ppcmaxp","64",,

"ppcretry","3",,

"ppctimeout","0",,

"sharedtftp","1",,

"SNsyncfiledir","/var/xcat/syncfiles",,

"tftpdir","/tftpboot",,

"xcatdport","3001",,

"xcatiport","3002",,

"xcatconfdir","/etc/xcat",,

"timezone","Asia/Shanghai",,

"useNmapfromMN","no",,

"enableASMI","no",,

"db2installloc","/mntdb2",,

"databaseloc","/var/lib",,

"sshbetweennodes","ALLGROUPS",,

"dnshandler","ddns",,

"vsftp","y",,

"ntpservers","172.16.1.1",,

"domain","cluster.net",,

"dhcpinterface","manager1|eth1",,

设置networks

先查看一下networks

[root@manager1 yum.repos.d]#tabdump networks

#netname,net,mask,mgtifname,gateway,dhcpserver,tftpserver,nameservers,ntpservers,logservers,dynamicrange,nodehostname,ddnsdomain,vlanid,comments,disable

"172_16_1_0-255_255_255_0","172.16.1.0","255.255.255.0","eth1","172.16.1.253","172.30.1.129","172.16.1.1","172.16.1.1","172.16.1.1",,,,,,,

"169_254_95_0-255_255_255_0","169.254.95.0","255.255.255.0","usb0","<xcatmaster>",,"169.254.95.120","172.16.1.1",,,,,,,,

按环境改

# chtab net=172.16.1.0networks.dhcpserver=172.16.1.1

# chtab net=172.16.1.0networks.ntpservers=172.16.1.1

# chtab net=172.16.1.0 networks.logservers=172.16.1.1

# chtab net=172.16.1.0networks.tftpserver=172.16.1.1

使用这类命令来设置,建议

netname,net,mask,mgtifname,dhcpserver,tftpserver,nameservers,ntpservers,logservers这些都设置上。

注意:

当不设置dynamicrange这个字段时,系统可以按照mac地址一对一的分配IP地址。不过在makedhcp时会有一条告警,不用管它。(建议方式)

设置dynamicrange这个字段时,系统不能按照mac地址一对一的分配IP地址。(在集群连接用户现有网络时,可能会产生灾难性的后果

设置passwd密码表

#chtab key=systempasswd.username=root passwd.password=root123(用于每个xcat安装出来的节点)

#chtab key=bladepasswd.username=USERID passwd.password=PASSW0RD(对应刀箱的AMM管理模块)

#chtab key=ipmipasswd.username=USERID passwd.password=PASSW0RD(对应机架式服务器的IMM管理模块)

[root@manager1 yum.repos.d]#tabdump passwd  查看password

#key,username,password,cryptmethod,comments,disable

"system","root","root123",,,

"blade","USERID","PASSW0RD",,,

设置nodelist

#nodeadd ON001-ON128 groups=all,compute

#nodeadd L001-L002 groups=all,compute

#nodeadd manager1-manager2  groups=all,compute

[root@serv ~]# tabdump nodelist  检查nodelist

[root@manager1 yum.repos.d]#tabdump nodelist

#node,groups,status,statustime,appstatus,appstatustime,primarysn,hidden,comments,disable

"BCE1","mm","booting","03-31-201223:58:58",,,,,,

"BCE2","mm","booting","03-31-201223:58:58",,,,,,

"BCE3","mm","booting","03-31-201223:58:58",,,,,,

"BCE4","mm","booting","03-31-201223:58:58",,,,,,

"BCE5","mm","booting","03-31-201223:58:58",,,,,,

"BCE6","mm","booting","03-31-201223:58:58",,,,,,

"BCE7","mm","booting","03-31-201223:58:58",,,,,,

"BCE8","mm","booting","03-31-201223:58:58",,,,,,

"BCE9","mm","booting","03-31-201223:58:58",,,,,,

"BCE10","mm","booting","03-31-201223:58:58",,,,,,

"ON001","all,compute",,,,,,,,

"ON002","all,compute",,,,,,,,

。。。。。。

"ON127","all,compute",,,,,,,,

"ON128","all,compute",,,,,,,,

"L001","all,compute",,,,,,,,

"L002","all,compute",,,,,,,,

"manager001","all,compute",,,,,,,,

"manager002","all,compute",,,,,,,,

设置noderes

#chtab node=compute noderes.netboot=pxenoderes.nfsserver=manager1 noderes.monserver=manager1 noderes.installnic=eth1noderes.primarynic=eth1 noderes.xcatmaster=manager1 noderes. servicenode=manager1

[root@manager1 yum.repos.d]#tabdump noderes   查看noders

#node,servicenode,netboot,tftpserver,nfsserver,monserver,nfsdir,installnic,primarynic,discoverynics,cmdinterface,xcatmaster,current_osimage,next_osimage,nimserver,routenames,comments,disable

"ON001",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

"ON002",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

。。。。。。

"ON126",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

"ON127",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

"ON128",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

"manager1",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

"LG01",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

"LG02",,"pxe",,"172.16.1.1","172.16.1.1",,"eth1","eth1","eth1",,"172.16.1.1","172.16.1.1",,,,,

servicenode,tftpserver,nfsserver,monserver,installnic,primarynic,xcatmaster

discoverynics可根据具体环境,不是必须。建议配置以上服务

设置nodetype

[root@manager1 yum.repos.d]#tabdump nodetype

#node,os,arch,profile,provmethod,supportedarchs,nodetype,comments,disable

"ON001","rhels5u5","x86_64","compute",,,,,

"ON002","rhels5u5","x86_64","compute",,,,,

。。。。。。

"ON126","rhels5u5","x86_64","compute",,,,,

"ON127","rhels5u5","x86_64","jss",,,,,

"ON128","rhels5u5","x86_64","oracle",,,,,

#chtab node=节点名nodetype.os=rhels5u5 nodetype.arch=x86_64 nodetype.profile=compute nodetype.nodetype=osi

在nodetype表中,定义计算节点ON001-ON126的操作系统是rhels5.5

x86_64,使用的profile是compute,节点类型是osi(OS image)

注意这里的nodetype.os、nodetype.profile这两个字段和kickstart文件名关联,如上设置,则:

kickstart文件名为:compute.rhels5u5.tmpl

kickstart软件包列表的文件名为:compute.rhels5u5.pkglist

compute.tmpl+compute.pkglist 存放位置

/opt/xcat/share/xcat/install/rh

设置postscripts

这个表指定在kickstart安装前,rpm包安装后和安装完成后第一次启动时执行的脚本,不同集群的环境定制全都在这里做。

[root@manager1 etc]# tabdumppostscripts

#node,postscripts,postbootscripts,comments,disable

"xcatdefaults","syslog,remoteshell,syncfiles,setupntp","otherpkgs",,

"service","servicenode,xcatserver,xcatclient",,,

"compute","hardeths,configRSH,configService",,,

postscripts这列表示rpm包安装后后执行。

postbootscripts这列表示安装完成后第一次启动时执行的脚本。

其中,xcatdefaults和service这两段为系统默认就有

xcatdefaults这个段代表所有节点都要执行的脚本

service是服务节点需要执行的脚本,服务节点是在较大(也许256节点以上)的集群环境中配置的,主要是分担管理节点的压力。

所有脚本位于/install/postscripts目录下

xcatdefaults默认的postscripts为"syslog,remoteshell,syncfiles"。

1)syslog脚本把各节点的log重定向到管理节点。

2)remoteshell脚本设置ssh无密码互通。注意xcat默认情况下root从管理节点ssh到各被管节点ssh无密码,但是从被管节点ssh到管理节点是要密码的。

3)syncfiles脚本指定安装过程中要同步的文件。但还需要其他相关配置,详细后面说明。

xcatdefaults默认的postscripts为"otherpkgs",指定了要安装的非操作系统光盘里的标准的rpm包,这也需要其他相关配置才行。

更改如下:

#chtab node=xcatdefaultspostscripts.postscripts="syslog,remoteshell,syncfiles,setupntp"

这里增加一个setupntp,指定所有的节点都要设置ntp时间同步。

#chtab node=compute  postscripts.postscripts="hardeths,configRSH,configService,configNFS"

这里自定义了compute这一段,他代表计算节点(必须要和nodelist表中groups字段的定义一致),

定义了rpm包安装后后执行hardeths,configRSH,configService,configNFS几个脚本。

这几个脚本中hardeths为系统默认就有,它指定节点设置固定IP地址,否则默认是DHCP的。

其他脚本要自己写的, configRSH,configService,configNFS是自己写的脚本。

configRSH是节点在安装时配置RSH无密码。

configService是节点在安装时配置需要起停的系统服务。

configNFS是节点在安装时需要在/etc/fstab定义的NFS文件系统,此次实施没有配置

这些脚本的内容请看附录。

注意这些脚本要放在/install/postscripts目录下,别忘了给他们加上可执行属性。

#chtab node=compute  postscripts.postbootscripts="setupGang,reboot"

定义了安装完成后第一次启动执行setupGang,reboot两个脚本。

setupGang是配置节点安装ganglia客户端,脚本内容参看附录,此次实施没有执行

reboot就是重启,默认就有这个脚本。

再检查一下:

#tabdump postscripts

#node,postscripts,postbootscripts,comments,disable

"xcatdefaults","configRSH,configService,setupntp,syslog,remoteshell,syncfiles","otherpkgs",,

"service","servicenode,xcatserver,xcatclient",,,

"compute","hardeths,configRSH,configService,setupntp","reboot",,

设置配置文件同步目录

对于redhat系统来说,创建一个/install/custom/install/rh目录,这里放要安装的额外rpm包和同步列表文件。

在本目录写一个同步列表文件,这个文件的文件名是有规矩的,和nodetype表的定义有关,格式如下:

nodetype.profile+nodetype.os+nodetype.arch+synclist

本例中,文件名为:compute.rhels5u5.x86_64.synclist,文件内容如下:

/etc/hosts -> /etc/hosts

设置刀片中心

如果计算节点是刀片

1)给每个管理模块配置IP地址

2)把这些AMM的IP加入管理节点的hosts文件

3)nodeadd把每个AMM加入xcat管理,使用和刀片计算节点不同的groups。

4)配置每个管理模块的ssh和snmp

#rspconfig $管理模块名字 snmpcfg=enable sshcfg=enable

rspconfig   BCE1   snmpcfg=enable  sshcfg=enable

rspconfig   BCE2   snmpcfg=enable  sshcfg=enable

rspconfig   BCE3   snmpcfg=enable  sshcfg=enable

rspconfig   BCE4   snmpcfg=enable  sshcfg=enable

rspconfig   BCE5  snmpcfg=enable  sshcfg=enable

rspconfig   BCE6  snmpcfg=enable  sshcfg=enable

rspconfig   BCE7   snmpcfg=enable  sshcfg=enable

rspconfig   BCE8   snmpcfg=enable  sshcfg=enable

rspconfig   BCE9   snmpcfg=enable  sshcfg=enable

rspconfig   BCE10  snmpcfg=enable  sshcfg=enable

#rpower 管理模块名字 reset

rpower  BCE1    reset

在执行rspconfig命令时,如果遇到Cannot communicate with XXXX的提示,需要用浏览器登到管理模块,启用SNMPv1

设置nodehm

#chtab node=ON001  nodehm.mgt=blade

[root@manager1 postscripts]#tabdump nodehm  查看nodehm

#node,power,mgt,cons,termserver,termport,conserver,serialport,serialspeed,serialflow,getmac,comments,disable

"BCE1","blade","blade",,,,,,,,,,

"BCE2","blade","blade",,,,,,,,,,

"BCE3","blade","blade",,,,,,,,,,

"BCE4","blade","blade",,,,,,,,,,

"BCE5","blade","blade",,,,,,,,,,

"BCE6","blade","blade",,,,,,,,,,

"BCE7","blade","blade",,,,,,,,,,

"BCE8","blade","blade",,,,,,,,,,

"BCE9","blade","blade",,,,,,,,,,

"BCE10","blade","blade",,,,,,,,,,

"ON001","blade","blade",,,,,,,,,,

"ON002","blade","blade",,,,,,,,,,

。。。。。。

"ON127","blade","blade",,,,,,,,,,

"ON128","blade","blade",,,,,,,,,,

"manager2","blade","blade",,,,,,,,,,

"manager1","blade","blade",,,,,,,,,,

"L001","blade","blade",,,,,,,,,,

"L002","blade","blade",,,,,,,,,,

设置mp表

[root@manager1 xcat_conf]#

#chtab node=ON005  mp.mpa=BCE1  mp.id=5

#tabdump mp  查看mp

#node,mpa,id,comments,disable

"BCE1","BCE1",,,

"BCE2","BCE2",,,

"BCE3","BCE3",,,

"BCE4","BCE4",,,

"BCE5","BCE5",,,

"BCE6","BCE6",,,

"BCE7","BCE7",,,

"BCE8","BCE8",,,

"BCE9","BCE9",,,

"BCE10","BCE10",,,

"ON001","BCE1","1",,

"ON002","BCE1","2",,

。。。。。。

"ON125","BCE9","13",,

"ON126","BCE9","14",,

"ON127","BCE10","2",,

"ON128","BCE10","3",,

"manager1","BCE10","14",,

"manager2","BCE10","12",,

"L001","BCE10","13",,

"L002","BCE10","4",,

设置mpa表

# chtab mpa.mpa=BCE1mpa.username=USERID mpa.password=PASSW0RD

# tabdump mpa

#mpa,username,password,comments,disable

"BCE1","USERID","PASSW0RD",,

"BCE2","USERID","PASSW0RD",,

"BCE3","USERID","PASSW0RD",,

"BCE4","USERID","PASSW0RD",,

"BCE5","USERID","PASSW0RD",,

"BCE6","USERID","PASSW0RD",,

"BCE7","USERID","PASSW0RD",,

"BCE8","USERID","PASSW0RD",,

"BCE9","USERID","PASSW0RD",,

"BCE10","USERID","PASSW0RD",,

设置dns

#makedns –n

#makedns compute

#chkconfig named on

测试一下

#nslookup

注意:

管理节点的DNS服务一定要正常,否则白搭。

设置dhcp

#makedhcp -n

#makedhcp compute 或者makedhcp all

如果只设置某个节点,可以如下:

#makedhcp node01

修改kickstar文件

进入/opt/xcat/share/xcat/install/rh

#cp compute.rhels6.tmpl compute.rhels5u5.tmpl

#cp compute.rhels6.pkglistmycomp.rhels5u5.pkglist

(针对jss服务器为jss.rhel5u5.tmpl; jss.rhel5u5.pkglist.针对oracle服务器为oracle.rhel5u5.tmpl; oracle.rhel5u5.pkglist.)

其中.tmpl是kickstart文件,.pkglist是指定需要需要装哪些软件包

 

修改mycomp.rhels5u5.tmpl文件,

1)改分区划分,注意rhel5不支持ext4

[root@manager1 rh]# morecompute.rhels5u5.tmpl

#[email protected]

#cmdline

lang en_US

#

# Where's the source?

# nfs --serverhostname.of.server or IP --dir /path/to/RH/CD/image

#

#nfs --server#XCATVAR:INSTALL_NFS# --dir #XCATVAR:INSTALL_SRC_DIR#

url --urlhttp://#TABLE:noderes:$NODE:nfsserver#/install/#TABLE:nodetype:$NODE:os#/#TABLE:nodetype:$NODE:arch#

#device ethernet e100

keyboard "us"

#

# Clear the MBR

#

zerombr yes

#

# Wipe out the disk

#

clearpart --all --initlabel

#clearpart --linux

key 0206063501adfc06

 

#

# Customize to fit your needs

#

#No RAID

#/boot really significant forthis sort of setup nowadays?

#part /boot --size 50 --fstypeext3

part /    --size40960 --fstype ext3 --asprimary

part /boot --size 1024 --fstype ext3

part /tmp --size 1024 --fstype ext3

part swap --size 24576

part /var --size 2048 --fstype ext3

part /wglogs --size 2048 --fstype ext3

part /local1/scr --size 174080 --fstype ext3

part /local2/scr --size 309600 --fstype ext3

#part / --size 1 --grow--fstype ext4

#RAID 0 /scr for performance

#part / --size 1024 --ondisksda

#part swap --size 512 --ondisksda

#part /var --size 1024 --ondisksdb

#part swap --size 512 --ondisksdb

#part raid.01 --size 1 --grow--ondisk sda

#part raid.02 --size 1 --grow--ondisk sdb

#raid /scr --level 0 --devicemd0 raid.01 raid.02

#Full RAID 1 Sample

#part raid.01 --size 50--ondisk sda

#part raid.02 --size 50--ondisk sdb

#raid /boot --level 1 --devicemd0 raid.01 raid.02

#

#part raid.11 --size 1024--ondisk sda

#part raid.12 --size 1024--ondisk sdb

#raid / --level 1 --device md1raid.11 raid.12

#

#part raid.21 --size 1024--ondisk sda

#part raid.22 --size 1024--ondisk sdb

#raid /var --level 1 --devicemd2 raid.21 raid.22

#

#part raid.31 --size 1024--ondisk sda

#part raid.32 --size 1024--ondisk sdb

#raid swap --level 1 --devicemd3 raid.31 raid.32

#

#part raid.41 --size 1 --grow--ondisk sda

#part raid.42 --size 1 --grow--ondisk sdb

#raid /scr --level 1 --devicemd4 raid.41 raid.42

#

# bootloader config

# --append <args>

# --useLilo

# --md5pass <crypted MD5password for GRUB>

#

bootloader

#

# install or upgrade

#

install

#

# text mode install (default isgraphical)

#

text

#

# firewall

#

firewall --disabled

#

# Select a zone

# Add the --utc switch if yourhardware clock is set to GMT

#

#timezone US/Hawaii

#timezone US/Pacific

#timezone US/Mountain

#timezone US/Central

#timezone US/Eastern

timezone --utc"#TABLE:site:key=timezone:value#"

#

# Don't do X

#

skipx

#

# To generate an encrypted rootpassword use:

#

# perl -e 'printcrypt("blah","Xa") . "\n";'p

# openssl passwd -apr1 -saltxxxxxxxx password

#

# where "blah" isyour root password.

#

#rootpw --iscryptedXaLGAVe1C41x2

#rootpw XaLGAVe1C41x2--iscrypted

rootpw --iscrypted#CRYPT:passwd:key=system,username=root:password#

#

# NIS setup: auth --enablenis--nisdomain sensenet

# --nisserver neptune--useshadow --enablemd5

#

# OR

auth --useshadow --enablemd5

#

# SE Linux

#

selinux --disabled

#

# Reboot after installation

#

reboot

#

#end of section

#

%packages

#INCLUDE_DEFAULT_PKGLIST#

%pre

#INCLUDE:#ENV:XCATROOT#/share/xcat/install/scripts/pre.rh#

%post

#INCLUDE:#ENV:XCATROOT#/share/xcat/install/scripts/post.rh#

 

2)把cmdline注销掉,这样节点安装时以传统的网络安装字符界面进行,比较顺眼,这不是必须的。

修改compute.rhels5u5.pkglist

如果需要安装所有的rpm包,这个文件只需要写两行,如下:

[root@manager1 rh]# morecompute.rhels5u5.pkglist

kickstar_config/compute.rhels5u5.pkglist.201204174

@包名

注意:

1)在rhel5.5之后,不再支持everything这个词了,rhel6也不行。

2)如果选择安装所有包,默认节点会从虚拟化kernel启动。可以在postscripts里自己写一个脚本,让系统从标准kernel启动。

 

复制安装文件

cp *.iso /install

copycds *.iso

mv rhels5.5  rhels5u5 (因为nodetype定义的是rhels5u5; /install/rhels5u5 )

设置安装

#rbootseq ON001-0N128  n,h,c,f  设置启动顺序

#rbootseq ON001-0N128  stat

然后,启动计算节点,从网络启动即可。

rpower compute  off

rinstall ON001-ON126

nodestat  ON001-ON126 (查看各节点状态)

补充:

configRSH脚本

#!/bin/sh

#

for i in rlogin rsh rexec;

do

    chkconfig $i on;

    echo $i >> /etc/securetty;

done;

service xinetd restart;

cat > /root/.rhosts<<EOF

节点名称

EOF

cat > /etc/hosts.equiv<<EOF

节点名称

EOF

logger -t xCAT "configRSH:autostart RSH successfully"

exit 0

configService脚本

#!/bin/sh

#

services="avahi-daemon gpmhidd hplip isdn mcstrans pcscd restorecond rpcgssd rpcidmapd setroubleshootsendmail smartd bluetooth cups firstboot iptables ip6tables iscsi iscsid rhnsdyum-updatesd";

logger -t xCAT"confService: starting config services"

for srv in $services

do

    chkconfig $srv off > /dev/null 2>&1

done

if [ -f /etc/cron.daily/logrotate] ; then

    rm -f /etc/cron.daily/logrotate

    logger -t xCAT "confService: remove lorotate crontabdaily"

fi

logger -t xCAT"confService: config services successfully"

exit 0

 

xcat管理节点配置命令

Xcat服务启停

service xcatd (status 、stop、start、restart)

示例:

service xcatd status

构建dhcp服务

makedhcp

示例:

makedhcp compute (compute为节点组名)

构建dns服务

makedns

示例:

makedns compute (compute为节点组名)

查看xcat信息

tabdump

示例:

tabdump nodelist

编辑xcat数据库

tabedit

示例:

tabedit passwd

改变表项的值

chtab

示例:

chtab mpa.mpa=testmpa.username=admin mpa.password=123456

获得计算节点mac地址

getmacs

示例:

getmacs all

nodeadd

nodeaddnoderangegroups=groupnames [table.column=value][...]

nodeadd ON001-ON128 groups=all,compute

lsdef  ON001

[root@manager1 ~]# lsdef ON001

Object name: ON001

    arch=x86_64

    groups=all,compute

    id=1

    installnic=eth1

    mac=34:40:B5:81:7F:5A

    mgt=blade

    monserver=172.16.1.1

    mpa=BCE1

    netboot=pxe

    nfsserver=172.16.1.1

    os=rhels5u5

    postbootscripts=otherpkgs

   postscripts=syslog,remoteshell,syncfiles,setupntp,hardeths,configRSH,configService

    power=blade

    primarynic=eth1

    profile=compute

    xcatmaster=172.16.1.1

计算节点并行命令

并行执行命令

psh

Usage: psh [-i<interface>] [-l <user>] <noderange> <command>

示例:

psh ON001-ON006  date

从节点范围中剔除某个节点可以如下使用

示例:

psh ON001-ON006,-ON001 date

并行ping

pping

示例:

pping ON001-ON126

并行拷贝文件

pscp

Usage: pscp [-i <SUFFIX>][SCP OPTIONS...] FILE... <NODERANGE>:<DESTINATION>

示例:

pscp  example.txt  ON001-ON006,-ON005,-ON004:/root/

并行开关机,查看电源状态

rpower

Usage: rpower <noderange>[--nodeps] [on|onstandby|off|suspend|reset|stat|state|boot]

示例:

rpower ON001-ON126 stat

Rpower ON001-ON126 off

并行修改引导顺序

rbootseq

rbootseq <noderange>[hd0|hd1|hd2|hd3|net|iscsi|usbflash|floppy|none],

示例:

rbootseq ON001-ON126 h,n,c,f

并行安装命令

rinstall

示例:

rinstall ON001-ON126

查看节点状态

nodestat

nodestat [noderange][-m|--usemon] [-p|powerstat] [-u|--updatedb]

示例:

nodestat ON001-ON126

欢迎加入
database群:119224876(db china联盟) 虚拟化方面群:229845401(虚拟化-云计算-物联网)

 

你可能感兴趣的:(集群,manager,timezone,脚本,service,Comments)