2019独角兽企业重金招聘Python工程师标准>>>
一、基本介绍
xCAT是一个集群管理工具,xCAT能提供云、集群、HPC、系统网络、数据中心、在线游戏基础设施的管理。
xCAT使管理员能够:
- 1 发现硬件服务器
- 2 执行远程系统管理
- 3 在物理机或虚拟机上配置操作系统
- 4 准备机器的Diskful(有状态)和Diskless(无状态)
- 5 安装和配置用户应用程序
- 6 并行系统管理
- 7 将xCAT集成到云中 xCAT官方网站是http://xcat.org, 更多详细介绍可以到官网仔细查看 。
二、安装过程
官网现在最新版本是2.13,并且官网提供了go-xcat和在线的repo进行安装,但是我在centos7.2 版本中使用官方提供的源在线安装时候,总是报perl版本不对应的问题,故我下载了以前2.85版本进行安装,具体步骤如下:
1、环境准备
| ip | hostname| 介绍 | | -------- | -----: | :----: | | 10.240.198.125 | master | 主节点 Centos7.2系统 | | 10.240.198.94 | node01 | 从节点 Centos7.2系统 |
2、关闭selinux,iptables
[root@master ~]# setenforce 0
[root@master ~]# cat /etc/sysconfig/selinux
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
# SELINUXTYPE= can take one of three two values:
# targeted - Targeted processes are protected,
# minimum - Modification of targeted policy. Only selected processes are protected.
# mls - Multi Level Security protection.
SELINUXTYPE=targeted
[root@master ~]# systemctl stop iptables
Failed to stop iptables.service: Unit iptables.service not loaded.
[root@master ~]#
3、下载源文件,进行安装
[root@master xcat]# cd /etc/yum.repos.d/
[root@master yum.repos.d]# wget http://sourceforge.net/projects/xcat/files/yum/2.8/xcat-core/xCAT-core.repo
[root@master yum.repos.d]# wget http://sourceforge.net/projects/xcat/files/yum/xcat-dep/rh7/x86_64/xCAT-dep.repo
[root@master yum.repos.d]# ls
bak CentOS-CR.repo CentOS-fasttrack.repo CentOS-Vault.repo xCAT-core.repo
CentOS-Base.repo CentOS-Debuginfo.repo CentOS-Media.repo myself.repo xCAT-dep.repo
[root@master yum.repos.d]#
4、创建镜像源文件
[root@master cdrom]# mkdir -p /mnt/cdrom
[root@master cdrom]# mkdir -p /mnt/iso
[root@master cdrom]# mv /root/redhat6u2.iso /mnt/iso/
[root@master ~]# ll /mnt/iso/redhat6u2.iso
-rwxr-xr-x. 1 root root 3916431360 Feb 28 10:55 /mnt/iso/redhat6u2.iso
# redhat6u2.iso 是centos6.8 DVD版本的镜像,改名成为redhat6u.iso了
[root@master cdrom]# mount -o loop /mnt/iso/redhat6u2.iso /mnt/cdrom/
[root@master cdrom]# cd /etc/yum.repos.d/
[root@master cdrom]# mkdir bak
[root@master cdrom]# mv CentOS-Sources.repo bak/
[root@master cdrom]# touch myself.repo
[root@master cdrom]# vi myself.repo
[houqd]
name=RedHat
baseurl=file:///mnt/cdrom
enabled=1
gpgcheck=0
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-CentOS-6
5、安装xCAT,我的已经安装完毕
[root@master yum.repos.d]# yum clean all
Loaded plugins: fastestmirror
Cleaning repos: base extras houqd updates xcat-2-core xcat-dep
Cleaning up everything
Cleaning up list of fastest mirrors
[root@master yum.repos.d]# yum -y install xCAT
Loaded plugins: fastestmirror
base | 3.6 kB 00:00:00
extras | 3.4 kB 00:00:00
houqd | 4.0 kB 00:00:00
updates | 3.4 kB 00:00:00
xcat-2-core | 2.5 kB 00:00:00
xcat-dep | 2.5 kB 00:00:00
(1/8): houqd/group_gz | 226 kB 00:00:00
(2/8): houqd/primary_db | 4.7 MB 00:00:00
extras/7/x86_64/primary_db FAILED
http://mirrors.zju.edu.cn/centos/7.3.1611/extras/x86_64/repodata/edfe2a5db6b2276d712979d76b7e6a0fa87a020d39ee0e7dcc68b35d794fd752-primary.sqlite.bz2: [Errno 14] HTTP Error 503 - Service Unavailable
Trying other mirror.
(3/8): xcat-2-core/primary_db | 25 kB 00:00:00
(4/8): xcat-dep/primary_db | 30 kB 00:00:00
(5/8): base/7/x86_64/group_gz | 155 kB 00:00:00
(6/8): extras/7/x86_64/primary_db | 122 kB 00:00:00
(7/8): updates/7/x86_64/primary_db | 2.9 MB 00:00:00
(8/8): base/7/x86_64/primary_db | 5.6 MB 00:00:01
Determining fastest mirrors
* base: centos.ustc.edu.cn
* extras: mirrors.zju.edu.cn
* updates: centos.ustc.edu.cn
Package xCAT-2.8.5-snap201409010230.x86_64 already installed and latest version
Nothing to do
[root@master yum.repos.d]#
6、检查xCAT是否安装正确,由于我的maser以前安装配置过,可能会多一些选项。
[root@master yum.repos.d]# tabdump site
#key,value,comments,disable
"blademaxp","64",,
"fsptimeout","0",,
"installdir","/install",,
"ipmimaxp","64",,
"ipmiretries","3",,
"ipmitimeout","2",,
"consoleondemand","no",,
"master","10.240.198.125",, # 应该为master的ip
"forwarders","8.8.8.8,10.240.196.10",,
"nameservers","10.240.198.125",, # 应该为master的ip
"maxssh","8",,
"ppcmaxp","64",,
"ppcretry","3",,
"ppctimeout","0",,
"powerinterval","0",,
"syspowerinterval","0",,
"sharedtftp","1",,
"SNsyncfiledir","/var/xcat/syncfiles",,
"nodesyncfiledir","/var/xcat/node/syncfiles",,
"tftpdir","/tftpboot",,
"xcatdport","3001",,
"xcatiport","3002",,
"xcatconfdir","/etc/xcat",,
"timezone","Asia/Chungking",,
"useNmapfromMN","no",,
"enableASMI","no",,
"db2installloc","/mntdb2",,
"databaseloc","/var/lib",,
"sshbetweennodes","ALLGROUPS",,
"dnshandler","ddns",,
"vsftp","n",,
"cleanupxcatpost","no",,
"dhcplease","43200",,
"ntpservers","10.240.198.125",,
"domain","cluster.com",,
"dhcpinterfaces","10.240.198.125|eth0",,
[root@master yum.repos.d]#
三、 部署系统
1、配置xcat的相关服务(集群服务包括:network服务、NTP服务、DNS服务、集群管理服务)
[root@master ~]# tabdump networks
#netname,net,mask,mgtifname,gateway,dhcpserver,tftpserver,nameservers,ntpservers,logservers,dynamicrange,staticrange,staticrangeincrement,nodehostname,ddnsdomain,vlanid,domain,comments,disable
"2002:97b:c2bb:830::/64","2002:97b:c2bb:830::/64","/64","eth0",,,,,,,,,,,,,,,
"10_240_196_0-255_255_252_0","10.240.196.0","255.255.252.0","eth0","10.240.196.1",,"10.240.198.125",,,,,,,,,,,,
# 注:直接用tabedit networks修改
2、 配置NTP(网络时间同步)
[root@master ~]#
[root@master ~]# mv /etc/ntp.conf /etc/ntp.conf.bak
[root@master ~]# vi /etc/ntp.conf
server 10.240.198.125 # Local clock
fudge 10.240.198.125 stratum 10
driftfile /var/lib/ntp/drift
[root@master ~]# systemctl restart ntpd
[root@master ~]# systemctl enable ntpd.service
将ntp服务器记录到site表里:
[root@master ~]# chtab key=ntpservers site.value=10.240.198.125
[root@master ~]# tabdump site
#key,value,comments,disable
"blademaxp","64",,
"fsptimeout","0",,
"installdir","/install",,
"ipmimaxp","64",,
"ipmiretries","3",,
"ipmitimeout","2",,
"consoleondemand","no",,
"master","10.240.198.125",,
"forwarders","8.8.8.8,10.240.196.10",,
"nameservers","NORESOLUTION",,
"maxssh","8",,
"ppcmaxp","64",,
"ppcretry","3",,
"ppctimeout","0",,
"powerinterval","0",,
"syspowerinterval","0",,
"sharedtftp","1",,
"SNsyncfiledir","/var/xcat/syncfiles",,
"nodesyncfiledir","/var/xcat/node/syncfiles",,
"tftpdir","/tftpboot",,
"xcatdport","3001",,
"xcatiport","3002",,
"xcatconfdir","/etc/xcat",,
"timezone","Asia/Chungking",,
"useNmapfromMN","no",,
"enableASMI","no",,
"db2installloc","/mntdb2",,
"databaseloc","/var/lib",,
"sshbetweennodes","ALLGROUPS",,
"dnshandler","ddns",,
"vsftp","n",,
"cleanupxcatpost","no",,
"dhcplease","43200",,
"ntpservers","10.240.198.125",, ### 这个是多出来的一行
"domain","cluster.com",,
"dhcpinterfaces","10.240.198.125|eth0",,
3、如果希望xcat安装的节点也自动设置好NTP服务,需要在postscripts表中加入setupntp的脚本:
[root@master ~]# tabdump postscripts
#node,postscripts,postbootscripts,comments,disable
"xcatdefaults","syslog,remoteshell,syncfiles","otherpkgs",,
"service","servicenode",,,
[root@master ~]# chtab node=xcatdefaults postscripts.postscripts=syslog,remoteshell,setupntp
[root@master ~]# tabdump postscripts
#node,postscripts,postbootscripts,comments,disable
"xcatdefaults","syslog,remoteshell,setupntp","otherpkgs",,
"service","servicenode",,,
4、设置DNS 如果系统中安装了bind-chroot的软件包,会与想xCAT冲突,所以如果安装了bind-chroot需要把它卸载掉:
[root@master ~]# rpm -qa | grep bind-chroot
[root@master ~]#
[root@master ~]# rpm -e bind-chroot
在site表中定义DNS服务器Ip地址为10.240.198.125,域名为cluster.com
[root@master ~]# cat /etc/sysconfig/network
# Created by anaconda
NETWORKING=yes
HOSTNAME=master
DOMAINNAME=cluster.com
[root@master ~]# cat /etc/rc.local
#!/bin/bash
# THIS FILE IS ADDED FOR COMPATIBILITY PURPOSES
#
# It is highly advisable to create own systemd services or udev rules
# to run scripts during boot instead of using this file.
#
# In contrast to previous versions due to parallel execution during boot
# this script will NOT be run after all other services.
#
# Please note that you must run 'chmod +x /etc/rc.d/rc.local' to ensure
# that this script will be executed during boot.
touch /var/lock/subsys/local
/bin/domainname cluster.com
6、配置DNS解析,编辑/etc/hosts,把集群中所有节点都加进去
[root@master ~]# chtab key=domain site.value=cluster.com
[root@master ~]# vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.240.198.125 master master.cluster.com
10.240.198.94 node01
7、配置/etc/resolv.conf,把nameserver指向管理服务器自己
[root@master ~]# vi /etc/resolv.conf
search cluster.com
nameserver 10.240.198.125
[root@master ~]# systemctl restart named
[root@master ~]# chkconfig named on
Note: Forwarding request to 'systemctl enable named.service'.
# 运行nslookup进行解析
[root@master ~]# nslookup node01
Server: 10.240.198.125
Address: 10.240.198.125#53
Name: node01.cluster.com
Address: 10.240.198.94
8、定义计算节点
[root@master ~]# tabdump nodelist
#node,groups,status,statustime,appstatus,appstatustime,primarysn,hidden,comments,disable
[root@master ~]# chtab node=node01 nodelist.groups=compute,all
[root@master ~]# tabdump nodelist
#node,groups,status,statustime,appstatus,appstatustime,primarysn,hidden,comments,disable
"node01","compute,all",,,,,,,,
9、添加MAC表
[root@master ~]# tabdump mac
#node,interface,mac,comments,disable
[root@master ~]# nodeadd node01 groups=compute,all mac.interface=eth0 hosts.ip=10.240.198.94 mac.mac=42:9a:b9:e0:bc:1a nodehm.mgt=ipmi nodehm.power=ipmi
[root@master ~]# tabdump mac
#node,interface,mac,comments,disable
"node01","eth0","42:9a:b9:e0:bc:1a",,
10、配置
[root@master ~]# makehosts
[root@master ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.240.198.125 master master.cluster.com
10.240.198.94 node01 node01.cluster.com # 多加了这个
[root@master ~]# makehosts -n
[root@master ~]# vi /etc/hosts
127.0.0.1 localhost
10.240.198.94 node01 node01.cluster.com
[root@master ~]# makedhcp -n
Renamed existing dhcp configuration file to /etc/dhcp/dhcpd.conf.xcatbak
The dhcp server must be restarted for OMAPI function to work
Warning: No dynamic range specified for 222.31.64.0. If hardware discovery is being used, a dynamic range is required.
Warning: No dynamic range specified for 10.240.198.0. If hardware discovery is being used, a dynamic range is required.
[root@master ~]# chkconfig dhcpd on
Note: Forwarding request to 'systemctl enable dhcpd.service'.
11、设置noders表,在noders表中,定义计算节点node01的引导方式是pxe,管理节点是10.240.198.125,安装使用的网卡是eth0,系统的主网卡也是eth0,nfs服务器是10.240.198.125:
[root@master ~]# tabdump noderes
#node,servicenode,netboot,tftpserver,tftpdir,nfsserver,monserver,nfsdir,installnic,primarynic,discoverynics,cmdinterface,xcatmaster,current_osimage,next_osimage,nimserver,routenames,comments,disable
[root@master ~]# chtab node=node01 noderes.netboot=pxe noderes.xcatmaster=10.240.198.125 noderes.installnic=eth0 noderes.primarynic=eth0 noderes.nfsserver=10.240.198.125
[root@master ~]# tabdump noderes
#node,servicenode,netboot,tftpserver,tftpdir,nfsserver,monserver,nfsdir,installnic,primarynic,discoverynics,cmdinterface,xcatmaster,current_osimage,next_osimage,nimserver,routenames,nameservers,proxydhcp,comments,disable
"node01",,"pxe",,,"10.240.198.125",,,"eth0","eth0",,,"10.240.198.125",,,,,,,,
11、设置nodetype表,定义计算节点node01的操作系统是rhels6u2,架构是x86_64,使用的profile是computer
[root@master ~]# tabdump nodetype
#node,os,arch,profile,provmethod,supportedarchs,nodetype,comments,disable
[root@master ~]# chtab node=node01 nodetype.os=rhels6u2 nodetype.arch=x86_64 nodetype.profile=compute nodetype.nodetype=osi
[root@master ~]# tabdump nodetype
#node,os,arch,profile,provmethod,supportedarchs,nodetype,comments,disable
"node01","rhels6u2","x86_64","compute","rhels6u2-x86_64-install-compute",,"osi",
12、定义postscripts表
[root@master ~]# chtab node=xcatdefaults postscripts.postscripts="syslog,remoteshell,syncfiles,setupntp"
[root@master ~]# chtab node=compute postscripts.postscripts="hardeths,configRSH,configService,configNFS"
[root@master ~]# tabdump postscripts
#node,postscripts,postbootscripts,comments,disable
"xcatdefaults","syslog,remoteshell,syncfiles,setupntp","otherpkgs",,
"service","servicenode",,,
"compute","hardeths,configRSH,configService,configNFS",,,
[root@master ~]# chtab node=compute postscripts.postscripts="setupGang,reboot"
[root@master ~]# tabdump postscripts
#node,postscripts,postbootscripts,comments,disable
"xcatdefaults","syslog,remoteshell,syncfiles,setupntp","otherpkgs",,
"service","servicenode",,,
"compute","setupGang,reboot",,,
13、配置passwd表
[root@master ~]# chtab key=system passwd.username=root passwd.password=dell_456
[root@master ~]# chtab key=blade passwd.username=USERID passwd.password=PASSWORD
[root@master ~]# chtab key=ipmi passwd.username=USERID passwd.password=PASSWORD
注:可以不用,因为在虚拟机下没有管理模块,blade(刀片)、ipmi(智能平台管理接口),如果是IBM的机架和刀片服务器就可以用了
[root@master ~]# tabdump passwd
#key,username,password,cryptmethod,comments,disable
"omapi","xcat_key","VDR5OVpVaGdQcGhwbGd0Qko3eklDTGprb015eVRWNGI=",,,
"system","root","dell_456",,,
"blade","USERID","PASSWORD",,,
"ipmi","USERID","PASSWORD",,,
14、 配置DHCP
[root@master ~]# chtab key=dhcpinterfaces site.value='10.240.1198.125|eth0'
[root@master ~]# makedhcp compute
[root@master ~]# systemctl restart dhcpd
15、复制安装文件
[root@master ~]# copycds *.iso # 中间会有拷贝进度条
Copying media to /install/rhels6.2/x86_64/ 其实不是完全将iso中的东西拷贝到该目录下,中间还是改变了一些东西,文件大小有变化。
[root@master ~]# systemctl restart httpd
16、实现定制安装,修改/opt/xcat/share/xcat/install/rh/compute.tmpl,确定如下几项
[root@master ~]# cat /opt/xcat/share/xcat/install/rh/compute.tmpl
#[email protected]
#
#cmdline ###注释掉
lang en_US
#langsupport en_US ##注释掉
network --bootproto dhcp
#
# Where's the source?
# nfs --server hostname.of.server or IP --dir /path/to/RH/CD/image
#
#nfs --server #XCATVAR:INSTALL_NFS# --dir #XCATVAR:INSTALL_SRC_DIR#
#url --url http://#TABLE:noderes:$NODE:nfsserver#/install/#TABLE:nodetype:$NODE:os#/#TABLE:nodetype:$NODE:arch#
#INSTALL_SOURCES#
#device ethernet e100
keyboard "us"
#
# Clear the MBR
#
zerombr yes
#
# Wipe out the disk
#
clearpart --all --initlabel
#clearpart --linux
key --skip
#
# Customize to fit your needs
#
#XCAT_PARTITION_START#
#No RAID
#/boot really significant for this sort of setup nowadays?
#part /boot --size 50 --fstype ext3
part swap --size 1024
part / --size 1 --grow --fstype ext3
#XCAT_PARTITION_END#
#RAID 0 /scr for performance
#part / --size 1024 --ondisk sda
#part swap --size 512 --ondisk sda
#part /var --size 1024 --ondisk sdb
#part swap --size 512 --ondisk sdb
#part raid.01 --size 1 --grow --ondisk sda
#part raid.02 --size 1 --grow --ondisk sdb
#raid /scr --level 0 --device md0 raid.01 raid.02
#Full RAID 1 Sample
#part raid.01 --size 50 --ondisk sda
#part raid.02 --size 50 --ondisk sdb
#raid /boot --level 1 --device md0 raid.01 raid.02
#
#part raid.11 --size 1024 --ondisk sda
#part raid.12 --size 1024 --ondisk sdb
#raid / --level 1 --device md1 raid.11 raid.12
#
#part raid.21 --size 1024 --ondisk sda
#part raid.22 --size 1024 --ondisk sdb
#raid /var --level 1 --device md2 raid.21 raid.22
#
#part raid.31 --size 1024 --ondisk sda
#part raid.32 --size 1024 --ondisk sdb
#raid swap --level 1 --device md3 raid.31 raid.32
#
#part raid.41 --size 1 --grow --ondisk sda
#part raid.42 --size 1 --grow --ondisk sdb
#raid /scr --level 1 --device md4 raid.41 raid.42
#
# bootloader config
# --append
# --useLilo
# --md5pass
#
bootloader
#
# install or upgrade
#
install
#
# text mode install (default is graphical)
#
text
#
# firewall
#
firewall --disabled ###防火墙关闭
#
# mouse selection
#
#mouse genericps/2 --emulthree
#mouse none ### 注释掉
#
# Select a zone
# Add the --utc switch if your hardware clock is set to GMT
#
#timezone US/Hawaii
#timezone US/Pacific
#timezone US/Mountain
#timezone US/Central
#timezone US/Eastern
timezone --utc "#TABLE:site:key=timezone:value#"
#
# Don't do X
#
skipx
#
# To generate an encrypted root password use:
#
# perl -e 'print crypt("blah","Xa") . "\n";'p
# openssl passwd -apr1 -salt xxxxxxxx password
#
# where "blah" is your root password.
#
#rootpw --iscrypted XaLGAVe1C41x2
#rootpw XaLGAVe1C41x2 --iscrypted
rootpw --iscrypted #CRYPT:passwd:key=system,username=root:password#
#
# NIS setup: auth --enablenis --nisdomain sensenet
# --nisserver neptune --useshadow --enablemd5
#
# OR
auth --useshadow --enablemd5
#
# SE Linux
#
selinux --disabled ###selinux关闭
#
# Reboot after installation
#
reboot
#
#end of section
#
%packages ### 删掉 --resolvedeps
##INCLUDE_DEFAULT_PKGLIST#
%pre
#INCLUDE:#ENV:XCATROOT#/share/xcat/install/scripts/pre.rh#
%post
#INCLUDE:#ENV:XCATROOT#/share/xcat/install/scripts/post.rh#
17、修改kickstart文件
[root@master rh]# pwd
/opt/xcat/share/xcat/install/rh
[root@master rh]# cp compute.rhels6.tmpl compute.rhels6u2.tmpl
[root@master rh]# cp compute.rhels6.pkglist compute.rhels6u2.pkglist
18、开始分发
[root@master rh]# nodeset node01 osimage=rhels6u2-x86_64-install-compute
node01: install rhels6u2-x86_64-compute
#查看node节点的安装状态
[root@master ~]# nodestat node01
node01: sshd # 说明安装成功
19、client进行重启,并设置network为第一启动项,然后就可以install 系统了。
20、安装完成后,用户名为root,密码为dell_456登录。查看系统版本,发现系统已经发生改变
[root@node01 ~]# cat /etc/redhat-release
CentOS release 6.8 (Final)