http://www.educity.cn/net/1616864.html
GPFS文件系统能够横跨在所有主机上,分布在所有磁盘上,条带化读写,高性能。信令管理机制,并发性好。可配置fail组,可用性高。下面是GPFS集群的自动安装部署python代码......
注:该脚本只能自动识别到系统盘之外的物理磁盘,不会识别到分区。
1、gpfs文件包目录结构
脚本中都按照这个目录结构进行安装
GPFS通用并行文件系统之Python自动部署GPFS集群
2、脚本执行方法
输入的是拥有存储磁盘的主机名。按回车完成
[root@vuestor01 scripts]# python auto.py
Input node's info. Usage:hosta/192.168.0.101. Press Enter is complete.
Please input Node info: stor01/192.168.2.67
check ip address success!
Input node's info. Usage:hosta/192.168.0.101. Press Enter is complete.
Please input Node info: stor02/192.168.2.85
执行成功后,自动挂载到/vol_data目录下。
[root@vuestor01 scripts]# df -hT
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda2 ext4 197G 1.6G 185G 1% /
tmpfs tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 ext4 504M 46M 433M 10% /boot
/dev/vol_data gpfs 1.2T 918M 1.2T 1% /vol_data
[root@stor02 ~]# df -hT
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda2 ext4 197G 1.6G 185G 1% /
tmpfs tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 ext4 504M 46M 433M 10% /boot
/dev/vol_data gpfs 1.2T 918M 1.2T 1% /vol_data
3、gpfs自动安装python代码分析
#!/usr/bin/python
import datetime
import os,sys,time
import commands
def output_log(): #定义输出log函数
time=datetime.datetime.now()
with open('loginfo.txt','wb') as f:
f.write('%s \n'%time)
def check_ip(ipaddr): #校验IP有效性函数
import sys
addr=ipaddr.strip().split('.')
#print addr
if len(addr) != 4:
print "check ip address failed!"
sys.exit()
for i in range(4):
try:
addr[i]=int(addr[i])
except:
sys.exit()
if addr[i]<=255 and addr[i]>=0:
pass
else:
print "check ip address failed!"
sys.exit()
i+=1
else:
print "check ip address success!"
def install_rpm():#定义安装gpfs相关软件包函数
print "Install rpm packages..."
os.system("rpm -ivh ../dependency/nss-softokn-freebl*.rpm >>/loginfo.txt;\
rpm -ivh ../dependency/rsh*.rpm >>/root/loginfo.txt;\
rpm -ivh ../dependency/glibc*.rpm >>/root/loginfo.txt;\
rpm -ivh ../dependency/libgcc*.rpm >>/root/loginfo.txt;\
rpm -ivh ../dependency/ksh*.rpm >>/root/loginfo.txt;\
rpm -ivh ../dependency/compat-libstdc++*.rpm >>/root/loginfo.txt;")
os.system("rpm -ivh ../base/gpfs.base*.rpm >>/root/loginfo.txt")
os.system("rpm -ivh ../base/*noarch.rpm >>/root/loginfo.txt")
os.system("rpm -Uvh ../update/*.rpm >>/root/loginfo.txt")
#判断内核版本,安装相应版本的源码编译包
a,b=commands.getstatusoutput("uname -r|grep 220")
c,d=commands.getstatusoutput("uname -r|grep 358")
if (a == 0):
commands.getstatusoutput("rpm -ivh ../rpm/gpfs.gplbin-2.6.32-220.el6.x86_64-3.4.0-21.x86_64.rpm")
elif (c==0):
commands.getstatusoutput("rpm -ivh ../rpm/gpfs.gplbin-2.6.32-358.el6.x86_64-3.4.0-21.x86_64.rpm")
else:
print "can't support this kernel!"
sys.exit()
#判断是否安装成功
num_status,num_result=commands.getstatusoutput("rpm -qa |grep gpfs|wc -l")
if (num_result<5):
print "RPM packages check failed!"
sys.exit()
print "Done."
#获取节点信息,该处需用户输入
node_dict={}
host_all=[]
def get_nodes():
while True:
node=raw_input("""Input node's info. Usage: hosta/192.168.0.101. Press Enter is complete.
Please input Node info: """)
if len(node)==0:
return 2
node_result=node.strip().split('/')
#print node_result
#获得信息后判断IP,主机名合法性
if len(node_result[0])==0:
print "Hostname is failed!"
sys.exit()
check_ip(node_result[1])
#node_dict[node_result[0]]=[node_result[1]]
host_all.append(node_result[0])
#print node_dict
local_ip_status,local_ip_result=commands.getstatusoutput("""ifconfig |grep 'inet addr'|awk -F '[: ]+' '{print $4}' """)
local_ip=local_ip_result.split('\n')
#print host_all
#指定第一个输入的IP必须为本地IP
if len(host_all)==1:
if node_result[1] in local_ip:
pass
else:
print "The first IP must be native IP."
sys.exit()
#获得磁盘信息
avail_disk={}
def get_disks():
import os,commands
print "Getting avail disks... ... list as fllow:"
#循环主机列表
for host in host_all:
#把对方的分区信息,系统盘信息输出文件并拷贝到本机
os.system("""ssh %s " cat /proc/partitions >/tmp/part_%s;scp /tmp/part_%s %s:/root/" """%(host,host,host,host_all[0]))
os.system("""ssh %s " df -h >/tmp/osdisk_%s;scp /tmp/osdisk_%s %s:/root/" """%(host,host,host,host_all[0]))
disk_status,disk_result=commands.getstatusoutput("cat /root/part_%s |awk '{print $4}'|grep -v name \
|grep -v [0-9]|grep -v ^$"%host)
disk_a=disk_result.split('\n')
#从所有磁盘列表中移除系统盘
os_status,os_result=commands.getstatusoutput("cat /root/osdisk_%s |grep '/$'|awk '{print $1}' \
|awk -F '/' '{print $3}'|awk -F [0-9] '{print $1}'"%host)
#print '111',disk_a,'1111'
disk_a.remove(os_result)
avail_disk[host]=disk_a
#输出除系统盘外的所有空闲磁盘
print ems()
for a,b in ems():
print '''On the host %s's disk:'''%a
for i in b:
print '/dev/%s'%i
#配置nsd文件
def config_file():
print 'Configuring diskef....'
with open('/root/diskef','w') as f:
num=[]
#如果只有一台主机,则把这台主机的所有磁盘分成两个fail组,通过磁盘数取余来配置
if len(avail_disk.keys())==1:
for n,m in ems():
for i in range(len(m)):
num.append(i)
if len(num)%2 ==1:
f.write("/dev/%s:%s:::01:\n"%(m[i],n))
elif len(num)%2 ==0:
f.write("/dev/%s:%s:::02:\n"%(m[i],n))
time.sleep(1)
#如果主机数大于等于2台,则按主机分组两个fail组,通过主机数取余来配置。
elif len(avail_disk.keys())>=2:
n_num=[]
for n in range(0,len(host_all)):
n_num.append(n)
for m in avail_disk[host_all[n]]:
if len(n_num)%2 ==1:
f.write("/dev/%s:%s:::01:\n"%(m,host_all[n]))
elif len(n_num)%2==0:
f.write("/dev/%s:%s:::02:\n"%(m,host_all[n]))
#for n,m in ems():
# n_num.append(n)
# for i in range(len(m)):
# #num.append(i)
# if len(n_num)%2 ==1:
# f.write("/dev/%s:%s:::01:\n"%(m[i],n))
# elif len(n_num)%2==0:
# f.write("/dev/%s:%s:::02:\n"%(m[i],n))
# time.sleep(1)
print 'Done.'
#def sel_tiebreakerD():
#配置gpfs集群
def config_gpfs():
print 'Configuring gpfs:\n'
#avail_disk.keys():
print 'Check crcluster...'
#print host_all,'host_all'
#校验主机数,一台时无从服务器。
if len(host_all)==1:
print ''"mmcrcluster -N %s:quorum -p %s -r /usr/bin/ssh -R /usr/b\
in/scp "%(host_all[0],host_all[0])
crcluster_status,crcluster_result=commands.getstatusoutput("mmcrcluster -N %s:quorum -p %s -r /usr/bin/ssh -R /usr/bin/scp "%(host_all[0],host_all[0]))
#主机数有多台时指定一台从服务器。
elif len(host_all)>=2:
print "mmcrcluster -N %s:quorum,%s -p %s -s %s -r /usr/bin/ssh -R /usr/bin/scp "%(host_all[0],','.join(host_all[1:]),host_all[0],host_all[1])
crcluster_status,crcluster_result=commands.getstatusoutput("mmcrcluster -N %s:quorum,%s -p %s -s %s -r /usr/bin/ssh -R /usr/bin/scp "%(host_all[0],','.join(host_all[1:]),host_all[0],host_all[1]))
#如果主机名为空,则退出
else:
print "Host list is empty. exit..."
sys.exit()
#if crcluster_status!=0:
# print 'Fail.'
# sys.exit()
#else:
# print 'Done.'
#校验集群配置
print 'Check cluster configuration...'
cluster_status,cluster_NAME=commands.getstatusoutput('''mmlscluster|grep 'GPFS cluster name'|awk '{print $4}' ''')
cluster_name=cluster_NAME.split('\n')
#如果集群里的主机数,和用户输入的不致,则退出
if cluster_name[len(cluster_name)-1].strip()==host_all[0]:
print 'Done.'
else:
print 'Check cluster fail...'
sys.exit()
#配置gpfs授权,配置所有输入的主机都有权进行配置与挂载。
if len(avail_disk.keys())==1:
os.system('mmchlicense server --accept -N %s'%(host_all[0]))
elif len(avail_disk.keys())>=2:
os.system('mmchlicense server --accept -N %s'%(','.join(host_all)))
print 'mmchlicense server --accept -N %s'%(','.join(host_all))
#校验nsd的配置,如果配置文件中的磁盘数,与成功添加进集群的磁盘数不致,则退出
print 'Checking NSD configuration... ...'
os.system('mmcrnsd -F /root/diskef -v no ')
nsd_status,nsd_result=commands.getstatusoutput('mmlsnsd |grep nsd|wc -l')
nsd_split=nsd_result.split('\n')
nsd_num=nsd_split[len(nsd_split)-1]
nsdfile_status,nsdfile_num=commands.getstatusoutput("cat /root/diskef |grep -v 'gpfs'|wc -l")
#print '2',nsd_num,'22',nsdfile_num
if int(nsd_num)==int(nsdfile_num):
print 'Done.'
else:
print 'Checking NSD Failed.'
sys.exit()
#启动gpfs服务
print 'Starting gpfs service, needs about 20 seconds...'
os.system('mmstartup -a')
#import time
time.sleep(20)
#如果所有的主机不是active状态,则退出。
node,node_state=commands.getstatusoutput('mmgetstate -a|grep active|wc -l')
if len(avail_disk.keys())==int(node_state):
print 'Done.'
else:
print 'fail... mmgetstate -a have down/unknown...'
sys.exit()
#创建集群文件系统,如果创建失败则退出。
crfs_status,crfs_result=commands.getstatusoutput('mmcrfs vol_data -F /root/diskef -B 256k -r 2 -m 2 -j cluster -T /vol_data -v no ')
if crfs_status==0:
print 'Done.'
else:
print 'mmcrfs is failed.'
sys.exit()
#挂载文件系统,如果挂载失败,则退出。
os.system('mmmount vol_data -a')
print 'Checking mounting state......'
mount_status,mount_result=commands.getstatusoutput('df -h |grep vol_data')
if mount_status!=0:
print 'Fail.'
sys.exit()
else:
print 'Done.'
#设置环境变量,如果存在路径不执行,不存在则添加。
def set_profile():
for host in host_all:
print 'Check mmfs profile... ...'
path_status,path_result=commands.getstatusoutput(""" scp %s "echo $PATH|grep '/usr/lpp/mmfs/bin'" """)
if path_status==0:
print 'Done.'
else:
print "Auto set PATH profile..."
with open("/etc/profile",'ab') as f:
f.write("PATH=$PATH:/usr/lpp/mmfs/bin")
os.system("source /etc/profile")
#配置所有节点的gpfs软件包安装
def set_another_node():
uname_status,uname_result=commands.getstatusoutput('uname -r')
print 'Checking the another node state...'
if len(host_all)==1:
print "Don't have another host... Done."
return
#node_host=node_dict.keys()
#把当前服务器的gpfs的安装包拷贝到其它主机上,只要内核版本一致,就能自动安装其它节点
for i in range(0,len(host_all)):
os.system('scp -r ../* %s:/tmp'%host_all[i])
os.system('''ssh %s 'sleep 1; sh -c " rpm -ivh /tmp/dependency/nss-softokn-freebl*.rpm; \
rpm -ivh /tmp/dependency/rsh*.rpm; \
rpm -ivh /tmp/dependency/glibc*.rpm; \
rpm -ivh /tmp/dependency/libgcc*.rpm; \
rpm -ivh /tmp/dependency/ksh*.rpm; \
rpm -ivh /tmp/dependency/compat-libstdc++*.rpm;" ' '''%host_all[i])
os.system('''ssh %s 'sh -c "sleep 1;rpm -ivh /tmp/base/gpfs.base*.rpm;rpm -ivh /tmp/base/*noarch.rpm;rpm -Uvh /tmp/update/*.rpm" ' '''%host_all[i])
os.system('''ssh %s 'sh -c "sleep 1;rpm -ivh /tmp/rpm/gpfs.gplbin-%s-3.4.0-21.x86_64.rpm" >>/root/loginfo.txt' '''%(host_all[i],uname_result))
# rpm_status,rpm_result=commands.getstatusoutput('ssh %s "rpm -qa|grep gpfs|wc -l"'%host_all[i])
# if rpm_result<5:
# print "The %s's rpm check failed!"%host_all[i]
# sys.exit()
# else:
# print 'rpm check complete.'
#
#
#
#
#执行上面的函数,并捕获异常,让错误更友好。
try:
#install_rpm()
get_nodes() #获得节点
get_disks() #获得空闲磁盘
set_profile() #设置环境变量
config_file() #磁盘文件配置
#print avail_disk
set_another_node() #安装所有节点的gpfs包
config_gpfs() #集群配置
except:
print '\n bye bye...' #如果中间有异常,则捕获退出
GPFS通用并行文件系统之CentOS上部署GPFS集群