GPFS文件系统能够横跨在所有主机上,分布在所有磁盘上,条带化读写,高性能。信令管理机制,并发性好。可配置fail组,可用性高。下面是GPFS集群的自动安装部署python代码......
注:该脚本只能自动识别到系统盘之外的物理磁盘,不会识别到分区。
1、gpfs文件包目录结构
脚本中都按照这个目录结构进行安装
2、脚本执行方法
输入的是拥有存储磁盘的主机名。按回车完成
[root@vuestor01 scripts]# python auto.py
Input node's info. Usage:hosta/192.168.0.101. Press Enter is complete.
Please input Node info: stor01/192.168.2.67
check ip address success!
Input node's info. Usage:hosta/192.168.0.101. Press Enter is complete.
Please input Node info: stor02/192.168.2.85
执行成功后,自动挂载到/vol_data目录下。
[root@vuestor01 scripts]# df -hT
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda2 ext4 197G 1.6G 185G 1% /
tmpfs tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 ext4 504M 46M 433M 10% /boot
/dev/vol_data gpfs 1.2T 918M 1.2T 1% /vol_data
[root@stor02 ~]# df -hT
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda2 ext4 197G 1.6G 185G 1% /
tmpfs tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 ext4 504M 46M 433M 10% /boot
/dev/vol_data gpfs 1.2T 918M 1.2T 1% /vol_data
3、gpfs自动安装python代码分析
#!/usr/bin/python import datetime import os,sys,time import commands def output_log(): #定义输出log函数 time=datetime.datetime.now() with open('loginfo.txt','wb') as f: f.write('%s \n'%time) def check_ip(ipaddr): #校验IP有效性函数 import sys addr=ipaddr.strip().split('.') #print addr if len(addr) != 4: print "check ip address failed!" sys.exit() for i in range(4): try: addr[i]=int(addr[i]) except: sys.exit() if addr[i]<=255 and addr[i]>=0: pass else: print "check ip address failed!" sys.exit() i+=1 else: print "check ip address success!" def install_rpm():#定义安装gpfs相关软件包函数 print "Install rpm packages..." os.system("rpm -ivh ../dependency/nss-softokn-freebl*.rpm >>/loginfo.txt;\ rpm -ivh ../dependency/rsh*.rpm >>/root/loginfo.txt;\ rpm -ivh ../dependency/glibc*.rpm >>/root/loginfo.txt;\ rpm -ivh ../dependency/libgcc*.rpm >>/root/loginfo.txt;\ rpm -ivh ../dependency/ksh*.rpm >>/root/loginfo.txt;\ rpm -ivh ../dependency/compat-libstdc++*.rpm >>/root/loginfo.txt;") os.system("rpm -ivh ../base/gpfs.base*.rpm >>/root/loginfo.txt") os.system("rpm -ivh ../base/*noarch.rpm >>/root/loginfo.txt") os.system("rpm -Uvh ../update/*.rpm >>/root/loginfo.txt") #判断内核版本,安装相应版本的源码编译包 a,b=commands.getstatusoutput("uname -r|grep 220") c,d=commands.getstatusoutput("uname -r|grep 358") if (a == 0): commands.getstatusoutput("rpm -ivh ../rpm/gpfs.gplbin-2.6.32-220.el6.x86_64-3.4.0-21.x86_64.rpm") elif (c==0): commands.getstatusoutput("rpm -ivh ../rpm/gpfs.gplbin-2.6.32-358.el6.x86_64-3.4.0-21.x86_64.rpm") else: print "can't support this kernel!" sys.exit() #判断是否安装成功 num_status,num_result=commands.getstatusoutput("rpm -qa |grep gpfs|wc -l") if (num_result<5): print "RPM packages check failed!" sys.exit() print "Done." #获取节点信息,该处需用户输入 node_dict={} host_all=[] def get_nodes(): while True: node=raw_input("""Input node's info. Usage: hosta/192.168.0.101. Press Enter is complete. Please input Node info: """) if len(node)==0: return 2 node_result=node.strip().split('/') #print node_result #获得信息后判断IP,主机名合法性 if len(node_result[0])==0: print "Hostname is failed!" sys.exit() check_ip(node_result[1]) #node_dict[node_result[0]]=[node_result[1]] host_all.append(node_result[0]) #print node_dict local_ip_status,local_ip_result=commands.getstatusoutput("""ifconfig |grep 'inet addr'|awk -F '[: ]+' '{print $4}' """) local_ip=local_ip_result.split('\n') #print host_all #指定第一个输入的IP必须为本地IP if len(host_all)==1: if node_result[1] in local_ip: pass else: print "The first IP must be native IP." sys.exit() #获得磁盘信息 avail_disk={} def get_disks(): import os,commands print "Getting avail disks... ... list as fllow:" #循环主机列表 for host in host_all: #把对方的分区信息,系统盘信息输出文件并拷贝到本机 os.system("""ssh %s " cat /proc/partitions >/tmp/part_%s;scp /tmp/part_%s %s:/root/" """%(host,host,host,host_all[0])) os.system("""ssh %s " df -h >/tmp/osdisk_%s;scp /tmp/osdisk_%s %s:/root/" """%(host,host,host,host_all[0])) disk_status,disk_result=commands.getstatusoutput("cat /root/part_%s |awk '{print $4}'|grep -v name \ |grep -v [0-9]|grep -v ^$"%host) disk_a=disk_result.split('\n') #从所有磁盘列表中移除系统盘 os_status,os_result=commands.getstatusoutput("cat /root/osdisk_%s |grep '/$'|awk '{print $1}' \ |awk -F '/' '{print $3}'|awk -F [0-9] '{print $1}'"%host) #print '111',disk_a,'1111' disk_a.remove(os_result) avail_disk[host]=disk_a #输出除系统盘外的所有空闲磁盘 print avail_disk.items() for a,b in avail_disk.items(): print '''On the host %s's disk:'''%a for i in b: print '/dev/%s'%i #配置nsd文件 def config_file(): print 'Configuring diskef....' with open('/root/diskef','w') as f: num=[] #如果只有一台主机,则把这台主机的所有磁盘分成两个fail组,通过磁盘数取余来配置 if len(avail_disk.keys())==1: for n,m in avail_disk.items(): for i in range(len(m)): num.append(i) if len(num)%2 ==1: f.write("/dev/%s:%s:::01:\n"%(m[i],n)) elif len(num)%2 ==0: f.write("/dev/%s:%s:::02:\n"%(m[i],n)) time.sleep(1) #如果主机数大于等于2台,则按主机分组两个fail组,通过主机数取余来配置。 elif len(avail_disk.keys())>=2: n_num=[] for n in range(0,len(host_all)): n_num.append(n) for m in avail_disk[host_all[n]]: if len(n_num)%2 ==1: f.write("/dev/%s:%s:::01:\n"%(m,host_all[n])) elif len(n_num)%2==0: f.write("/dev/%s:%s:::02:\n"%(m,host_all[n])) #for n,m in avail_disk.items(): # n_num.append(n) # for i in range(len(m)): # #num.append(i) # if len(n_num)%2 ==1: # f.write("/dev/%s:%s:::01:\n"%(m[i],n)) # elif len(n_num)%2==0: # f.write("/dev/%s:%s:::02:\n"%(m[i],n)) # time.sleep(1) print 'Done.' #def sel_tiebreakerD(): #配置gpfs集群 def config_gpfs(): print 'Configuring gpfs:\n' #avail_disk.keys(): print 'Check crcluster...' #print host_all,'host_all' #校验主机数,一台时无从服务器。 if len(host_all)==1: print ''"mmcrcluster -N %s:quorum -p %s -r /usr/bin/ssh -R /usr/b\ in/scp "%(host_all[0],host_all[0]) crcluster_status,crcluster_result=commands.getstatusoutput("mmcrcluster -N %s:quorum -p %s -r /usr/bin/ssh -R /usr/bin/scp "%(host_all[0],host_all[0])) #主机数有多台时指定一台从服务器。 elif len(host_all)>=2: print "mmcrcluster -N %s:quorum,%s -p %s -s %s -r /usr/bin/ssh -R /usr/bin/scp "%(host_all[0],','.join(host_all[1:]),host_all[0],host_all[1]) crcluster_status,crcluster_result=commands.getstatusoutput("mmcrcluster -N %s:quorum,%s -p %s -s %s -r /usr/bin/ssh -R /usr/bin/scp "%(host_all[0],','.join(host_all[1:]),host_all[0],host_all[1])) #如果主机名为空,则退出 else: print "Host list is empty. exit..." sys.exit() #if crcluster_status!=0: # print 'Fail.' # sys.exit() #else: # print 'Done.' #校验集群配置 print 'Check cluster configuration...' cluster_status,cluster_NAME=commands.getstatusoutput('''mmlscluster|grep 'GPFS cluster name'|awk '{print $4}' ''') cluster_name=cluster_NAME.split('\n') #如果集群里的主机数,和用户输入的不致,则退出 if cluster_name[len(cluster_name)-1].strip()==host_all[0]: print 'Done.' else: print 'Check cluster fail...' sys.exit() #配置gpfs授权,配置所有输入的主机都有权进行配置与挂载。 if len(avail_disk.keys())==1: os.system('mmchlicense server --accept -N %s'%(host_all[0])) elif len(avail_disk.keys())>=2: os.system('mmchlicense server --accept -N %s'%(','.join(host_all))) print 'mmchlicense server --accept -N %s'%(','.join(host_all)) #校验nsd的配置,如果配置文件中的磁盘数,与成功添加进集群的磁盘数不致,则退出 print 'Checking NSD configuration... ...' os.system('mmcrnsd -F /root/diskef -v no ') nsd_status,nsd_result=commands.getstatusoutput('mmlsnsd |grep nsd|wc -l') nsd_split=nsd_result.split('\n') nsd_num=nsd_split[len(nsd_split)-1] nsdfile_status,nsdfile_num=commands.getstatusoutput("cat /root/diskef |grep -v 'gpfs'|wc -l") #print '2',nsd_num,'22',nsdfile_num if int(nsd_num)==int(nsdfile_num): print 'Done.' else: print 'Checking NSD Failed.' sys.exit() #启动gpfs服务 print 'Starting gpfs service, needs about 20 seconds...' os.system('mmstartup -a') #import time time.sleep(20) #如果所有的主机不是active状态,则退出。 node,node_state=commands.getstatusoutput('mmgetstate -a|grep active|wc -l') if len(avail_disk.keys())==int(node_state): print 'Done.' else: print 'fail... mmgetstate -a have down/unknown...' sys.exit() #创建集群文件系统,如果创建失败则退出。 crfs_status,crfs_result=commands.getstatusoutput('mmcrfs vol_data -F /root/diskef -B 256k -r 2 -m 2 -j cluster -T /vol_data -v no ') if crfs_status==0: print 'Done.' else: print 'mmcrfs is failed.' sys.exit() #挂载文件系统,如果挂载失败,则退出。 os.system('mmmount vol_data -a') print 'Checking mounting state......' mount_status,mount_result=commands.getstatusoutput('df -h |grep vol_data') if mount_status!=0: print 'Fail.' sys.exit() else: print 'Done.' #设置环境变量,如果存在路径不执行,不存在则添加。 def set_profile(): for host in host_all: print 'Check mmfs profile... ...' path_status,path_result=commands.getstatusoutput(""" scp %s "echo $PATH|grep '/usr/lpp/mmfs/bin'" """) if path_status==0: print 'Done.' else: print "Auto set PATH profile..." with open("/etc/profile",'ab') as f: f.write("PATH=$PATH:/usr/lpp/mmfs/bin") os.system("source /etc/profile") #配置所有节点的gpfs软件包安装 def set_another_node(): uname_status,uname_result=commands.getstatusoutput('uname -r') print 'Checking the another node state...' if len(host_all)==1: print "Don't have another host... Done." return #node_host=node_dict.keys() #把当前服务器的gpfs的安装包拷贝到其它主机上,只要内核版本一致,就能自动安装其它节点 for i in range(0,len(host_all)): os.system('scp -r ../* %s:/tmp'%host_all[i]) os.system('''ssh %s 'sleep 1; sh -c " rpm -ivh /tmp/dependency/nss-softokn-freebl*.rpm; \ rpm -ivh /tmp/dependency/rsh*.rpm; \ rpm -ivh /tmp/dependency/glibc*.rpm; \ rpm -ivh /tmp/dependency/libgcc*.rpm; \ rpm -ivh /tmp/dependency/ksh*.rpm; \ rpm -ivh /tmp/dependency/compat-libstdc++*.rpm;" ' '''%host_all[i]) os.system('''ssh %s 'sh -c "sleep 1;rpm -ivh /tmp/base/gpfs.base*.rpm;rpm -ivh /tmp/base/*noarch.rpm;rpm -Uvh /tmp/update/*.rpm" ' '''%host_all[i]) os.system('''ssh %s 'sh -c "sleep 1;rpm -ivh /tmp/rpm/gpfs.gplbin-%s-3.4.0-21.x86_64.rpm" >>/root/loginfo.txt' '''%(host_all[i],uname_result)) # rpm_status,rpm_result=commands.getstatusoutput('ssh %s "rpm -qa|grep gpfs|wc -l"'%host_all[i]) # if rpm_result<5: # print "The %s's rpm check failed!"%host_all[i] # sys.exit() # else: # print 'rpm check complete.' # # # # #执行上面的函数,并捕获异常,让错误更友好。 try: #install_rpm() get_nodes() #获得节点 get_disks() #获得空闲磁盘 set_profile() #设置环境变量 config_file() #磁盘文件配置 #print avail_disk set_another_node() #安装所有节点的gpfs包 config_gpfs() #集群配置 except: print '\n bye bye...' #如果中间有异常,则捕获退出