python多线程的具体应用-批量处理服务器列表

对于批量处理服务器的任务,通常的解决方案是在服务器内网部署一台中转机, 中转机通过一些途径获得所有单服的信息. 再使用ssh,rsync等指令和具体服务器交互完成工作。


简略代码如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import sys

def SwitchOption(ipInfo, option, server, ext_info):
    print 'begin %s %s server(%s)  '%(option, server, ipInfo)
    taskFinish = True
    #.......
    time.sleep(1)
    assert taskFinish
    
class UpdateHandler():
    def Option(self, ipInfo, option, name, ext_info):
        SwitchOption(ipInfo, option, name, ext_info)
        
def GetGroupList():
    return ('10.0.128.1', '10.0.128.2', '10.0.128.3', '10.0.128.4' , '10.0.128.5') 
            
def option_group_list(op, ext_info):
    groups = GetGroupList()
    updatehandler = UpdateHandler()
    for ipTup in groups:
        updatehandler.Option(ipTup, op, 'group', ext_info)
        
        
option_group_list('update', '')
option_group_list('restart', '')

上述代码模拟更新并重启了5个服务器,  假设每步操作需要1分钟, 那么总耗时就是10分钟..

而且每增加一台服务器, 更新的时间将会按此倍数增长.


考虑到更新/重启多个服务器不是计算密集型任务,

且每次操作都属于独立的子任务,并不存在相互影响相互依赖关系, 可以使用不需要同步的多线程完成具体的子任务。

当然有几个细节值得注意:

1.子线程返回错误/中断不会打断主线程, 需要用try catch记录各个子线程任务的完成情况

2. print函数打印log并不是多线程安全的,可以用sys.stdout.write代替

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import threading
import time
import sys

def SwitchOption(ipInfo, option, server, ext_info):
    sys.stdout.write('begin %s %s server(%s) \n'%(option, server, ipInfo))
    taskFinish = True
    #.......
    time.sleep(1)
    assert taskFinish
    
    
class threadUpdateHandler(threading.Thread):
    def __init__(self):
        super(threadUpdateHandler, self).__init__()
        self.result = 0
        
    def parepare_args(self, name, ipInfo, option, ext_info):
        self.name,self.ipInfo,self.option,self.ext_info=name,ipInfo,option,ext_info
        
    def run(self):
        try:
            SwitchOption(self.ipInfo, self.option, self.name, self.ext_info)
        #捕获所有异常
        except Exception as e:
            sys.stdout.write("!!! error exception !!!!: %s server[%s] option %s \n"%(e, (self.ipInfo), self.option) )
            self.result = 1
            
    def get_result(self):
        return self.result
        
        
def GetGroupList():
    return ('10.0.128.1', '10.0.128.2', '10.0.128.3', '10.0.128.4' , '10.0.128.5')*2
        
        
def option_group_list(op, ext_info):
    groups = GetGroupList()
    #多线程处理小服列表的启停
    if op in ['update', 'start', 'stop', 'restart']:
        threadList = []
        for ipTup in groups:
            t = threadUpdateHandler()
            t.parepare_args('group', ipTup, op, ext_info)
            t.start()
            threadList.append(t)
        
        #主线程阻塞等待子线程返回
        for thread in threadList:
            thread.join()
            
        #检查子线程任务完成情况
        ret_error = 0
        for index,thread in enumerate(threadList):
            if thread.get_result():
                print "%s server[%s] failed"%(op, (groups[index]))
                ret_error = 1
        assert ret_error == 0
        
    else:
        updatehandler = updateHandler()
        for ipTup in groups:
            updatehandler.Option('group', ipTup, op, ext_info)

option_group_list('update', '')
option_group_list('restart', '')

上述多线程代码模拟更新并重启了10个服务器.  如果每步操作1分钟,则总共用时两分钟。由此可见多线程大大减少了批量处理服务器所花费的时间。


但上段代码还有一个很严重的问题, 即是没有对并发执行的子线程进行数量上的限制, 设想如下情况: 有100个服务器需要更新,每个子线程需要启动一个相关进程(如rsync),那么中转机整体的压力就会变的很大,网络带宽也会占满.  

因此我们需要一个线程队列来控制并发执行的线程数量, 保证机器数量如何扩展都不会对机器性能造成影响。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import Queue
import threading
import time
import sys

def SwitchOption(ipInfo, option, server, ext_info):
    sys.stdout.write('begin %s %s server(%s) \n'%(option, server, ipInfo))
    taskFinish = True
    #.......
    time.sleep(1)
    assert taskFinish
    
class ThreadPool(object):  
    def __init__(self, max_num=15):
        #用线程安全的队列模拟线程池
        self.queue = Queue.Queue(max_num)  
        for i in range(max_num): 
            self.queue.put(threadUpdateHandler()) 

    def get_thread(self):  
        return self.queue.get()  

    def add_thread(self): 
        self.queue.put(threadUpdateHandler())
    
class threadUpdateHandler(threading.Thread):
    def __init__(self):
        super(threadUpdateHandler, self).__init__()
        self.result = 0
        
    def parepare_args(self, name, ipInfo, option, ext_info, pool = None):
        self.name,self.ipInfo,self.option,self.ext_info=name,ipInfo,option,ext_info
        self.pool = pool
        
    def run(self):
        try:
            SwitchOption(self.ipInfo, self.option, self.name, self.ext_info)
        except Exception as e:
            sys.stdout.write("!!! error exception !!!!: %s server[%s] option %s \n"%(e, (self.ipInfo), self.option) )
            self.result = 1
        #线程执行完毕, 通知线程池
        if self.pool != None:
            self.pool.add_thread()
            
    def get_result(self):
        return self.result
        
def GetGroupList():
    return ('10.0.128.1', '10.0.128.2', '10.0.128.3', '10.0.128.4' , '10.0.128.5')*20
        
def option_group_list(op, ext_info):
    pool = ThreadPool(10) if op == 'update' else None
    groups = GetGroupList()
    
    if op in ['update', 'start', 'stop', 'restart']:
        threadList = []
        for ipTup in groups:
            #针对更新服务器的rsync,单独进行优化
            t = pool.get_thread() if op == 'update' else threadUpdateHandler()
            t.parepare_args('group', ipTup, op, ext_info, pool)
            t.start()
            threadList.append(t)
        
        for thread in threadList:
            thread.join()
            
        ret_error = 0
        for index,thread in enumerate(threadList):
            if thread.get_result():
                print "%s server[%s] failed"%(op, (groups[index]))
                ret_error = 1
        assert ret_error == 0
        
    else:
        updatehandler = updateHandler()
        for ipTup in groups:
            updatehandler.CarryNameAddrOption('group', ipTup, op, ext_info)

            
option_group_list('update', '')
option_group_list('restart', '')




你可能感兴趣的:(python,多线程,server)