Python多线程和多进程运行程序,并获得函数返回值

Python多线程和多进程运行程序,并获得方法返回值

一、多线程运行程序,获得方法返回值

#多进程运行程序
import threadpool
import itertools
import paramiko
import time

def splitDict(pingo_table_dict):
    pingo_table_list = []
    pingo_table_iter = iter(pingo_table_dict.items())
	#切割字典5次
    for i in range(5):
        #每次切割10条字典数据出来
        pingo_table = dict(itertools.islice(pingo_table_iter,10))
        pingo_table_list.append(pingo_table)
    return pingo_table_list

def exec_ssh_command(ssh_command,ssh):
    # 创建一个ssh对象

    # 执行shell命令,返回的是一个元组
    # ls /opt/bi/kettle/etljobs_svn/lens_olap/

    stdin, stdout, stderr = ssh.exec_command(ssh_command)

    # 返回shell命令执行结果
    # for i in stdout.readlines():
    #     print(i)
    #
    ##获取输出结果,decode('utf-8')解码是为了存在中文能够正常显示
    result = stdout.read().decode('utf-8')

    return result


def del_value(result):
    if result.startswith('0'):
        atr = '0 G'
        return atr
    elif result == "":
        atr = '0 G'
        return atr
    elif 'hdfs' in result:
        atr = result.split("  hdfs:")[0]
        return atr
    elif 'pfs' in result:
        atr = result.split("  /pfs")[0]
        return atr
    else:
        return "其他结果情形"


def search_table_space(new_pingo_dict):
    # ps -ef 命令就是列出当前所有的进程信息
    ssh = paramiko.SSHClient()
    # 如果之前没有连接过的ip,会出现Are you sure you want to continue connecting (yes/no)? yes
    # 自动选择yes
    key = paramiko.AutoAddPolicy()
    ssh.set_missing_host_key_policy(key)
    # 连接服务器
    ssh.connect(
        hostname='10.133.0.1', port=22, username='bigdata', password='bigdata.aac', timeout=50
    )
    new_dict = {
     }
    for key,value in new_pingo_dict.items():
        ssh_command = 'hdfs dfs -du -s ' + " " + value
        result = exec_ssh_command(ssh_command,ssh)
        last_result = del_value(result)
        new_dict[key]=[value,last_result]

    ssh.close()
    return new_dict

#多线程回调函数,保存结果
def get_result(request,result):
    global results
    results.append(result)


if __name__ == "__main__":

#字典中50个元素   
    pingo_table_dict = {
     
        'ods.acoustics.sfc_3080_tracedbamethyst.3080_amethyst_bk_t_testinfo_ae31': 'hdfs://bdos/minos/sqlserver/sfc_3080_tracedbamethyst/T_TestInfo_AE31/1591428980949',
      ......
        'ods.acoustics.sfc_3080_tracedbamethyst.3080_amethyst_bk_table_id': 'hdfs://bdos/minos/sqlserver/sfc_3080_tracedbamethyst/Table_ID/1591428673333'}

	#切分字典
    pingo_table_list = splitDict(pingo_table_dict)

    results = []
    start_time = time.time()

	#多线程运行方法
    pool = threadpool.ThreadPool(8)
    list_var0 = [pingo_table_list[0]]
    list_var1 = [pingo_table_list[1]]
    list_var2 = [pingo_table_list[2]]
    list_var3 = [pingo_table_list[3]]
    list_var4 = [pingo_table_list[4]]
    par_list = [(list_var0,None),(list_var1,None),(list_var2,None),(list_var3,None),(list_var4,None)]
    re = threadpool.makeRequests(search_table_space,par_list,get_result)
    res = [pool.putRequest(req) for req in re]
    pool.wait()

	#处理多线程运行方法得到的结果
    table_all_list = []

    for i in results:
        for key,value in i.items():
            value.insert(0,key)
            table_all_list.append(value)

    stop_time = time.time()
    print(stop_time)
    #27.932997226715088
    print(stop_time-start_time)

二、python多进程运行程序,获得函数返回值

进程方法一:

import multiprocessing
import itertools
import paramiko
import time
#个 Manager 对象是一个服务进程,推荐多进程程序中,数据共享就用一个 manager 管理。
from multiprocessing import Manager
def worker(procnum, return_dict):
    '''worker function'''
    print(str(procnum) + ' represent!')
    return_dict[procnum] = procnum


def splitDict(pingo_table_dict):
    pingo_table_list = []
    pingo_table_iter = iter(pingo_table_dict.items())
#切割字典9次
    for i in range(5):
        #每次切割5条字典数据出来
        pingo_table = dict(itertools.islice(pingo_table_iter,10))
        pingo_table_list.append(pingo_table)
    print(len(pingo_table_list))
    print(len(pingo_table_list[4]))
    return pingo_table_list

def exec_ssh_command(ssh_command,ssh):
    # 创建一个ssh对象

    # 执行shell命令,返回的是一个元组
    # ls /opt/bi/kettle/etljobs_svn/lens_olap/

    stdin, stdout, stderr = ssh.exec_command(ssh_command)

    # 返回shell命令执行结果
    # for i in stdout.readlines():
    #     print(i)
    #
    ##获取输出结果,decode('utf-8')解码是为了存在中文能够正常显示
    result = stdout.read().decode('utf-8')




    return result

def del_value(result):
    if result.startswith('0'):
        atr = 0
        return atr
    elif result == "":
        atr = 0
        return atr
    elif 'hdfs' in result:
        atr = result.split("  hdfs:")[0]
        # value_list = []
        # for i in temp:
        #     if i != "":
        #         value_list.append(i)
        #     else:
        #         break
        # atr = "".join(value_list)
        return int(atr)
    elif 'pfs' in result:
        atr = result.split("  pfs")[0]
        # value_list = []
        # for i in temp:
        #     if i !="":
        #         value_list.append(i)
        #     else:
        #         break
        # atr = "".join(value_list)
        return int(atr)
    else:
        return "其他结果情形"


def search_table_space(new_pingo_dict,return_dict):
    # ps -ef 命令就是列出当前所有的进程信息
    ssh = paramiko.SSHClient()
    # 如果之前没有连接过的ip,会出现Are you sure you want to continue connecting (yes/no)? yes
    # 自动选择yes
    key = paramiko.AutoAddPolicy()
    ssh.set_missing_host_key_policy(key)
    # 连接服务器
    ssh.connect(
        hostname='10.133.0.1', port=22, username='bigdata', password='bigdata.aac', timeout=50
    )
    for key,value in new_pingo_dict.items():
        ssh_command = 'hdfs dfs -du -s ' + " " + value
        print(ssh_command)
        result = exec_ssh_command(ssh_command,ssh)
        last_result = del_value(result)
        return_dict[key]=[value,last_result]

    ssh.close()

if __name__ == '__main__':
    #多进程运行程序
    start_time = time.time()
    #字典包含50个元素
    pingo_table_dict = {
     
            'ods.acoustics.sfc_3080_tracedbamethyst.3080_amethyst_bk_t_testinfo_ae31': 'hdfs://bdos/minos/sqlserver/sfc_3080_tracedbamethyst/T_TestInfo_AE31/1591428980949',
           ......
            'ods.acoustics.sfc_3080_tracedbamethyst.3080_amethyst_bk_table_id': 'hdfs://bdos/minos/sqlserver/sfc_3080_tracedbamethyst/Table_ID/1591428673333'}

	#切分字典
    pingo_table_list = splitDict(pingo_table_dict)
    
    
    manager = Manager()
    #manager.list() 调用一次即产生一个新的数据池,而不是返回同一个数据池实例
    # return_list = manager.list() 也可以使用列表list
    return_dict = manager.dict()

	
	#对进程执行程序
    jobs = []
    for i in range(5):
        new_pingo_dict=pingo_table_list[i]
        p = multiprocessing.Process(target=search_table_space, args=(new_pingo_dict,return_dict))
        jobs.append(p)
        p.start()

    for proc in jobs:
        proc.join()
    
    print(len(return_dict))
    stop_tiem = time.time()
    t = stop_tiem - start_time
    #29.943978786468506
    print(t)

进程方法二:进程池

import multiprocessing
import itertools
import paramiko
import time
#个 Manager 对象是一个服务进程,推荐多进程程序中,数据共享就用一个 manager 管理。
from multiprocessing import Manager
def worker(procnum, return_dict):
    '''worker function'''
    print(str(procnum) + ' represent!')
    return_dict[procnum] = procnum


def splitDict(pingo_table_dict):
    pingo_table_list = []
    pingo_table_iter = iter(pingo_table_dict.items())
#切割字典9次
    for i in range(5):
        #每次切割5条字典数据出来
        pingo_table = dict(itertools.islice(pingo_table_iter,10))
        pingo_table_list.append(pingo_table)
    print(len(pingo_table_list))
    print(len(pingo_table_list[4]))
    return pingo_table_list

def exec_ssh_command(ssh_command,ssh):
    # 创建一个ssh对象

    # 执行shell命令,返回的是一个元组
    # ls /opt/bi/kettle/etljobs_svn/lens_olap/

    stdin, stdout, stderr = ssh.exec_command(ssh_command)

    # 返回shell命令执行结果
    # for i in stdout.readlines():
    #     print(i)
    #
    ##获取输出结果,decode('utf-8')解码是为了存在中文能够正常显示
    result = stdout.read().decode('utf-8')




    return result

def del_value(result):
    if result.startswith('0'):
        atr = 0
        return atr
    elif result == "":
        atr = 0
        return atr
    elif 'hdfs' in result:
        atr = result.split("  hdfs:")[0]
        # value_list = []
        # for i in temp:
        #     if i != "":
        #         value_list.append(i)
        #     else:
        #         break
        # atr = "".join(value_list)
        return int(atr)
    elif 'pfs' in result:
        atr = result.split("  pfs")[0]
        # value_list = []
        # for i in temp:
        #     if i !="":
        #         value_list.append(i)
        #     else:
        #         break
        # atr = "".join(value_list)
        return int(atr)
    else:
        return "其他结果情形"


def search_table_space(new_pingo_dict,return_dict):
    # ps -ef 命令就是列出当前所有的进程信息
    ssh = paramiko.SSHClient()
    # 如果之前没有连接过的ip,会出现Are you sure you want to continue connecting (yes/no)? yes
    # 自动选择yes
    key = paramiko.AutoAddPolicy()
    ssh.set_missing_host_key_policy(key)
    # 连接服务器
    ssh.connect(
        hostname='10.133.0.1', port=22, username='bigdata', password='bigdata.aac', timeout=50
    )
    for key,value in new_pingo_dict.items():
        ssh_command = 'hdfs dfs -du -s ' + " " + value
        print(ssh_command)
        result = exec_ssh_command(ssh_command,ssh)
        last_result = del_value(result)
        return_dict[key]=[value,last_result]

    ssh.close()

if __name__ == '__main__':
    #多进程运行程序
    start_time = time.time()
    pingo_table_dict = {
     
            'ods.acoustics.sfc_3080_tracedbamethyst.3080_amethyst_bk_t_testinfo_ae31': 'hdfs://bdos/minos/sqlserver/sfc_3080_tracedbamethyst/T_TestInfo_AE31/1591428980949',
                            ......
            'ods.acoustics.sfc_3080_tracedbamethyst.3080_amethyst_bk_table_id': 'hdfs://bdos/minos/sqlserver/sfc_3080_tracedbamethyst/Table_ID/1591428673333'}


    pingo_table_list = splitDict(pingo_table_dict)
    manager = Manager()
    # return_list = manager.list() 也可以使用列表list
    return_dict = manager.dict()

	#进程池
    p= multiprocessing.Pool(processes=5)

    for i in range(5):
        new_pingo_dict=pingo_table_list[i]
        p.apply_async(search_table_space, args=(new_pingo_dict,return_dict))
    p.close()
    p.join()

    stop_tiem = time.time()

    t = stop_tiem - start_time
    print(t)

你可能感兴趣的:(大数据平台搭建,Python多线程运行程序,Python多进程运行程序,获得函数返回值)