pool.map传入一个函数与一个列表,列表中包含字典

 1、即传入的函数返回另外一个函数,在另外一个函数中处理数据。

import time
import requests
from multiprocessing.dummy import Pool
# 自定义函数 job用于获取网页内容信息并保存到表格
def job(name ,url):
    file_path = pd.ExcelWriter(name + '.xlsx')  # 创建一个表格
    html = requests.get(url) # 请求网页
    jjhtml = etree.HTML(html.text)
    ld_html = jjhtml.xpath('//ul[@class="listContent"]/li')
    for index,ld in enumerate(ld_html):
        ld_age = ld.xpath('./div/div[3]/text()')
        ld_url = ld.xpath('./div/div[@class="title"]/a/@href')
        ldlist = []
        content = {'哦哦哦': ldage[0], '哦哦哦url': ld_url[0]}
        ldlist.append(content)
        pf = pd.DataFrame(list(ldlist))
        order = ['哦哦哦', '哦哦哦url']
        pf = pf[order]  # 表头
        pf.fillna(' ', inplace=True) # 去除空格
        pf.to_excel(file_path, encoding="gbk", header=False, index=False, startrow=( index + 3))
        file_path.save()

def job1(object):
    for name,url in object.items():
        return job(name, url)

url_list = [{'锦江': 'https://aa.aa.com/aaaaa/aa'}, {'青羊': 'https://bb.bb.com/bbbbb/bbb/'}, {'成华': 'https://cc.cc.com/ccccc/ccc/'}]

time1=time.time()
pool = Pool(4)
data_list=url_list
res = pool.map(job1,data_list)
time2=time.time()
print(res)
pool.close()
pool.join()
print('总共耗时:' + str(time2 - time1) + 's')

你可能感兴趣的:(python,开发语言)