1、即传入的函数返回另外一个函数,在另外一个函数中处理数据。
import time
import requests
from multiprocessing.dummy import Pool
# 自定义函数 job用于获取网页内容信息并保存到表格
def job(name ,url):
file_path = pd.ExcelWriter(name + '.xlsx') # 创建一个表格
html = requests.get(url) # 请求网页
jjhtml = etree.HTML(html.text)
ld_html = jjhtml.xpath('//ul[@class="listContent"]/li')
for index,ld in enumerate(ld_html):
ld_age = ld.xpath('./div/div[3]/text()')
ld_url = ld.xpath('./div/div[@class="title"]/a/@href')
ldlist = []
content = {'哦哦哦': ldage[0], '哦哦哦url': ld_url[0]}
ldlist.append(content)
pf = pd.DataFrame(list(ldlist))
order = ['哦哦哦', '哦哦哦url']
pf = pf[order] # 表头
pf.fillna(' ', inplace=True) # 去除空格
pf.to_excel(file_path, encoding="gbk", header=False, index=False, startrow=( index + 3))
file_path.save()
def job1(object):
for name,url in object.items():
return job(name, url)
url_list = [{'锦江': 'https://aa.aa.com/aaaaa/aa'}, {'青羊': 'https://bb.bb.com/bbbbb/bbb/'}, {'成华': 'https://cc.cc.com/ccccc/ccc/'}]
time1=time.time()
pool = Pool(4)
data_list=url_list
res = pool.map(job1,data_list)
time2=time.time()
print(res)
pool.close()
pool.join()
print('总共耗时:' + str(time2 - time1) + 's')