初步使用
# -*- coding: utf-8 -*-
import asyncio
import aiohttp
#async以及await关键字将函数异步化,首先异步获取相应,然后异步读取响应的内容
async def fetch():
#Aiohttp使用ClientSession作为主要的接口发起请求,Session(会话)在使用完毕之后需要关闭,关闭Session是另一个异步操作,所以每次你都需要使用async with关键字,with语句可以保证在处理session的时候,总是能正确的关闭它。
async with aiohttp.ClientSession() as session:
async with session.get("http://xxx") as response:
response = await response.read()
print(response)
#创建一个asyncio loop的实例, 然后将任务加入其中
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch())
指定session
# -*- coding: utf-8 -*-
import asyncio
import aiohttp
async def fetch(jsessionId):
async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
async with session.get("http://xxx") as response:
response = await response.read()
print(response)
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch('xxxxxxxxxxxxxxxxxxxxxx'))
多条url请求
import asyncio
import aiohttp
async def fetch(url, jsessionId):
async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
async with session.get(url) as response:
return await response.read()
async def run(r):
url = "http://xxx/{}"
tasks = []
for i in range(r):
task = fetch(url.format(i), 'xxxxxxxxxxxxxxxxxxxxxx')
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
print(responses)
loop = asyncio.get_event_loop()
loop.run_until_complete(run(4))
从文件中读取第一列作为url参数,进行多条url请求
# -*- coding: utf-8 -*-
import aiohttp
import asyncio
import sys, os
from datetime import datetime
import json
async def fetch(url, session, sem):
async with sem:
async with session.get(url) as resp:
#不加下面这一行的话,不会实现协程异步
await asyncio.sleep(0)
return await resp.read()
async def run():
tasks = []
numbers = []
url = "http://xxx/msisdn={}"
jsessionId = 'A3FB6AA9CE57ACB5CF105E9428CE0AB2'
#脚本名:sys.argv[0],参数1:sys.argv[1],参数2:sys.argv[2]
file = sys.argv[1]
#会报:concurrent.futures._base.TimeoutError原因为:Once it's a big number such as 30,000 it can't be physically done within 10 seconds due to networks/ram/cpu capacity.所以需要限制携程的信号量
sem = asyncio.Semaphore(1000)
#将session以参数传入fetch中,让所有请求只使用一个session,而不用每个请求都创建一个session
async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
if os.path.isfile(file):
with open(file, mode = 'r') as f:
for line in f:
#split()默认以空格分隔
phoneNumber = line.split()[0]
numbers.append(phoneNumber)
#此次没有使用asyncio.ensure_future,当前自己使用与不使用暂未发现区别
task = fetch(url.format(phoneNumber), session, sem)
tasks.append(task)
#它搜集所有的Future对象,然后等待他们返回
responses = await asyncio.gather(*tasks)
for j in range(len(responses)):
#由于返回的json信息为unicode编码,中文显示需要转换一下
resJson = json.loads(responses[j].decode('utf-8'))
if resJson["reDesc"] != "操作成功":
print('{}:{}'.format(numbers[j],responses[j].decode('utf-8')))
else:
print("{} not found".format(file))
a = datetime.now()
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
loop.close()
b = datetime.now()
#通过开始与结束时间之差来确定花费了多少时间
print('Cost {} seconds'.format((b - a).seconds))
使用docopt获取session参数、读取文件、输出文件
# -*- coding: utf-8 -*-
"""xx查询
Usage:
miguQuery
"""
from docopt import docopt
import aiohttp
import asyncio
import sys, os
from datetime import datetime
import json
#使用async以及await关键字将函数异步化。在hello()中实际上有两个异步操作:首先异步获取相应,然后异步读取响应的内容。
async def fetchTest(url, jsessionId):
async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
async with session.get(url) as resp:
return await resp.read()
async def fetch(url, session, sem):
#conn = aiohttp.TCPConnector(limit=30)
#async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
async with sem:
async with session.get(url) as resp:
#不加下面这一行的话,不会实现协程异步
await asyncio.sleep(0)
#read()是一个异步操作,这意味着它不会立即返回结果,仅仅返回生成器,所以添加上了await
return await resp.read()
async def run(url, jsessionId, inputfile, outputfile):
tasks = []
numbers = []
count = 0
#脚本名:sys.argv[0],参数1:sys.argv[1],参数2:sys.argv[2]
#file = sys.argv[1]
file = inputfile
#会报:concurrent.futures._base.TimeoutError原因为:Once it's a big number such as 30,000 it can't be physically done within 10 seconds due to networks/ram/cpu capacity.所以需要限制携程的信号量
sem = asyncio.Semaphore(1000)
#将session以参数传入fetch中,让所有请求只使用一个session,而不用每个请求都创建一个session
async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
with open(file, mode = 'r') as f:
for line in f:
#split()默认以空格分隔
phoneNumber = line.split()[0]
numbers.append(phoneNumber)
#print(url.format(phoneNumber))
#task = asyncio.ensure_future(fetch(url.format(phoneNumber), jssessionId))
#包装在asyncio的Future对象中,然后将Future对象列表作为任务传递给事件循环。
task = fetch(url.format(phoneNumber), session, sem)
tasks.append(task)
##它搜集所有的Future对象,然后等待他们返回
responses = await asyncio.gather(*tasks)
file = open(outputfile, 'w')
for j in range(len(responses)):
resJson = json.loads(responses[j].decode('utf-8'))
if resJson["reDesc"] != "[FCMG]操作成功":
#print('{}:{}'.format(numbers[j],responses[j].decode('utf-8')))
file.write('{}:{}\n'.format(numbers[j],responses[j].decode('utf-8')))
count += 1
file.close()
print("总计查询到包月号码:{}个".format(count))
print("查询结果输出到:{}".format(outputfile))
if __name__ == '__main__':
# 将绑定交互参数
arguments = docopt(__doc__)
session = arguments['']
inputfile = arguments['']
outputfile = arguments['']
url = "http://xxx?msisdn={}"
if os.path.isfile(inputfile) == False:
print("找不到此文件:{}".format(inputfile))
exit(0)
if os.path.isfile(outputfile) == True:
print("文件: {} 已经存在,请保存为另一个名字".format(outputfile))
exit(0)
#创建一个asyncio loop的实例, 然后将任务加入其中
loop = asyncio.get_event_loop()
resp = loop.run_until_complete(fetchTest(url.format('13666198249'), session)).decode('utf-8')
if 'login.jsp' in resp:
print("当前session:{} 不正确或已过期,请重新传入session参数".format(session))
loop.close()
exit(0)
a = datetime.now()
loop.run_until_complete(run(url, session, inputfile, outputfile))
loop.close()
b = datetime.now()
print('Cost {} seconds'.format((b - a).seconds))