python aiohttp与aiohttp

初步使用

# -*- coding: utf-8 -*-
import asyncio 
import aiohttp 


#async以及await关键字将函数异步化,首先异步获取相应,然后异步读取响应的内容
async def fetch(): 
    #Aiohttp使用ClientSession作为主要的接口发起请求,Session(会话)在使用完毕之后需要关闭,关闭Session是另一个异步操作,所以每次你都需要使用async with关键字,with语句可以保证在处理session的时候,总是能正确的关闭它。
    async with aiohttp.ClientSession() as session:         
        async with session.get("http://xxx") as response:                
            response = await response.read()                         
            print(response) 

#创建一个asyncio loop的实例, 然后将任务加入其中
loop = asyncio.get_event_loop() 
loop.run_until_complete(fetch())

指定session

# -*- coding: utf-8 -*-
import asyncio 
import aiohttp 



async def fetch(jsessionId): 
    async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:        
        async with session.get("http://xxx") as response:                
            response = await response.read()                         
            print(response) 

loop = asyncio.get_event_loop() 
loop.run_until_complete(fetch('xxxxxxxxxxxxxxxxxxxxxx'))

多条url请求

import asyncio 
import aiohttp  

async def fetch(url, jsessionId):      
async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:           
    async with session.get(url) as response:             
        return await response.read()             

async def run(r):     
url = "http://xxx/{}"
tasks = []         
for i in range(r):             
    task = fetch(url.format(i), 'xxxxxxxxxxxxxxxxxxxxxx')               
    tasks.append(task)         
    responses = await asyncio.gather(*tasks)         
    # you now have all response bodies in this variable         
    print(responses)     


loop = asyncio.get_event_loop() 
loop.run_until_complete(run(4))

从文件中读取第一列作为url参数,进行多条url请求

# -*- coding: utf-8 -*-
import aiohttp
import asyncio
import sys, os
from datetime import datetime 
import json

async def fetch(url, session, sem):
    async with sem:
        async with session.get(url) as resp:
            #不加下面这一行的话,不会实现协程异步
            await asyncio.sleep(0)
            return await resp.read()


async def run():
    tasks = []
    numbers = []
    url = "http://xxx/msisdn={}"
    jsessionId = 'A3FB6AA9CE57ACB5CF105E9428CE0AB2'
    #脚本名:sys.argv[0],参数1:sys.argv[1],参数2:sys.argv[2]
    file = sys.argv[1]
    #会报:concurrent.futures._base.TimeoutError原因为:Once it's a big number such as 30,000 it can't be physically done within 10 seconds due to networks/ram/cpu capacity.所以需要限制携程的信号量
    sem = asyncio.Semaphore(1000)
    #将session以参数传入fetch中,让所有请求只使用一个session,而不用每个请求都创建一个session
    async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
        if os.path.isfile(file):
            with open(file, mode = 'r') as f:
                for line in f:
                    #split()默认以空格分隔
                    phoneNumber = line.split()[0]
                    numbers.append(phoneNumber)
                    #此次没有使用asyncio.ensure_future,当前自己使用与不使用暂未发现区别
                    task = fetch(url.format(phoneNumber), session, sem)
                    tasks.append(task)

            #它搜集所有的Future对象,然后等待他们返回
            responses = await asyncio.gather(*tasks)
            for j in range(len(responses)):
                #由于返回的json信息为unicode编码,中文显示需要转换一下
                resJson = json.loads(responses[j].decode('utf-8'))
                if resJson["reDesc"] != "操作成功":
                    print('{}:{}'.format(numbers[j],responses[j].decode('utf-8')))
        else:
            print("{} not found".format(file))


a = datetime.now()
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
loop.close()
b = datetime.now()
#通过开始与结束时间之差来确定花费了多少时间
print('Cost {} seconds'.format((b - a).seconds))

使用docopt获取session参数、读取文件、输出文件

# -*- coding: utf-8 -*-


"""xx查询

Usage:
miguQuery   


"""

from docopt import docopt
import aiohttp
import asyncio
import sys, os
from datetime import datetime
import json

#使用async以及await关键字将函数异步化。在hello()中实际上有两个异步操作:首先异步获取相应,然后异步读取响应的内容。
async def fetchTest(url, jsessionId):
    async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
        async with session.get(url) as resp:
            return await resp.read()

async def fetch(url, session, sem):
    #conn = aiohttp.TCPConnector(limit=30)
    #async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
    async with sem:
        async with session.get(url) as resp:
            #不加下面这一行的话,不会实现协程异步
            await asyncio.sleep(0)
            #read()是一个异步操作,这意味着它不会立即返回结果,仅仅返回生成器,所以添加上了await
            return await resp.read()

async def run(url, jsessionId, inputfile, outputfile):
    tasks = []
    numbers = []
    count = 0
    #脚本名:sys.argv[0],参数1:sys.argv[1],参数2:sys.argv[2]
    #file = sys.argv[1]
    file = inputfile 
    #会报:concurrent.futures._base.TimeoutError原因为:Once it's a big number such as 30,000 it can't be physically done within 10 seconds due to networks/ram/cpu capacity.所以需要限制携程的信号量
    sem = asyncio.Semaphore(1000)
    #将session以参数传入fetch中,让所有请求只使用一个session,而不用每个请求都创建一个session
    async with aiohttp.ClientSession(headers={"Cookie":"JSESSIONID={}".format(jsessionId)}) as session:
    with open(file, mode = 'r') as f:
        for line in f:
            #split()默认以空格分隔
            phoneNumber = line.split()[0]
            numbers.append(phoneNumber)
            #print(url.format(phoneNumber))
            #task = asyncio.ensure_future(fetch(url.format(phoneNumber), jssessionId))
            #包装在asyncio的Future对象中,然后将Future对象列表作为任务传递给事件循环。
            task = fetch(url.format(phoneNumber), session, sem)
            tasks.append(task)

    ##它搜集所有的Future对象,然后等待他们返回
    responses = await asyncio.gather(*tasks)
    file = open(outputfile, 'w')
    for j in range(len(responses)):
        resJson = json.loads(responses[j].decode('utf-8'))
        if resJson["reDesc"] != "[FCMG]操作成功":
            #print('{}:{}'.format(numbers[j],responses[j].decode('utf-8')))
            file.write('{}:{}\n'.format(numbers[j],responses[j].decode('utf-8')))
            count += 1
    file.close()
    print("总计查询到包月号码:{}个".format(count))
    print("查询结果输出到:{}".format(outputfile))





if __name__ == '__main__':
    # 将绑定交互参数
    arguments = docopt(__doc__)
    session = arguments['']
    inputfile = arguments['']
    outputfile = arguments['']
    url = "http://xxx?msisdn={}"

    if os.path.isfile(inputfile) == False:
        print("找不到此文件:{}".format(inputfile))
        exit(0)
    if os.path.isfile(outputfile) == True:
        print("文件: {} 已经存在,请保存为另一个名字".format(outputfile))
        exit(0)



    #创建一个asyncio loop的实例, 然后将任务加入其中
    loop = asyncio.get_event_loop()
    resp = loop.run_until_complete(fetchTest(url.format('13666198249'), session)).decode('utf-8')
    if 'login.jsp' in resp:
        print("当前session:{} 不正确或已过期,请重新传入session参数".format(session))
        loop.close()
        exit(0)

    a = datetime.now()
    loop.run_until_complete(run(url, session, inputfile, outputfile))
    loop.close()
    b = datetime.now()
    print('Cost {} seconds'.format((b - a).seconds))

你可能感兴趣的:(Python)