pypeteer基于asyncio异步模块
导入模块
from pyppeteer import launch
实例化一个浏览器
browser = await launch() # 实例化一个浏览器对象
# browser = await launch({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']})
实例化一个页面
page = await self.browser.newPage()
页面打开网站
await page.goto('https://www.baidu.com/',{'timeout':100*1000})
定位页面元素
ele = (await page.xpath('//div[text()="免密码登录"]'))[0]
await ele.click() # 点击页面元素
输入框中输入内容
await page.type('input[name=username]',uname,{'delay':100})
获取元素属性值
check = await page.Jeval('img[alt=图形验证码]','node => node.getAttribute("src")')
获取cookies
cookies = await page.evaluate('document.cookie',force_expr=True)
import asyncio
from pyppeteer import launch
import traceback
class Spider:
def __init__(self):
self.browser = None
self.regist_url = 'https://www.zhihu.com/signin?next=%2F'
def input_time_random(self):
return randint(100, 151)
async def registered(self,unames:tuple):
if not self.browser:
self.browser = await launch({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']})
for uname in unames:
try:
page = await self.browser.newPage()
await page.goto(self.regist_url,{'timeout':100*1000})
# 选择免密码登录
await (await page.xpath('//div[text()="免密码登录"]'))[0].click()
await asyncio.sleep(1)
# 输入用户名
await page.type('input[name=username]',uname,{'delay': self.input_time_random() - 50})
check = await page.Jeval('img[alt=图形验证码]','node => node.getAttribute("src")')
print('check',check)
# # check = await page.evaluate('img[alt=图形验证码].getAttribute("src")', force_expr=True)
if 'null' not in check:
pass # 打码平台
# 点击获取验证码
await (await page.xpath('//button[text()="获取短信验证码"]'))[0].click()
await asyncio.sleep(60)
cookies = await page.evaluate('document.cookie',force_expr=True)
print('cookies',cookies)
except:
traceback.print_exc()
async def run(self):
self.browser = await launch({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']})
tasks = []
for i in range(1):
tasks.append(asyncio.create_task(self.registered(('XXXXX',))))
await asyncio.wait(tasks)
if __name__=="__main__":
spider = Spider()
asyncio.run(spider.run())