pyppeteer登录头条获取cookie爬坑。

首先需要需要pip 卸载 websockets 安装指定版本的。

pip uninstall websockets #卸载websockets
pip install websockets==6.0 #指定安装6.0版本

否则,你会在

waitForSelector 判断元素是否出现,而且页面上js跳转后的新页面,这个地方会出现上面的报错。

Runtime.callFunctionOn: Target closed.

参考链接:https://www.cnblogs.com/baihuitestsoftware/p/10531462.html

保存cookie:

cookies2 = await page.cookies()
    await save_cookie(cookies2)

.设置Cookie:(pyppeteer的设置cookie的方法每次传一个dict进去。所以需要循环cookie列表。)

await page.goto('https://www.toutiao.com/')
    for cookie in cookie1:
        await page.setCookie(cookie)
    await page.goto('https://www.toutiao.com/')
    print("登录成功")
    await page.waitForSelector('.logged', {'timeout': 300000})

整体代码:(首次登录输入账户,二次直接登录成功):

# -*- coding: utf-8 -*-
import asyncio
from pyppeteer import launch
import json


# 保存cookie
async def save_cookie(cookie):
    with open("cookie.json", 'w+', encoding="utf-8") as file:
        json.dump(cookie, file, ensure_ascii=False)


# 读取cookie
async def load_cookie():
    with open("cookie.json", 'r', encoding="utf-8") as file:
        cookie = json.load(file)
    return cookie


# 登录函数
async def login(page):
    await page.setViewport({'width': 1366, 'height': 768})
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                            'Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299')
    await page.goto('https://sso.toutiao.com/')
    await page.waitForSelector('.logged', {'timeout': 300000})
    cookies2 = await page.cookies()
    await save_cookie(cookies2)
    print(cookies2)


# 加载首页
async def index(page, cookie1):
    await page.setViewport({'width': 1366, 'height': 768})
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                            'Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299')
    await page.goto('https://www.toutiao.com/')
    for cookie in cookie1:
        await page.setCookie(cookie)
    await page.goto('https://www.toutiao.com/')
    print("登录成功")
    await page.waitForSelector('.logged', {'timeout': 300000})
    await asyncio.sleep(60)
    cookies2 = await page.cookies()
    await save_cookie(cookies2)
    print(cookies2)


# 主函数
async def main():
    # 初始化浏览器
    browser = await launch({'headless': False,
                            'dumpio': True,
                            'args': [
                                # '--disable-extensions',
                                # '--disable-bundled-ppapi-flash',
                                # '--mute-audio',
                                # '--no-sandbox',
                                # '--disable-setuid-sandbox',
                                '--disable-dev-shm-usage',
                                '--shm-size=1gb'
                                '--disable-gpu',
                            ],
                            'executablePath': 'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
                            })
    # 打开新标签页
    page = await browser.newPage()
    cookie = await load_cookie()
    # 登录函数
    await index(page, cookie)


# 运行入口
if __name__ == '__main__':
    asyncio.get_event_loop().run_until_complete(main())

 

你可能感兴趣的:(python)