爬虫:pyppeteer模拟浏览器

from pyppeteer import launch
import asyncio
from loguru import logger

BROWSER_WIDTH = 1920  # 1366
BROWSER_HEIGHT = 1080  # 850

OPTIONS = {
     
    'headless': False,  # 可以无头
    # 'headless': True,
    'slowMo': 1.3,
    # 'userDataDir': './userdata',
    'args': [
        # f'--window-size={BROWSER_WIDTH},{BROWSER_HEIGHT}'
        '--start-maximized',
        '--enable-automation',
        '--disable-extensions',
        '--hide-scrollbars',
        '--disable-bundled-ppapi-flash',
        '--mute-audio',
        '--no-sandbox',
        '--disable-setuid-sandbox',
        '--disable-gpu',
        '--disable-infobars'
    ],
    'dumpio': True
}


async def page_evaluate(page, width=BROWSER_WIDTH, height=BROWSER_HEIGHT):
    # 设置浏览器头部
    await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36")

    # 设置浏览器大小
    await page.setViewport({
     'width': width, 'height': height})

    # 是否启动js,enable设为false,则无渲染效果
    # await page.setJavaScriptEnabled(enabled=True)

    # js注入 防反爬
    await page.evaluate('''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } });window.screen.width=1366; }''')
    await page.evaluate('''() =>{ window.navigator.chrome = { runtime: {}, };}''')
    await page.evaluate('''() =>{ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }''')
    await page.evaluate('''() =>{ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], }); }''')
    await page.evaluateOnNewDocument("""() => {Object.defineProperty(navigator,'webdriver',{get:() => undefined})}""")
    logger.success("浏览器初始化成功")


async def main():
    browser = await launch(OPTIONS)
    page = await browser.newPage()
    await page_evaluate(page)
    await page.goto("https://www.baidu.com/")
    await asyncio.sleep(3)
    print(await page.content())

    await asyncio.sleep(5000)

    await browser.close()


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

你可能感兴趣的:(爬虫,python)