python使用chrome抓取页面中ajax请求返回的数据

#-*-coding:utf-8-*-

from time import sleep
from selenium import webdriver
import json
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

# 过滤出数据请求中的headers
def getHttpInfo(browser):
    for responseReceived in browser.get_log('performance'):
        try:
            response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
            if 'ajaxUrl' in response['url']:
                # print(response)
                # print(response['url'])
                # print(response['headers'])
                # print(response['headersText'])
                return response['requestHeaders']
        except:
            pass
    return None

# 请求页面 并设置headers到文件中
def setHeaders():
    d = DesiredCapabilities.CHROME
    d['loggingPrefs'] = { 'performance':'ALL' }
    options=webdriver.ChromeOptions()
    options.set_headless()
    options.add_argument('--disable-gpu')

    driver=webdriver.Chrome(desired_capabilities=d,options=options)
    driver.get('http://www.baidu.com')
    sleep(20)
    headers = getHttpInfo(driver)
    driver.quit()
    # write header
    hand = open('header.txt', 'w')
    hand.write(json.dumps(headers))
    hand.close()

if __name__ == '__main__':
    setHeaders()

转载于:https://www.cnblogs.com/ningmo/p/10695876.html

你可能感兴趣的:(python使用chrome抓取页面中ajax请求返回的数据)