selenium特殊无头模式

Xvfb 和 PyVirtualDisplay
使用这个解决方案,将可以兼容多种浏览器的webdriver,chrome,firefox,等等
yum/apt-get install Xvfb
pip install PyVirtualDisplay
检查DISPLAY设置,默认应该是 :1

env | grep DISPLAY

部分项目代码

from pyvirtualdisplay import Display
from lxml import etree
import re
import requests
from selenium import webdriver

        url = 'http://***'
        opt = webdriver.ChromeOptions()
        display = Display(visible=0, size=(800, 800))
        display.start()
        opt.add_argument('--disable-extensions')
        opt.add_argument('--disable-gpu')
        opt.add_argument("--no-sandbox")
        driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver", chrome_options=opt)

        driver.get(url)
        time.sleep(3)
        response = driver.page_source
        driver.quit()
        # print(response)
        response = "".join(response).split('中间右边的部分')[1]
        selector = etree.HTML(response)
    for i in range(1,2):
        url = 'http://www.cbrc.gov.cn/zhuanti/xzcf/get2and3LevelXZCFDocListDividePage//2.html?current='+ str(i)
        opt = webdriver.ChromeOptions()
        display = Display(visible=0, size=(800, 800))
        display.start()
        opt.add_argument('--disable-extensions')

        opt.add_argument('--disable-gpu')
        opt.add_argument("--no-sandbox")

    for i in range(1,2):
        url = 'http://www.cbrc.gov.cn/zhuanti/xzcf/get2and3LevelXZCFDocListDividePage//2.html?current='+ str(i)
        opt = webdriver.ChromeOptions()
        display = Display(visible=0, size=(800, 800))
        display.start()
        opt.add_argument('--disable-extensions')

        opt.add_argument('--disable-gpu')
        opt.add_argument("--no-sandbox")
        driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver", chrome_options=opt)

        driver.get(url)
        time.sleep(3)
        response = driver.page_source
        driver.quit()
        # print(response)
        response = "".join(response).split('中间右边的部分')[1]
        selector = etree.HTML(response)

        node_list = selector.xpath('//tr')
        for node in node_list:

            # item = {}
            title =  ''.join(node.xpath('./td[1]/a/@title'))
            print(title)
            if len(title) > 4 and '商业银行主要监管指标' not in title :
                title = title
                url_href = ''.join(node.xpath('./td[1]/a/@href'))
                url_href = 'http://www.cbrc.gov.cn'+url_href

                opt = webdriver.ChromeOptions()
                display = Display(visible=0, size=(800, 800))
                display.start()
                opt.add_argument('--disable-extensions')

                opt.add_argument('--disable-gpu')
                opt.add_argument("--no-sandbox")
                driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver", chrome_options=opt)


                url = url_href
                driver.get(url)

                time.sleep(3)
                node_lists =driver.page_source
                #print(node_lists)
                selectors = etree.HTML(node_lists)
				driver.quit()

你可能感兴趣的:(selenium)