SOCKS使用及Selenium过检测

urllib携带socks5代理

安装pip3 install PySocks

import socks
import socket
from urllib import request
from urllib.error import URLError

socks.set_default_proxy(socks.SOCKS5, '127.0.0.1', 9742)
socket.socket = socks.socksocket
try:
    response = request.urlopen('http://httpbin.org/get')
    print(response.read().decode('utf-8'))
except URLError as e:
    print(e.reason)

requests携带socks5代理

import requests 
proxy = '127.0.0.1:9742'
proxies = {'http': 'socks5://' + proxy, 'https': 'socks5://' + proxy }
try:    
    response = requests.get('http://httpbin.org/', proxies=proxies)    		      		     print(response.text) 
except requests.exceptions.ConnectionError as e:    
    print('Error', e.args)

另外还有一种设置方式,和Urllib中的方法相同,使用socks模块,也需要像上文一样安装该库,设置方法如下:

import requests
import socks
import socket

socks.set_default_proxy(socks.SOCKS5, '127.0.0.1', 9742)
socket.socket = socks.socksocket
try:    
    response = requests.get('http://httpbin.org/get')
    print(response.text) 
except requests.exceptions.ConnectionError as e: 
    print('Error', e.args)

这样也可以设置SOCKS5代理,运行结果完全相同,相比第一种方法,此方法是全局设置,不同情况可以选用不同的方法。

Selenium 谷歌添加代理

from selenium import webdriver
from selenium.webdriver import ChromeOptions

from selenium import webdriver

# option = webdriver.ChromeOptions()
# option.add_argument('disable-infobars')
# option.add_argument('--headless')
#
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
#
# # 禁止图片和css加载
# # prefs = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
# # option.add_experimental_option("prefs", prefs)
option = ChromeOptions()
ip = 'ip:port'
#注意此处一个坑 "'--proxy-server=" 非 "--proxy-server="
option.add_argument(("'--proxy-server=" + ip)) 
driver = webdriver.Chrome(options=option)
driver.get(url='https://www.baidu.com')

Selenium 火狐添加代理


def login(self):

    ua='Mozilla/5.0 (Windows NT {}.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{}.{}.{}.{} Safari/537.36'.format(random.randint(7,10),random.randint(35,75),random.randint(0,9),random.randint(1000,9999),random.randint(100,999))

    options = webdriver.FirefoxOptions()
    options.add_argument('-headless')

    # options.add_experimental_option('excludeSwitches', ['enable-automation'])

    profile = webdriver.FirefoxProfile()

    profile.set_preference('general.useragent.override', ua)

    profile.set_preference('network.proxy.type', 1)

    profile.set_preference('network.proxy.http', ip)
    profile.set_preference('network.proxy.http_port', port)

    profile.set_preference('permissions.default.image', 2)  # 某些firefox只需要这个
    profile.set_preference('browser.migration.version', 9001)  # 部分需要加上这个
    # 禁用css
    profile.set_preference('permissions.default.stylesheet', 2)

    # 禁用flash
    profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')

    profile.update_preferences()

    driver = webdriver.Firefox(firefox_options=options, firefox_profile=profile)

    # 超时等待
    driver.set_page_load_timeout(800)
    driver.set_script_timeout(800)

    return driver

Selenium 谷歌解决JS冲突检测

from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
  "source": """
    Object.defineProperty(navigator, 'webdriver', {
      get: () => undefined
    })
  """
})


# console.log   -> 检测 window.navigator.webdriver

driver.get('http://exercise.kingname.info')

你可能感兴趣的:(2020,selenium,python,数学建模)