【Python - 爬虫】爬虫代理

设置

urllib 设置代理
# -*- coding:utf-8 -*-   
import urllib.request

handler=urllib.request.ProxyHandler({'https':'http://123.56.75.209:3128'})
opener=urllib.request.build_opener(handler)  
response=opener.open('http://httpbin.org/get')
print(response.read())
Requests 设置代理
import requests

proxies = {"http": "http://10.10.1.10:3128", "https": "http://10.10.1.10:1080"}
req = requests.get("http://httpbin.org/ip", proxies=proxies)
print(req.text)
selenium 设置代理
from selenium import webdriver

chromeOptions = webdriver.ChromeOptions()
# 设置代理注意,= 两边不能有空格
chromeOptions.add_argument("--proxy-server=http://202.20.16.82:10152")
browser = webdriver.Chrome(chrome_options = chromeOptions)
browser.get("http://httpbin.org/ip")
print(browser.page_source)
browser.quit()
获取免费IP
#!/usr/bin/python
# -*- coding:utf8 -*-

import requests
from bs4 import BeautifulSoup

class Proxys:
    def __init__(self):
        self.kuaidaili = "https://www.kuaidaili.com/free/inha/1/"
        self.xicidaili = "http://www.xicidaili.com/nn"

    def kuaidailiIp(self):
        response = requests.get(self.kuaidaili)
        soup = BeautifulSoup(response.text, "lxml")
        table = soup.find("table", class_="table-bordered")
        tbody = table.find("tbody")
        trs = tbody.find_all("tr")
        iplists = []
        for tr in trs:
            tds = tr.find_all("td")
            iplists.append({"ip": tds[0].text, "port": tds[1].text, "anonymity": tds[2].text, "http": tds[3].text,
                            "position": tds[4].text, "speed": tds[5].text, "lasttime": tds[6].text})
        return iplists

    def xicidailiIp(self):
        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,de;q=0.7",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Cookie": "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTJjMTJhMGFjZjU0NzhhYWU3MzI3NDk2Zjc2MDdiM2VkBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUdEWm01dzNhOW8xdU9VeEhuWnBEdzR6YXAxeWhRWGFSSUM4L3J1aE5uNDQ9BjsARg%3D%3D--6120a318303f8d901ac55c579f1b0a72a9781097; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1525533120,1525534199,1525534396,1525534398; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1525535827",
            "Host": "www.xicidaili.com",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36"
        }
        response = requests.get(self.xicidaili,headers=headers)
        soup = BeautifulSoup(response.text, "lxml")
        iplists = soup.find("table", id="ip_list")
        trs = iplists.find_all("tr")
        iplists = []
        i = 0
        for tr in trs:
            i += 1
            if i == 1:
                continue
            tds = tr.find_all("td")
            iplists.append({"ip": tds[1].text, "port": tds[2].text, "anonymity": tds[4].text, "http": tds[5].text,
                            "position": tds[3].text, "speed": tds[6].text, "lasttime": tds[9].text})
        return iplists

参考链接

  • Python代理IP爬虫的简单使用
  • python爬虫设置代理

你可能感兴趣的:(【Python - 爬虫】爬虫代理)