设置
urllib 设置代理
# -*- coding:utf-8 -*-
import urllib.request
handler=urllib.request.ProxyHandler({'https':'http://123.56.75.209:3128'})
opener=urllib.request.build_opener(handler)
response=opener.open('http://httpbin.org/get')
print(response.read())
Requests 设置代理
import requests
proxies = {"http": "http://10.10.1.10:3128", "https": "http://10.10.1.10:1080"}
req = requests.get("http://httpbin.org/ip", proxies=proxies)
print(req.text)
selenium 设置代理
from selenium import webdriver
chromeOptions = webdriver.ChromeOptions()
# 设置代理注意,= 两边不能有空格
chromeOptions.add_argument("--proxy-server=http://202.20.16.82:10152")
browser = webdriver.Chrome(chrome_options = chromeOptions)
browser.get("http://httpbin.org/ip")
print(browser.page_source)
browser.quit()
获取免费IP
#!/usr/bin/python
# -*- coding:utf8 -*-
import requests
from bs4 import BeautifulSoup
class Proxys:
def __init__(self):
self.kuaidaili = "https://www.kuaidaili.com/free/inha/1/"
self.xicidaili = "http://www.xicidaili.com/nn"
def kuaidailiIp(self):
response = requests.get(self.kuaidaili)
soup = BeautifulSoup(response.text, "lxml")
table = soup.find("table", class_="table-bordered")
tbody = table.find("tbody")
trs = tbody.find_all("tr")
iplists = []
for tr in trs:
tds = tr.find_all("td")
iplists.append({"ip": tds[0].text, "port": tds[1].text, "anonymity": tds[2].text, "http": tds[3].text,
"position": tds[4].text, "speed": tds[5].text, "lasttime": tds[6].text})
return iplists
def xicidailiIp(self):
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,de;q=0.7",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Cookie": "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTJjMTJhMGFjZjU0NzhhYWU3MzI3NDk2Zjc2MDdiM2VkBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUdEWm01dzNhOW8xdU9VeEhuWnBEdzR6YXAxeWhRWGFSSUM4L3J1aE5uNDQ9BjsARg%3D%3D--6120a318303f8d901ac55c579f1b0a72a9781097; Hm_lvt_0cf76c77469e965d2957f0553e6ecf59=1525533120,1525534199,1525534396,1525534398; Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59=1525535827",
"Host": "www.xicidaili.com",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36"
}
response = requests.get(self.xicidaili,headers=headers)
soup = BeautifulSoup(response.text, "lxml")
iplists = soup.find("table", id="ip_list")
trs = iplists.find_all("tr")
iplists = []
i = 0
for tr in trs:
i += 1
if i == 1:
continue
tds = tr.find_all("td")
iplists.append({"ip": tds[1].text, "port": tds[2].text, "anonymity": tds[4].text, "http": tds[5].text,
"position": tds[3].text, "speed": tds[6].text, "lasttime": tds[9].text})
return iplists
参考链接
- Python代理IP爬虫的简单使用
- python爬虫设置代理