import requests
import re
import time
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0"
}
li = []
#获取代理IP 代理网站
def get_proxy_IP(count):
prox = {
"http": "http://127.0.0.1:7890",
"https": "https://127.0.1.0:7890"
}
f = open(r"pro.txt",'a')
url = f"http://www.66ip.cn/"+str(count)+".html"
r = requests.post(url, headers=head,proxies=prox)
r.encoding = 'gb2312'
req = r.text
obj = re.compile(r'(?P\d+.\d+.\d+.\d+) (?P\d+) .*? .*? \d{4}年\d{2}月\d{2}日\d{2}时.*?验证 '
, re.S)
resu = obj.finditer(req)
for item in resu:
ip = item.group("ip")
dk = item.group("dk")
if isIP(ip): # 判断是否为IP
s = f'{ip}:{dk}'
print(s)
flag = yankeyong(s)
if flag:
f.write(s+"\n")
f.close()
#验证代理ip:port可用性
def yankeyong(proxy):
pro = {
"http":"http://"+proxy,
"https": "https://" + proxy
}
url = "http://baidu.com"
try:
a = requests.get(url, headers=head,proxies=pro,timeout=2)
#print(f"代理响应码:{a}") # 输出响应码
if(a.status_code == 200): #判断响应码正常吗
li.append(a)
print(f"代理可用:{a}")#输出响应码
return True #可用返回True
except requests.exceptions.ConnectionError as e:
#print(e)
return False #不可用返回False
#判断是否为IP
def isIP(str):
p = re.compile('^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$')
if p.match(str):
return True
else:
return False
if __name__ == '__main__':
for i in range(5,1000):
get_proxy_IP(i)
发现都是些透明代理,意义不大,放弃了