获取代理IP

import requests
import re
import time
head = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0"
}
li = []
#获取代理IP 代理网站
def get_proxy_IP(count):
    prox = {
        "http": "http://127.0.0.1:7890",
        "https": "https://127.0.1.0:7890"
    }
    f = open(r"pro.txt",'a')
    url = f"http://www.66ip.cn/"+str(count)+".html"
    r = requests.post(url, headers=head,proxies=prox)
    r.encoding = 'gb2312'
    req = r.text
    obj = re.compile(r'(?P\d+.\d+.\d+.\d+)(?P\d+).*?.*?\d{4}年\d{2}月\d{2}日\d{2}时.*?验证'
                     , re.S)
    resu = obj.finditer(req)
    for item in resu:
        ip = item.group("ip")
        dk = item.group("dk")
        if isIP(ip):  # 判断是否为IP
            s = f'{ip}:{dk}'
            print(s)
            flag = yankeyong(s)
            if flag:
                f.write(s+"\n")
    f.close()



#验证代理ip:port可用性
def yankeyong(proxy):
    pro = {
        "http":"http://"+proxy,
        "https": "https://" + proxy
    }
    url = "http://baidu.com"
    try:
        a = requests.get(url, headers=head,proxies=pro,timeout=2)
        #print(f"代理响应码:{a}")  # 输出响应码
        if(a.status_code == 200):  #判断响应码正常吗
            li.append(a)
            print(f"代理可用:{a}")#输出响应码
            return True   #可用返回True
    except requests.exceptions.ConnectionError as e:
        #print(e)
        return False    #不可用返回False

#判断是否为IP
def isIP(str):
    p = re.compile('^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$')
    if p.match(str):
        return True
    else:
        return False


if __name__ == '__main__':
    for i in range(5,1000):
      get_proxy_IP(i)

发现都是些透明代理,意义不大,放弃了

你可能感兴趣的:(python)