国内免费代理中制作有效地址的地址池

#!C:\Python3.7
# -*- coding:utf-8 -*-
import requests
from lxml import etree
import os

import optparse
base_url = "https://www.kuaidaili.com/free/inha/"

def get_proxy_IP_port(url):
    print("抓取网页:",url)
    ipAndPortList =[]
    try:
        req = requests.get(url)
        #print(req.status_code)
        html = etree.HTML(req.text)
        tr_lists = html.xpath('//*[@id="list"]//tbody/tr')
        for tr in tr_lists:
            ip = tr.xpath('./td[@data-title="IP"]/text()')[0]
            port = tr.xpath('./td[@data-title="PORT"]/text()')[0]
            if test_proxy(ip,port) ==True:
                ipAndPortList.append(ip+":"+port)

        return ipAndPortList

    except Exception as e:
        print(e)
        pass


def test_proxy(ip,port):
    proxies = {'http':'http://'+str(ip)+':'+str(port)}
    req = requests.get(url="https://www.baidu.com/",proxies=proxies)
    if req.status_code==200:
        return True
    else:
        return False


def save_prox(filename,proxy_list):
    base_path = os.getcwd()
    path = os.path.join(base_path,filename)

    with open(filename,'w+') as fw:
        for proxy in proxy_list:
            fw.write(proxy+'\n')

    print("proxy save file :",path)



def main():
    num = input("输入抓取代理个数:")



    proxy_list=[]
    page =1

    while len(proxy_list) 
 

你可能感兴趣的:(语言,python)