获取免费代理IP库

#!/usr/local/bin/python3
# coding:utf-8

# ====================================================
# Author: chang - EMail:[email protected]
# Last modified: 2017-4-22
# Filename: iplibrary.py
# Description: get ip library files,base urlib, re
# blog:http://www.cnblogs.com/changbo
# ====================================================

import urllib.request
import re


def filter_tags(htmlstr):
    re_cdata = re.compile('//]*//\]\]>', re.I)  # 匹配CDATA
    re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I)  # Script
    re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I)  # style
    # re_br = re.compile('')  # 处理换行
    re_h = re.compile(']*>')  # HTML标签
    re_comment = re.compile('')  # HTML注释
    s = re_cdata.sub('', htmlstr)  # 去掉CDATA
    s = re_script.sub('', s)  # 去掉SCRIPT
    s = re_style.sub('', s)  # 去掉style
    # s = re_br.sub('\n', s)  # 将br转换为换行
    # s = re_h.sub('', s)  # 去掉HTML 标签
    s = re_comment.sub('', s)
    blank_line = re.compile('\n+')
    s = blank_line.sub('\n', s)
    return s


def getiplist(ipnumber):
    # url = 'http://ip.taobao.com/service/getIpInfo.php?ip=%s' % ip
    url = 'http://www.89ip.cn/api/?&tqsl=%d&sxa=&sxb=&tta=&ports=&ktip=&cf=1' % ipnumber
    f = urllib.request.Request(url)
    f.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0')
    response = ((urllib.request.urlopen(f)).read()).decode('gbk')
    ipinfo = filter_tags(response)
    # print(response)
    iplist = (((ipinfo.split('
')[1])[:-46]).strip()).split('
') for i in iplist: iptmp = i.split(':') ip = iptmp[0] port = iptmp[1] print(ip + ' ---- ' + port) getiplist(30)

END!

转载于:https://www.cnblogs.com/changbo/p/6747624.html

你可能感兴趣的:(获取免费代理IP库)