调用成品api构建自己的代理IP池

# coding=utf-8

import tushare as ts
import pandas as pd
import requests
import json
import re
import time

def get_pro():

    a=requests.get('http://lab.crossincode.com/proxy/get/?num=10')
    a.encoding = 'etf-8'
    list=re.findall('"http": "(.*?)", "',a.text)



    with open("pro.txt","w") as f:
        for pro in  list:
            f.write(pro)

    return list



def crawl(list):


    header={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64)'}
    o_g=list
    a=0
    for a in range(len(list)):
        proxies_l = {'http': o_g[a],

             }
        print(proxies_l['http'])

        try:
            req=requests.get('http://httpbin.org/ip',headers=header,proxies=proxies_l)
            print('finish')
            print (req.text)
        except:
            print('no proxies')

        '''
        sleep_time=0.2
        time.sleep(sleep_time)
        print('Wait%ds'%sleep_time)
        '''

getpro=get_pro()
crawl(getpro)

你可能感兴趣的:(爬虫)