从烈火网下载HTTP代理de例子

'''
Created on 2013-4-3
 
@author: zdh
'''
#!/usr/bin/env python
# -*- coding: utf-8 -*-
 
import re
import urllib
 
def main():
     die = { 'd' : '0' , 'c' : '1' , 'k' : '2' , 'z' : '3' , 'm' : '4' , 'b' : '5' , 'w' : '6' , 'i' : '7' , 'r' : '8' , 'l' : '9' }
     headers = ( 'User-Agent' , 'Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' )
     opener = urllib.URLopener()
     opener.addheaders = [headers]
     f = open (r 'E:/ip.txt' , 'w' )
     for page in range ( 1 , 11 ):
         url = "http://www.veryhuo.com/res/ip/page_" + str (page) + ".php"
         data = opener. open (url).read()
         data = data.decode( 'GBK' )
         ip_list = re.findall(re. compile (r '<td>(.*)<S' ),data)
         port_list = re.findall(re. compile (r '\"(\+.*?\+.)\)' ),data)
         for x in range ( len (ip_list)):
             port_list[x] = port_list[x].replace( '+' , '')
             if 'e' in port_list[x] or 'x' in port_list[x] or 'a' in port_list[x] or 'f' in port_list[x]:
                 s = ip_list[x]
             else :            
                 s = (ip_list[x] + ':' + port_list[x])
             for key in die:
                 s = s.replace(key, die[key])
             f.write(s + '\n' )
             print (s)
         print ( len (ip_list))
     f.close()
if __name__ = = '__main__' :
     main()

你可能感兴趣的:(python,urllib)