多线程截取html中相应的数据

多线程截取html中相应的数据

#coding=gbk
import re,urllib,time
import linecache,threading
from bs4 import BeautifulSoup as soup
mlock = threading.Lock()
a = []
def get_content(ip_content):
                
    '获取HTML中需要的内容'
                
    global a
    pythoner = urllib.urlopen("http://hk.bing.com/search?q=ip%3A125.39.240.113&\
qs=n&form=QBLH&filt=all&pq=ip%{0}&sc=0-2&sp=-1&sk=" .format(ip_content) )
    content = pythoner.read()
    pythoner.close()
    c = soup(content)
    data = c.find_all("div",{"class":"sb_meta"})
    mlock.acquire()
    for x in data:
        da = re.split('/',x.cite.text)[0]
        if da not in a:
            a.append(da)
            data = open('c:\mylog.txt','a')
            print >> data,da
    mlock.release()        
                
                
                
def thread_geturl(process,info):
                
    '根据IP地址生成相应的进程'
                
    for x in info:
        d = threading.Thread(target=process,args=[x])
        d.start()
                
                
if __name__ == '__main__':
    ip_list = [ x for x in linecache.getlines(r'c:\iplist.txt')]
    thread_geturl(get_content,ip_list)

IP_list

155641384.jpg

你可能感兴趣的:(多线程,import,content)