获取主站,并且去重

a = [] 
#----------------------------------------------------------------------
def ceshi_url(url_file):
    """"""
    with open(url_file) as w:
        urls = w.readlines()
        for i in urls:
            i = i.strip('\n').strip('\r')
            try:
                url = tld.get_tld(i)
                list_url = 'http://' + url
                a.append(list_url)
                
            except Exception,e:
                print str(e)
    
ceshi_url('formal_url.txt')
g = open('url.txt','w+')
a = list(set(a))
print len(a)
for i in a:
    print i
    g.writelines(i)
    g.writelines('\n')

你可能感兴趣的:(获取主站,并且去重)