这里是对 “茶叶“ 关键字进行自动采集,并保存和打印相关域名。
下面例子是搜索google上面的茶叶相关网站的前2页得例子
# -*- coding: utf-8 -*- import pam,time import urllib.parse as up from bs4 import BeautifulSoup qtxt='茶叶' #关键字 namelist=[] def get(): soup=BeautifulSoup(ie.outerHTML()) for i in soup.findAll('a'): text=i.get('href') try: if text.startswith(r'http://') and 'google' not in text: namelist.append((text,up.urlparse(text).netloc)) except: pass ie=pam.PAMIE() ie.navigate('http://www.google.com/ncr') ie.navigate('https://www.google.com/') ie.setTextBox('q',qtxt) a=ie.findElement('button','name','btnK') ie.clickElement(a) get() for i in range(2,3):#页面数 time.sleep(2) ie.clickLink(str(i)) #time.sleep(1) get() namelist=list(set(namelist)) for i in namelist: print (i[1]) print (i[1],file=open(r'dns.txt','at')) ie.quit()