Python多线程爬取7160网站美女图片

直接上代码

#coding=UTF-8
from urllib.request import urlopen
from pyquery import PyQuery as pq
import re
import pymongo
import threading
client=pymongo.MongoClient(host='localhost',port=27017)
table=client.taobao.mutitry47160
lock=threading.Lock()
i = 1

def save_to_mongo(result):
	try:
		if table.insert(result):
			print('存储到Mongo成功')
	except Exception:
		print('存储到Mongo失败',result)

def download_son(Son_link,l):
	Sonson_link='https://www.7160.com'+Son_link+'index_'+str(l)+'.html'
	doc3=pq(Sonson_link,encoding='gbk')
	image_son=doc3('.picsbox.picsboxcenter p a img').attr('src')
	title_son=doc3('.picsbox.picsboxcenter p a img').attr('alt')
	product2={'image':image_son,'title':title_son}
	print(product2)
	save_to_mongo(product2)


def father_link():
	lock.acquire()
	global i
	i += 1
	lock.release()
	url='https://www.7160.com/rentiyishu/list_1_'+str(i)+'.html'
	print('--------------------------------------'+str(i)+'--------------------------------------')
	doc=pq(url,encoding='gbk')
	items=doc('.news_bom-left li').items()
	for item in items:
		Son_link=item.find('a').attr('href')
		doc2=pq('https://www.7160.com'+Son_link,encoding='gbk')
		image_main=doc2('.picsbox.picsboxcenter p a img').attr('src')
		title_main=doc2('.picsbox.picsboxcenter p a img').attr('alt')
		product={'image':image_main,'title':title_main}
		save_to_mongo(product)
		#获取页码
		page_num=doc2('body > div > div.center > div.NEWS > div.picmainer > div.itempage > a:nth-child(1)').text()
		page_num=re.findall(r"\d+\.?\d*",page_num)
		try:
			page_num=int(page_num[0])
			print('共%d页,开始爬取'%page_num)
			for l in range(2,page_num+1):
				r1=threading.Thread(target=download_son,args=(Son_link,l))
				r1.start()
		except Exception:
			pass
def main():
	for i in range(1,108):
		t1=threading.Thread(target=father_link)
		t1.start()

if __name__ == '__main__':
	main()

你可能感兴趣的:(python,多线程,爬虫)