import requests
from requests.exceptions import RequestException
import re
import csv
import threadpool#需要先安装:pip install threadpool
#文件保存路径
path=‘E:\data.csv’
#访问网址
baseUrl = ‘https://maoyan.com/board/4?offset=’
def parseOnePage(url):
try:
response = requests.get(url)
except RequestException:
print(url+‘url请求发生异常’)
return None
if response.status_code!=200:
print(‘状态码不为200,’+url+’ 响应异常’)
return None
else:#正常响应
html = response.text
pattern = re.compile(#正则表达式
‘
def init():#写文件头
with open(path,‘w’,newline=’’) as f:
head = [‘排名’,‘电影名字’,‘主演’,‘上映时间’,‘评分’]
csv_writer = csv.writer(f, dialect=‘excel’)
csv_writer.writerow(head)
f.close()
def singelThread():#单线程抓取数据
init()
for offset in range(0,10):
spider(10*offset)
def multiThread():#多线程抓取数据
init()
pool = threadpool.ThreadPool(10)
requests = threadpool.makeRequests(spider, range(0,100,30))
[pool.putRequest(req) for req in requests]
pool.wait()
if name==‘main’:
# singelThread()
multiThread()