import requests
from pyquery import PyQuery
from bs4 import BeautifulSoup
import openpyxl
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Cookie':'__mta=156144900.1636616654407.1636632158574.1636632889189.43; uuid_n_v=v1; uuid=FD45EE3042C211EC99BFC50282E54185B3C7DC56EE0D41BBB1F564A9125A1ACF; _csrf=2617d894c54088c5a9ea4d4ec82429d1651a7a26cc5c5b27e786c094416ad25e; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1636616653; _lxsdk_cuid=17d0df449d8c8-05fa68edf298bf-376b4502-2a3000-17d0df449d8c8; _lxsdk=FD45EE3042C211EC99BFC50282E54185B3C7DC56EE0D41BBB1F564A9125A1ACF; __mta=156144900.1636616654407.1636616659285.1636616673893.3; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1636632887; _lxsdk_s=17d0e1e4301-169-cb1-4f5%7C%7C64'
}
lst_1=[]
for i in range(0,100,10):
url='https://www.maoyan.com/board/4?requestCode=9daba57e99d92be6fe6549abd87e9f258ifwl&offset={}'.format(i)
lst_1.append(url)
resp = requests.get(url, headers=headers)
lst0 = []
lst1 = []
lst2 = []
lst3 = []
for lst_2 in lst_1:
resp = requests.get(lst_2, headers=headers)
cateye1=BeautifulSoup(resp.text,'lxml')
name=cateye1.find_all('p',class_='name')
for name1 in name:
lst0.append(name1.text)
star=cateye1.find_all('p',class_='star')
for star1 in star:
lst1.append(star1.text.strip())
time=cateye1.find_all('p',class_='releasetime')
for time1 in time:
lst2.append(time1.text)
score=cateye1.find_all('p',class_='score')
for score1 in score:
lst3.append(score1.text)
lst_1=[]
for i in range(len(lst0)):
str1=(lst0[i]+','+lst1[i].replace(',','|')+','+lst2[i]+'|'+'评分:'+lst3[i])
lst_1.append(str1)
with open('猫眼top100.txt', 'w', encoding='utf-8') as file:
for lst_2 in lst_1:
file.write(lst_2+'\n')
wb=openpyxl.Workbook()
sheet=wb.active
for lst_2 in lst_1:
lst_3=lst_2.split(',')
print(lst_3)
sheet.append(lst_3)
wb.save('猫眼top100.xlsx')