python爬虫简单案例:猫眼top100爬取

import requests
from pyquery import PyQuery
from bs4 import BeautifulSoup
import openpyxl

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    'Cookie':'__mta=156144900.1636616654407.1636632158574.1636632889189.43; uuid_n_v=v1; uuid=FD45EE3042C211EC99BFC50282E54185B3C7DC56EE0D41BBB1F564A9125A1ACF; _csrf=2617d894c54088c5a9ea4d4ec82429d1651a7a26cc5c5b27e786c094416ad25e; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1636616653; _lxsdk_cuid=17d0df449d8c8-05fa68edf298bf-376b4502-2a3000-17d0df449d8c8; _lxsdk=FD45EE3042C211EC99BFC50282E54185B3C7DC56EE0D41BBB1F564A9125A1ACF; __mta=156144900.1636616654407.1636616659285.1636616673893.3; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1636632887; _lxsdk_s=17d0e1e4301-169-cb1-4f5%7C%7C64'
}
lst_1=[]
for i in range(0,100,10):
    url='https://www.maoyan.com/board/4?requestCode=9daba57e99d92be6fe6549abd87e9f258ifwl&offset={}'.format(i)
    lst_1.append(url)
    resp = requests.get(url, headers=headers)


lst0 = []
lst1 = []
lst2 = []
lst3 = []
for lst_2 in lst_1:
    resp = requests.get(lst_2, headers=headers)
    cateye1=BeautifulSoup(resp.text,'lxml')
    name=cateye1.find_all('p',class_='name')
    # print(name1)
    # lst0=[]
    for name1 in name:
        lst0.append(name1.text)

    # lst1=[]
    star=cateye1.find_all('p',class_='star')
    for star1 in star:
        lst1.append(star1.text.strip())

    # lst2=[]
    time=cateye1.find_all('p',class_='releasetime')
    for time1 in time:
        lst2.append(time1.text)

    # lst3=[]
    score=cateye1.find_all('p',class_='score')
    for score1 in score:
        lst3.append(score1.text)

    # zip方法直接打印
    # for names,stars,times,scores in zip(lst0,lst1,lst2,lst3):
    #     print('片名:',names,'|',stars.strip(),'|',times,'|','评分',scores,'\n',)

    #组合成列表的方法
lst_1=[]
for i in range(len(lst0)):
    str1=(lst0[i]+','+lst1[i].replace(',','|')+','+lst2[i]+'|'+'评分:'+lst3[i])
    lst_1.append(str1)
# print(lst_1)

# 输出到txt中
with open('猫眼top100.txt', 'w', encoding='utf-8') as file:
    for lst_2 in lst_1:
        # print(type(lst_2))
        file.write(lst_2+'\n')

# 输出到excel中
wb=openpyxl.Workbook()
sheet=wb.active
for lst_2 in lst_1:
    lst_3=lst_2.split(',')
    print(lst_3)
    sheet.append(lst_3)
wb.save('猫眼top100.xlsx')

你可能感兴趣的:(python,爬虫,数据分析)