import requests
from bs4 import BeautifulSoup
import time
import csv
def getcontent(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 OPR/49.0.2725.47'}
r = requests.get(url,headers=headers)
content = r.text
soup = BeautifulSoup(content, 'lxml')
div_name = soup.find_all(class_='name')
div_star = soup.find_all(class_='star')
div_time = soup.find_all(class_='releasetime')
div_score = soup.find_all(class_='score')
long = len(div_name)
global DATA
for i in range(0, long):
data =[]
data.append(div_name[i].get_text())
data.append(div_star[i].get_text())
data.append(div_time[i].get_text())
data.append(div_score[i].get_text())
DATA.append(data)
with open('E:/Python/Spider/MaoyanTOP100.csv', 'w', newline='',encoding='gb18030') as f:
writer = csv.writer(f)
writer.writerows(DATA)
DATA = []
for i in range(0, 100, 10):
url = "http://maoyan.com/board/4?offset=" + str(i)
getcontent(url)