Python 简单爬虫 豆瓣热门影评

第一次写Python,备忘用,写的不完善大家见笑了




# -*- coding:utf-8 -*-

import urllib
import urllib2
import re
import xlwt

book=xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet=book.add_sheet('movie_review',cell_overwrite_ok=True)
sheet.write(0, 0, '标题')
sheet.write(0, 1, '影评人')
sheet.write(0, 2, '电影')
sheet.write(0, 3, '星级')
sheet.write(0, 4, '时间')
sheet.write(0, 5, '内容')

baseurl='https://movie.douban.com/review/best/?start='
for i in range(0,3):
        url_list=baseurl+str(i*20)
        request_url = urllib2.Request(url_list)
        response_url = urllib2.urlopen(request_url)
        html_url = response_url.read().decode('utf-8')
        pattern_url = re.compile('

.*?(.*?)',re.S) pattern_reviewer = re.compile('(.*?)',re.S) pattern_movie = re.compile('(.*?)',re.S) pattern_star = re.compile('(.*?)',re.S) pattern_time = re.compile('

(.*?)

',re.S) pattern_content = re.compile('
(.*?)
',re.S) title = re.findall(pattern_title,html) reviewer = re.findall(pattern_reviewer,html) movie = re.findall(pattern_movie,html) star = re.findall(pattern_star,html) time = re.findall(pattern_time,html) content = re.findall(pattern_content,html) k=i*10+j+1 sheet.write(k,0,title[0]) sheet.write(k,1,reviewer[0]) sheet.write(k,2,movie[0]) sheet.write(k,3,star[0]) sheet.write(k,4,time[0]) sheet.write(k,5,content[0]) book.save('d:\ test.xls') #print k



你可能感兴趣的:(爬虫)