[python爬虫] 抓取糗事百科的爬虫程序

抓取糗事百科的爬虫程序

先贴上代码,等假期回家了把过程写一写

# -*- coding:utf-8 -*-
import re
import urllib2


page = 1
url = 'http://www.qiushibaike.com/hot/page/' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
try:
    request = urllib2.Request(url,headers = headers)
    response = urllib2.urlopen(request)
    # print response.read()
except urllib2.URLError, e:
    if hasattr(e,"code"):
        print e.code
    if hasattr(e,"reason"):
        print e.reason
# pattern = re.compile('
.*?.*?(.*?).*?
',re.S) # 该表达式可以匹配出作者
pattern = re.compile('
.*?.*?(.*?).*?
.*?
(.*?).*?
'
,re.S) content = response.read().decode('utf-8') items = re.findall(pattern,content) for i in items: print '<<<'+'-'*60+'>>>' print 'author:'+ i[0].strip() print 'content:'+ i[1].strip() print 'time:'+ i[2].strip() print '\n'

你可能感兴趣的:(Python,生活)