以下使用面向过程版的代码
impore urllib
import urllib2
import re
page = 1
url = 'http://www.qiushibaike.com/hot/page/'+str(page)
#url = 'http://www.yllin.cn'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent':user_agent}
try:
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request)
content = response.read().decode('utf-8')
#print content
pattern = re.compile('([\s\S]+?)<\/span>')
items = re.findall(pattern,content)
for item in items:
print item
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
面向对象版
import urllib
import urllib2
import re
class QSBK:
url =''
headers = ''
def __init__(self,url,headers):
self.url = url
self.headers = headers
def request(self):
request = urllib2.Request(url,headers=self.headers)
response = urllib2.urlopen(request)
return response
def decode(self):
return self.request().read().decode('utf-8')
def solve_data(self):
pattern = re.compile('([\s\S]+?)<\/span>')
content = self.decode()
items = re.findall(pattern,content)
return items
def print_data(self):
data = self.solve_data()
for item in data:
print item
page = 1
url = 'http://www.qiushibaike.com/hot/page/'+str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent':user_agent}
test = QSBK(url,headers)
test.print_data()