Python爬虫之抓取静态网页并保存到excel

分析

很简单,获取网页的文本,然后通过re正则表达式找到需要的内容,并把内容存储到excel

import urllib.request
import re
import xlwt

#获取网页数据
def GetData(url):
#    url='http://www.risfond.com/case/fmcg/26700'
    response = urllib.request.urlopen(url)
    page=response.read().decode("UTF-8")
    context=re.findall("
.*?(.*?)
"
,page) return context #获取26700-26719的数据 def SearchAllPage(): contextall=[] for i in range(26700,26720): url='http://www.risfond.com/case/fmcg/'+str(i) contextall+=GetData(url) # print(contextall) return contextall #将数据存储到excel def ExcelRestore(contextall): workbook=xlwt.Workbook(encoding='UtF-8') worksheet=workbook.add_sheet("mysheet") #表头设置 str_list=["职位名称","工作地点","案例日期","所在行业","职位周期","上岗人数","顾问团队"] style = xlwt.XFStyle() style.font.bold = True for i in range(7): worksheet.write(0,i,str_list[i],style) #内容设置 count=0 colum=0 row=1 while count<len(contextall) : if colum>6 : colum=0 row+=1 worksheet.write(row,colum,contextall[count]) colum+=1 count+=1 workbook.save("python.xls") ExcelRestore(SearchAllPage())

你可能感兴趣的:(python)