首先导入requests库和beautifulsoup库
import requests
from bs4 import BeautifulSoup
然后调用requests.get()方法获得指定url的html
url = 'http://top.hengyan.com/xuanhuan/'
html = requests.get(url)
soup = BeautifulSoup(html.text, "html.parser")
def generate_info(info):
for ul in soup.find_all('ul'):
if [] != ul.find_all('li', class_='num'):
ha = []
for li in ul.find_all('li', class_='num'):
ha.append(li.string)
for li in ul.find_all('li', class_='bookname'):
if li.string == None:
for a in ul.find_all('a', target='_blank'):
ha.append(a.string)
else:
ha.append(li.string)
ha.append('最新章节')
for li in ul.find_all('li', class_='author'):
ha.append(li.string)
for li in ul.find_all('li', class_='length'):
ha.append(li.string)
for li in ul.find_all('li', class_='click'):
ha.append(li.string)
for li in ul.find_all('li', class_='update'):
ha.append(li.string)
info.append(ha)
首先导入xlwt模块
import xlwt
设置excel样式,字体类型大小等
def set_style(name, height, bold = False):
style = xlwt.XFStyle() #初始化样式
font = xlwt.Font() #为样式创建字体
font.name = name
font.bold = bold
font.color_index = 4
font.height = height
style.font = font
return style
创建excel,把提取的有效信息存储到excel.
def write_excel(datas,filename):
#创建工作簿
workbook = xlwt.Workbook(encoding='utf-8')
#创建sheet
data_sheet = workbook.add_sheet('novel')
#row0 = ['排行', '书名', '作者', '字数', '点击', '更新时间']
#for i in range(len(row0)):
# data_sheet.write(0, i, row0[i], set_style('Times New Roman', 220, True))
nrows=len(datas)
for i in range(nrows):
for j in range(len(datas[i])):
data_sheet.write(i,j,datas[i][j])
# 工作簿保存到磁盘
workbook.save(filename)