这个算是一个简单的flask项目,主要用于熟悉flask的各种操作并综合了前段时间学的一个简单的爬虫的方法。虽然目前这个项目还非常简单,但是在我看来确实趣味十足。
在这里,我主要展示后台的代码,以及完成之后的一个效果。不过由于这个小项目将涉及到数据库的一些操作,所以在最开始,我将接着上一篇介绍一些models数据模型层
pip flask-sqlalchemy
PandaInit/setting.py
import os
from PandaInit.function import get_uri
# 基础路径
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# 页面路径
TEMPLATES_DIR = os.path.join(BASE_DIR, 'templates')
# 静态路径
STATIC_DIR = os.path.join(BASE_DIR, 'static')
# 数据库连接
DATABASE = {
# 地址
'HOST': 'localhost',
# 端口
'PORT': '3306',
# 数据库用户名
'USER': 'root',
# 密码
'PASSWORD': '123456',
# 驱动
'DRIVER': 'pymysql',
# 数据库
'DB': 'mysql',
# 数据库名字
'NAME': 'pandatxtdb'
}
SQLALCHEMY_DATABASE_URI = get_uri(DATABASE)
PandaInit/function.py
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
def get_uri(DATABASE):
"""
获取uri的值
:param DATABASE: 字典
:return: uri
"""
host = DATABASE['HOST']
port = DATABASE['PORT']
user = DATABASE['USER']
password = DATABASE['PASSWORD']
driver = DATABASE['DRIVER']
name = DATABASE['NAME']
mydb = DATABASE['DB']
return '{}+{}://{}:{}@{}:{}/{}'.format(mydb, driver, user, password, host, port, name)
def init_app(app):
"""初始化app"""
db.init_app(app)
PandaInit/App.py
from flask import Flask
from PandaInit.settings import TEMPLATES_DIR, STATIC_DIR, SQLALCHEMY_DATABASE_URI
from Pandatxt.views import pandatxt
from PandaInit.function import init_app
def create_app():
app = Flask(__name__, template_folder=TEMPLATES_DIR, static_folder=STATIC_DIR)
# 注册蓝图
app.register_blueprint(blueprint=pandatxt, url_prefix='/pandatxt')
# 连接数据库的配置
app.config['SQLALCHEMY_DATABASE_URI'] = SQLALCHEMY_DATABASE_URI
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
init_app(app)
return app
PandaTxt/models.py
from PandaInit.function import db
class XiaoShuoTitles(db.Model):
"""每本书的名字"""
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
t_title = db.Column(db.String(32), unique=True) # 书名
t_desc = db.Column(db.String(512)) # 对这本书的描述
t_url = db.Column(db.String(64)) # 这本书的url(目前我是直接在笔趣阁爬取的小说)
t_img = db.Column(db.String(64)) # 封面图
everychapters = db.relationship('XiaoShuoEveryChapter', backref='titles', lazy=True) # 反向关联xiaoshuoeverychapter表
__tablename__ = 'xiaoshuotitle'
def __init__(self, title, url, desc=None, img=None):
self.t_title = title
self.t_url = url
self.t_desc = desc
self.t_img = img
class XiaoShuoEveryChapter(db.Model):
"""每一章"""
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
ec_url = db.Column(db.String(64)) # 每一章节的url
c_id = db.Column(db.Integer, db.ForeignKey('xiaoshuotitle.id')) # 外键关联Xiaoshuotitles表
ec_title = db.Column(db.String(64)) # 每一章节的名字
contents = db.relationship('XiaoShuoContents', backref='every_chapter', lazy=True) # 反向关联xiaoshuocontents表
__tablename__ = 'xiaoshuoeverychapter'
def __init__(self, url, c_id, title):
self.ec_url = url
self.c_id = c_id
self.ec_title = title
class XiaoShuoContents(db.Model):
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
c_content = db.Column(db.Text) # 每一章节的内容
ec_id = db.Column(db.Integer, db.ForeignKey('xiaoshuoeverychapter.id'), nullable=True) # 外键,关联每一章xiaoshuoeverychapter表
__tablename__ = 'xiaoshuocontent'
def __init__(self, content, t_id):
self.c_content = content
self.ec_id = t_id
pandatxt/views.py
from threading import Thread
from flask import Blueprint, render_template
import requests
from bs4 import BeautifulSoup as bs
from Pandatxt.models import db, XiaoShuoTitles, XiaoShuoEveryChapter, XiaoShuoContents
pandatxt = Blueprint('pandatxt', __name__)
@pandatxt.route('/createdb/')
def create_db():
db.create_all()
return '创建成功!'
@pandatxt.route('/getname/')
def get_name():
"""
获取小说名字, 并存入数据库
:return: 添加成功!
"""
req = requests.get('https://www.biquge5200.cc/xuanhuanxiaoshuo/')
after_bs = bs(req.text, 'html5lib')
new_update = after_bs.find_all('div', class_='l') # 最新更新所有内容
after_new_updata = bs(str(new_update), 'html5lib')
span2 = after_new_updata.find_all('span', class_='s2') # 找到span2的内容
after_span2 = bs(str(span2), 'html5lib')
a_list = after_span2.find_all('a') # 找到所有a标签
for a in a_list:
t_title = a.text
t_url = a.get('href')
t = XiaoShuoTitles.query.filter(XiaoShuoTitles.t_title == t_title).all()
if not t:
title = XiaoShuoTitles(t_title, t_url)
db.session.add(title)
db.session.commit()
titles = XiaoShuoTitles.query.all()
return render_template('index/index.html/', titles=titles)
@pandatxt.route('/getchapterurl//')
def get_every_chapter_url(id):
"""获取一本书的每一章节的url"""
title_list = XiaoShuoTitles.query.get(id)
have_chapters = XiaoShuoEveryChapter.query.filter(XiaoShuoEveryChapter.c_id == id).all()
if not have_chapters:
req = requests.get(title_list.t_url)
after_bs = bs(req.text, 'html5lib')
list_div = after_bs.find('div', id='list')
after_div = bs(str(list_div), 'html5lib')
a_list = after_div.find_all('a')
for i in range(len(a_list)-9):
ec_url = a_list[9+i].get('href')
everychapter = XiaoShuoEveryChapter(ec_url, title_list.id, a_list[9+i].text)
db.session.add(everychapter)
db.session.commit()
chapters = XiaoShuoEveryChapter.query.filter(XiaoShuoEveryChapter.c_id == id)
return render_template('chapters/chapters.html/', chapters=chapters)
@pandatxt.route('/getcontent//')
def get_content(id):
contents = XiaoShuoContents.query.filter(XiaoShuoContents.ec_id == id).all()
ec = XiaoShuoEveryChapter.query.get(id)
if not contents:
req = requests.get(ec.ec_url)
afrer_bs = bs(req.text)
div_content = afrer_bs.find('div', id='content')
con = XiaoShuoContents(str(div_content), id)
db.session.add(con)
db.session.commit()
content = XiaoShuoContents.query.filter(XiaoShuoContents.ec_id == id).all()
return render_template('content/content.html', content=content[0])
最后,目前这个小项目只能算一个小说网站的雏形,到后面我还进一步优化这个项目,比如说数据库的设计更全面一点,具备小说类型分类,创建时间,更新时间,点击排名等。还会加入一些新的功能,比如小说下载功能等