继续之前的实例上进行操作。
本次主要介绍SQLite的使用。
首先简单一介绍,SQLite是一个内置数据库,是以一种文件的方式存储的。
1.创建数据库
数据库创建过程如下代码(有基础的道友可以轻松阅读,没有基础的建议预习一下数据库语言,在此不做详细介绍):
import sqlite3
import os
db_file = 'maoyan.db'
# 创建表
def create_table():
# 1. 连接数据库
conn = sqlite3.connect(db_file)
# 2. 创建执行对象
cursor = conn.cursor()
# 3. 执行SQL语句
cursor.execute('''
create table movie(
id integer primary key autoincrement,
title text,
star text,
reltime text,
score float
)
''')
# 4. 提交操作, 对于可以修改数据库内容的操作, 必须要提交
conn.commit()
# 5. 关闭连接
conn.close()
def save(movie):
# 1. 连接
conn = sqlite3.connect(db_file)
# 2. 创建执行对象
cursor = conn.cursor()
# 3. 执行SQL语句
cursor.execute('''
insert into movie
(title, star, reltime, country, score)
values
(?, ?, ?, ?, ?)
''', (movie.get('title'), movie.get('star'), movie.get('time'),
movie.get('country'), movie.get('score')) )
# 4. 提交
conn.commit()
# 5. 关闭
conn.close()
if __name__ == '__main__':
# 创建一个数据表
if not os.path.exists(db_file):
create_table()
# 保存
movie = {'title': '霸王别姬', 'star': '主演:张国荣,张丰毅,巩俐', 'time': '上映时间:1993-01-01', 'score': 9.6}
# save(movie)
执行完代码后,文件夹中会生成maoyan.db,打开后发现无法查看内容。
按如下操作执行:1) PyCharm 右侧点击Database
2) 点击 + 弹出菜单中, 选择Data Source, 再选择sqlite
3) 如果有 Download missing driver files, 点击 Download
4) 选择 file 选择需要打开的数据库文件 (提前查询好创建的db文件路径)
2.向数据库中存放数据
代码如下:
from bs4 import BeautifulSoup
from urllib.request import urlopen
import sqlite3
import os
import re
db_file = 'maoyan.db'
def save(movie):
# 1. 连接
conn = sqlite3.connect(db_file)
# 2. 创建执行对象
cursor = conn.cursor()
# 3. 执行SQL语句
cursor.execute('''
insert into movie
(name, star, reltime, score)
values
(?, ?, ?, ?)
''', (movie.get('name'), movie.get('star'), movie.get('releasetime'),
movie.get('score')) )
# 4. 提交
conn.commit()
# 5. 关闭
conn.close()
def get_one_page(x):
#字符串的格式化处理: {}占位符表示未知的参数,后面会补上
url = 'https://maoyan.com/board/4?offset={}'.format(x*10)
#第二种方法:url = 'https://maoyan.com/board/4?offset=%d'%(x*10)
response = urlopen(url)
return (response.read().decode())
def get_film(html):
ls = []
#html = get_one_page(x)
soup = BeautifulSoup(html,'html.parser')
class_name = soup.select('.name')
class_star = soup.select('.star')
class_releasetime = soup.select('.releasetime')
class_integer = soup.select('.integer')
class_fraction = soup.select('.fraction')
#print(class_name,class_star,class_releasetime,class_integer,class_fraction)
for a,b,c,d,e in zip(class_name,class_star,class_releasetime,class_integer,class_fraction):
'''print(a.get_text())
print(b.get_text().strip())
print(c.get_text())
print(d.get_text(),end='')
print(e.get_text().strip())
print()'''
movie={}
movie['name']=a.get_text()
movie['star'] = b.get_text().strip()
movie['releasetime'] = c.get_text()
#movie['releasetime'] = d.get_text()
#movie['fraction'] = e.get_text()
#评分合成
f=d.get_text()+e.get_text()
movie['score']=f
ls.append(movie)
return ls
if __name__ == '__main__':
movie_list = []
for index in range(0, 10):
html = get_one_page(index)
movie_list += get_film(html)
# 使用数据库保存数据
print((movie_list))
# SQLite
for movie in movie_list:
save(movie)
该代码在数据库文件建立的情况下可以直接运行。
效果图如下:
3.数据库查询
编写查询模块代码:
def find_by_title(key):
# 1.
conn = sqlite3.connect(db_file)
# 2.
cursor = conn.cursor()
# 3.
result = cursor.execute('''
select * from movie
where name like ?
''', ('%'+key+'%',))
# 4. 查询不需要提交
ls = []
for row in result:
movie = {}
movie['id'] = row[0]
movie['name'] = row[1]
movie['star'] = row[2]
movie['reltime'] = row[3]
movie['score'] = row[4]
ls.append(movie)
# 5. 关闭
conn.close()
return ls
完整代码如下:
from bs4 import BeautifulSoup
from urllib.request import urlopen
import sqlite3
import os
import re
db_file = 'maoyan.db'
def find_by_title(key):
# 1.
conn = sqlite3.connect(db_file)
# 2.
cursor = conn.cursor()
# 3.
result = cursor.execute('''
select * from movie
where name like ?
''', ('%'+key+'%',))
# 4. 查询不需要提交
ls = []
for row in result:
movie = {}
movie['id'] = row[0]
movie['name'] = row[1]
movie['star'] = row[2]
movie['reltime'] = row[3]
movie['score'] = row[4]
ls.append(movie)
# 5. 关闭
conn.close()
return ls
def save(movie):
# 1. 连接
conn = sqlite3.connect(db_file)
# 2. 创建执行对象
cursor = conn.cursor()
# 3. 执行SQL语句
cursor.execute('''
insert into movie
(name, star, reltime, score)
values
(?, ?, ?, ?)
''', (movie.get('name'), movie.get('star'), movie.get('releasetime'),
movie.get('score')) )
# 4. 提交
conn.commit()
# 5. 关闭
conn.close()
def get_one_page(x):
#字符串的格式化处理: {}占位符表示未知的参数,后面会补上
url = 'https://maoyan.com/board/4?offset={}'.format(x*10)
#第二种方法:url = 'https://maoyan.com/board/4?offset=%d'%(x*10)
response = urlopen(url)
return (response.read().decode())
def get_film(html):
ls = []
#html = get_one_page(x)
soup = BeautifulSoup(html,'html.parser')
class_name = soup.select('.name')
class_star = soup.select('.star')
class_releasetime = soup.select('.releasetime')
class_integer = soup.select('.integer')
class_fraction = soup.select('.fraction')
#print(class_name,class_star,class_releasetime,class_integer,class_fraction)
for a,b,c,d,e in zip(class_name,class_star,class_releasetime,class_integer,class_fraction):
'''print(a.get_text())
print(b.get_text().strip())
print(c.get_text())
print(d.get_text(),end='')
print(e.get_text().strip())
print()'''
movie={}
movie['name']=a.get_text()
movie['star'] = b.get_text().strip()
movie['releasetime'] = c.get_text()
#movie['releasetime'] = d.get_text()
#movie['fraction'] = e.get_text()
#评分合成
f=d.get_text()+e.get_text()
movie['score']=f
ls.append(movie)
return ls
if __name__ == '__main__':
movie_list = []
for index in range(0, 10):
html = get_one_page(index)
movie_list += get_film(html)
# 使用数据库保存数据
print((movie_list))
# SQLite
for movie in movie_list:
save(movie)
print(find_by_title('王'))