python期末大报告做了接近两天,结果却不是很好,写个报告总结一下
一.技术总结
我实现的是一个网络读书网站的书籍下载和书籍阅读系统
目标是上面的世界名著模块:365读书人网站上的世界名著模块
书籍的下载用的是多线程爬虫,书籍阅读界面是用GUI完成了一个记事本,登录界面连接数据库进行判断
下面是实现的代码:
import tkinter
import pymysql
import re
import threading
import requests
import tkinter.filedialog
import tkinter.messagebox
import os
import codecs
import chardet
import time
global user_et, psw_et, rd_win, text, filename, lg_win, book_url, book_title
roort_cate = [' ', '世界名著']
def wr():
wr_tk = tkinter.Tk()
wr_tk.title("Error")
wr_tk.geometry("250x125+750+400")
wr_lb = tkinter.Label(wr_tk, text="不存在该用户或者密码错误", font=("宋体", 10))
wr_lb.place(x=50, y=60, width=150, height=20)
wr_tk.mainloop()
def reg():
conn = pymysql.connect(host='127.0.0.1', user='root', password='123456', database='read', charset='utf8')
cursor = conn.cursor()
us_nm = user_et.get()
psw = psw_et.get()
sql = "insert into user(user_name, password) values('%s', '%s')" %(us_nm, psw)
print(sql)
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
def log():
conn = pymysql.connect(host='127.0.0.1', user='root', password='123456', database='read', charset='utf8')
cursor = conn.cursor()
us_nm = user_et.get()
psw = psw_et.get()
sql = "select * from user where user_name = '%s' and password = '%s'" %(us_nm, psw)
print(sql)
cursor.execute(sql)
count = cursor.rowcount
print(count)
if count == 1:
interface()
else:
wr()
cursor.close()
conn.close()
def find_chinese(file):
pattern = re.compile(r'[^\u4e00-\u9fa5]')
chinese = re.sub(pattern, '', file)
return chinese
l=threading.Lock()
def wt(url, fpath):
l.acquire()
time.sleep(0.001)
headers = {"user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
html = response.text
#print(html)
content = re.findall(r'
(.*?)', html, re.S)[0]
#content = re.findall(r'
(.*?)', content, re.S)[0]
#content = clean(content)
#content = find_chinese(content)
content = content.replace('
', '')
content = content.replace(' ', '')
fb = open(fpath, 'w', encoding='gb2312', errors='ignore')
fb.write(content)
l.release()
#print(content)
def solve(id, num):
global book_url, book_title
fpath = "E:/python/read365/%s/%s/" %(roort_cate[num], book_title[id])
if os.path.exists(fpath) == False:
os.makedirs(fpath)
url = book_url[id]
print(url)
headers = {"user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
html = response.text
#print(html)
url = url.replace("index.htm", '')
cate_tuple = re.findall(r'
(.*?)', html, re.S)
len1 = len(cate_tuple)
cate_url = [()] * 1005
cate_title = [()] * 1005
for i in range(0, len1 - 1):
cate_url[i] = cate_tuple[i][0]
cate_title[i] = cate_tuple[i][1]
# print("i = %d" %i, end = ' ')
# print("%s" %cate_title[i])
# cate_title[i]=cate_title[i].replace(' ','')
# cate_title[i]=cate_title[i].replace(' ', '')
# cate_title[i]=cate_title[i].replace('\n', '')
cate_title[i] = find_chinese(cate_title[i])
tmp_title = book_title[id].replace('《', '')
print(book_title[id])
#多线程
thread = []
for i in range(0, len1 - 1):
tmp_url = url + cate_url[i]
cate_title[i] = "%s.txt" %cate_title[i]
ffpath = fpath + cate_title[i]
# wt(tmp_url, ffpath)
print("%s-finish" %cate_title[i])
#sl_et.insert(tkinter.INSERT, "%s-finish" %cate_title[i])
t = threading.Thread(target=wt(tmp_url, ffpath))
thread.append(t)
t.start()
for x in thread:
t.join()
print("%s下载完成" %book_title[id])
sl_win = tkinter.Tk()
sl_win.title("下载")
sl_win.geometry("300x125+500+250")
sl_lb = tkinter.Label(sl_win, text = "%s下载完成" %book_title[id],font = ("宋体", 15))
sl_lb.pack()
def load1():
global book_url, book_title
url = "http://www.readers365.com/World/index.htm"
headers = {"user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
html = response.text
#print(html)
book_url = re.findall(r'href="(.*?)">', html, re.S)
book_title = re.findall(r'index.htm">(.*?)', html, re.S)
#print('url')
tmp_url = "http://www.readers365.com/World/"
len1 = len(book_url)
for i in range(1, len1):
book_url[i] = tmp_url + book_url[i]
# print(book_url[i])
#
# print('title')
# len2 = len(book_title)
# for i in range(1, len1):
# print(book_title[i])
ld1_win = tkinter.Tk()
ld1_win.title("世界名著")
ld1_win.geometry("300x400+500+300")
# 绑定变量
lbv = tkinter.StringVar()
# 与BORWSE相似,但是不支持鼠标按下后移动选中位置
lb = tkinter.Listbox(ld1_win, selectmode=tkinter.SINGLE, listvariable=lbv)
lb.place(width = 300, height = 400)
lb.insert(tkinter.END, "-------------------------图书列表-------------------------")
for i in range(1, len1):
# 按顺序添加
lb.insert(tkinter.END, book_title[i])
# 打印当前列表中的选型
#print(lbv.get())
# 设置选项
# lbv.set(("1","2","3"))
# 绑定事件
def xia1(event):
# print(lb.curselection()) # 返回下标
print(lb.get(lb.curselection())) # 返回值
id = lb.curselection()[0]
print(id)
solve(id, 1)
lb.bind("
", xia1)
ld1_win.mainloop()
# def download():
# dl_win = tkinter.Tk()
# dl_win.title("图书下载")
# dl_win.geometry("300x200+470+200")
#
# but1 = tkinter.Button(dl_win, text = "世界名著", command = load1,font = ("黑体", 15))
# # but2 = tkinter.Button(dl_win, text = "四大名著", command = load2,font = ("黑体", 15))
# # but3 = tkinter.Button(dl_win, text = "白话古籍",command = load3,font = ("黑体", 15))
# # but4 = tkinter.Button(dl_win, text="名人传记",command = load4,font = ("黑体", 15))
# # but5 = tkinter.Button(dl_win, text="唐诗鉴赏",command = load5,font = ("黑体", 15))
# # but6 = tkinter.Button(dl_win, text="宋词鉴赏",command = load6,font = ("黑体", 15))
# # but7 = tkinter.Button(dl_win, text="二十四史",command = load7,font = ("黑体", 15))
# # but8 = tkinter.Button(dl_win, text="中外童话",command = load8,font = ("黑体", 15))
#
# but1.place(x = 100, y = 50, width = 100, height = 30)
# but2.place(x = 100, y = 125, width = 100, height = 30)
# # but3.place(x=70, y=100, width=100, height=30)
# # but4.place(x=200, y=100, width=100, height=30)
# # but5.place(x=70, y=150, width=100, height=30)
# # but6.place(x=200, y=150, width=100, height=30)
# # but7.place(x=70, y=200, width=100, height=30)
# # but8.place(x=200, y=200, width=100, height=30)
# dl_win.mainloop()
def myopen():
global filename, rd_win
filename = tkinter.filedialog.askopenfilename(defaultextension = '.txt',filetypes = [('Text','.txt'),('DOC','.doc')])
if filename == '':
filename = "未命名文件"
else:
rd_win.title('FileName:'+os.path.basename(filename))
text.delete(1.0, tkinter.END)
f = open(filename, encoding='gb2312', errors='ignore')
text.insert(1.0,f.read())
f.close()
def mysave():
global filename
f = open(filename, 'w', encoding='gb2312')
w = text.get(1.0, tkinter.END)
f.write(w)
f.close()
def reads():
global rd_win, text
rd_win = tkinter.Tk()
rd_win.title("图书阅读")
rd_win.geometry("800x525+675+100")
menubar = tkinter.Menu(rd_win)
filemenu = tkinter.Menu(menubar, tearoff=False)
filemenu.add_command(label='打开(O)', accelerator='Ctrl+O', command=myopen)
filemenu.add_command(label='保存(S)', accelerator='Ctrl+S', command=mysave)
menubar.add_cascade(label='文件(F)', menu=filemenu)
scroll = tkinter.Scrollbar(rd_win)
text = tkinter.Text(rd_win, font=("宋体", 15))
# side放到窗体的哪一侧, fill填充
scroll.pack(side=tkinter.RIGHT, fill=tkinter.Y)
text.pack(side=tkinter.LEFT, fill=tkinter.Y)
#text.place(x = 0, y = 0, width = 750, height = 575)
# 关联
scroll.config(command=text.yview)
text.config(yscrollcommand=scroll.set)
text.pack()
rd_win.config(menu=menubar)
rd_win.mainloop()
def interface():
global lg_win
lg_win.destroy()
inter_win = tkinter.Tk()
inter_win.title("reader365")
inter_win.geometry("350x200+250+200")
# button1 = tkinter.Button(inter_win, text = "世界名著")
# button2 = tkinter.Button(inter_win, text = "四大名著")
# button3 = tkinter.Button(inter_win, text = "白话古籍")
# button4 = tkinter.Button(inter_win, text="名人传记")
# button5 = tkinter.Button(inter_win, text="唐诗鉴赏")
# button6 = tkinter.Button(inter_win, text="宋词鉴赏")
# button7 = tkinter.Button(inter_win, text="二十四史")
# button8 = tkinter.Button(inter_win, text="中外童话")
button1 = tkinter.Button(inter_win, text="图书下载", font=("黑体", 15), command = load1)
button2 = tkinter.Button(inter_win, text="图书阅读", font=("黑体", 15), command = reads)
button1.place(x=125, y=30, width=100, height=45)
button2.place(x=125, y=110, width=100, height=45)
inter_win.mainloop()
def login():
global lg_win
lg_win = tkinter.Tk()
lg_win.title("用户登录")
lg_win.geometry("500x250+500+250")
#文本框
user_lb = tkinter.Label(lg_win, text="用户账号:", font=("黑体", 15))
psw_lb = tkinter.Label(lg_win, text="密码:", font=("黑体", 15))
user_lb.place(x=100, y=50, width=100, height=30)
psw_lb.place(x=100, y=100, width=100, height=30)
#输入框
global user_et, psw_et
user_et = tkinter.Entry(lg_win);
psw_et = tkinter.Entry(lg_win, show = '*');
user_et.place(x = 220, y = 50, width = 200, height = 30)
psw_et.place(x=220, y=100, width=200, height=30)
#按钮
reg_but = tkinter.Button(lg_win, text = "注册", command=reg)
log_but = tkinter.Button(lg_win, text = "登录", command=log)
reg_but.place(x = 130, y = 170, width = 70, height = 30)
log_but.place(x=300, y=170, width=70, height=30)
lg_win.mainloop()
if __name__ == "__main__":
login()
技术上得到的收获:
1.对python的tkinter模块有了更加深入的理解,使用了listbox等多个控件
2.对网络爬虫的使用更加熟练
3.实现过程中遇到了编码的问题,了解了一些编码的知识
4.对多线程和python连接数据库的运用更加熟练
5.了解目录创建,更加熟悉了文件读写
python读写
模式 |
可做操作 |
若文件不存在 |
是否覆盖 |
r |
只能读 |
报错 |
- |
r+ |
可读可写 |
报错 |
是 |
w |
只能写 |
创建 |
是 |
w+ |
可读可写 |
创建 |
是 |
a |
只能写 |
创建 |
否,追加写 |
a+ |
可读可写 |
创建 |
否,追加写 |
二.反思总结
本次在完成项目和给老师检查的过程中有如下的失误:
1.目标不够清晰,本末倒置
一开始我的目标是完成包括世界名著在内的8个书籍模块,但是后面发现他们的底层网页结构并不相同,爬虫的实现存在困难,这个时候我表现的不够果断,反复尝试了尽可能使用一个爬虫爬取8个模块,浪费了大量时间。我对结果存在侥幸,没有经过认真思考。
开始我主要侧重于图形界面的设计,设计了关于8个模块的界面,浪费了大量时间
开始的侧重点就出现了问题,更应该侧重于爬虫的设计,后面爬虫的设计出现了大量问题,有点手足无措
这从侧面也体现出了我对python的掌握并不足够熟练
2.实现过程中的自我满足
实现过程中总是存在着我做的不错这样的想法,事实上视野狭窄,做出的东西虽然功能完成的不错,但是没有亮点,要时刻保持饥渴,探寻高点
3.检查过程中的问题
检查过程中感觉老师很着急,并没有细致的检查,有点沮丧
感觉有以下几个原因,以后要注意:
1).时间有限,前面的几个同学耗费了大量时间
2).我做的东西比较常规,没什么亮点
3).我的讲解也并没有能引起老师特别注意的地方
对最后的结果不是很满意,好不甘心!!
继续努力