闲话少说
直接撸代码
import tkinter as tk
from tkinter.filedialog import askdirectory
import requests
import re
import os
import time
class crawl_picture():
window = tk.Tk()
window.title("【星云随风_倚码为诗】之自动爬取知乎回答")
window.geometry("500x500")
canvas = tk.Canvas(window,
width=800, # 指定Canvas组件的宽度
height=500, # 指定Canvas组件的高度
bg='#E6E8FA') # 指定Canvas组件的背景色
canvas.pack()
notice_str = tk.StringVar() # StringVar是一个很强大的类,可以辅助控件动态改变值
def start_interface(self):
self.notice = tk.Label(self.window, textvariable=self.notice_str )
self.notice_str.set("尚未开始")
self.notice.place(x=50, y=50)
tk.Label(self.window, text="问题号:").place(x=50, y=100)
tk.Label(self.window, text="保存路径:").place(x=50, y=150)
tk.Label(self.window, bg='#E6E8FA', text="有什么意见可以加我好友交流").place(x=50, y=250)
tk.Label(self.window, bg='#E6E8FA', text="对这个程序有什么想法可以加我好友交流哦").place(x=50, y=250)
tk.Label(self.window, bg='#E6E8FA', text="B站up主", font="Arial 20 bold", fg="#4169E1").place(x=50, y=270)
tk.Label(self.window, bg='#E6E8FA', text="欢迎三连支持一下,谢谢",font="Arial 15 bold", fg="red").place(x=50, y=300)
self.keyWord = tk.StringVar()
self.entry = tk.Entry(self.window, textvariable=self.keyWord)
self.entry.place(x=150, y=100)
self.keyWord.set("请输入知乎问题号")
# 修改背景色
self.text1 = tk.Text(self.window)
self.text1.place(x=150,y=150,width=150,height=25)
tk.Button(self.window, text="选择路径", command=self.select_path).place(x=320, y=152)
tk.Button(self.window, text="开始爬取", command=lambda:self.craw(self.entry.get(),
self.path, self.window)).place(x=200, y=200)
self.window.mainloop()
def text(self):
self.notice = self.notice_str.set("")
def select_path(self):
"""放入文件"""
self.path = askdirectory(title='选择文件')
# 清楚text文本框内容并进行插入
print(self.path)
self.text1.delete(1.0, tk.END)
self.text1.insert(tk.END, self.path, 'red')
def getAnser(self,qid, offset):
# 利用知乎API请求json数据
# qid:知乎问题号
# offset:第几页
# 知乎API
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
}
url = "https://www.zhihu.com/api/v4/questions/{}/answers?include=content&limit=20&offset={}&platform=desktop&sort_by=default".format(
qid, offset)
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
return res.json()
def getAnswers(self,qid, save_path):
offset = 0
self.num = 1
f = open("%s/知乎回答%s.txt" % (save_path, qid), "a")
while True:
qid = qid
print('Offset =', offset)
# 知乎api请求
data = self.getAnser(qid, offset)
print(data)
if len(data['data']) == 0:
break
for line in data['data']:
# 保存回答数据
content = line['content']
pattern = re.compile(r'<[^>]+>', re.S)
result = pattern.sub('', content)
print(result)
f.write("\n【第%d个回答】" % self.num)
self.notice_str.set("正在爬取第%d个" % self.num)
self.window.update()
# notice["text"]="正在爬取第%d个"%num
self.num += 1
f.write(result)
offset += 20
time.sleep(1)
f.close()
def craw(self,keyword, path, enter_w):
self.notice_str.set("开始爬取")
key = keyword # 需要爬取的内容
SAVE_DIR = path + "/" + key # 以KEY的名字新建一个文件夹
if not os.path.exists(SAVE_DIR):
os.makedirs(SAVE_DIR)
self.getAnswers(key, SAVE_DIR) # 获取图片函数
self.new_window(enter_w)
def new_window(self,enter_w):
window_one = tk.Toplevel(enter_w)
window_one.geometry('100x50')
window_one.title('星云随风_倚码为诗')
Lab = tk.Label(window_one, text='爬取成功', compound=tk.CENTER)
Lab.pack()
if __name__ == '__main__':
cp = crawl_picture()
cp.start_interface()
创作不易,点个赞吧!!
版权声明:如无特殊说明,文章均为本站原创,转载请注明出处
本文链接:https://blog.csdn.net/wsad861512140