#coding=utf-8
from tkinter imprt *
class movieFrame:
def __init__(self, init_window_name):
self.init_window_name = init_window_name
def setInitWindow(self):
self.init_window_name.title("百度网盘资源搜索 by YoooKnight")
self.init_window_name.getmetry('500x400')
# 搜索框
self.init_search_text = Text(self.init_window_name, width=30, height=2)
self.init_search_text.grid(row=0, column=1, padx=20, pady=10)
# 结果集
self.init_result_data = Text(self.init_window_name, width=50, height=20)
self.init_result_data.config(state=DISABLED)
self.init_result_data.grid(row=1, column=1, columnspan=2, padx=20, pady=10, sticky=W)
#滚动条
scroll = Scrollbar(command=self.init_result_data.yview)
self.init_result_data.config(yscrollcommand=scroll.set)
scroll.grid(row=1,column=3, sticky=S + W + E + N)
# 查询按钮
self.searchButton = Button(self.init_window_name, text="查询", bg='lightblue', command=self.searchMovie)
self.searchButton.grid(row=0, column=2)
def searchMovie:
pass
from bs4 import BeautifulSoup
from urllib.request import quote
import urllib.request
import string
import re
class Spider:
search = ''
indexUrl = 'http://www.pinghaoche.com.cn/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
def __init__(self, search):
self.search = search
def getlinkList(self):
# 搜索文件
searchUrl = self.indexUrl + '?s=' + self.search
searchUrl = quote(searchUrl, safe=string.printable)
req = urllib.request.Request(searchUrl, headers=self.headers)
res = urllib.request.urlopen(req)
html = res.read().decode('utf8')
# 读取详情页面
soup = BeautifulSoup(html, 'html.parser')
try:
# 这里只找了第一个链接,所有相当于是查找到相似度最高的一个结果
detailUrl = soup.find('div', class_='mainleft').find('div', class_='thumbnail').find('a').get('href')
# 获取详情页面
detailReq = urllib.request.Request(detailUrl, headers=self.headers)
detailRes = urllib.request.urlopen(detailReq)
detailHtml = detailRes.read().decode('utf-8')
dic = []
# 查找所有的a标签
soup = BeautifulSoup(detailHtml, 'html.parser')
aList = soup.findAll("a")
linkUrlList = []
for aTag in aList:
tempHref = aTag.get("href")
if tempHref and tempHref.find("pan.baidu.com")>=0:
linkUrlList.append(tempHref)
# 获取所有的提取码
codeList = re.findall('((提取码|密码)[\:\:][ ]?.{4})', str(detailHtml))
# 拼接我需要的数据
index=0
for link in linkUrlList:
if (index<len(codeList)):
tempDic = {
"link": link,
"code": codeList[index][0][-4:]
}
dic.append(tempDic)
index += 1
return dic
except Exception as e:
print(e)
return []
from sourceSpider.pinghaoche import spider as pingSpider
class movieFrame:
def searchMovie(self):
# 获取搜索框里面的内容
search = self.init_search_Text.get(1.0, END)
spiderObject = pingSpider.Spider(search)
ret = spiderObject.getlinkList()
index = 1
self.init_result_data.config(state=NORMAL)
self.init_result_data.delete(1.0, END)
if ret:
for temp in ret:
tempIndex = format(index, '0.1f')
self.init_result_data.insert(tempIndex, "链接地址:" + temp['link'] + "\n")
index += 1
tempIndex = format(index, '0.1f')
self.init_result_data.insert(tempIndex, "提取码:" + temp['code'] + "\n\n")
index += 2
else:
self.init_result_data.insert(1.0, "非常抱歉,没有找到你要的影片")
self.init_result_data.config(state=DISABLED)
pip install pyInstaller
# F: 生成结果是一个exe文件,所有的第三方依赖、资源和代码均被打包进该exe内
# w: 不显示命令行窗口
pyInstaller -Fw xx.py
Tips:有兴趣的朋友可以+qq1592388194,这只是一个小工具,有很多问题,不介意的可以找我,大家一起学习进步,哈哈哈。
ps:
该文章已经同步发到简书,链接地址:https://www.jianshu.com/p/9a53322a6d0c