爬虫部分
def crawls_home(film_name):
name = film_name.encode('gb2312')
name = str(name)[1:]
name = re.sub(r'\\x','%',name)
name = eval(name).upper() #关键字处理
url = "http://s.ygdy8.com/plus/so.php?typeid=1&keyword={}".format(name)
html = request_url(url)
soup = BeautifulSoup(html,'lxml')
div = soup.find('div',class_="co_content8")
if div is None:
return ['没有搜到相应电影']
film_list = div.find_all('table',border="0",width="100%")
return film_list
像电影天堂这种的爬虫应该算是很简单的了,这里需要注意的是电影天堂对输入的关键字采用的是gb2312编码,所以需要要对关键字做一些处理
可视化部分
主要是利用pyqt5进行简单粗糙的可视化
通过捕捉回车键或者点击确认弹出搜索电影的详细列表
可以点击超链接弹出详细介绍
通过点击超链接调用迅雷实现下载
完整代码如下:
crawlsdytt.py:
import requests
import re
from bs4 import BeautifulSoup
header = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/57.0.2987.110 Safari/537.36"}
#film_name = '猫和老鼠'
def request_url(url):
while True:
try:
r = requests.get(url,headers=header)
if r.status_code == 200:
r.encoding = 'gbk'
return r.text
except:
pass
def crawls_home(film_name):
name = film_name.encode('gb2312')
name = str(name)[1:]
name = re.sub(r'\\x','%',name)
name = eval(name).upper()
url = "http://s.ygdy8.com/plus/so.php?typeid=1&keyword={}".format(name)
html = request_url(url)
soup = BeautifulSoup(html,'lxml')
div = soup.find('div',class_="co_content8")
if div is None:
return ['没有搜到相应电影']
film_list = div.find_all('table',border="0",width="100%")
return film_list
def crawls_single(url):
html = request_url(url)
soup = BeautifulSoup(html,'lxml')
div = soup.find('div',id="Zoom")
node = div.p
href = re.search(r'href=\"(.*?)\"',str(div)).group(1)
n_node = '\n\n{0}'.format(href)
node = str(node) + n_node
return node
qqtt.py:
from PyQt5.QtWidgets import QApplication,QLabel,QWidget,QVBoxLayout,QGridLayout,QPushButton
from PyQt5.QtWidgets import QMainWindow,QScrollArea
from PyQt5.QtCore import *
from PyQt5.Qt import QLineEdit
from crawlsdytt import *
import sys
import re
import os
class WindowDemo(QWidget):
close_signal = pyqtSignal(object)
def __init__(self):
super(WindowDemo,self).__init__()
self.m = MainWindow()
self.title = QLabel('电影名称:')
self.textbox = QLineEdit()
self.textboxValue = ''
self.button = QPushButton('确认', self)
self.button.clicked.connect(self.on_click)
self.grid = QGridLayout()
self.grid.setSpacing(10)
self.grid.addWidget(self.title, 1, 0)
self.grid.addWidget(self.textbox, 1, 1)
self.grid.addWidget(self.button,1,2)
self.setLayout(self.grid)
self.setWindowTitle("电影天堂")
@pyqtSlot()
def on_click(self):
film_name = self.textbox.text()
self.m.ui(film_name)
def keyPressEvent(self, event):
if(str(event.key()) == '16777220'):
self.on_click()
class Content(QWidget):
def __init__(self):
super(Content,self).__init__()
def show_label(self,url):
print(url)
textboxValue = crawls_single(url)
print(str(textboxValue))
self.title = QLabel(self)
self.title.setText(str(textboxValue))
self.title.setWordWrap(True)
self.title.linkActivated.connect(self.link_clicked)
self.setWindowTitle("详细信息")
vbox = QVBoxLayout()
vbox.addWidget(self.title)
self.setLayout(vbox)
self.show()
def link_clicked(self):
ss = self.title.text()
print(ss)
link = re.search(r'href=\"(.*?)\"',ss).group(1)
# 启动迅雷
os.chdir(r'D:\Program Files (x86)\Thunder Network\Thunder\Program')
os.system("Thunder.exe -StartType:DesktopIcon \"%s\"" % link)
class MainWindow(QMainWindow):
def __init__(self):
super(QMainWindow,self).__init__()
def ui(self,film_name):
if film_name is None or film_name.strip() == '':
return
else:
film_list = crawls_home(film_name)
w = QWidget()
self.setCentralWidget(w)
self.topFiller = QWidget()
self.topFiller.setMinimumSize(800, 2000)
self.qlabel_dict = {}
self.c = {}
for i in range(len(film_list)):
self.c[i] = Content()
self.qlabel_dict[i] = QLabel(self.topFiller)
self.qlabel_dict[i].setText(str(film_list[i]))
self.qlabel_dict[i].setWordWrap(True)
self.qlabel_dict[i].linkActivated.connect(self.link_clicked(i))
self.qlabel_dict[i].move(10,i*300)
self.scroll = QScrollArea()
self.scroll.setWidget(self.topFiller)
self.vbox = QVBoxLayout()
self.vbox.addWidget(self.scroll)
w.setLayout(self.vbox)
self.statusBar().showMessage("电影列表")
self.setWindowTitle("详细列表")
self.show()
def link_clicked(self,i):
def link():
ss = self.qlabel_dict[i].text()
#print(ss)
url ="http://www.ygdy8.com" + re.search(r'href=\"(.*?)\"',str(ss)).group(1)
self.c[i].show_label(url)
return url
return link
if __name__ == '__main__':
app = QApplication(sys.argv)
win = WindowDemo()
win.show()
sys.exit(app.exec_())