以采集某电影网站的最新电影为例:
import os # 目录结构处理模块
import requests
from bs4 import BeautifulSoup
from flask import Flask, render_template
import webbrowser
app = Flask(__name__)
@app.route('/')
def index():
movie_list = getMostNewMoview(indexUrl)
#print(movie_list)
# movie_list=[{'title': 'daaad', 'url': 'http://www.baidu.com'}]
return render_template('index.html', url_list=movie_list )
# 初始化配置参数
path = 'data/images/' # 图片存放目录
indexUrl = 'https://www.dytt8.net'
limit_time = 60 #限制请求超时时间(单位秒)
if not os.path.exists(path):
os.makedirs(path)
def getMostNewMoview(url):
headers = {'User-Agent': 'User-Agent:Mozilla/5.0'}
res = requests.get(url, headers=headers)
res.encoding = 'gb2312'
soup = BeautifulSoup(res.text, 'html.parser')
movie_new = soup.find(attrs={'class': 'co_content8'})
movie_new = movie_new.find_all('td')
movie_list = []
num = 0
for li in movie_new:
if num > 1:
movie_new_li = li.find_all('a')
if len(movie_new_li) > 0:
movie_new_title = movie_new_li[1].string
movie_new_url = indexUrl + movie_new_li[1]['href']
print(movie_new_title)
# print(movie_new_url)
mag_url, ftp_url = getMovieDownUrl(movie_new_url)
movie_list.append({'title': movie_new_title, 'mag_url': mag_url, 'ftp_url': ftp_url})
num += 1
return movie_list
def getMovieDownUrl(url=None):
# url = 'https://www.dytt8.net/html/gndy/dyzz/20181209/57919.html'
headers = {'User-Agent': 'User-Agent:Mozilla/5.0'}
res = requests.get(url, headers=headers)
res.encoding = 'gb2312'
soup = BeautifulSoup(res.text, 'html.parser')
soup = soup.find(attrs={'class': 'co_content8'})
# ftp_url = soup.find('table').find('td').find('a')['href']
mag_url = soup.find_all('a')[0]['href']
ftp_url = soup.find_all('a')[1]['href']
return mag_url, ftp_url
# print(mag_url)
# print(ftp_url)
if __name__ == '__main__':
# getMostNewMoview(indexUrl)
# getMovieDownUrl()
webbrowser.open('http://127.0.0.1:5000/', 0, False)
app.run()
index.html
提示:推荐使用磁力下载方式,下载前先安装好迅雷!
{% for post in url_list %}
{% endfor %}
开始打包
1、安装pyinstaller
pip install pyinstaller
2、python pyinstaller.py -F G:\moviedown\mainPro.py
打包完后会有两个文件夹生成:dist、build,exe在dist文件夹下
如果要让flask对应的网页正常打开,需要将templates文件夹复制到dist目录下
然后点击exe文件运行