多线程小视频下载

原文作者为:

Donoy 

环境:

python2.7

scrollText

tkinter

1.代码分析:

thread = threading.Thread(target=myThread)
thread.start()
创建一个线程,通过start方法启动
 
  
#!/usr/bin/env python
# -*- coding: utf--*-
# coding:utf-8
# @Date    : 6--:8:
# @Author  : Donoy ([email protected])
# @Link    : http://www.cnblogs.com/Donoy/
# @Version : $Id$

# 多线程 爬虫
# GUI 界面
import os
import requests
import re
import threading
from   Tkinter import *
from   ScrolledText import ScrolledText  # 文本滚动框
import urllib  # 这个模块中有下载的功能
import sys

# sys 模块的输出编码 格式
reload(sys)
sys.setdefaultencoding('utf-8')
Video_Data = []


def creatWnd():
    global root
    global varl
    global text

    # 创建一个窗口
    root = Tk()  # 窗口

    # 窗口的标题
    root.title('DSpider')
    # 文本滚动窗口
    text = ScrolledText(root, font=('微软雅黑'))
    text.grid()

    # 设置Lable
    varl = StringVar()
    lable = Label(root, font=('微软雅黑'), fg='red', textvariable=varl)
    lable.grid()
    varl.set('Fight......')

    # 设置按钮
    button = Button(root, text='开始爬取', font='黑体', command=begin_Thread)
    button.grid()

    # root.mainloop()     # 显示窗口


def getHtmlData(url):
    # print html.text               # 网站的内容
    # print html.status_code        # 请求的返回值
    RequestHeader = {
        'User-Agent': 'Mozilla/. (Windows NT.3; WOW6) AppleWebKit/3.36 (KHTML, like Gecko) Chrome/..883. Safari/3.36'
    }
    html = requests.get(url, headers=RequestHeader)
    # print html.text
    return html.text


def getVideoUrl(html):
    # re.S 是匹配换行符
    Parse = re.compile(r'(
.*?
.*?
)', re.S) context = re.findall(Parse, html) Parse = re.compile(r'data-mp4="(.*?)"') for item in context: VideoUrl = re.findall(Parse, item) if VideoUrl: Name_Parse = re.compile(r'((.*?))', re.S); VideoName = re.findall(Name_Parse, item) for Name, url in zip(VideoName, VideoUrl): # zip这个内置函数就是将List重新整合一下 Video_Data.append([Name, url]) # Video_Data.append(['123','http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4']) # Video_Data.append(['123','http://mvideo.spriteapp.cn/video/2017/1212/5a2febb81b7cb_wpcco.mp4']) def begin_Thread(): try: # for i in range(10,12): url = 'http://www.budejie.com/video/' # url = 'http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4' html = getHtmlData(url) getVideoUrl(html) except Exception as e: raise varl.set('一共%s个小视频,现在开始下载......' % (len(Video_Data) / 2)) thread = threading.Thread(target=myThread) thread.start() def myThread(): id = 1 for Data in Video_Data: text.insert(END, str(id) + '.' + Data[0][1] + Data[1] + '\n') urllib.urlretrieve(Data[1], 'F:\save\\vedio\\' + str(id) + '.mp4') Video_Data.pop() id += 1 varl.set('所有的视频都下载完成')


你可能感兴趣的:(request)