python多线程下载文件

思路

  1. 先从http head中获取文件的大小
  2. 将大小分隔成若干份(一个线程下载一份)
  3. 通过seek将下载的块的内容写到文件的对应的位置,对每一个线程下载的数据块进行拼接

代码(下载百度首页的图片为例)

import requests
import threading

class downloader:
    def __init__(self):
        self.url = "https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo/bd_logo1_31bdc765.png"
        self.num = 8
        self.name = "baidu.png"
        r = requests.head(self.url)
        # 获取文件大小
        self.total = int(r.headers['Content-Length'])
        print self.total

    # 获取每个线程下载的区间
    def get_range(self):
        ranges = []
        offset = int(self.total/self.num)
        for i in range(self.num):
            if i == self.num-1:
                ranges.append((i*offset,''))
            else:
                ranges.append((i*offset,(i+1)*offset))
        return ranges  # [(0,100),(100,200),(200,"")]

    # 通过传入开始和结束位置来下载文件
    def download(self,start,end):
        headers = {'Range':'Bytes=%s-%s'%(start,end),'Accept-Encoding':'*'}
        res = requests.get(self.url,headers=headers)
        print "%s-%s download success"%(start,end)
        # 将文件指针移动到传入区间开始的位置
        self.fd.seek(start)
        self.fd.write(res.content)

    def run(self):
        self.fd = open(self.name,"wb")

        thread_list = []
        n = 0

        for ran in self.get_range():
            # 获取每个线程下载的数据块
            start,end = ran
            n += 1
            thread = threading.Thread(target=self.download,args=(start,end))
            thread.start()
            thread_list.append(thread)

        for i in thread_list:
            # 设置等待,避免上一个数据块还没写入,下一数据块对文件seek,会报错
            i.join()

        self.fd.close()

if __name__ == "__main__":
    downloader().run()

你可能感兴趣的:(python多线程下载文件)