python多线程下载实现

本文涉及的代码案例中,参考了此文章的代码,在此基础上做的修改

http://www.oschina.net/code/snippet_70229_2407

 

由于需要下载https带ssl验证的文件,而且经常会出现单进程卡死的情况就考虑用了多线程下载

 

# -*- coding:utf8 -*- import os import getpass,urllib2,sys import cookielib import threading,thread url = 你的https://url username = 你的用户名 password = 你的密码 #class which supply request authcation info class TerminalPassword(urllib2.HTTPPasswordMgr): def find_user_password(self, realm, authuri): retval = urllib2.HTTPPasswordMgr.find_user_password(self, realm, authuri) if retval[0] == None and retval[1] == None: user = username passwd = password return (user, passwd) else: return retval '''It is a multi-thread downloading tool It was developed follow axel. Author: volans E-mail: volansw [at] gmail.com modify:gavin ma date:2011-04-12 ''' def Init(): try: cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj),urllib2.HTTPBasicAuthHandler(TerminalPassword())) urllib2.install_opener(opener) urllib2.urlopen(url) return True except Exception,e: pass class AxelPython(threading.Thread): '''Multi-thread downloading class. run() is a vitural method of Thread. ''' def __init__(self, threadname, url, virus, filename, ranges=0): threading.Thread.__init__(self, name=threadname) self.name = threadname self.url = url self.virus = virus self.filename = filename self.ranges = ranges self.downloaded = 0 self.flag = False def run(self): '''vertual function in Thread''' try: self.downloaded = os.path.getsize( self.filename ) except OSError: self.downloaded = 0 # rebuild start poind self.startpoint = self.ranges[0] + self.downloaded # This part is completed if self.startpoint >= self.ranges[1]: print 'Part %s has been downloaded over.' % self.filename return self.oneTimeSize = 8000 #16kByte/time try : Init() req = urllib2.Request(self.url+self.virus) req.add_header("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1])) urlhandle = urllib2.urlopen(req) data = urlhandle.read(self.oneTimeSize) while data: filehandle = open( self.filename, 'ab+' ) filehandle.write( data ) filehandle.close() self.downloaded += len( data ) data = urlhandle.read( self.oneTimeSize ) self.flag = True except : self.flag = False pass def GetUrlFileSize(url, samplevirus): if Init(): req = urllib2.Request(url+samplevirus) urlHandler = urllib2.urlopen(req) headers = urlHandler.info().headers length = 0 for header in headers: if header.find('Content-Length') != -1: length = header.split(':')[-1].strip() length = int(length) return length def SpliteBlocks(totalsize, blocknumber): blocksize = totalsize/blocknumber ranges = [] for i in range(0, blocknumber-1): ranges.append((i*blocksize, i*blocksize +blocksize - 1)) ranges.append(( blocksize*(blocknumber-1), totalsize -1 )) return ranges def islive(tasks): for task in tasks: if task.isAlive(): return True return False def paxel(url, samplevirus, output, blocks=6): ''' paxel ''' size = GetUrlFileSize( url, samplevirus ) ran = SpliteBlocks( size, blocks ) threadname = [ "thread_%d" % i for i in range(0, blocks) ] filename = [ path + os.sep + "tmpfile_%d" % i for i in range(0, blocks) ] tasks = [] for i in range(0, blocks): task = AxelPython(threadname[i], url, samplevirus, filename[i], ran[i]) task.setDaemon( True ) task.start() tasks.append( task ) global finish,count finish = True count = 0 while finish : for task in tasks : if not task.isAlive(): task.run() time.sleep(0.5) if task.flag: count+=1 tasks.remove(task) # print count if count == blocks: finish = False # print "has done" time.sleep( 2 ) filehandle = open( output, 'wb+' ) for i in filename: f = open( i, 'rb' ) filehandle.write( f.read() ) f.close() try: os.remove(i) pass except: pass filehandle.close() def main(): paxel(网址 , 下载文件, 输出目录, blocks=6) if __name__ == '__main__': main()  


你可能感兴趣的:(python)