实习一个星期了,第一天主管说先学习两个星期的python,正好才买了一本python学习手册。一周内看完了,900页(很佩服自己的) 第二周本来是用python要做点东西的,可是临时来了个任务,更改一个shell上传脚本,原来的效率太低下。 LANG=en_US.UTF-8 work_dir="******" cd $work_dir export LANG ftp_home=$work_dir/tags/apache ftpServerName=url1 ftpPort=21 ftpUsername=username1 ftpPassword=password1 # cdn ftp upload cdn_fabu() { apache_cdn_folders="css css2 images js js2 iframe" cd ${work_dir}/build/apache for folder in $(find $apache_cdn_folders -type d) do cd ${work_dir}/build/apache parent_dir=$(echo $folder | awk -F'/' '{if(NF==1)print $1; else{ for(i=1;i<NF;i++)printf("%s/",$i)} }') #upload parent_dir files ftp -n $ftpServerName $ftpPort <<_EOF user $ftpUsername $ftpPassword prompt off bin mkdir $parent_dir cd $parent_dir lcd $parent_dir mput * bye _EOF sub_dir_num=$(echo $folder | awk -F'/' '{print NF}') if [ $sub_dir_num -gt 1 ] then sub_dir=$(echo $folder | awk -F'/' '{print $NF}') #upload sub_dir files cd ${work_dir}/build/apache ftp -n $ftpServerName $ftpPort <<_EOF user $ftpUsername $ftpPassword prompt off bin cd $parent_dir mkdir $sub_dir cd $sub_dir lcd $folder mput * bye _EOF fi done } 要我改为python,首先,我做的一样,全部上传。 #!/usr/bin/python # fabu script # Filename: cdn_fabu.py import locale import os from ftplib import FTP LANG = 'en_US.UTF-8' #work_dir = '/home/fabu/baoxian' locale.setlocale(locale.LC_ALL, LANG) #ftp_home = work_dir + '/tags/apache' ftpServerName = '192.168.52.206' ftpPort = 21 ftpUsername = 'ftptest' ftpPassword = 'test' ftpRootDir = '' f = '' def cdn_fabu(): global f f = FTP(ftpServerName) f.login(ftpUsername, ftpPassword) f.set_debuglevel(0) global ftpRootDir ftpRootDir = f.pwd() apache_cdn_folders = ['css', 'css2', 'images', 'js', 'js2', 'iframe'] rootDir = '/home/wangyang/apache/' for folderSearch in apache_cdn_folders: f.cwd(ftpRootDir) print folderSearch try: f.mkd(folderSearch) except: pass searchDir = rootDir + folderSearch recusWalkDir(searchDir, folderSearch) f.quit() def recusWalkDir(dir, ftpDir): for s in os.listdir(dir): newDir = dir + '/' + s if os.path.isdir(newDir): f.cwd(ftpRootDir + '/' + ftpDir) try: f.mkd(s) except: pass newfDir = ftpDir + s recusWalkDir(newDir, newfDir) else: if os.path.isfile(newDir): fileHandler = open(newDir, 'rb') f.storbinary('STOR s', fileHandler) fileHandler.close() if __name__ == '__main__': cdn_fabu() 这个版本把所有文件上传,不论是否服务器上有什么。这个时候,为了效率就要进行优化了。 头脑风暴: 考虑时间,文件大小等都不能保证100%的文件可靠性,只有md5,可是如果保存MD5。 解决方法:每次上传时都把md5值和相对路径名字存储在一个文件中,每次都把那个文件下载后,与本地文件的md5值对比,不同的上传 。源码 #!/usr/local/bin/python2.7 import os from ftplib import FTP import shelve import hashlib import time import socket import sys import ftplib work_home='/home/yu/netease/log' ftpServerName="localhost" ftpPort=21 ftpUsername='ftptest' ftpPassword='test' myout=sys.stdout sys.stdout=sys.stderr try: f=FTP(ftpServerName) f.login(ftpUsername,ftpPassword) except socket.error: print 'can\'t find host' sys.exit(0) except ftplib.error_perm: print 'username or password is wrong' sys.exit(0) finally: sys.out=myout #apache_cdn_folders=['css','css2','iframe','js','js2','images','mail'] ftp_home=f.pwd() basepath='' dbtmp='' tmpfile=open('/tmp/recordbs','wb') log='' try: f.retrbinary('RETR recordb',tmpfile.write) except: os.remove('/tmp/recordbs') else: tmpfile.close() class record: def __init__(self,a,b): self.path=a self.md5=b def up(s): filepath=ftp_home+'/'+s filename=filepath.split('/')[len(filepath.split('/'))-1] filedir=filepath[0:len(filepath)-len(filename)-1] f.cwd(filedir) file=open(basepath+'/'+s,'rb') f.storbinary('STOR '+filename,file) file.close() def update(s,sdb,cdb): if sdb[s].md5!=cdb[s].md5: log.write('update:'+s+'\n') delit(s) up(s) def delit(s): filepath=ftp_home+'/'+s filename=filepath.split('/')[len(filepath.split('/'))-1] filedir=filepath[0:len(filepath)-len(filename)-1] f.cwd(filedir) try: f.delete(filename) except:pass def cdn_fabu1(cpath,srelativePath=''): ''' 'cpath' is the dir you want to upload which could be relative or absolute path serlationPath is the dir on ftp server.It's relative to your main path of you ftp ''' global basepath global dbtmp global f global log global ftp_home if srelativePath!='': ftp_home=ftp_home+'/'+srelativePath logname=time.strftime("%Y-%m-%d",time.gmtime()) log=open(work_home+'/'+logname+'.log','a') log.write(time.strftime("---------------------%H:%M:%S--------------\n",time.gmtime())) basepath=cpath try: os.remove('/tmp/recordbc') except:pass dbtmp=shelve.open('/tmp/recordbc') travel(cpath) dbtmp.close() sdb=shelve.open('/tmp/recordbs') cdb=shelve.open('/tmp/recordbc') ser=set(sdb.keys()) cli=set(cdb.keys()) ftpFileList = [] f.cwd(ftp_home) travelFtp('',ftpFileList) #print 'ftpfilelist:',ftpFileList for s in ftpFileList: if not(s in cli): delit(s) if s!='recordb': log.write('delete:'+s+'\n') uplist=cli-ser dellist=ser-cli testlist=ser&cli #print "ser",ser #print 'cli:',cli #print 'uplist:',uplist #print 'dellist:',dellist #print 'testlist:',testlist updatelist=[s for s in testlist if sdb[s]!=cdb[s]] for s in updatelist: update(s,sdb,cdb) for s in uplist: up(s) log.write('upload:'+s+'\n') for s in dellist: delit(s) log.write('delete:'+s+'\n') updb() f.close() log.close() try: os.remove('/tmp/recordbs') except:pass try: os.remove('/tmp/recordbc') except:pass def travelFtp(path,filelist):#path:the of the dir you want travel on ftp relative the ftp_home dir=[] def callback(i): last=i.split(' ')[len(i.split(' '))-1] last=path+'/'+last if last[0]=='/': last=last[1:] if i[0]=='d': dir.append(last) if i[0]=='-': filelist.append(last) f.dir(path,callback) for s in dir: travelFtp(s,filelist) def updb(): f.cwd(ftp_home) file=open('/tmp/recordbc','rb') f.storbinary('STOR recordb',file) file.close() def createdir(path): tem=path.split('/') dirname=tem[len(tem)-1] f.cwd(ftp_home+'/'+path[:len(path)-len(dirname)]) try: f.mkd(dirname) except:pass def travel(path): global dbtmp for s in os.listdir(path): paths=(path+'/'+s)[len(basepath)+1:] if os.path.isdir(path+'/'+s): createdir(paths) travel(path+'/'+s) else: dbtmp[paths]=record(paths,proMd5(path+'/'+s)) def proMd5(path): m=hashlib.md5() for a in open(path,'r'): m.update(a) return m.hexdigest() if __name__=='__main__': cdn_fabu1('baoxian','') 可是后来发现,有时候有人直接进行ftp操作,这样会引起MD5文件与实际文件的不同步,而运营中的服务器可靠性是第一位的,因此,改换方案。(其实这是管理方面的问题,由此感觉,管理相对于开发的话有大大的影响,如果公司内部统一规定只用这个工具上传的话一点问题都没有。由于管理不同意,这么好的方案就流产了) 头脑风暴: 问题:把md5值与文件绑定,一个被删除,另一个也被删除。 方案1:能获取服务器端文件的所有md5值,如果能用telnent登录计算出md5值进行对比也行,有些ftp服务器可以进行telnet登录的,但是这种方法不通用,有些ftp服务器不允许。问题是我们的服务器也不允许,放弃 方案2:曲线救国,文件能存储md5值的地方只有文件名字,可是由于文件名改后超级链接等又不能用,可以建立符号链接,把原来的文件名中加上md5值,新建立一个符号链接保持原名,不影响应用,而且把md5值与文件绑定在了一起,over,完美,我太聪明了。 实现过程中发现FTP根本不能上传下载符号链接,只能 手工在服务器端建立。(如果有telnet就好了)这也是一种好的解决方案,不影响应用。 最后还是老老实实全部上传吧。 网速在7M以上的情况先传输1300+文件(70M)用时5min,在局域网内用时20ms。单个问题7m都不超过10s。局域网时延5ms,而实际系统中时延34ms,应该是交互次数太多,所以长时间时延影响效率,就用多线程吧,8线程情况下局域网传输时间减到了8s,实际环境中还没实现,代码 from ftplib import FTP import os import threading from Queue import Queue threads=[] basepath="" f=FTP('192.168.52.51') f.login('ftptest','test') ftp_home=f.pwd() def createdir(path): tem=path.split('/') dirname=tem[len(tem)-1] f.cwd(ftp_home+'/'+path[:len(path)-len(dirname)]) try: f.mkd(dirname) except:pass def upload(q): f=FTP('192.168.52.51') f.login('ftptest','test') while(True): if q.qsize()==0: return 0 s=q.get() filepath=ftp_home+'/'+s filename=filepath.split('/')[len(filepath.split('/'))-1] filedir=filepath[0:len(filepath)-len(filename)-1] # print filedir f.cwd(filedir) try: f.delete(filename) except: pass file=open(basepath+'/'+s,'rb') f.storbinary('STOR '+filename,file) file.close() print 'upload:'+filepath f.close() def travel(path,q): for s in os.listdir(path): paths=(path+'/'+s)[len(basepath)+1:] if os.path.isdir(path+'/'+s): travel(path+'/'+s,q) else: q.put(paths,1) def travel1(path): for s in os.listdir(path): paths=(path+'/'+s)[len(basepath)+1:] if os.path.isdir(path+'/'+s): createdir(paths) travel1(path+'/'+s) def cdn_fabu(path,srelativePath=''): global basepath q=Queue(1500) basepath=path travel1(path) travel(path,q) for i in range(20): t=threading.Thread(target=upload,args=(q,)) threads.append(t) t.start() for i in threads: i.join() print "over" if __name__=='__main__': cdn_fabu('baoxian') 最后一版比大多数ftp工具快的多 以上代码都是实验代码,有后期润色,只是进行快速实验用的,所以代码风格及乱。 over