FTP上传工具开发

实习一个星期了,第一天主管说先学习两个星期的python,正好才买了一本python学习手册。一周内看完了,900页(很佩服自己的)
第二周本来是用python要做点东西的,可是临时来了个任务,更改一个shell上传脚本,原来的效率太低下。
LANG=en_US.UTF-8
work_dir="******"
cd $work_dir
export LANG

ftp_home=$work_dir/tags/apache
ftpServerName=url1
ftpPort=21
ftpUsername=username1
ftpPassword=password1

# cdn ftp upload
cdn_fabu()
{
apache_cdn_folders="css css2 images  js js2 iframe"
cd ${work_dir}/build/apache
for folder in $(find $apache_cdn_folders -type d)
do
cd ${work_dir}/build/apache
parent_dir=$(echo $folder | awk -F'/' '{if(NF==1)print $1; else{ for(i=1;i<NF;i++)printf("%s/",$i)} }')
#upload parent_dir files
ftp -n $ftpServerName $ftpPort <<_EOF
user $ftpUsername $ftpPassword
prompt off
bin
mkdir $parent_dir
cd $parent_dir
lcd $parent_dir
mput *
bye
_EOF

sub_dir_num=$(echo $folder | awk -F'/' '{print NF}')
if [ $sub_dir_num -gt 1 ]
then
sub_dir=$(echo $folder | awk -F'/' '{print $NF}')
#upload sub_dir files
cd ${work_dir}/build/apache
ftp -n $ftpServerName $ftpPort <<_EOF
user $ftpUsername $ftpPassword
prompt off
bin
cd $parent_dir
mkdir $sub_dir
cd $sub_dir
lcd $folder
mput *
bye
_EOF

fi

done
}
要我改为python,首先,我做的一样,全部上传。
#!/usr/bin/python
# fabu script
# Filename: cdn_fabu.py


import locale
import os
from ftplib import FTP


LANG = 'en_US.UTF-8'
#work_dir = '/home/fabu/baoxian'


locale.setlocale(locale.LC_ALL, LANG)


#ftp_home = work_dir + '/tags/apache'
ftpServerName = '192.168.52.206'
ftpPort = 21
ftpUsername = 'ftptest'
ftpPassword = 'test'


ftpRootDir = ''
f = ''


def cdn_fabu():
	global f 
	f = FTP(ftpServerName)
	f.login(ftpUsername, ftpPassword)
	f.set_debuglevel(0)
	global ftpRootDir
	ftpRootDir = f.pwd()
	apache_cdn_folders = ['css', 'css2', 'images', 'js', 'js2', 'iframe']
	rootDir = '/home/wangyang/apache/'
	for folderSearch in apache_cdn_folders:
		f.cwd(ftpRootDir)
		print folderSearch
		try:
			f.mkd(folderSearch)
		except:
			pass
		searchDir = rootDir + folderSearch
		recusWalkDir(searchDir, folderSearch)
	f.quit()
		
def recusWalkDir(dir, ftpDir):
	for s in os.listdir(dir):
		newDir = dir + '/' + s
		if os.path.isdir(newDir):
			f.cwd(ftpRootDir + '/' + ftpDir)
			try:
				f.mkd(s)
			except:
				pass
			newfDir = ftpDir + s
			recusWalkDir(newDir, newfDir)
		else:
			if os.path.isfile(newDir):
				fileHandler = open(newDir, 'rb')
				f.storbinary('STOR s', fileHandler)
				fileHandler.close()


if __name__ == '__main__':
	cdn_fabu()
这个版本把所有文件上传,不论是否服务器上有什么。这个时候,为了效率就要进行优化了。
头脑风暴:
考虑时间,文件大小等都不能保证100%的文件可靠性,只有md5,可是如果保存MD5。
解决方法:每次上传时都把md5值和相对路径名字存储在一个文件中,每次都把那个文件下载后,与本地文件的md5值对比,不同的上传 。源码
#!/usr/local/bin/python2.7
import os
from ftplib import FTP
import  shelve
import hashlib
import time
import socket
import sys
import ftplib


work_home='/home/yu/netease/log'
ftpServerName="localhost"
ftpPort=21
ftpUsername='ftptest'
ftpPassword='test'
myout=sys.stdout
sys.stdout=sys.stderr
try:
	f=FTP(ftpServerName)
	f.login(ftpUsername,ftpPassword)
except socket.error:
	print 'can\'t find host'
	sys.exit(0)
except ftplib.error_perm:
	print 'username or password is wrong'
	sys.exit(0)
finally:
	sys.out=myout


#apache_cdn_folders=['css','css2','iframe','js','js2','images','mail']
ftp_home=f.pwd()
basepath=''
dbtmp=''
tmpfile=open('/tmp/recordbs','wb')
log=''


try:
	f.retrbinary('RETR recordb',tmpfile.write)
except:
	os.remove('/tmp/recordbs')
else:
	tmpfile.close()


class record:
	def	__init__(self,a,b):
		self.path=a
		self.md5=b


def up(s):
	filepath=ftp_home+'/'+s
	filename=filepath.split('/')[len(filepath.split('/'))-1]
	filedir=filepath[0:len(filepath)-len(filename)-1]
	f.cwd(filedir)
	file=open(basepath+'/'+s,'rb')
	f.storbinary('STOR '+filename,file)
	file.close()


def update(s,sdb,cdb):
	if sdb[s].md5!=cdb[s].md5:
		log.write('update:'+s+'\n')
		delit(s)
		up(s)


def delit(s):
	filepath=ftp_home+'/'+s
	filename=filepath.split('/')[len(filepath.split('/'))-1]
	filedir=filepath[0:len(filepath)-len(filename)-1]
	f.cwd(filedir)
	try:	
		f.delete(filename)
	except:pass


def cdn_fabu1(cpath,srelativePath=''):
	'''
	'cpath' is the dir you want to upload which could be relative or  absolute path
	serlationPath is the dir on ftp server.It's relative to your main path of you ftp
	'''
	global basepath
	global dbtmp
	global f
	global log
	global ftp_home
	
	if srelativePath!='':
		ftp_home=ftp_home+'/'+srelativePath
	logname=time.strftime("%Y-%m-%d",time.gmtime())
	log=open(work_home+'/'+logname+'.log','a')
	log.write(time.strftime("---------------------%H:%M:%S--------------\n",time.gmtime()))
	basepath=cpath
	try:
		os.remove('/tmp/recordbc')
	except:pass


	dbtmp=shelve.open('/tmp/recordbc')
	travel(cpath)
	dbtmp.close()
	sdb=shelve.open('/tmp/recordbs')
	cdb=shelve.open('/tmp/recordbc')
	ser=set(sdb.keys())
	cli=set(cdb.keys())
			
	ftpFileList = []
	f.cwd(ftp_home)
	travelFtp('',ftpFileList)
	#print 'ftpfilelist:',ftpFileList
	for s in ftpFileList:
		if not(s in cli):
			delit(s)
			if s!='recordb':
				log.write('delete:'+s+'\n')
	uplist=cli-ser
	dellist=ser-cli
	testlist=ser&cli
	
	#print "ser",ser
	#print 'cli:',cli
	#print 'uplist:',uplist
	#print 'dellist:',dellist
	#print 'testlist:',testlist


	updatelist=[s for s in testlist if sdb[s]!=cdb[s]]


	for s in updatelist:
		update(s,sdb,cdb)
	for s in uplist:
		up(s)
		log.write('upload:'+s+'\n')
	for s in dellist:
		delit(s)
		log.write('delete:'+s+'\n')
	updb()
	f.close()
	log.close()
	try:
		os.remove('/tmp/recordbs')
	except:pass
	try:
		os.remove('/tmp/recordbc')
	except:pass


def travelFtp(path,filelist):#path:the of the dir you want travel on ftp relative the ftp_home
	dir=[]
	def callback(i):
		last=i.split(' ')[len(i.split(' '))-1]
		last=path+'/'+last
		if last[0]=='/':
			last=last[1:]
		if i[0]=='d':
			dir.append(last)
		if i[0]=='-':
			filelist.append(last)
	f.dir(path,callback)
	for s in dir:
		travelFtp(s,filelist)
	


def updb():
	f.cwd(ftp_home)
	file=open('/tmp/recordbc','rb')
	f.storbinary('STOR recordb',file)
	file.close()
	
def createdir(path):
	tem=path.split('/')
	dirname=tem[len(tem)-1]
	f.cwd(ftp_home+'/'+path[:len(path)-len(dirname)])
	try:
		f.mkd(dirname)
	except:pass


def travel(path):
	global dbtmp
	for s in os.listdir(path):
		paths=(path+'/'+s)[len(basepath)+1:]
		if os.path.isdir(path+'/'+s):
			createdir(paths)
			travel(path+'/'+s)
		else:
			dbtmp[paths]=record(paths,proMd5(path+'/'+s))


def proMd5(path):
	m=hashlib.md5()
	for a in open(path,'r'):
		m.update(a)
	return m.hexdigest()
		
if __name__=='__main__':
	cdn_fabu1('baoxian','')	
可是后来发现,有时候有人直接进行ftp操作,这样会引起MD5文件与实际文件的不同步,而运营中的服务器可靠性是第一位的,因此,改换方案。(其实这是管理方面的问题,由此感觉,管理相对于开发的话有大大的影响,如果公司内部统一规定只用这个工具上传的话一点问题都没有。由于管理不同意,这么好的方案就流产了)
头脑风暴:
问题:把md5值与文件绑定,一个被删除,另一个也被删除。
方案1:能获取服务器端文件的所有md5值,如果能用telnent登录计算出md5值进行对比也行,有些ftp服务器可以进行telnet登录的,但是这种方法不通用,有些ftp服务器不允许。问题是我们的服务器也不允许,放弃
方案2:曲线救国,文件能存储md5值的地方只有文件名字,可是由于文件名改后超级链接等又不能用,可以建立符号链接,把原来的文件名中加上md5值,新建立一个符号链接保持原名,不影响应用,而且把md5值与文件绑定在了一起,over,完美,我太聪明了。
实现过程中发现FTP根本不能上传下载符号链接,只能
手工在服务器端建立。(如果有telnet就好了)这也是一种好的解决方案,不影响应用。
最后还是老老实实全部上传吧。
网速在7M以上的情况先传输1300+文件(70M)用时5min,在局域网内用时20ms。单个问题7m都不超过10s。局域网时延5ms,而实际系统中时延34ms,应该是交互次数太多,所以长时间时延影响效率,就用多线程吧,8线程情况下局域网传输时间减到了8s,实际环境中还没实现,代码
from ftplib import FTP
import os
import threading
from Queue import Queue


threads=[]
basepath=""
f=FTP('192.168.52.51')
f.login('ftptest','test')
ftp_home=f.pwd()


def createdir(path):
	tem=path.split('/')
	dirname=tem[len(tem)-1]
	f.cwd(ftp_home+'/'+path[:len(path)-len(dirname)])
	try:
		f.mkd(dirname)
	except:pass


def upload(q):
	f=FTP('192.168.52.51')
	f.login('ftptest','test')
	while(True):
		if q.qsize()==0:
			return 0
		s=q.get()
		filepath=ftp_home+'/'+s
		filename=filepath.split('/')[len(filepath.split('/'))-1]
		filedir=filepath[0:len(filepath)-len(filename)-1]
#	print filedir
		f.cwd(filedir)
		try:
			f.delete(filename)
		except:
			pass
		file=open(basepath+'/'+s,'rb')
		f.storbinary('STOR '+filename,file)
		file.close()
		print 'upload:'+filepath
	f.close()


def travel(path,q):
	for s in os.listdir(path):
		paths=(path+'/'+s)[len(basepath)+1:]
		if os.path.isdir(path+'/'+s):
			travel(path+'/'+s,q)
		else:
			q.put(paths,1)


def travel1(path):
	for s in os.listdir(path):
		paths=(path+'/'+s)[len(basepath)+1:]
		if os.path.isdir(path+'/'+s):
			createdir(paths)
			travel1(path+'/'+s)
			
def cdn_fabu(path,srelativePath=''):
	
	global basepath
	q=Queue(1500)
	basepath=path
	travel1(path)
	travel(path,q)
	for i in range(20):	
		t=threading.Thread(target=upload,args=(q,))
		threads.append(t)
		t.start()
	
	for i in threads:
		i.join()
	print "over"


if __name__=='__main__':
	cdn_fabu('baoxian')
最后一版比大多数ftp工具快的多
以上代码都是实验代码,有后期润色,只是进行快速实验用的,所以代码风格及乱。
over


你可能感兴趣的:(FTP上传工具开发)