import struct
import os
#定义数据类型字典类型
def typeList():
return { "FFD8FFE0":"jpg" , "89504E47": "png","25504446":"pdf","D0CF11E0":"doc","504B0304":"docx","D0CF11E0":"xls","504B0304":"xlsx"}
#字节转16进制
def bytes2hex(bytes):
num = len(bytes)
hexstr = u""
for i in range(num):
t = u"%x" % bytes[i]
if len(t) % 2:
hexstr += u"0"
hexstr += t
return hexstr.upper()
#判断文件类型方法
def filetype(filePath):
ftype = 'unknown'
#判断文件大小如果是空文件则返回unknown,否则在判断docx文件时是为报错的,再则对空文件判断类型也没有任何意义
if os.path.getsize(filePath) == 0:
return ftype
#读取文件并对文件进行逻辑解析判断
binfile = open(filePath, 'rb')
tl = typeList()
for hcode in tl.keys():
numOfBytes = len(hcode) / 2
binfile.seek(0)
#注意如果是python3版本 hbytes会报错误信息,python2版本没有问题
#TypeError: can't multiply sequence by non-int of type 'float'
hbytes = struct.unpack_from("B"*numOfBytes,binfile.read(numOfBytes))
f_hcode = bytes2hex(hbytes)
if f_hcode == hcode:
ftype = tl[hcode]
break
binfile.close()
return ftype
if __name__ == '__main__':
#对文件大小进行判断
if os.path.getsize('2.docx') == 0:
print "ni wan le"
#对各文件类型进行测试
typeJPGName1 = filetype('1.jpg')
print ("1.jpg-----------------------:",typeJPGName1)
typePNGName1 = filetype('1.png')
print ("1.png-----------------------:",typePNGName1)
typePDFName1 = filetype('1.pdf')
print ("1.pdf-----------------------:",typePDFName1)
typeDOCName1 = filetype('1.doc')
print ("1.doc-----------------------:",typeDOCName1)
typeDOCXName1 = filetype('1.docx')
print ("1.docx-----------------------:",typeDOCXName1)
typeXLSName1 = filetype('1.xls')
print ("1.xls-----------------------:",typeXLSName1)
typeXLSXName1 = filetype('1.xlsx')
print ("1.xlsx-----------------------:",typeXLSXName1)