照片有点多,准备整理一下,按照片拍照的时间进行重新整理,用python写了一个,只读自己感兴趣的exif信息的脚本,看看速度怎么样。
以前用.net写过一个,用的现成的exif类库:大约开启1~5个线程来分析所有目标图片文件,读取exif信息,放入一个队列中;大约开启20个左右的线程来进行重命名、拷贝和删除原始文件。
这个程序运行起来,速度会越来越慢,一直没找到关键原因在哪,觉得可能有的原因:
刚试了一下,没有多线程,速度还挺快的,测试文件大约有2G左右,2秒以内就能完成,全部80多G的照片整理大约需要不到4分钟
# http://www.codeproject.com/Articles/43665/ExifLibrary-for-NET
# http://www.exiv2.org/tags.html
# http://www.awaresystems.be/imaging/tiff/tifftags.html
import os
import struct
import random
import datetime
import sys
import traceback
class exiftags:
datetime = 0x0132
datetime_original = 0x9003
datetime_digited = 0x9004
exifpointer = 0x8769
class log:
visited = 0
class jpg:
def __init__(self, file_path):
self.__file_path = file_path
self.__fo = None
self.__endian = '>'
self.__baseoffset = None
self.exif = {}
def __del__(self):
if self.__fo is not None:
self.__fo.close()
def __getfo(self):
if self.__fo is None:
self.__fo = open(self.__file_path, 'rb')
return self.__fo
def __isjpg(self):
arr = self.__getfo().read(2)
if (arr is None) or (len(arr) < 2):
return False
if (ord(arr[0]) == 0xff) and (ord(arr[1]) == 0xd8):
return True
return False
def __read_app0_section(self):
pos = self.__getfo().tell()
arr = self.__getfo().read(2)
if (ord(arr[0]) == 0xff) and (ord(arr[1]) == 0xe0):
arr = self.__getfo().read(2)
size = struct.unpack('>H', arr)[0] # big-endian
pos = self.__getfo().tell()
self.__getfo().seek(pos + size - 2, 0) # skip app0 section
else:
self.__getfo().seek(pos, 0)
def __read_app1_section(self):
pos = self.__getfo().tell()
arr = self.__getfo().read(2)
if (ord(arr[0]) == 0xff) and (ord(arr[1]) == 0xe1):
arr = self.__getfo().read(2)
size = struct.unpack('>H', arr)[0]
arr = self.__getfo().read(6)
# no exif
if arr != '\x45\x78\x69\x66\x00\x00':
print("NOT EXIF!")
return
# base position
self.__baseoffset = self.__getfo().tell()
# get little/bigdian
arr = self.__getfo().read(2)
if (ord(arr[0]) == 0x49) and (ord(arr[1]) == 0x49):
self.__endian = '<'
elif (ord(arr[0]) == 0x4d) and (ord(arr[1]) == 0x4d):
self.__endian = '>'
else:
print("Failed to get big-/little-endian")
raise IOError
# TIFF marker, should always be [0x002A]
self.__getfo().read(2)
arr = self.__getfo().read(4)
# Read 0th IFD
nextifd = struct.unpack(self.__endian + 'L', arr)[0]
if nextifd != 0:
exifpointer = {exiftags.exifpointer:None}
self.__getfo().seek(self.__baseoffset + nextifd, 0)
self.__read_IFD(exifpointer)
else:
print("Read 0th ifd failed...")
return
# Read EXIF IFD
if exifpointer[exiftags.exifpointer] is None:
print("Read EXIF IFD offset failed...")
return
nextifd = struct.unpack(self.__endian + 'L', exifpointer[exiftags.exifpointer])[0]
if nextifd != 0:
self.__getfo().seek(self.__baseoffset + nextifd, 0)
self.__read_IFD(self.exif)
else:
print("exif pointer is 0")
else:
self.__getfo().seek(pos, 0)
def __read_IFD(self,tags):
# get IFD field count
arr = self.__getfo().read(2)
fieldcount = struct.unpack(self.__endian + 'H', arr)[0]
# process fileds
for i in range(0, fieldcount):
self.__read_IFD_Field(tags)
def __read_IFD_Field(self,tags):
arr = self.__getfo().read(2)
tagid = struct.unpack(self.__endian + 'H', arr)[0]
arr = self.__getfo().read(2)
type = struct.unpack(self.__endian + 'H', arr)[0]
arr = self.__getfo().read(4)
count = struct.unpack(self.__endian + 'L', arr)[0]
# Byte length of field data
if type == 1:
n = count
elif (type == 2) or (type == 7):
n = count
elif (type == 3):
n = 2 * count
elif (type == 4) or (type == 9):
n = 4 * count
elif (type == 5) or (type == 10):
n = 8 * count
# Get value or offset
value = self.__getfo().read(4)
if tagid not in tags:
return
# offset
if n > 4:
pos = self.__getfo().tell()
value = struct.unpack(self.__endian + 'L', value)[0]
self.__getfo().seek(self.__baseoffset + value, 0)
value = self.__getfo().read(n)
self.__getfo().seek(pos, 0)
tags[tagid] = value
def getEXIF(self, tags):
try:
self.exif = tags
if not self.__isjpg():
print("file " + self.__file_path + " is not jpg file")
return
self.__read_app0_section()
self.__read_app1_section()
finally:
self.__getfo().seek(0, 0)
def testjpg1():
tags = {exiftags.datetime:''}
j = jpg('/tmp/1.jpg')
j.getEXIF(tags)
for k in tags:
print(hex(k) + "=" + j.exif[k])
del j
def visitjpg(destdir, dirname, names):
for name in names:
if name.find('.jpg') < 0 and name.find('.JPG') < 0:
continue
tags = {exiftags.datetime_original:None}
try:
log.visited += 1
origpath = os.path.join(dirname, name)
j = jpg(origpath)
j.getEXIF(tags)
if j.exif[exiftags.datetime_original] is None:
print("failed to get exif of: " + dirname + "/" + name)
continue
# the exif read from jpg has \0(NULL bytes) at the end of the string, trim it
strdt = j.exif[exiftags.datetime_original]
del j
while strdt[-1] == "\0":
strdt = strdt[0:-1]
dt = datetime.datetime.strptime(strdt, '%Y:%m:%d %H:%M:%S')
# Get date aggregate folder
dtdir = os.path.join(destdir, dt.date().isoformat())
if not os.path.exists(dtdir):
os.mkdir(dtdir)
if not os.path.isdir(dtdir):
print("failed to initialize dir: " + dtdir)
continue
newpath = os.path.join(dtdir, dt.date().isoformat() +"_" + dt.time().isoformat().replace(":","-") + ".jpg")
while os.path.exists(newpath):
newpath = os.path.splitext(newpath)[0] + "_" + str(random.randint(0,100)) + os.path.splitext(newpath)[1]
os.rename(origpath, newpath) # use shutil.move if src and dest is on difference file system
except IOError as e:
#print("failed to rename: " + dirname + "/" + name + ' due to: ' + str(e))
#traceback.print_exc(file=sys.stdout)
print("IOError: failed to rename: " + dirname + "/" + name)
traceback.print_exc(file=sys.stdout)
except:
print("Filed while rename: " + dirname + "/" + name)
traceback.print_exc(file=sys.stdout)
raise
def mgmjpg(srcdir, destdir):
if (not os.path.exists(srcdir)) or (not os.path.isdir(srcdir)):
print("src: " + str(srcdir) + " is not a directory")
return
start = datetime.datetime.now()
log.visited = 0
if not os.path.exists(destdir):
os.mkdir(destdir)
else:
if (not os.path.isdir(destdir)):
print("dest: " + str(destdir) + " is not a directory")
return
os.path.walk(srcdir, visitjpg, destdir)
print("visited %s files" % log.visited)
print("started when: %s, finished when: %s, cost: %s" % (str(start), str(datetime.datetime.now()), str(datetime.datetime.now()-start)))
def testexif():
j = jpg('/Volumes/DATA/Pictures/Dudu1/2013-01-03/2013-01-03_19-53-22_47.jpg')
tags = {exiftags.datetime:None, exiftags.datetime_original:None, exiftags.datetime_digited:None}
j.getEXIF(tags)
print("datetime: %s" % j.exif[exiftags.datetime])
print("original: %s" % j.exif[exiftags.datetime_original])
print("digited: %s" % j.exif[exiftags.datetime_digited])
if __name__ == '__main__':
#mgmjpg('/root/src', '/tmp')
mgmjpg('/Volumes/DATA/Pictures/Dudu1', '/Volumes/DATA/Pictures/Dudu')