Python: Monitoring a Directory

Basic

mtime + checksum + directory traversal

Code

#!/usr/bin/env python

'''
monitor a directory
print the changed files in the directory
strategy: mtime + checksum
'''

import sys
import os
import stat
import cPickle as pickle

def print_usage():
    print 'monitor.py init [dir]'
    print 'monitor.py check [dir]'
    print 'monitor.py refresh [dir]'
    print '[dir] defaults to .'

def main(argv=None):
    if len(argv) == 1:
        action = argv[0]
        directory = '.'
    elif len(argv) == 2:
        action = argv[0]
        directory = argv[1]
    else:
        print_usage()
        return 1
    
    if action != 'init' and action != 'check' and action != 'refresh':
        print_usage()
        return 1

    directory = os.path.abspath(directory)
    monitor = Monitor(directory)
    if action == 'init':
        monitor.init_dir()
        return 0
    elif action == 'check':
        monitor.check_dir()
        return 0
    elif action == 'refresh':
        monitor.refresh_dir()
        return 0
    else:
        print 'Unexpeceted Error!'
        return 1
    

class Monitor(object):
    def __init__(self, directory):
        self.directory = directory

    def construct_cache(self):
        fileCacheList = []
        # recursively traverse the directory, cache each files's mtime and checksum
        # {filename1:(mtime, checksum), filename2:(mtime, checksum), ....}
        for dirpath, dirnames, filenames in os.walk(self.directory):
            for f in filenames:
                if f == '.cache':
                    continue    # exclude .cache file
                f = os.path.join(dirpath, f)
                print 'dealing with', f
                filecache = FileCache(f)
                fileCacheList.append(filecache)
        # dump fileCacheList to .cache
        cacheFilePath = os.path.abspath(os.path.join(self.directory, '.cache'))
        with open(cacheFilePath, 'wb') as cacheFile:
            pickler = pickle.Pickler(cacheFile)
            for filecache in fileCacheList:
                pickler.dump(filecache)

        
    def init_dir(self):
        '''
        init directory
        cache the mtime and checksum of all files in the directory
        dump the cache to .cache in the direcotry
        '''
        print 'init_dir'
        self.construct_cache()
        print 'init', self.directory, 'success'
        return
    
    def check_dir(self):
        '''
        check directory to determine which files have changed
        '''
 #       print 'check_dir'
        # make sure the directory has been initialized
        # i.e. there's a .cache file under this direcotry
        files = os.listdir(self.directory)
        if not '.cache' in files:
            print self.directory, 'has not been initialized yet'
            return
        # reconstruct fileCacheList
        fileCacheList = []
        cacheFilePath = os.path.abspath(os.path.join(self.directory, '.cache'))
        with open(cacheFilePath, 'rb') as cache:
            pickled = pickle.Unpickler(cache)
            while cache:
                try:
                    filecache = pickled.load()
                except:
                    break
                if isinstance(filecache, FileCache):
                    fileCacheList.append(filecache)
        
        # construct a dict from fileCacheList
        dictFiles = {}
        for fc in fileCacheList:
            dictFiles[fc.filepath] = (fc.mtime, fc.checksum)

        # traverse the target directory and determine which files have changed
        for dirpath, dirnames, filenames in os.walk(self.directory):
            for f in filenames:
                if f == '.cache':
                    continue    # exclude .cache file
                f = os.path.join(dirpath, f)
#                print 'checking', f
                if f not in dictFiles:
                    print '[ADD]:', f
                else:           # f in dictFiles
                    smtime = dictFiles[f][0]
                    cmtime = os.stat(f)[stat.ST_MTIME]
                    if cmtime == smtime:
                        pass
                    else:
#                        print 'file %s changed mtime, recompute checksum' % f
                        schecksum = dictFiles[f][1]
                        cchecksum = md5_file(f)
                        if schecksum == cchecksum:
                            pass
                        else:
                            print '[CHANGED]:', f
                    # remove entry f from dictFiles
                    dictFiles.pop(f)

        # tranverse ended
        if len(dictFiles) != 0:
            for f in dictFiles:
                print '[REMOVE]:', f
        

    def refresh_dir(self):
        print 'refresh_dir'
        self.construct_cache()
        print 'refresh %s success' % self.directory


def md5_file(filename):
    try:
        import hashlib
        m = hashlib.md5()
    except ImportError:
        import md5
        m = md5.new()
        
    for line in open(filename):
        m.update(line)
    return m.hexdigest()

class FileCache(object):
    def __init__(self, f):
        self.filepath = os.path.abspath(f)
        self.mtime = os.stat(f)[stat.ST_MTIME]
        self.checksum = md5_file(f)

    def __str__(self):
        return self.filepath+str(self.mtime)+str(self.checksum)

if __name__ == '__main__':
    main(sys.argv[1:])

Test

chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py init
init_dir
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py~
dealing with /home/chenqi/mypro/python/monitorDir/test1/1.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/2.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/5.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/6.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/4.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/3.txt
init /home/chenqi/mypro/python/monitorDir success
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ touch monitor.py
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py check
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ touch 1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py check
[ADD]: /home/chenqi/mypro/python/monitorDir/1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py refresh
refresh_dir
dealing with /home/chenqi/mypro/python/monitorDir/1.txt
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py
dealing with /home/chenqi/mypro/python/monitorDir/monitor.py~
dealing with /home/chenqi/mypro/python/monitorDir/test1/1.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/2.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/5.txt
dealing with /home/chenqi/mypro/python/monitorDir/test1/test_sub1/6.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/4.txt
dealing with /home/chenqi/mypro/python/monitorDir/test2/3.txt
refresh /home/chenqi/mypro/python/monitorDir success
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ rm 1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$ ./monitor.py check
[REMOVE]: /home/chenqi/mypro/python/monitorDir/1.txt
chenqi@chenqi-OptiPlex-760:~/mypro/python/monitorDir$

你可能感兴趣的:(python,Monitor,Directory,pickle,checksum)