在word文件中查找字符串

# -*- coding: utf-8 -*-

import sys,os
import os, sys
import win32com.client
import fnmatch


def work(path,search_str):
    u"""
        此方法为查找给定path及所有子目录下的所有doc文件中含有search_str的doc文件,
        并打印出doc文件的名字
        
        path为要查找的目录,search_str为查找的字符串
    """
    
    
    wordapp = win32com.client.gencache.EnsureDispatch("Word.Application")
    find_list = []
    try:
        for thispath,subdir,fl in os.walk(path):    #遍历目录
            for fn in map(lambda x:os.path.join(thispath,x),fnmatch.filter(fl,"*.doc")):    #找出此目录下的所有doc文件,并遍历
                if find(wordapp,fn,search_str):      #判断是否有对应字符串
                    find_list.append(fn)
    finally:
        # 保证word在结束后关闭(因为开启wordapp和关闭比较费资源,所有在查找之前开启一次,查找完成关闭一次)
        wordapp.Quit( )
        
    return find_list
    

def find(wordapp,fn,search_str):
    """查划文档是否现了相关文字"""
    document = wordapp.Documents.Open(fn)
    document.Activate()
    # Clear formatting from previous searches
    wordapp.Selection.Find.ClearFormatting()
    isFound = wordapp.Selection.Find.Execute(FindText = search_str)

    wordapp.ActiveDocument.Close()
    return isFound

        

if __name__ == "__main__":
    #搜索当前文件路径
    path = os.path.dirname(os.path.abspath(__file__))
   
    #查找有字符串“1”的
    search_str = "1"
    find_list = work(path,search_str)
   
    print "/n"*2
    print "=================search_str is %(search_str)s====================="%locals()
    print "=================there are follow files found which the search str in====================="
    print "/n".join(find_list) or "None"
    print "/n"*2
   
    #~ #查找有字符串“北木”的
    #~ search_str = u"北木"
    #~ find_list = work(path,search_str)    #搜索中文要用unicode
    #~ print "/n"*2
    #~ print "=================search_str is %(search_str)s====================="%locals()
    #~ print "=================there are follow files found which the search str in====================="
    #~ print "/n".join(find_list) or "None"
    #~ print "/n"*2

你可能感兴趣的:(python)