word文档批量转换为html格式

有时需要将doc/docx格式的文档批量转换为html格式的网页文件,可以使用以下VBA脚本执行批量转换的操作,需要在安装了宏功能的Word中执行。

以下脚本会搜索指定目录中的doc文档,并逐个进行处理,执行以下操作:

  • 将doc文档的标题属性修改为文件名,以使生成的网页文件显示的标题为文件名;
  • 对图片大小进行统一按比例缩放,防止图片过大影响展示;
  • 将生成的html文件保存在指定的目录中。
Private Declare Function GetTickCount Lib "kernel32" () As Long

'''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Private num_file As Integer
Private FileType_htm As String
Private FileType_doc As String
Private path_src_start As String '记录源路径
Private path_des_start As String '记录目标路径
Private LogFile As String '日志文件路径

Private Sub CommandButton1_Click()
    '从指定目录开始处理,下面的路径最后不带“\”
    path_src_start = "D:\word" '保存.doc文件的目录
    path_des_start = path_src_start '保存需要生成.htm文件的目录
    
    '开始执行
    start
End Sub

Sub start()
    num_file = 0
    FileType_htm = ".htm"
    FileType_doc = ".doc"
    Dim time_start, time_end, second_spend, minute_spend, minute_tail As Long
    
    MsgBox ("需要处理的目录路径:" & path_src_start & vbCrLf _
        & "保存处理结果的目录路径:" & path_des_start) 'vbCrLf为回车换行
    
    '记录开始时间
    time_start = GetTickCount()
    '开始搜索
    LogFile = Chr(34) & path_des_start & "\转换记录.txt" & Chr(34) 'chr(34)",echo >>时,若文件路径中存在空格,需要用""包含

    Open path_des_start & "\转换记录.txt" For Output As #1
        
    search path_src_start, path_des_start
    '完成搜索
    time_end = GetTickCount()

    writeLog ("处理文件数量:" & num_file)
    
    '获得花费时间
    time_spend = time_end - time_start
    second_spend = time_spend \ 1000
    minute_spend = second_spend \ 60
    minute_tail = second_spend - minute_spend * 60
    output = "处理用时:" & minute_spend & "分" & minute_tail & "秒"
    writeLog (output)
    
    Close #1
    
    MsgBox ("处理完毕" & vbCrLf _
        & "处理文件数量:" & num_file & vbCrLf _
        & output)
End Sub

'搜索文件,path_src为源文件路径,path_des为需要保存.doc文件的路径
Sub search(path_src, path_des)
    Dim fs, fold, fls, fl
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set fold = fs.getfolder(path_src)
    '首先对当前目录的指定文件进行处理
    If fold.Files.Count <> 0 Then
        Set fls = fold.Files
        For Each fl In fls
            '判断是否为.htm文件,需判断文件名中是否出现了指定的后缀,且处于最后(防止.html文件匹配.htm)
            judge = InStrRev(fl.Name, FileType_doc)
            If judge <> 0 And judge + Len(FileType_doc) - 1 = Len(fl.Name) Then
                '处理.doc文件
                operDocFile path_src, fl.Name, path_src
            End If
        Next
    End If
    
    '再对子目录进行递归
    If fold.SubFolders.Count <> 0 Then
        Set fls = fold.SubFolders
        For Each fl In fls
            search path_src & "\" & fl.Name, path_des & "\" & fl.Name
        Next
    End If
End Sub

'处理.doc文件,path_src为.doc文件所在目录路径,FileName为.doc文件名,path_des为.htm文件需要保存的目录路径
Sub operDocFile(path_src, FileName, path_des)
    filePath_doc = path_src & "\" & FileName
    FileName_Only = Left(FileName, InStrRev(FileName, ".") - 1) '去掉后缀的文件名,开头没有'\'
    filePath_htm = path_des & "\" & FileName_Only & FileType_htm '对应.htm文件的路径
    '判断对应的.htm是否已生成
    If Dir(filePath_htm) = "" Then
        writeLog ("--开始处理文件:" & filePath_doc)

        Documents.Open (filePath_doc) '打开答案.mht文件
        modifyTitle (FileName_Only) '修改.doc文档的标题属性为文件名
        operPics '修改图片大小
        saveDoc2Htm (filePath_htm) '保存为.htm文件
        ActiveDocument.Close '关闭打开的文件
        writeLog ("--处理文件成功:" & filePath_htm)
          
        num_file = num_file + 1 '已处理文件数加1
    End If
End Sub

'将.doc文件通过word保存为.htm文件,destPath为.htm文件完整路径
Sub saveDoc2Htm(destPath)
    '另存为.htm文件
    ActiveDocument.SaveAs FileName:=destPath, FileFormat:= _
        wdFormatFilteredHTML, LockComments:=False, Password:="", AddToRecentFiles:= _
        False, WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:= _
        False, SaveNativePictureFormat:=False, SaveFormsData:=False, _
        SaveAsAOCELetter:=False
End Sub

'修改.doc文档的标题属性
Sub modifyTitle(title)
    ActiveDocument.BuiltInDocumentProperties("Title") = title '修改.doc文档标题属性
    ActiveDocument.Save '保存
End Sub

'对图片进行处理
Sub operPics()
    Selection.WholeStory '全选
    pic_num = Selection.InlineShapes.Count '获得图片个数

    For i = 1 To pic_num
        '对图片大小进行处理
        operPicSize i
    Next i
    
    '不保存原始doc-ActiveDocument.Save
End Sub

'对图片大小进行处理,将未完全显示出来的图片放大至固定宽
Sub operPicSize(currentPic)
    
    fixed_width = 800 '设置固定宽度
    fixed_width_min = 500 '设置固定宽度的最小值
    minus_width = 50 '设置每次减少的宽度
    
    '计算图片原始宽度
    pic_width_raw = (Selection.InlineShapes(currentPic).Width * 100) / Selection.InlineShapes(currentPic).ScaleWidth
    writeLog ("图片显示比例:" & Selection.InlineShapes(currentPic).ScaleWidth & " 图片显示宽度:" & Selection.InlineShapes(currentPic).Width & " 图片原始宽度:" & pic_width_raw)
    
    If Selection.InlineShapes(currentPic).ScaleWidth < 100# Then
        '图片显示比例小于100%,需要设置固定宽度
            
        '尝试从800500,与图片显示宽度进行比较,
        try = 0
        Do
            fixed_width_current = fixed_width - minus_width * try
        
            '循环到最小固定宽度时结束
            If fixed_width_current < fixed_width_min Then
                Exit Do
            End If
        
            try = try + 1
        
            If fixed_width_current < pic_width_raw Then
                '若图片原始宽度比当前尝试的固定宽度大,则使用当前的固定宽度
                
                '计算长宽比保持不变的图片新高度
                height_new = (fixed_width_current / Selection.InlineShapes(currentPic).Width) * Selection.InlineShapes(currentPic).Height

                Selection.InlineShapes(currentPic).Width = fixed_width_current
                Selection.InlineShapes(currentPic).Height = height_new
                
                writeLog ("修改后图片显示宽度:" & fixed_width_current)
                
                Exit Do
            End If
        Loop
    End If
End Sub

'写日志
Sub writeLog(data)
    Print #1, data
End Sub

你可能感兴趣的:(Windows,vba)