EXCEL VBA调用adobe的api识别电子PDF发票里内容并登记台账

EXCEL VBA调用adobe的api识别电子PDF发票里内容并登记台账

EXCEL VBA调用adobe的api识别电子PDF发票里内容并登记台账_第1张图片
代码如下


使用须知:
1、工具--引用里勾选[Adobe Acrobat 10.0 Type Library]
2、安装Adobe Acrobat pro软件

Dim sht As Worksheet
Function BrowseFolders() As String  '浏览目录
    Dim objshell As Object
    Dim objFolder As Object
    Set objshell = CreateObject("Shell.Application")
    Set objFolder = objshell.BrowseForFolder(0, "请指定发票文件所在的文件夹", 0, 0)
    BrowseFolders = ""
    If Not objFolder Is Nothing Then
        BrowseFolders = objFolder.Self.Path
    End If
    Set objFolder = Nothing
    Set objshell = Nothing
End Function

Sub cmd_getpdf_Click()
    Dim Pth As String '文件路径
    Dim PDFName As String, Wapp As Object, Mstr As String
    Application.ScreenUpdating = False
    '============================================
    Pth = BrowseFolders
    If Pth = "" Then
        Pth = Sheet1.Range("A9").Text
    End If
    If Pth = "" Then
        Pth = ThisWorkbook.Path
    End If
    If Right(Pth, 1) <> "\" Then Pth = Pth & "\"
    Sheet1.Range("A8") = "上次路径:"
    Sheet1.Range("A9") = Pth
    Sheet1.Range("a15:a10000") = ""
    If Dir(Pth & "*.pdf") = "" Then
        MsgBox "指定目录没有找到发票PDF文件!"
        Sheet1.Range("A9") = ""
        Exit Sub
    End If
    'Debug.Print Pth
    '============================================
    For Each sht In ThisWorkbook.Sheets
        Application.DisplayAlerts = False
        If sht.Name = "发票资料读取到Excel" Then sht.Delete
        Application.DisplayAlerts = True
    Next
    Set sht = Worksheets.Add(, Worksheets(Sheets.Count))
    sht.Name = "发票资料读取到Excel"
    sht.Range("A1:J1") = Array("发票号码", "发票日期", "货物或*名称", "规格型号", "单位", "数量", "单价", "金额", "税率", "税额")
    '============================================定义表头字段
    PDFName = Dir(Pth & "*.pdf")
    Do While PDFName <> ""
          Call Imp_Into_XL(Pth & PDFName)
          PDFName = Dir
    Loop
    sht.Columns.AutoFit
    MsgBox "操作完成!"
    '============================================
    Application.ScreenUpdating = True
End Sub

Sub Imp_Into_XL(PDF_File As String)
    Dim AC_PD As Acrobat.AcroPDDoc
    Dim AC_Hi As Acrobat.AcroHiliteList
    Dim AC_PG As Acrobat.AcroPDPage
    Dim AC_PGTxt As Acrobat.AcroPDTextSelect
    Dim Yes_Fir As Boolean
    Dim Ct_Page As Long
    Dim i As Long, j As Long, k As Long, m As Integer
    Dim T_Str As String
    Dim Hld, XL, Brr(), RowNo%, Arr As Variant, sss%
    Dim Hld_Txt As Variant
    Dim FPHM As String   '发票号码
    Dim FPRQ As String   '发票日期
    Dim GGXH As String   '规格型号
    Dim HWMC As String   '货物名称
    Dim SL_SV As String  '数量-税率
    Dim SL_SV_Temp As String  '数量-税率的临时存变量
    Dim HWDW As String   '货物单位
    Dim SL As String     '数量
    Dim DW As String     '单位
    Dim XH As String     '型号
    '====================================================定义字段类型
    Set AC_PD = New Acrobat.AcroPDDoc
    Set AC_Hi = New Acrobat.AcroHiliteList
    AC_Hi.Add 0, 32767
    With AC_PD
        .Open PDF_File
        Ct_Page = .GetNumPages
        If Ct_Page = -1 Then
            MsgBox "请确认发票文件 '" & PDF_File & "'"
            .Close
            GoTo h_end
        End If
        For i = 1 To 1 ' Ct_Page    '只考虑一个文档有一张发票的情形
            T_Str = ""
            Set AC_PG = .AcquirePage(i - 1)
            Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)
            If Not AC_PGTxt Is Nothing Then
                With AC_PGTxt
                    For j = 0 To .GetNumText - 1
                        T_Str = T_Str & .GetText(j)
                    Next j
                End With
            End If
            '==========================================================
            If T_Str <> "" Then
                    Hld_Txt = Split(T_Str, vbCrLf)
                    FPHM = "": FPRQ = "":: GGXH = "": HWMC = ""
                    For j = 0 To UBound(Hld_Txt)
                          If InStr(Hld_Txt(j), "年月日") = 0 Then
                          If InStr(Hld_Txt(j), "年") > 0 And InStr(Hld_Txt(j), "月") > 0 And InStr(Hld_Txt(j), "日") > 0 Then   '当字符串里含有年月日时
                             Hld_Txt(j) = Repce2(Hld_Txt(j))
                             Hld_Txt(j) = Trim(Replace(Hld_Txt(j), "开票日期:", ""))            '如果有"开票日期:"几个字,将其替换掉
                             FPRQ = Left(Hld_Txt(j), 4) & "-" & Mid(Hld_Txt(j), 6, 2) & "-" & Mid(Hld_Txt(j), 9, 2)
                             Exit For
                          End If
                          End If
                    Next j
                    For j = 0 To UBound(Hld_Txt)
                      If TestNumber(Hld_Txt(j)) Then   '测试是否含有数字并以数字结尾的类型,加以判断
                        If Len(Hld_Txt(j)) = 10 And TestCH(Hld_Txt(j)) = False Then '当字符串里没有年月日,但是以"2023 06 30"有空格,共有10个字符串位置形式存在时取得发票日期
                          If InStr(Hld_Txt(j), " ") > 0 And UBound(Split(Hld_Txt(j), " ")) > 0 Then
                             FPRQ = "'" & RegR(Hld_Txt(j))    '取得发票日期
                             Exit For
                          End If
                        End If
                      End If
                    Next j
                    For j = 0 To UBound(Hld_Txt)
                       If TestNumber(Hld_Txt(j)) Then   '测试是否含有数字并以数字结尾的类型,加以判断
                        Hld_Txt(j) = Trim(Replace(Hld_Txt(j), "发票号码:", ""))            '如果有"发票号码:"几个字,将其替换掉
                        If Len(Hld_Txt(j)) = 8 Or Len(Hld_Txt(j)) = 20 Then          '//***限定要取出的发票号码为8位或者20位数字,否则发票号码取不出来
                          If IsNumeric(Hld_Txt(j)) Then
                             If InStr(Hld_Txt(j), ".") = 0 And InStr(Hld_Txt(j), ChrW(165)) = 0 Then
                               FPHM = Regs(Hld_Txt(j)) '取得8位或者20位的发票号码
                               Exit For
                             End If
                          End If
                        End If
                       End If
                   Next j
                   k = 0
                   For j = 0 To UBound(Hld_Txt)
                        If Len(Trim(Hld_Txt(j))) > 2 Then        '//***当字符数大于2,有的只有一个*,这种情形需要排除
                        If Left(Trim(Hld_Txt(j)), 1) = "*" Or InStr(Hld_Txt(j), "详见") > 0 Then  '////当货物名称前面第一个字符是*号或者含有(详见)时
                        Arr = Array("+", "<", ">")   '/***密码区有许多有这几个符号,遇到了就避开它
                        sss = 0
                        For m = LBound(Arr) To UBound(Arr)               '//***避免遇到密码区以*号开头,并且有Arr数组里符号的情形
                          If InStr(Hld_Txt(j), Arr(m)) > 0 Then sss = sss + 1
                        Next m
                        If sss = 0 Then
                           Hld_Txt(j) = Trim(Hld_Txt(j))                '清除前后空格
                           Hld_Txt(j) = StrConv(Hld_Txt(j), vbNarrow)   '全角转为半角
                           Hld_Txt(j) = Repce(Hld_Txt(j))               '将字符串中多个空格变成一个
                           If InStr(Hld_Txt(j), "%") > 0 Or Right(Trim(Hld_Txt(j)), 1) = "*" Then
                              For m = UBound(Split(Hld_Txt(j), " ")) To 0 Step -1
                                  If TestCHNum(Split(Hld_Txt(j), " ")(m)) = False Or Trim(Split(Hld_Txt(j), " ")(m)) = "*" Then   '循环判定,取出有数字的数量-税额部分//有部分的金额和税额是*号
                                     If TestCH(Split(Hld_Txt(j), " ")(m)) = True And InStr(Hld_Txt(j), "不征税") = 0 Then Exit For
                                     SL_SV = Split(Hld_Txt(j), " ")(m) & " " & SL_SV
                                     SL_SV_Temp = Split(Hld_Txt(j), " ")(m) & " " & SL_SV_Temp       '增加这个变量,存下原始的数量金额部分
                                     If InStr(SL_SV, "不征税") > 0 And Len(SL_SV) > 3 Then SL_SV = Left(SL_SV, InStr(SL_SV, "税")) & " " & Right(SL_SV, Len(SL_SV) - InStr(SL_SV, "税"))
                                     SL_SV = Trim(SL_SV)
                                     SL_SV_Temp = Trim(SL_SV_Temp)
                                     If m < UBound(Split(Hld_Txt(j), " ")) And Split(Hld_Txt(j), " ")(m) < 0 Then Exit For
                                  ElseIf TestCHNum(Split(Hld_Txt(j), " ")(m)) = True Then    '循环判定,如含有中文+数字,则需拆分
                                     SL_SV = RegSL(Split(Hld_Txt(j), " ")(m)) & " " & SL_SV
                                     SL_SV_Temp = RegSL(Split(Hld_Txt(j), " ")(m)) & " " & SL_SV_Temp
                                     SL_SV = Trim(SL_SV)
                                     SL_SV_Temp = Trim(SL_SV_Temp)
                                     Exit For
                                  End If
                              Next m
                              SL_SV = Repce(SL_SV): SL_SV_Temp = Repce(SL_SV_Temp)  '用原始的数量金额部分来方便取出GGXH
                              GGXH = Trim(Replace(Hld_Txt(j), SL_SV_Temp, ""))  '去掉数量-税额部分,下余的是规格型号   ////***前面做过变动后,这里用replace取不出余下的规格型号
                              SL_SV_Temp = ""
                              SL_SV = SL_JE(SL_SV)                         '数量-税额部分,不能用trim去掉前面空格
                              If InStr(GGXH, "费") > 0 Then
                                 HWMC = Left(GGXH, InStr(GGXH, "费"))         '货物名称,有费字的取费字前面字符(含费字)作为货物名称
                                 GGXH = Trim(Replace(GGXH, HWMC, ""))         '费字后面的是规格型号+单位
                              Else
                                 If InStr(GGXH, " ") = 0 Then
                                    HWMC = GGXH: GGXH = ""                   '规格型号没有包含空格时,货物名称就取ggxh,将原来的ggxh置空
                                 Else
                                    HWMC = Split(GGXH, " ")(0)               '规格型号有包含空格时,货物名称取ggxh的第一个空格前的字符
                                    GGXH = Trim(Replace(GGXH, HWMC, ""))     '规格型号取除了货物名称后的余下的值
                                 End If
                              End If
                              If InStr(GGXH, " ") = 0 Then          '当规格型号没有空格时********
                                 Select Case Len(GGXH)
                                     Case Is = 0                    '当费后面的字符数量为0时
                                        If Split(SL_SV, " ")(0) = "" Then     '当数据部分第一个字符为空时,货物名称就只为货物名称
                                           HWMC = HWMC & " " & " "
                                        Else                                '当数据部分第一个字符不为空时,货物名称取最后一个值为单位,次一个值为规格型号
                                           If Mid(HWMC, Len(HWMC) - 1, 2) = "服务" Or InStr(HWMC, "费") > 0 Then
                                              HWMC = HWMC & " " & " "                '当货物名称最后两个字是"服务"时或含有"费",已经不能拆开了.
                                           ElseIf InStr(HWMC, "费") = 0 Then
                                              DW = Right(HWMC, 1)                      '取右边一位做单位*****
                                              XH = Mid(HWMC, Len(HWMC) - 1, 1)
                                              HWMC = Left(HWMC, Len(HWMC) - 2)
                                              If InStr(HWMC, XH & DW) > 0 Or InStr(HWMC, XH) > 0 Or InStr(HWMC, DW) > 0 Then
                                                 HWMC = HWMC & XH & DW & " " & " "
                                              Else
                                                 HWMC = HWMC & " " & XH & " " & DW
                                              End If
                                           End If
                                        End If
                                     Case Is >= 1                   '当费后面的字符数量为1或者大于1时
                                        DW = Right(GGXH, 1)                         '取右边一位做单位
                                        XH = Replace(GGXH, DW, "")               '余下的是型号
                                        If Split(SL_SV, " ")(0) = "" Then
                                           HWMC = HWMC & " " & " "
                                        Else
                                           If XH <> "" Then
                                              HWMC = HWMC & " " & XH & " " & DW
                                           Else
                                              HWMC = HWMC & " " & " " & DW
                                           End If
                                        End If
                                 End Select
                              ElseIf InStr(GGXH, " ") > 0 Then       '当规格型号有空格时
                                 If Split(SL_SV, " ")(0) <> "" Then
                                    HWDW = Split(GGXH, " ")(UBound(Split(GGXH, " ")))     '单位
                                    If Len(HWDW) > 1 Then
                                       HWDW = Right(HWDW, 1)
                                       GGXH = Replace(GGXH, HWDW, "")
                                       GGXH = Replace(GGXH, " ", "_")
                                       HWMC = HWMC & " " & GGXH & " " & HWDW
                                    Else
                                       XH = Trim(Replace(GGXH, HWDW, ""))             '规格型号
                                       If XH = "" Then
                                          If Len(HWDW) > 1 Then
                                             DW = Right(HWDW, 1)
                                             XH = Replace(HWDW, DW, "")
                                             HWMC = HWMC & " " & XH & " " & DW
                                          ElseIf Len(HWDW) = 1 Then
                                             HWMC = HWMC & " " & " " & DW
                                          End If
                                       Else
                                          DW = HWDW
                                          XH = Trim(Replace(XH, " ", "_"))                 '去掉规格型号中的空格,用下横线连接
                                          HWMC = HWMC & " " & XH & " " & DW
                                       End If
                                     End If
                                 ElseIf Split(SL_SV, " ")(0) = "" Then
                                     XH = Replace(GGXH, " ", "_")                 '去掉规格型号中的空格,用下横线连接
                                     HWMC = HWMC & " " & XH & " "                 '没有单位,要加上表示单位的空格
                                 End If
                              End If
                           ElseIf UBound(Split(Hld_Txt(j), " ")) <= 2 And InStr(Hld_Txt(j), "%") = 0 Then           '当品名与数量金额等不在同一行时
                              HWMC = Hld_Txt(j)
                              For m = j To UBound(Hld_Txt)
                                  If InStr(Hld_Txt(m), "%") > 0 Then SL_SV_Temp = Hld_Txt(m): Exit For
                              Next m
                              For m = UBound(Split(SL_SV_Temp, " ")) To 0 Step -1
                                  If TestCHNum(Split(SL_SV_Temp, " ")(m)) = False Or Trim(Split(SL_SV_Temp, " ")(m)) = "*" Then   '循环判定,取出有数字的数量-税额部分//有部分的金额和税额是*号
                                     If TestCH(Split(SL_SV_Temp, " ")(m)) = True And InStr(SL_SV_Temp, "不征税") = 0 Then Exit For
                                     SL_SV = Split(SL_SV_Temp, " ")(m) & " " & SL_SV       '增加这个变量,存下原始的数量金额部分
                                     If InStr(SL_SV, "不征税") > 0 And Len(SL_SV) > 3 Then SL_SV = Left(SL_SV, InStr(SL_SV, "税")) & " " & Right(SL_SV, Len(SL_SV) - InStr(SL_SV, "税"))
                                     SL_SV = Trim(SL_SV)
                                     If m < UBound(Split(SL_SV_Temp, " ")) And Split(SL_SV_Temp, " ")(m) < 0 Then Exit For
                                  ElseIf TestCHNum(Split(SL_SV_Temp, " ")(m)) = True Then    '循环判定,如含有中文+数字,则需拆分
                                     SL_SV = RegSL(Split(SL_SV_Temp, " ")(m)) & " " & SL_SV
                                     SL_SV = Trim(SL_SV)
                                     Exit For
                                  End If
                              Next m
                              SL_SV_Temp = Replace(SL_SV_Temp, SL_SV, "")
                              SL_SV = Repce(SL_SV) '用原始的数量金额部分来方便留下GGXH
                              GGXH = Trim(Replace(SL_SV_Temp, SL_SV, ""))  '去掉数量-税额部分,下余的是规格型号   ////***前面做过变动后,这里用replace取不出余下的规格型号
                              If Len(GGXH) = 0 Then         '当规格型号为空时
                                 DW = Split(HWMC, " ")(UBound(Split(HWMC, " ")))
                                 HWMC = Trim(Replace(HWMC, DW, ""))
                                 XH = Trim(Replace(HWMC, Split(HWMC, " ")(0), " "))
                                 HWMC = Trim(Replace(HWMC, XH, ""))
                                 If InStr(HWMC, " ") > 0 Then HWMC = Replace(HWMC, " ", "_")    '将货物名称里原有的空格用下划线代替
                                 If InStr(XH, " ") > 0 Then XH = Replace(XH, " ", "_")          '将型号里原有的空格用下划线代替
                                 If Len(XH) > 0 Then
                                    HWMC = HWMC & " " & XH & " " & DW
                                 Else
                                    HWMC = HWMC & " " & " " & DW
                                 End If
                              ElseIf Len(GGXH) > 0 Then                        '当规格型号不为空时
                                 If InStr(HWMC, " ") > 0 Then HWMC = Replace(HWMC, " ", "_")    '将货物名称里原有的空格用下划线代替
                                 If InStr(GGXH, " ") > 0 Then
                                    DW = Split(GGXH, " ")(UBound(Split(GGXH, " ")))   '单位
                                    XH = Trim(Replace(SL_SV_Temp, DW, ""))                     '型号
                                    If InStr(XH, " ") > 0 Then XH = Replace(XH, " ", "_")          '将型号里原有的空格用下划线代替
                                    If Len(XH) > 0 Then
                                       HWMC = HWMC & " " & XH & " " & DW
                                    Else
                                       HWMC = HWMC & " " & " " & DW
                                    End If
                                 Else
                                    DW = Right(GGXH, 1)                      '取右边一位做单位*****
                                    XH = Replace(GGXH, DW, "")
                                    HWMC = HWMC & " " & XH & " " & DW
                                 End If
                              End If
                           End If
                           If Split(SL_SV, " ")(0) = "" Then     '///*****************
                              Hld_Txt(j) = HWMC & SL_SV
                           Else
                              Hld_Txt(j) = HWMC & " " & SL_SV
                           End If
                           HWMC = "": SL_SV = "":   SL = "": DW = "": XH = "": GGXH = "": HWDW = "": SL_SV_Temp = ""
                           If UBound(Split(Hld_Txt(j), " ")) = 7 Then
                              k = k + 1
                              ReDim Preserve Brr(1 To 10, 1 To k)
                              Brr(1, k) = "'" & FPHM: Brr(2, k) = FPRQ       '编号及日期
                              For m = 0 To UBound(Split(Hld_Txt(j), " "))
                                  Brr(3 + m, k) = Split(Hld_Txt(j), " ")(m)
                              Next m
                           Else
                              GoTo 0
                           End If
                        End If
                        End If
                        End If
                   Next j
                   With sht
                     If k = 0 Then GoTo 0
                     RowNo = .Cells(65536, 1).End(3).Row + 1
                     .Cells(RowNo, 1).Resize(UBound(Brr, 2), UBound(Brr)) = Application.Transpose(Brr)
                  '  .Cells(RowNo, 11) = PDF_File              '将文件名称放在最后一列
                     Erase Brr
                   End With
           ElseIf T_Str = "" Then
0
              MsgBox PDF_File & "文件没有取到数据,请检查!", vbOKOnly, "ExcelHome"
              Sheet1.Cells(Sheet1.Cells(65536, 1).End(3).Row + 1, 1) = PDF_File    '将有问题的文件名称放在sheet1表中,方便查验
              Exit For
           End If
            '===========================================================
        Next i
            .Close
    End With

h_end:
    Set AC_PGTxt = Nothing
    Set AC_PG = Nothing
    Set AC_Hi = Nothing
    Set AC_PD = Nothing
End Sub
Function Regs(STR)        '取发票号码
Dim reg As Object, mh As Variant
 Set reg = CreateObject("VBScript.RegExp")
 With reg
   .Global = True
   .Pattern = "(^\d{8}$|^\d{20}$)"    '是8位或者是20位
    Set mh = .Execute(STR)
    Regs = mh.Item(0).SubMatches.Item(0)
 End With
End Function
Function RegR(STR)        '取发票日期
Dim reg As Object, mh As Variant
 Set reg = CreateObject("VBScript.RegExp")
 With reg
   .Global = True
   .Pattern = "(^\d{4} \d{2} \d{2}$)"    '前四位年,中两位月,后两位日
    Set mh = .Execute(STR)
    RegR = Replace(mh.Item(0).SubMatches.Item(0), " ", "-")
 End With
End Function
Function RegSL(STR)        '取数量
Dim reg As Object, mh As Variant
 Set reg = CreateObject("VBScript.RegExp")
 With reg
   .Global = True
   '.Pattern = "[\u4e00-\u9fff](\d+\.\d+|\d+)"    '中文后面跟的数量为小数或整数
   .Pattern = "[\u4e00-\u9fff]([-]?\d+\.\d+$|[-]?\d+$)"    '中文后面跟的数量为小数或整数
    Set mh = .Execute(STR)
    RegSL = mh.Item(0).SubMatches.Item(0)
 End With
End Function
Function TestNumber(STR)        '测试是否最后是数字
Dim reg As Object
 Set reg = CreateObject("VBScript.RegExp")
 With reg
   .Global = True
   .Pattern = "^\d+\.\d+$|\d+$"
    TestNumber = .test(STR)
 End With
End Function
Function TestCH(STR)        '测试是否以中文开始
Dim reg As Object
 Set reg = CreateObject("VBScript.RegExp")
 With reg
   .Global = True
   .Pattern = "[\u4e00-\u9fff]+"
    TestCH = .test(STR)
 End With
End Function
Function TestCHNum(STR)        '测试是否以中文后跟随数字
Dim reg As Object
 Set reg = CreateObject("VBScript.RegExp")
 With reg
   .Global = True
   .Pattern = "[\u4e00-\u9fff]([-]?\d+\.\d+$|[-]?\d+$)"
    TestCHNum = .test(STR)
 End With
End Function
Public Function Repce(STR)        '多个空格变成一个
With CreateObject("VBSCRIPT.REGEXP")
  .Global = True
  .Pattern = "\s+"
  Repce = .Replace(STR, " ")
End With
End Function
Public Function Repce2(STR)        '去掉中间空格
With CreateObject("VBSCRIPT.REGEXP")
  .Global = True
  .Pattern = "\s+"
  Repce2 = .Replace(STR, "")
End With
End Function
Public Function SL_JE(STR)        '处理数量金额这部分
   Dim i%, str_temp
   Select Case UBound(Split(STR, " "))
     Case Is >= 5
       For i = UBound(Split(STR, " ")) To UBound(Split(STR, " ")) - 4 Step -1
           str_temp = Split(STR, " ")(i) & " " & str_temp
       Next i
           SL_JE = Trim(str_temp)
     Case 4
        SL_JE = STR
     Case 2
        SL_JE = "   " & STR
   End Select
End Function

你可能感兴趣的:(excelVBA专栏,excel,adobe,pdf)