【VBA研究】用XMLHTTP的Post功能抓取数据

作者:iamlaosong

我前一阵子用VBA做了个工具,用XMLHTTP的Get功能抓取城市间距离。现在我想用用XMLHTTP的Post功能抓取邮件轨迹。抓取数据是用Get还是Post,取决于网站提交参数的方法。

1、通过分析(用fiddler),邮件轨迹查询网站是用post提交参数的。如下图:

【VBA研究】用XMLHTTP的Post功能抓取数据_第1张图片

上图中“Entity”内容用于设置包头,点击“TextView”可以看到传输的参数内容,邮件号码,如下图:

抓取数据的代码如下:

Sub tt()
    Dim HttpReq As Object
    Dim pdata, http As String
    
    Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
    '轨迹头部数据,网址用xxx屏蔽
    http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
    pdata = "trace_no=1044905987232"
    
    HttpReq.Open "Post", http, False
    
    HttpReq.setRequestHeader "Content-Length", Len(pdata)
    'HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
    HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded; charset=utf-8"

    HttpReq.send pdata    '"trace_nos=1194359346482"
    
    Do Until HttpReq.readyState = 4
        DoEvents
    Loop
    
    If HttpReq.Status = 200 Then
        Debug.Print HttpReq.responseText
    End If

End Sub

2、返回内容是个json结构的数据,可以用fiddler的查看返回内容,点击“TextView”:

【VBA研究】用XMLHTTP的Post功能抓取数据_第2张图片

点击“JSON”可以看到数据解析结果:

【VBA研究】用XMLHTTP的Post功能抓取数据_第3张图片

但vba解析的时候内容为空,对比以前返回json结构的数据,发现这个数据少了个名称,因为定义JS函数时,指定了一个json结构数据的名称,即jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"中的traces名称。

加上后就可以解析了,即:

            '返回数据要成为标准的json结构,还需要在外面加一层数据名称
            buf = "{""traces"":" & HttpReq.responseText & "}"
            kk = get_trace(buf)
get_trace函数就是用来解析json数据的,代码如下:

Function get_trace(mystring As String) As Integer
    Dim objJSx As Object, objJSy As Object
    Dim m1, m2, n, j As Integer
    Dim source, level, kind, sm As String
    
    On Error Resume Next
    Set objJSx = CreateObject("ScriptControl")        '调用MSScriptControl.ScriptControl对象将提取的变量文本运算形成对象集合
    objJSx.Language = "JavaScript"                    '测试发现JavaScript、javascript、JScript都可以表示JavaScript语言
    
    '定义一个JS函数,通过计算表达式的方式引入JSON数据并解析
    jscode = "function json(s,i) { return eval('(' + s + ').traces[' + i + ']'); }"
    objJSx.AddCode jscode
    TT = "否"
    For n = 1 To 100
        If objJSx.Run("json", mystring, n - 1) = "" Then Exit For
        Set objJSy = objJSx.Run("json", mystring, n - 1)
        For j = 1 To 11
            TraceInfo(n, j) = ""
        Next j
        
        TraceInfo(n, 1) = objJSy.traceNo
        TraceInfo(n, 2) = objJSy.opCode
        TraceInfo(n, 3) = objJSy.opTime
        TraceInfo(n, 4) = objJSy.opName
        TraceInfo(n, 6) = objJSy.opOrgCode
        TraceInfo(n, 7) = objJSy.opOrgSimpleName
        TraceInfo(n, 8) = objJSy.operatorNo
        TraceInfo(n, 9) = objJSy.operatorName
        TraceInfo(n, 10) = objJSy.level
        TraceInfo(n, 11) = objJSy.source
        sm = objJSy.desc
        '剔除数据中的HTML部分
        Do While InStr(sm, "<") > 0
            m1 = InStr(sm, "<")
            m2 = InStr(sm, ">")
            If m2 > 0 Then
                If Mid(sm, m1, 3) = "/br" Then
                    sm = Left(sm, m1 - 1) & " " & Right(sm, Len(sm) - m2)
                Else
                    sm = Left(sm, m1 - 1) & Right(sm, Len(sm) - m2)
                End If
            Else
                Exit Do
            End If
        Loop
        TraceInfo(n, 5) = sm
        If objJSy.opCode = "704" Then TT = "是"
    Next n
    
    get_trace = n - 1
End Function

3、实际使用的代码

Public Sub get_data()
    '根据工作表中的查询语句读取数据
    Dim HttpReq As Object
    Dim i, k, kk, lineno, row1 As Long
    Dim Mail, pdata, tbhead As String, buf As String
    Dim arr_head
    
    lineno = [A65536].End(xlUp).Row      '行数,也是邮件号码数量
    Range("B2:B" & lineno).ClearContents
    'lineno = ActiveSheet.UsedRange.Rows.Count
    Set HttpReq = CreateObject("MSXML2.XMLHTTP.6.0")
    '轨迹头部数据
    'http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryCurrentTraceByTrace_nos/"
    'pdata = "trace_nos="
    '轨迹数据
    http = "http://10.xxx.xxx.xxx/querypush-traln/qps/qpswaybilltraceinternal/queryTraceByTrace_no/"
    'pdata = "trace_no="
    
    row1 = 2
    maxrow = Sheets("查询结果").UsedRange.Rows.Count
    If maxrow >= 1 Then
        Sheets("查询结果").Range("A1:L" & maxrow).ClearContents
    End If
    tbhead = "邮件号码 操作码 操作时间 处理动作 详细说明 机构代码 机构名称 操作员代码 操作员姓名 级别 来源"
    arr_head = Split(tbhead, " ")    '下标从0开始
    Sheets("查询结果").Cells(1, 1).Resize(1, UBound(arr_head) + 1) = arr_head
    
    For i = 2 To lineno
        Mail = Trim(Sheets("邮件号码").Cells(i, 1))
        If Mail = "" Then Exit For
        If Len(Mail) = 13 Then
            
            HttpReq.Open "Post", http, False
            
            pdata = "trace_no=" & Mail
            HttpReq.setRequestHeader "Content-Length", Len(pdata)
            HttpReq.setRequestHeader "CONTENT-TYPE", "application/x-www-form-urlencoded"
            
            HttpReq.send pdata    'pdata = "trace_no=1194359346482"
            
            Do Until HttpReq.readyState = 4
                DoEvents
            Loop
            'MsgBox HttpReq.getAllResponseHeaders
            'Debug.Print HttpReq.responseText
            
            '返回数据要成为标准的json结构,还需要在外面加一层数据名称
            buf = "{""traces"":" & HttpReq.responseText & "}"

            kk = get_trace(buf)
            Sheets("邮件号码").Cells(i, 2) = TT
            
            If kk > 0 Then
                For k = kk To 1 Step -1
                    If CInt(TraceInfo(k, 10)) <= Range("E1") Then
                        For j = 1 To 11
                            Sheets("查询结果").Cells(row1, j) = TraceInfo(k, j)
                        Next j
                        row1 = row1 + 1
                    End If
                Next k
            Else
                Sheets("邮件号码").Cells(i, 2) = "Err"
                
                Sheets("查询结果").Cells(row1, 1) = Mail
                Sheets("查询结果").Cells(row1, 2) = "Err"
                Sheets("查询结果").Cells(row1, 4) = HttpReq.responseText
                row1 = row1 + 1
                delay (9 * Rnd + 1)   '出错了,说明你还是干快了,随机后延时1-10秒,看运气了。
            End If
            'If CInt(i / 10) * 10 = i Then
                Application.StatusBar = "完成:" & Round(i * 100 / lineno, 2) & "%"
                DoEvents
            'End If
            delay (Rnd + 0.25)  '总部领导说了,接口是大家用的,你一个人不能用太多,此处延时0.5秒,降降速度。
        Else
            Sheets("邮件号码").Cells(i, 2) = "异常"
        End If
    
    Next i
    
    Application.StatusBar = "就绪"
    Sheets("查询结果").Activate
    msg = MsgBox("邮件批量查询完毕,共查询" & i - 2 & "个邮件!", vbOKOnly, "AHEMS:iamlaosong")

End Sub

 

你可能感兴趣的:(VBA_Excel,Web应用)