vba 读取 html 数据类型,如何在vba中用html dom处理网络采集的数据?

Sub QQ1722187970()

Dim sVerb As String

Dim sUrl As String

Dim sCharset As String

Dim sPostData As String

sVerb = "GET"

sUrl = "http://www.77tj.org/tencent/"

Dim sToken As String

sResult = HtmlBasic("GET", sUrl)

Set oHtmlDom = CreateObject("htmlfile")

With oHtmlDom

.body.innerHTML = sResult

sToken = .getElementsByTagName("input")(1).Value

End With

For n = 1 To 10

sPostData = "PageIndex=" & n & "&__RequestVerificationToken=" & sToken

sResult = HtmlBasic("POST", sUrl, , sPostData)

Call HtmlTable(sResult)

Next n

End Sub

Function HtmlBasic(ByVal sVerb As String, ByVal sUrl As String, Optional ByVal sCharset As String = "utf-8", Optional ByVal sPostData As String = "")

'sVerb为发送的Html请求的方法,sUrl为具体的网址,sCharset为网址对应的字符集编码,sPostData为Post方法对应的发送body

Dim oHTML As Object

'https://msdn.microsoft.com/en-us/library/windows/desktop/aa384106(v=vs.85).aspx

Set oHTML = VBA.CreateObject("WinHttp.WinHttpRequest.5.1")

With oHTML

Select Case sVerb

Case "GET"

.Open "GET", sUrl, False

Case "POST"

.Open "POST", sUrl, False

' .SetRequestHeader "Referer", "http://www.77tj.org/tencent"

.SetRequestHeader "Content-Type", "application/x-www-form-urlencoded"

End Select

.send (sPostData)

'获取返回的字节数组

bResult = .ResponseBody

'按照指定的字符编码显示

sResult = Byte2String(bResult, sCharset)

Debug.Print sResult

HtmlBasic = sResult

End With

Set oHTML = Nothing

End Function

Function Byte2String(bContent, ByVal sCharset As String)

Const adTypeBinary = 1

Const adTypeText = 2

Const adModeRead = 1

Const adModeWrite = 2

Const adModeReadWrite = 3

Dim oStream As Object

'创建流对象

Set oStream = CreateObject("ADODB.Stream")

With oStream

'打开流

.Open

'设置为字节模式

.Type = adTypeBinary

'写入字节

.write bContent

'将位置定位在第一个字节

.Position = 0

'设置为文本模式

.Type = adTypeText

'设置编码的字符集

.Charset = sCharset

'读取编码后的文本

Byte2String = .ReadText

'关闭流对象

.Close

End With

End Function

'提取网页表格的代码

Sub HtmlTable(ByVal sHtml As String)

'网页html文档对象

Dim oHtmlDom As Object

'网页表格对象

Dim oTable As Object

'网页表格行对象

Dim oRows As Object

'网页表格单元格对象

Dim oCells As Object

'抓取的数据存放的excel表格对象

Dim oWK As Worksheet

Set oWK = Sheet1

iRow = oWK.Range("a65536").End(xlUp).Row + 1

Set oHtmlDom = CreateObject("htmlfile")

With oHtmlDom

.body.innerHTML = sHtml

Set oTable = .getElementsByTagName("table")(0)

'Set obj = getElementById("id")

With oTable

Set oRows = .Rows

For i = 1 To oRows.Length - 1

Set oCells = oRows(i).Cells

For j = 0 To oCells.Length - 1

oWK.Cells(iRow, j + 1) = oCells(j).innertext

Next j

iRow = iRow + 1

Next i

End With

End With

Set oHtmlDom = Nothing

Set oTable = Nothing

Set oRows = Nothing

Set oCells = Nothing

End Sub

你可能感兴趣的:(vba,读取,html,数据类型)