vb.net 多线程爬虫抓取免费代理IP

Class Program
Public Shared masterPorxyList As List(Of proxy) = New List(Of proxy)()

Public Class proxy
Public ip As String
Public port As String
Public speed As Integer

Public Sub New(ByVal pip As String, ByVal pport As String, ByVal pspeed As Integer)
Me.ip = pip
Me.port = pport
Me.speed = pspeed
End Sub
End Class

Private Shared Sub getProxyList(ByVal pageIndex As Object)
Dim urlCombin As String = "http://www.xicidaili.com/wt/" & pageIndex.ToString()
Dim catchHtml As String = catchProxIpMethord(urlCombin, "UTF8")
Dim doc As HtmlAgilityPack.HtmlDocument = New HtmlAgilityPack.HtmlDocument()
doc.LoadHtml(catchHtml)
Dim table As HtmlNode = doc.DocumentNode.SelectSingleNode("//div[@id='wrapper']//div[@id='body']/table[1]")
Dim collectiontrs As HtmlNodeCollection = table.SelectNodes("./tr")

For i As Integer = 0 To collectiontrs.Count - 1
Dim itemtr As HtmlAgilityPack.HtmlNode = collectiontrs(i)
Dim collectiontds As HtmlNodeCollection = itemtr.ChildNodes

If i > 0 Then
Dim itemtdip As HtmlNode = CType(collectiontds(3), HtmlNode)
Dim itemtdport As HtmlNode = CType(collectiontds(5), HtmlNode)
Dim itemtdspeed As HtmlNode = CType(collectiontds(13), HtmlNode)
Dim ip As String = itemtdip.InnerText.Trim()
Dim port As String = itemtdport.InnerText.Trim()
Dim speed As String = itemtdspeed.InnerHtml
Dim beginIndex As Integer = speed.IndexOf(":", 0, speed.Length)
Dim endIndex As Integer = speed.IndexOf("%", 0, speed.Length)
Dim subSpeed As Integer = Integer.Parse(speed.Substring(beginIndex + 1, endIndex - beginIndex - 1))

If subSpeed > 90 Then
Dim temp As proxy = New proxy(ip, port, subSpeed)
masterPorxyList.Add(temp)
Console.WriteLine("当前是第:" & masterPorxyList.Count.ToString() & "个代理IP")
End If
End If
Next
End Sub

Private Shared Function catchProxIpMethord(ByVal url As String, ByVal encoding As String) As String
Dim htmlStr As String = ""

Try

If Not String.IsNullOrEmpty(url) Then
Dim request As WebRequest = WebRequest.Create(url)
Dim response As WebResponse = request.GetResponse()
Dim datastream As Stream = response.GetResponseStream()
Dim ec As Encoding = Encoding.[Default]

If encoding = "UTF8" Then
ec = Encoding.UTF8
ElseIf encoding = "Default" Then
ec = Encoding.[Default]
End If

Dim reader As StreamReader = New StreamReader(datastream, ec)
htmlStr = reader.ReadToEnd()
reader.Close()
datastream.Close()
response.Close()
End If

Catch
End Try

Return htmlStr
End Function

Private Shared Sub Main(ByVal args As String())
For i As Integer = 1 To 15
ThreadPool.QueueUserWorkItem(AddressOf getProxyList, i)
Next

Console.Read()
End Sub
End Class

你可能感兴趣的:(vb.net 多线程爬虫抓取免费代理IP)