Author:
水如烟
这里所说的网上数据,是基于:
一、有固定网址发布最新数据的链接;
二、数据格式固定。
在去年的10月,曾写了个《全国县及县以上行政区划代码信息类 》
见:
http://www.cnblogs.com/LzmTW/archive/2005/10/22/260066.html
现在仍以行政区划代码数据为例。
行政区划代码数据由国家统计局发布,网址为
http://www.stats.gov.cn/tjbz/xzqhdm/index.htm
数据格式是固定的:
如最新的为2005年12月31日
http://www.stats.gov.cn/tjbz/xzqhdm/t20041022_402301029.htm
最旧的为2001年10月的,
http://www.stats.gov.cn/tjbz/xzqhdm/t20021125_46781.htm
但是有例外,这在代码中说。
方案组织:
效果:
以下为代码:
NetConst.vb
Namespace
NET
Public
Class
NetConst
Private
Sub
New
()
End Sub
Public
Const
GOV_DEFAULT
As
String
=
"
www.stats.gov.cn
"
Public
Const
GOV_ADDRESS
As
String
=
"
http://www.stats.gov.cn/tjbz/xzqhdm/
"
Public
Const
WEBTABLE_INDEX
As
String
=
"
9
"
End Class
End Namespace
NetInformation.vb
Imports
System.Net
Imports
System.IO
Imports
System.Text.RegularExpressions
Namespace
NET
Public
Class
NetInformation
Private
gNetUpdateInformations(
-
1
)
As
NetUpdateInformationItem
Public
ReadOnly
Property
UpdateInformationsTable()
As
DataTable
Get
Return
GetUpdateInformationsTable()
End
Get
End Property
Private
Function
GetUpdateInformationsTable()
As
DataTable
Dim
mDataTable
As
New
DataTable(
"
UpdateInformations
"
)
With
mDataTable
.Columns.Add(
"
Address
"
)
.Columns.Add(
"
LastDate
"
)
For
Each
item
As
NetUpdateInformationItem
In
gNetUpdateInformations
.Rows.Add(
New
String
() {item.Address, item.LastDate})
Next
.AcceptChanges()
End
With
Return
mDataTable
End Function
Public
Sub
DownloadInformationsFromNet()
Dim
mRegex
As
New
Regex(
"
(?<date>2.*日)
"
)
Dim
mNetUpdateItems
As
NetUpdateItem()
=
GetNetUpdateItems()
Dim
mNetUpdateInformationItem
As
NetUpdateInformationItem
Dim
tmp
As
NetUpdateItem
'
由于后两个不合规则,舍去不用。最后一个没有日期,倒数第二个提供的是附件数据。
For
i
As
Integer
=
0
To
mNetUpdateItems.Length
-
1
-
2
tmp
=
mNetUpdateItems(i)
mNetUpdateInformationItem
=
New
NetUpdateInformationItem
With
mNetUpdateInformationItem
.Address
=
tmp.Address
.LastDate
=
CType
(mRegex.Match(tmp.Content).Value,
Date
).ToString(
"
yyyyMMdd
"
)
End
With
AppendItem(Of NetUpdateInformationItem)(mNetUpdateInformationItem, gNetUpdateInformations)
Next
End Sub
Private
Function
GetNetUpdateItems()
As
NetUpdateItem()
Dim
mResult(
-
1
)
As
NetUpdateItem
Dim
mRegex
As
New
Regex(
"
<a href='(?<href>.*)' target='_blank' >(?<content>.*行政区划代码.*)</a>
"
)
Dim
mCollection
As
MatchCollection
Dim
mClient
As
New
WebClient()
Dim
mStream
As
Stream
=
mClient.OpenRead(NetConst.GOV_ADDRESS)
Dim
mReader
As
New
StreamReader(mStream, System.Text.Encoding.Default)
Dim
mText
As
String
=
mReader.ReadToEnd
mReader.Close()
mStream.Close()
mClient.Dispose()
mCollection
=
mRegex.Matches(mText)
Dim
tmpItem
As
NetUpdateItem
For
Each
m
As
Match
In
mCollection
tmpItem
=
New
NetUpdateItem
With
tmpItem
.Address
=
NetConst.GOV_ADDRESS
&
m.Groups(
1
).Value
.Content
=
m.Groups(
2
).Value
End
With
AppendItem(Of NetUpdateItem)(tmpItem, mResult)
Next
Return
mResult
End Function
Private
Structure
NetUpdateItem
Public
Address
As
String
Public
Content
As
String
End Structure
Private
Structure
NetUpdateInformationItem
Public
Address
As
String
Public
LastDate
As
String
End Structure
Private
Sub
AppendItem(Of T)(
ByVal
value
As
T,
ByRef
array
As
T())
ReDim
Preserve
array(array.Length)
array(array.Length
-
1
)
=
value
End Sub
End Class
End Namespace
ExcelQueryTable.vb
Option
Strict
Off
Namespace
NET
Public
Class
ExcelQueryTable
Private
gExcelApplication
As
Object
Private
gWorkbook
As
Object
Private
gWorksheet
As
Object
Private
gQueryTable
As
Object
Sub
New
()
Initialize()
End Sub
Private
Sub
Initialize()
gExcelApplication
=
CreateObject
(
"
Excel.Application
"
)
gExcelApplication.DisplayAlerts
=
False
'
使退出时不询问是否存盘
gWorkbook
=
gExcelApplication.Workbooks.Add
gWorksheet
=
gWorkbook.Worksheets.Add
End Sub
'
这里只作简单处理,详细处理在我的BLOG上有相关“文章”作过介绍
Public
Sub
Close()
gWorkbook.Close()
gWorksheet
=
Nothing
gWorkbook
=
Nothing
gExcelApplication.DisplayAlerts
=
True
gExcelApplication.Quit()
gExcelApplication
=
Nothing
End Sub
Public
Function
Query(
ByVal
address
As
String
)
As
DataTable
Dim
mDataTable
As
DataTable
=
GetDataTable()
gWorksheet.Cells.Clear()
gQueryTable
=
gWorksheet.QueryTables.Add( _
Connection:
=
String
.Format(
"
URL;{0}
"
, address), _
Destination:
=
gWorksheet.Range(
"
A1
"
))
With
gQueryTable
.WebTables
=
NetConst.WEBTABLE_INDEX
'
这是固定的
.Refresh(BackgroundQuery:
=
False
)
End
With
Dim
mCell
As
Object
Dim
mMaxRowIndex
As
Integer
Dim
line
As
Object
mMaxRowIndex
=
gWorksheet.Cells.SpecialCells(
11
).Row
'
Excel.XlCellType.xlCellTypeLastCell=11
mCell
=
gWorksheet.Range(
"
A1
"
)
For
i
As
Integer
=
0
To
mMaxRowIndex
line
=
mCell.Offset(i,
0
).Value
If
line IsNot
Nothing
Then
AddRow(mDataTable, line.ToString)
End
If
Next
gQueryTable.Delete()
gQueryTable
=
Nothing
Return
mDataTable
End Function
Private
Sub
AddRow(
ByVal
table
As
DataTable,
ByVal
line
As
String
)
line
=
line.Trim
If
line.Length
<
7
Then
Exit Sub
Dim
tmpCode
As
String
Dim
tmpName
As
String
tmpCode
=
line.Substring(
0
,
6
)
tmpName
=
line.Substring(
6
).Trim
If
Not
IsNumeric
(tmpCode)
Then
Exit Sub
'
前六位需是数字
table.Rows.Add(
New
String
() {tmpCode, tmpName})
End Sub
Private
Function
GetDataTable()
As
DataTable
'
表的列名意义为:代码、名称
Dim
mDataTable
As
New
DataTable(
"
RegionalCode
"
)
With
mDataTable.Columns
.Add(
"
Code
"
)
.Add(
"
Name
"
)
End
With
Return
mDataTable
End Function
End Class
End Namespace
测试代码:
MainForm.vb(界面部分省,在最后有整个方案供下载)
Public
Class
MainForm
Private
gNetInformation
As
New
RegionalCodeLibrary.NET.NetInformation
Private
gQueryTable
As
RegionalCodeLibrary.NET.ExcelQueryTable
Private
Sub
Button1_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
Button1.Click
If
Not
CheckNetworkIsAvailable()
Then
Exit Sub
ShowMessage(
"
正在下载数据信息
"
)
gNetInformation.DownloadInformationsFromNet()
With
Me
.ComboBox1
.DataSource
=
gNetInformation.UpdateInformationsTable
.DisplayMember
=
"
LastDate
"
End
With
ShowMessage(
""
)
End Sub
Private
Sub
Button2_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
Button2.Click
If
String
.IsNullOrEmpty(
Me
.ComboBox1.Text)
Then
Exit Sub
If
Not
CheckNetworkIsAvailable()
Then
Exit Sub
If
gQueryTable
Is
Nothing
Then
ShowMessage(
"
正在启动Excel
"
)
gQueryTable
=
New
RegionalCodeLibrary.NET.ExcelQueryTable
End
If
Dim
mAddress
As
String
=
CType
(
Me
.ComboBox1.SelectedItem, DataRowView).Row.Item(
"
Address
"
).ToString
ShowMessage(
String
.Format(
"
正在下载{0}数据
"
,
Me
.ComboBox1.Text))
Me
.DataGridView1.DataSource
=
gQueryTable.Query(mAddress)
ShowMessage(
String
.Format(
"
{0}共有数据{1}项
"
,
Me
.ComboBox1.Text,
Me
.DataGridView1.RowCount))
End Sub
Private
Sub
Button3_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
Button3.Click
ClearEnvironment()
End Sub
Private
Function
CheckNetworkIsAvailable()
As
Boolean
Dim
mResult
As
Boolean
=
False
mResult
=
My.Computer.Network.IsAvailable
If
Not
mResult
Then
ShowMessage(
"
本地连接无效
"
)
Else
Try
mResult
=
My.Computer.Network.Ping(RegionalCodeLibrary.NET.NetConst.GOV_DEFAULT)
Catch
ex
As
Exception
mResult
=
False
End
Try
If
Not
mResult
Then
ShowMessage(
String
.Format(
"
本机没有连接Internet或发布网址{0}无效
"
, RegionalCodeLibrary.NET.NetConst.GOV_ADDRESS))
End
If
End
If
Return
mResult
End Function
Private
Sub
ShowMessage(
ByVal
msg
As
String
)
If
msg
=
""
Then
msg
=
"
待命
"
Me
.Label1.Text
=
String
.Format(
"
消息:{0}
"
, msg)
Me
.Label1.Refresh()
End Sub
Private
Sub
MainForm_FormClosing(
ByVal
sender
As
Object
,
ByVal
e
As
System.Windows.Forms.FormClosingEventArgs)
Handles
Me
.FormClosing
ClearEnvironment()
End Sub
Private
Sub
ClearEnvironment()
If
gQueryTable
Is
Nothing
Then
Exit Sub
gQueryTable.Close()
gQueryTable
=
Nothing
End Sub
End Class
方案下载:
代码