HOW TO:利用Excel的QueryTable下载网上数据

Author: 水如烟

这里所说的网上数据,是基于:
一、有固定网址发布最新数据的链接;
二、数据格式固定。

在去年的10月,曾写了个《全国县及县以上行政区划代码信息类 》
见: http://www.cnblogs.com/LzmTW/archive/2005/10/22/260066.html

现在仍以行政区划代码数据为例。

行政区划代码数据由国家统计局发布,网址为
http://www.stats.gov.cn/tjbz/xzqhdm/index.htm
数据格式是固定的:
如最新的为2005年12月31日
http://www.stats.gov.cn/tjbz/xzqhdm/t20041022_402301029.htm
最旧的为2001年10月的,
http://www.stats.gov.cn/tjbz/xzqhdm/t20021125_46781.htm

但是有例外,这在代码中说。

方案组织: HOW TO:利用Excel的QueryTable下载网上数据
效果:
HOW TO:利用Excel的QueryTable下载网上数据

以下为代码:
NetConst.vb
Namespace  NET
    
Public   Class  NetConst
        
Private   Sub   New ()
        
End Sub

        
Public   Const  GOV_DEFAULT  As   String   =   " www.stats.gov.cn "
        
Public   Const  GOV_ADDRESS  As   String   =   " http://www.stats.gov.cn/tjbz/xzqhdm/ "
        
Public   Const  WEBTABLE_INDEX  As   String   =   " 9 "
    
End Class
End Namespace
NetInformation.vb
Imports  System.Net
Imports  System.IO
Imports  System.Text.RegularExpressions

Namespace  NET
    
Public   Class  NetInformation
        
Private  gNetUpdateInformations( - 1 As  NetUpdateInformationItem


        
Public   ReadOnly   Property  UpdateInformationsTable()  As  DataTable
            
Get
                
Return  GetUpdateInformationsTable()
            
End   Get
        
End Property

        
Private   Function  GetUpdateInformationsTable()  As  DataTable
            
Dim  mDataTable  As   New  DataTable( " UpdateInformations " )
            
With  mDataTable
                .Columns.Add(
" Address " )
                .Columns.Add(
" LastDate " )
                
For   Each  item  As  NetUpdateInformationItem  In  gNetUpdateInformations
                    .Rows.Add(
New   String () {item.Address, item.LastDate})
                
Next
                .AcceptChanges()
            
End   With
            
Return  mDataTable
        
End Function

        
Public   Sub  DownloadInformationsFromNet()

            
Dim  mRegex  As   New  Regex( " (?<date>2.*日) " )

            
Dim  mNetUpdateItems  As  NetUpdateItem()  =  GetNetUpdateItems()
            
Dim  mNetUpdateInformationItem  As  NetUpdateInformationItem

            
Dim  tmp  As  NetUpdateItem
            
' 由于后两个不合规则,舍去不用。最后一个没有日期,倒数第二个提供的是附件数据。
             For  i  As   Integer   =   0   To  mNetUpdateItems.Length  -   1   -   2
                tmp 
=  mNetUpdateItems(i)

                mNetUpdateInformationItem 
=   New  NetUpdateInformationItem
                
With  mNetUpdateInformationItem
                    .Address 
=  tmp.Address
                    .LastDate 
=   CType (mRegex.Match(tmp.Content).Value,  Date ).ToString( " yyyyMMdd " )
                
End   With

                AppendItem(Of NetUpdateInformationItem)(mNetUpdateInformationItem, gNetUpdateInformations)
            
Next
        
End Sub

        
Private   Function  GetNetUpdateItems()  As  NetUpdateItem()

            
Dim  mResult( - 1 As  NetUpdateItem

            
Dim  mRegex  As   New  Regex( " <a href='(?<href>.*)' target='_blank' >(?<content>.*行政区划代码.*)</a> " )
            
Dim  mCollection  As  MatchCollection

            
Dim  mClient  As   New  WebClient()

            
Dim  mStream  As  Stream  =  mClient.OpenRead(NetConst.GOV_ADDRESS)
            
Dim  mReader  As   New  StreamReader(mStream, System.Text.Encoding.Default)
            
Dim  mText  As   String   =  mReader.ReadToEnd

            mReader.Close()
            mStream.Close()
            mClient.Dispose()

            mCollection 
=  mRegex.Matches(mText)

            
Dim  tmpItem  As  NetUpdateItem
            
For   Each  m  As  Match  In  mCollection
                tmpItem 
=   New  NetUpdateItem
                
With  tmpItem
                    .Address 
=  NetConst.GOV_ADDRESS  &  m.Groups( 1 ).Value
                    .Content 
=  m.Groups( 2 ).Value
                
End   With

                AppendItem(Of NetUpdateItem)(tmpItem, mResult)
            
Next

            
Return  mResult
        
End Function

        
Private   Structure  NetUpdateItem
            
Public  Address  As   String
            
Public  Content  As   String
        
End Structure

        
Private   Structure  NetUpdateInformationItem
            
Public  Address  As   String
            
Public  LastDate  As   String
        
End Structure

        
Private   Sub  AppendItem(Of T)( ByVal  value  As  T,  ByRef  array  As  T())
            
ReDim   Preserve  array(array.Length)
            array(array.Length 
-   1 =  value
        
End Sub

    
End Class

End Namespace

ExcelQueryTable.vb
Option   Strict   Off

Namespace  NET
    
Public   Class  ExcelQueryTable
        
Private  gExcelApplication  As   Object
        
Private  gWorkbook  As   Object
        
Private  gWorksheet  As   Object
        
Private  gQueryTable  As   Object

        
Sub   New ()
            Initialize()
        
End Sub

        
Private   Sub  Initialize()
            gExcelApplication 
=   CreateObject ( " Excel.Application " )
            gExcelApplication.DisplayAlerts 
=   False   ' 使退出时不询问是否存盘
            gWorkbook  =  gExcelApplication.Workbooks.Add
            gWorksheet 
=  gWorkbook.Worksheets.Add
        
End Sub

        
' 这里只作简单处理,详细处理在我的BLOG上有相关“文章”作过介绍
         Public   Sub  Close()
            gWorkbook.Close()
            gWorksheet 
=   Nothing
            gWorkbook 
=   Nothing
            gExcelApplication.DisplayAlerts 
=   True
            gExcelApplication.Quit()
            gExcelApplication 
=   Nothing
        
End Sub

        
Public   Function  Query( ByVal  address  As   String As  DataTable
            
Dim  mDataTable  As  DataTable  =  GetDataTable()

            gWorksheet.Cells.Clear()

            gQueryTable 
=  gWorksheet.QueryTables.Add( _
                Connection:
= String .Format( " URL;{0} " , address), _
                Destination:
= gWorksheet.Range( " A1 " ))

            
With  gQueryTable
                .WebTables 
=  NetConst.WEBTABLE_INDEX   ' 这是固定的
                .Refresh(BackgroundQuery: = False )
            
End   With


            
Dim  mCell  As   Object
            
Dim  mMaxRowIndex  As   Integer
            
Dim  line  As   Object

            mMaxRowIndex 
=  gWorksheet.Cells.SpecialCells( 11 ).Row  ' Excel.XlCellType.xlCellTypeLastCell=11
            mCell  =  gWorksheet.Range( " A1 " )

            
For  i  As   Integer   =   0   To  mMaxRowIndex
                line 
=  mCell.Offset(i,  0 ).Value
                
If  line IsNot  Nothing   Then
                    AddRow(mDataTable, line.ToString)
                
End   If
            
Next

            gQueryTable.Delete()
            gQueryTable 
=   Nothing

            
Return  mDataTable
        
End Function

        
Private   Sub  AddRow( ByVal  table  As  DataTable,  ByVal  line  As   String )
            line 
=  line.Trim
            
If  line.Length  <   7   Then   Exit Sub

            
Dim  tmpCode  As   String
            
Dim  tmpName  As   String

            tmpCode 
=  line.Substring( 0 6 )
            tmpName 
=  line.Substring( 6 ).Trim

            
If   Not   IsNumeric (tmpCode)  Then   Exit Sub   ' 前六位需是数字

            table.Rows.Add(
New   String () {tmpCode, tmpName})
        
End Sub

        
Private   Function  GetDataTable()  As  DataTable
            
' 表的列名意义为:代码、名称
             Dim  mDataTable  As   New  DataTable( " RegionalCode " )
            
With  mDataTable.Columns
                .Add(
" Code " )
                .Add(
" Name " )
            
End   With
            
Return  mDataTable
        
End Function

    
End Class
End Namespace

测试代码:
MainForm.vb(界面部分省,在最后有整个方案供下载)
Public   Class  MainForm
    
Private  gNetInformation  As   New  RegionalCodeLibrary.NET.NetInformation
    
Private  gQueryTable  As  RegionalCodeLibrary.NET.ExcelQueryTable


    
Private   Sub  Button1_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  Button1.Click

        
If   Not  CheckNetworkIsAvailable()  Then   Exit Sub

        ShowMessage(
" 正在下载数据信息 " )

        gNetInformation.DownloadInformationsFromNet()
        
With   Me .ComboBox1
            .DataSource 
=  gNetInformation.UpdateInformationsTable
            .DisplayMember 
=   " LastDate "
        
End   With

        ShowMessage(
"" )
    
End Sub

    
Private   Sub  Button2_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  Button2.Click
        
If   String .IsNullOrEmpty( Me .ComboBox1.Text)  Then   Exit Sub

        
If   Not  CheckNetworkIsAvailable()  Then   Exit Sub

        
If  gQueryTable  Is   Nothing   Then

            ShowMessage(
" 正在启动Excel " )
            gQueryTable 
=   New  RegionalCodeLibrary.NET.ExcelQueryTable

        
End   If

        
Dim  mAddress  As   String   =   CType ( Me .ComboBox1.SelectedItem, DataRowView).Row.Item( " Address " ).ToString

        ShowMessage(
String .Format( " 正在下载{0}数据 " Me .ComboBox1.Text))
        
Me .DataGridView1.DataSource  =  gQueryTable.Query(mAddress)

        ShowMessage(
String .Format( " {0}共有数据{1}项 " Me .ComboBox1.Text,  Me .DataGridView1.RowCount))
    
End Sub

    
Private   Sub  Button3_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  Button3.Click
        ClearEnvironment()
    
End Sub

    
Private   Function  CheckNetworkIsAvailable()  As   Boolean
        
Dim  mResult  As   Boolean   =   False
        mResult 
=  My.Computer.Network.IsAvailable

        
If   Not  mResult  Then
            ShowMessage(
" 本地连接无效 " )

        
Else
            
Try
                mResult 
=  My.Computer.Network.Ping(RegionalCodeLibrary.NET.NetConst.GOV_DEFAULT)
            
Catch  ex  As  Exception
                mResult 
=   False
            
End   Try

            
If   Not  mResult  Then
                ShowMessage(
String .Format( " 本机没有连接Internet或发布网址{0}无效 " , RegionalCodeLibrary.NET.NetConst.GOV_ADDRESS))
            
End   If
        
End   If

        
Return  mResult
    
End Function

    
Private   Sub  ShowMessage( ByVal  msg  As   String )
        
If  msg  =   ""   Then  msg  =   " 待命 "
        
Me .Label1.Text  =   String .Format( " 消息:{0} " , msg)
        
Me .Label1.Refresh()
    
End Sub

    
Private   Sub  MainForm_FormClosing( ByVal  sender  As   Object ByVal  e  As  System.Windows.Forms.FormClosingEventArgs)  Handles   Me .FormClosing
        ClearEnvironment()
    
End Sub

    
Private   Sub  ClearEnvironment()
        
If  gQueryTable  Is   Nothing   Then   Exit Sub
        gQueryTable.Close()
        gQueryTable 
=   Nothing
    
End Sub
End Class

方案下载: 代码

你可能感兴趣的:(Excel)