1 Option Explicit 2 '----------------读Csv文件 类--------------------- 3 4 Private Declare Function WideCharToMultiByte Lib "kernel32" _ 5 (ByVal CodePage As Long, _ 6 ByVal dwFlags As Long, _ 7 ByVal lpWideCharStr As Long, _ 8 ByVal cchWideChar As Long, _ 9 ByRef lpMultiByteStr As Any, _ 10 ByVal cchMultiByte As Long, _ 11 ByVal lpDefaultChar As String, _ 12 ByVal lpUsedDefaultChar As Long) As Long 13 14 Private Declare Function MultiByteToWideChar Lib "kernel32" _ 15 (ByVal CodePage As Long, _ 16 ByVal dwFlags As Long, _ 17 ByRef lpMultiByteStr As Any, _ 18 ByVal cchMultiByte As Long, _ 19 ByVal lpWideCharStr As Long, _ 20 ByVal cchWideChar As Long) As Long 21 22 Private Type BuffType '一个缓冲区 23 StartPosAbso As Long '该缓冲区在文件中的绝对位置 24 BufLen As Long '缓冲区总长 25 PtrInBuf As Long '缓冲区内部指针 26 ptrNextStrStartInBuf As Long '下一行内容开始位置(从此处算到下一个cr/lf为下一行) 27 IgnoreFirstLf As Boolean '是否忽略本缓冲区的第一个 vblf 28 bufBytes() As Byte '缓冲区内容(字节数组) 29 End Type 30 31 32 Dim State As StateType 33 Private Enum StateType 34 NewFieldStart 35 NonQuotesField 36 QuotesField 37 FieldSeparator 38 QuoteInQuotesField 39 RowSeparator 40 ErrorS 41 End Enum 42 43 Dim af_Buff As BuffType '一个缓冲区 44 Dim af_lngFileLength As Long 45 46 Dim lFileName As String 47 Dim lFileNum As Integer 48 Dim lStatus As Integer '-1=已关闭;1=已打开;2=已经开始读取;0=未设 49 Dim lIsEndRead As Boolean '=true表示或者读完文件或者出错,即不能再继续读了,主程序应退出读取 50 Dim lErrOccured As Boolean '是否上次 GetNextLine 发生了一个错误 51 Dim lAutoOpen As Boolean '是否设置 FileName 属性时自动打开文件,默认为true(类初始化时设为true) 52 Dim lAutoClose As Boolean '是否 读取行读完文件或出错时 自动关闭文件,默认为true(类初始化时设为true) 53 54 55 56 57 Dim lEncode As Long '编码设置 58 Dim EncodeErr As Boolean '编码转换时出错Flag 59 Public Enum EncodeEnum 60 Default = 0 61 ShifJis = 932 62 JIS = 50220 63 Utf8 = 65001 64 GB2312 = 936 65 End Enum 66 67 68 Dim ch As Long 69 '以上仅为GetNextLine函数用,为了不每次调用GetNextLine时候都重新定义,故将之做为全局的了,其实应是局部的 70 '_______________________________________ 71 Dim lineArr As New Collection 72 Dim strArr() As Byte 73 Dim strArrlBuff As Long 74 Private Const mcInitBuffSize As Long = 100 '初始分配空间大小,10K 75 76 Public Function GetNextLine(ByRef col As Collection) As Integer 77 '读取文件的下一行文本,支持 vbCrLf、vbLf、vbCr 的多种分行符 78 '返回1表示正常读取了 79 '返回-1也表示正常,但读完了文件 80 '返回0表示出错或非法 81 '1. 一般出错返回0,并设置 lErrOccured=True 82 '2. 如果上次读完了文件,则允许再额外调用一次 GetNextLine (返回 0 并 _ 83 不提示出错,lErrOccured 仍为 false,此算非法);如果再调用就出错了 _ 84 (函数仍返回0,但 lErrOccured 为 true 此算出错) 85 86 87 '设置反映错误的标志变量 88 lErrOccured = False '表示尚未发生错误;如后续程序中发生了错误再改为 True 89 '判断和设置状态 90 If lStatus = 0 Then 91 'lStatus = 0:当前状态非法,尚未打开文件,无法读取 92 GoTo errExit 93 ElseIf lStatus < 0 Then 94 GoTo errExit '不允许额外调用了,出错 95 End If 96 97 '正常读取的情况:此时 lStatus 要么为1要么为2,即要么文件已经打开, _ 98 '要么已经进入读取状态了,总之读取下一行是没有问题的 99 lStatus = 2 '设置为2表示已经进入读取状态 100 101 102 '//////////////// 读取文件,以找到“一行”的内容 //////////////// 103 On Error GoTo errExit '有任何错误发生时都转到errExit标签处执行 104 105 With af_Buff 106 '缓冲区逐渐沿文件前进,直到缓冲区起始位置超过文件总长读完文件 107 Do Until .StartPosAbso > af_lngFileLength 108 109 '============ (1)根据需要读取文件的下一个缓冲区内容 ============ 110 '若 .PtrInBuf=-1 表示要读取下一个缓冲区,否则不读取下一个,仍使用 _ 111 当前缓冲区和 .PtrInBuf 指针 112 If .PtrInBuf < 0 Then 113 '----从 .StartPosAbso 开始读取一些字节存入缓冲区 .bufBytes() 114 .BufLen = FileGetBytesLocal(.StartPosAbso, .bufBytes()) 115 If .BufLen <= 0 Then GoTo errExit '读取出错 116 117 '----初始化缓冲区指针 118 .PtrInBuf = 1 119 '看是否需要忽略第一个 vbLf 120 If .IgnoreFirstLf Then 121 If .bufBytes(.PtrInBuf) = 10 Then '第1个字节确是 vbLf 122 '忽略第一个 vbLf 123 .PtrInBuf = .PtrInBuf + 1 124 End If 'If .bufBytes(.PtrInBuf) = 10 Then 125 126 .IgnoreFirstLf = False '恢复标志,不忽略第一个 vbLf 127 End If 'If .IgnoreFirstLf Then 128 129 '初始化下一行起始位置 ptrNextStrStartInBuf (下一行内容包含该字节) 130 .ptrNextStrStartInBuf = .PtrInBuf 131 End If 'If .PtrInBuf < 0 Then 132 133 '============ (2)逐个扫描缓冲区中的字节,查找分行符 ============ 134 '扫描缓冲区中的字节,直到找到 vbCr或vbLf 或扫描完缓冲区 135 For .PtrInBuf = .PtrInBuf To .BufLen 136 ch = .bufBytes(.PtrInBuf) 137 Select Case State '34代表双引号 44代表逗号 138 Case NewFieldStart 139 If ch = 34 Then 140 State = QuotesField 141 ElseIf ch = 44 Then 142 lineArr.Add "" 143 State = FieldSeparator 144 ElseIf ch = 13 Or ch = 10 Then 145 State = NewFieldStart 146 Exit For 147 Else 148 149 strArrlBuff = strArrlBuff + 1 150 If strArrlBuff Mod mcInitBuffSize = 0 Then 151 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 152 End If 153 'ReDim Preserve strArr(1 To strArrlBuff) 154 strArr(strArrlBuff) = ch 155 'strArr.Add ch 156 State = NonQuotesField 157 End If 158 Case NonQuotesField 159 If ch = 44 Then 160 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 161 Erase strArr 162 ReDim strArr(1 To mcInitBuffSize) 163 strArrlBuff = 0 164 'Set strArr = New Collection 165 State = FieldSeparator 166 ElseIf ch = 13 Then 167 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 168 State = RowSeparator 169 Else 170 strArrlBuff = strArrlBuff + 1 171 If strArrlBuff Mod mcInitBuffSize = 0 Then 172 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 173 End If 174 'ReDim Preserve strArr(1 To strArrlBuff) 175 strArr(strArrlBuff) = ch 176 'strArr.Add ch 177 End If 178 Case QuotesField 179 If ch = 34 Then 180 State = QuoteInQuotesField 181 Else 182 strArrlBuff = strArrlBuff + 1 183 If strArrlBuff Mod mcInitBuffSize = 0 Then 184 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 185 End If 186 'ReDim Preserve strArr(1 To strArrlBuff) 187 strArr(strArrlBuff) = ch 188 'strArr.Add ch 189 End If 190 Case FieldSeparator 191 If ch = 44 Then 192 lineArr.Add "" 193 ElseIf ch = 34 Then 194 Erase strArr 195 ReDim strArr(1 To mcInitBuffSize) 196 strArrlBuff = 0 197 'Set strArr = New Collection 198 State = QuotesField 199 ElseIf ch = 13 Then 200 lineArr.Add "" 201 State = RowSeparator 202 Else 203 strArrlBuff = strArrlBuff + 1 204 If strArrlBuff Mod mcInitBuffSize = 0 Then 205 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 206 End If 207 'ReDim Preserve strArr(1 To strArrlBuff) 208 strArr(strArrlBuff) = ch 209 'strArr.Add ch 210 State = NonQuotesField 211 End If 212 Case QuoteInQuotesField 213 If ch = 44 Then 214 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 215 Erase strArr 216 ReDim strArr(1 To mcInitBuffSize) 217 strArrlBuff = 0 218 'Set strArr = New Collection 219 State = FieldSeparator 220 ElseIf ch = 13 Then 221 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 222 State = RowSeparator 223 ElseIf ch = 34 Then 224 strArrlBuff = strArrlBuff + 1 225 If strArrlBuff Mod mcInitBuffSize = 0 Then 226 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 227 End If 228 'ReDim Preserve strArr(1 To strArrlBuff) 229 strArr(strArrlBuff) = ch 230 'strArr.Add ch 231 State = QuotesField 232 Else 233 State = ErrorS '"语法错误: 转义字符 \" 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"; 234 End If 235 Case RowSeparator 236 If ch = 10 Then 237 Erase strArr 238 ReDim strArr(1 To mcInitBuffSize) 239 strArrlBuff = 0 240 'Set strArr = New Collection 241 State = NewFieldStart 242 Exit For 243 Else 244 State = ErrorS '"语法错误: 行分隔用了回车 \\r。但未使用回车换行 \\r\\n "; 245 End If 246 Case ErrorS 247 GoTo errExit 248 249 End Select 250 251 ' If .bufBytes(.PtrInBuf) = 13 Or _ 252 ' .bufBytes(.PtrInBuf) = 10 Then Exit For 253 Next .PtrInBuf 254 255 '退出 For 后,判断是否找到了分行符 vbCr或vbLf 256 If .PtrInBuf <= .BufLen Then '是否找到了 vbCr或vbLf 257 If .PtrInBuf + 1 > .BufLen And _ 258 .StartPosAbso + .BufLen > af_lngFileLength Then 259 '已经读完文件 260 lIsEndRead = True 261 If lAutoClose Then CloseFile 262 Else 263 '还未读完文件,再判断是否文件只剩一个字节;若只剩一个字节并且 _ 264 '剩下的正好是 vbLf,并且下次要忽略掉 vbLf,则仍是已经读完文件 265 If .StartPosAbso + .BufLen = af_lngFileLength And .IgnoreFirstLf Then 266 '读取文件中的最后一个字节,只测试一下 267 Dim tByt() As Byte, tRet As Integer 268 tRet = FileGetBytesLocal(.StartPosAbso + .BufLen, tByt()) 269 If tRet <= 0 Then GoTo errExit '出错处理 270 If tByt(1) = 10 Then 271 '已经读完文件 272 lIsEndRead = True 273 If lAutoClose Then CloseFile 274 End If 275 End If 276 End If 277 .PtrInBuf = .PtrInBuf + 1 278 279 If lIsEndRead Then 280 '已经读完文件,一定 Exit Function 281 282 Set col = lineArr 283 Set lineArr = New Collection 284 strArrlBuff = 0 285 GetNextLine = 0 286 287 Exit Function '已经读完文件,一定 Exit Function 288 Else 'If lIsEndRead Then 289 '没有读完文件(忽略空行不退出,否则退出) 290 If GetNextLine = 0 Then 291 '不需要忽略空行或最后不是空行,退出 292 Else 293 Set col = lineArr 294 Set lineArr = New Collection 295 strArrlBuff = 0 296 GetNextLine = 1 297 Exit Function 298 End If 299 End If 'If lIsEndRead Then 300 301 Else 'If .PtrInBuf <= .BufLen Then '是否找到了 vbCr或vbLf 302 303 .PtrInBuf = -1 304 '==== 准备继续读下一个缓冲区 ==== 305 .StartPosAbso = .StartPosAbso + .BufLen 306 End If 'If .PtrInBuf <= .BufLen Then '是否找到了 vbCr或vbLf 307 Loop 308 End With 309 310 311 '//////////// 全部读完文件,看还有无剩余的 //////////// 312 313 314 Select Case State 315 Case NonQuotesField 316 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 317 Erase strArr 318 ReDim strArr(1 To mcInitBuffSize) 319 strArrlBuff = 0 320 'lineArr.Add strArr 321 'Set strArr = New Collection 322 Case QuotesField 323 GoTo errExit '"语法错误: 引号字段未闭合"; 324 Case FieldSeparator 325 lineArr.Add "" 326 Case QuoteInQuotesField 327 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 328 329 End Select 330 331 332 Set col = lineArr 333 Set lineArr = New Collection 334 strArrlBuff = 0 335 336 GetNextLine = 0 337 338 339 If lAutoClose Then CloseFile 340 lIsEndRead = True 341 '此时读完文件,必须返回 342 Exit Function 343 344 345 346 errExit: 347 lErrOccured = True 348 GetNextLine = 0 349 '为一般错误,不设置 lIsEndRead = True 350 If lAutoClose Then CloseFile 351 End Function 352 353 Private Function EncodeStr(ByRef bytIn() As Byte, hasError As Boolean, Optional byteSize As Long = -1) As String 354 355 Select Case Encode 356 Case Default 357 Dim tempStr As String 358 tempStr = bytIn 359 EncodeStr = StrConv(tempStr, vbUnicode) 360 361 Case ShifJis 362 EncodeStr = WCMB_Decode(ShifJis, bytIn, hasError, byteSize) 363 Case JIS 364 EncodeStr = WCMB_Decode(JIS, bytIn, hasError, byteSize) 365 Case Utf8 366 EncodeStr = WCMB_Decode(Utf8, bytIn, hasError, byteSize) 367 Case GB2312 368 EncodeStr = WCMB_Decode(GB2312, bytIn, hasError, byteSize) 369 End Select 370 371 End Function 372 373 374 ' 関数名 : WCMB_Decode 375 ' 返り値 : UNICODE文字列 376 ' 引き数 : cp : 入力文字データのコードページ番号 377 ' : bytIn : 入力文字データ 378 ' 機能説明 : 入力文字データをUNICODEに変換する 379 ' 備考 : MultiByteToWideCharによる文字コード変換 380 Private Function WCMB_Decode(ByVal cp As Long, ByRef bytIn() As Byte, ByRef hasError As Boolean, Optional byteSize As Long = -1) As String 381 On Error GoTo ErrHandler 382 383 Dim lngInSize As Long 384 Dim strBuf As String 385 Dim lngBufLen As Long 386 Dim lngRtn As Long 387 If byteSize > 0 Then 388 lngInSize = byteSize 389 Else 390 If bytIn(UBound(bytIn)) = 13 Then 391 lngInSize = UBound(bytIn) - 1 392 Else 393 lngInSize = UBound(bytIn) 394 End If 395 End If 396 lngBufLen = (lngInSize + 1) * 5 397 strBuf = String$(lngBufLen, vbNullChar) 398 lngRtn = MultiByteToWideChar _ 399 (cp, 0, bytIn(1), lngInSize, StrPtr(strBuf), lngBufLen) 400 If lngRtn Then 401 WCMB_Decode = Left$(strBuf, lngRtn) 402 End If 403 hasError = False 404 Exit Function 405 ErrHandler: 406 WCMB_Decode = "" 407 hasError = True 408 End Function 409 410 Public Sub Init() 411 412 ReDim strArr(1 To mcInitBuffSize) 'CSV 各个单元 缓冲区 413 strArrlBuff = 0 414 415 Erase af_Buff.bufBytes '缓冲区 416 417 418 419 af_lngFileLength = 0 420 af_Buff.StartPosAbso = 1 '当前缓冲区的起始处所在的文件位置 421 af_Buff.ptrNextStrStartInBuf = 1 422 423 '此作为标志,=-1表示下次运行 GetNextLine 要重新读取新的缓冲区 _ 424 '否则不重新读取,仍使用当前缓冲区和 .PtrInBuf 指针 425 af_Buff.PtrInBuf = -1 426 427 lErrOccured = False 428 429 430 af_Buff.IgnoreFirstLf = False '初始化标志:当前缓冲区不需要忽略第一个字节(若是vblf) 431 432 lIsEndRead = False 433 End Sub 434 435 Public Function GetPercent(Optional DotNum As Integer = 2) As Single 436 'DotNum保留几位小数,<0或>7为不保留小数 437 Dim sngPerc As Single 438 439 If af_lngFileLength > 0 Then 440 If af_Buff.PtrInBuf < 0 Then 441 sngPerc = (af_Buff.StartPosAbso - 1) / af_lngFileLength 442 Else 443 sngPerc = (af_Buff.StartPosAbso + af_Buff.PtrInBuf - 2) / af_lngFileLength 444 End If 445 End If 446 447 If DotNum >= 0 Or DotNum <= 7 Then 448 Dim Temp As Long 449 Temp = 10 ^ DotNum 450 sngPerc = Int(Temp * sngPerc + 0.5) / Temp 451 End If 452 453 GetPercent = sngPerc 454 End Function 455 456 Public Sub CloseFile() 457 If lFileNum > 0 Then Close lFileNum: lFileNum = 0 458 lStatus = -1 '表示文件已关闭 459 '不Init,防止读取行后自动关闭文件时状态变量被初始化;在OpenFile时会Init 460 End Sub 461 462 Public Function OpenFile() As Boolean 463 If lFileNum > 0 Then CloseFile '如果已打开了文件,则先关闭它 464 lFileNum = FreeFile '获得一个可用的文件号(同时属性 FileNum 的值也自动改变) 465 On Error GoTo errH '如果一下程序发生任何错误,就转到 errH 标签处执行 466 If Dir(lFileName, 31) = "" Then GoTo errH '如果文件不存在,就转到 errH 标签处执行 467 Open lFileName For Binary Access Read As #lFileNum '以二进制方式打开文件 468 lStatus = 1 '表示文件已打开 469 Init '初始化操作 470 af_lngFileLength = LOF(lFileNum) '设置文件总大小 471 OpenFile = True 472 Exit Function 473 errH: 474 If lFileNum > 0 Then CloseFile 475 OpenFile = False 476 End Function 477 478 479 480 Private Function FileGetBytesLocal(ByVal ReadPos As Long, _ 481 ArrBytes() As Byte, _ 482 Optional ByVal EndingBorder As Long = 0, _ 483 Optional ByVal ReadMax As Long = 16384) As Long 484 'Optional ByVal ReadMax As Long = 16384, _ 485 '从文件号 lFileNum 中的 ReadPos 位置开始读取一批字节 486 '从参数ArrBytes()返回读取的字节内容,会重新定义ArrBytes()数组 487 '所读取的字节数不确定,如果文件中有足够的内容,就读取ReadMax个字节, _ 488 '否则就读到文件尾(当EndingBorder参数<=0时)或读到EndingBorder _ 489 '为止(当EndingBorder参数>0时) 490 'ShowResume 指定如果读取出错,是否弹出对话框提示 491 '若ShowResume=1,提示框中有"重试"和"取消"两个按钮; 492 '若ShowResume=2,出错时提示框中有"终止"、"重试"和"忽略"三个按钮; 493 '若ShowResume=0,出错时不弹出提示框,不弹出提示框就不能在发生错误时重试 494 '返回读取的字节数,若失败返回<=0,若用户“忽略”则返回=0;_ 495 '若用户终止或取消或无提示框,则返回<0 496 497 Dim lngUBound As Long 498 499 If EndingBorder <= 0 Then EndingBorder = LOF(lFileNum) 500 If EndingBorder < ReadPos Then 501 FileGetBytesLocal = -1 502 Exit Function 503 End If 504 505 On Error GoTo errH 506 If EndingBorder - ReadPos + 1 >= ReadMax Then lngUBound = ReadMax Else _ 507 lngUBound = EndingBorder - ReadPos + 1 508 509 ReDim ArrBytes(1 To lngUBound) As Byte 510 511 Get #FileNum, ReadPos, ArrBytes 512 513 FileGetBytesLocal = lngUBound 514 Exit Function 515 errH: 516 FileGetBytesLocal = -1 517 End Function 518 519 520 521 Private Sub Class_Initialize() 522 lAutoOpen = True '设置 FileName 属性时自动打开文件 523 lAutoClose = True '读取行读完文件或出错时 自动关闭文件 524 End Sub 525 526 Private Sub Class_Terminate() 527 CloseFile 528 Erase af_Buff.bufBytes 529 530 End Sub 531 532 533 Public Property Get FileName() As String 534 FileName = lFileName 535 End Property 536 537 Public Property Let FileName(ByVal vNewValue As String) 538 If lFileNum > 0 Then CloseFile 539 lFileName = vNewValue 540 If lAutoOpen Then OpenFile 541 End Property 542 543 Public Property Get FileNum() As Integer 544 FileNum = lFileNum 545 End Property 546 547 Public Property Get Status() As Integer 548 Status = lStatus 549 End Property 550 551 Public Property Get IsEndRead() As Boolean 552 IsEndRead = lIsEndRead 553 End Property 554 555 Public Property Get AutoOpen() As Boolean 556 AutoOpen = lAutoOpen 557 End Property 558 559 Public Property Let AutoOpen(ByVal vNewValue As Boolean) 560 lAutoOpen = vNewValue 561 End Property 562 563 Public Property Get AutoClose() As Boolean 564 AutoClose = lAutoClose 565 End Property 566 567 Public Property Let AutoClose(ByVal vNewValue As Boolean) 568 lAutoClose = vNewValue 569 End Property 570 571 572 Public Property Get ErrOccured() As Boolean 573 ErrOccured = lErrOccured 574 End Property 575 576 Public Property Let ErrOccured(ByVal vNewValue As Boolean) 577 lErrOccured = vNewValue 578 End Property 579 580 Public Property Get Encode() As EncodeEnum 581 Encode = lEncode 582 End Property 583 584 Public Property Let Encode(ByVal vNewValue As EncodeEnum) 585 lEncode = vNewValue 586 End Property 587 588 Public Property Get IsEncodeErr() As Boolean 589 IsEncodeErr = EncodeErr 590 End Property
1 Option Explicit 2 '----------------读Csv文件 类--------------------- 3 4 Private Declare Function WideCharToMultiByte Lib "kernel32" _ 5 (ByVal CodePage As Long, _ 6 ByVal dwFlags As Long, _ 7 ByVal lpWideCharStr As Long, _ 8 ByVal cchWideChar As Long, _ 9 ByRef lpMultiByteStr As Any, _ 10 ByVal cchMultiByte As Long, _ 11 ByVal lpDefaultChar As String, _ 12 ByVal lpUsedDefaultChar As Long) As Long 13 14 Private Declare Function MultiByteToWideChar Lib "kernel32" _ 15 (ByVal CodePage As Long, _ 16 ByVal dwFlags As Long, _ 17 ByRef lpMultiByteStr As Any, _ 18 ByVal cchMultiByte As Long, _ 19 ByVal lpWideCharStr As Long, _ 20 ByVal cchWideChar As Long) As Long 21 22 Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (Destination As Any, Source As Any, ByVal Length As Long) 23 Private Declare Function VarPtrArray Lib "msvbvm60.dll" Alias "VarPtr" (ptr() As Any) As Long 24 25 Private Type BuffType '一个缓冲区 26 StartPosAbso As Long '该缓冲区在文件中的绝对位置 27 BufLen As Long '缓冲区总长 28 PtrInBuf As Long '缓冲区内部指针 29 ptrNextStrStartInBuf As Long '下一行内容开始位置(从此处算到下一个cr/lf为下一行) 30 IgnoreFirstLf As Boolean '是否忽略本缓冲区的第一个 vblf 31 bufBytes() As Byte '缓冲区内容(字节数组) 32 End Type 33 34 35 Dim State As StateType 36 Private Enum StateType 37 NewFieldStart 38 NonQuotesField 39 QuotesField 40 FieldSeparator 41 QuoteInQuotesField 42 RowSeparator 43 ErrorS 44 End Enum 45 46 Dim af_Buff As BuffType '一个缓冲区 47 Dim af_lngFileLength As Long 48 49 Dim lFileName As String 50 Dim lFileNum As Integer 51 Dim lStatus As Integer '-1=已关闭;1=已打开;2=已经开始读取;0=未设 52 Dim lIsEndRead As Boolean '=true表示或者读完文件或者出错,即不能再继续读了,主程序应退出读取 53 Dim lErrOccured As Boolean '是否上次 GetNextLine 发生了一个错误 54 Dim lAutoOpen As Boolean '是否设置 FileName 属性时自动打开文件,默认为true(类初始化时设为true) 55 Dim lAutoClose As Boolean '是否 读取行读完文件或出错时 自动关闭文件,默认为true(类初始化时设为true) 56 57 58 59 60 Dim lEncode As Long '编码设置 61 Dim EncodeErr As Boolean '编码转换时出错Flag 62 Public Enum EncodeEnum 63 Default = 0 64 ShifJis = 932 65 JIS = 50220 66 Utf8 = 65001 67 GB2312 = 936 68 End Enum 69 70 71 Dim ch As Long 72 '以上仅为GetNextLine函数用,为了不每次调用GetNextLine时候都重新定义,故将之做为全局的了,其实应是局部的 73 '_______________________________________ 74 Dim lineArr As New Collection 75 Dim strArr() As Byte 76 Dim strArrlBuff As Long 77 Private Const mcInitBuffSize As Long = 100 '初始分配空间大小,10K 78 Dim mIgnoreQuotes As Boolean 79 80 Public Function GetNextLine(ByRef col As Collection) As Integer 81 '读取文件的下一行文本,支持 vbCrLf、vbLf、vbCr 的多种分行符 82 '返回1表示正常读取了 83 '返回-1也表示正常,但读完了文件 84 '返回0表示出错或非法 85 '1. 一般出错返回0,并设置 lErrOccured=True 86 '2. 如果上次读完了文件,则允许再额外调用一次 GetNextLine (返回 0 并 _ 87 不提示出错,lErrOccured 仍为 false,此算非法);如果再调用就出错了 _ 88 (函数仍返回0,但 lErrOccured 为 true 此算出错) 89 90 91 '设置反映错误的标志变量 92 lErrOccured = False '表示尚未发生错误;如后续程序中发生了错误再改为 True 93 '判断和设置状态 94 If lStatus = 0 Then 95 'lStatus = 0:当前状态非法,尚未打开文件,无法读取 96 GoTo errExit 97 ElseIf lStatus < 0 Then 98 GoTo errExit '不允许额外调用了,出错 99 End If 100 101 '正常读取的情况:此时 lStatus 要么为1要么为2,即要么文件已经打开, _ 102 '要么已经进入读取状态了,总之读取下一行是没有问题的 103 lStatus = 2 '设置为2表示已经进入读取状态 104 105 106 '//////////////// 读取文件,以找到“一行”的内容 //////////////// 107 On Error GoTo errExit '有任何错误发生时都转到errExit标签处执行 108 109 With af_Buff 110 '缓冲区逐渐沿文件前进,直到缓冲区起始位置超过文件总长读完文件 111 Do Until .StartPosAbso > af_lngFileLength 112 113 '============ (1)根据需要读取文件的下一个缓冲区内容 ============ 114 '若 .PtrInBuf=-1 表示要读取下一个缓冲区,否则不读取下一个,仍使用 _ 115 当前缓冲区和 .PtrInBuf 指针 116 If .PtrInBuf < 0 Then 117 '----从 .StartPosAbso 开始读取一些字节存入缓冲区 .bufBytes() 118 .BufLen = FileGetBytesLocal(.StartPosAbso, .bufBytes()) 119 If .BufLen <= 0 Then GoTo errExit '读取出错 120 121 '----初始化缓冲区指针 122 .PtrInBuf = 1 123 '看是否需要忽略第一个 vbLf 124 If .IgnoreFirstLf Then 125 If .bufBytes(.PtrInBuf) = 10 Then '第1个字节确是 vbLf 126 '忽略第一个 vbLf 127 .PtrInBuf = .PtrInBuf + 1 128 End If 'If .bufBytes(.PtrInBuf) = 10 Then 129 130 .IgnoreFirstLf = False '恢复标志,不忽略第一个 vbLf 131 End If 'If .IgnoreFirstLf Then 132 133 '初始化下一行起始位置 ptrNextStrStartInBuf (下一行内容包含该字节) 134 .ptrNextStrStartInBuf = .PtrInBuf 135 End If 'If .PtrInBuf < 0 Then 136 137 '============ (2)逐个扫描缓冲区中的字节,查找分行符 ============ 138 '扫描缓冲区中的字节,直到找到 vbCr或vbLf 或扫描完缓冲区 139 For .PtrInBuf = .PtrInBuf To .BufLen 140 ch = .bufBytes(.PtrInBuf) 141 Select Case State '34代表双引号 44代表逗号 142 Case NewFieldStart 143 If ch = 34 Then 144 If mIgnoreQuotes Then 145 strArrlBuff = strArrlBuff + 1 146 If strArrlBuff Mod mcInitBuffSize = 0 Then 147 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 148 End If 149 strArr(strArrlBuff) = ch 150 End If 151 State = QuotesField 152 ElseIf ch = 44 Then 153 lineArr.Add "" 154 State = FieldSeparator 155 ElseIf ch = 13 Or ch = 10 Then 156 State = NewFieldStart 157 Exit For 158 Else 159 strArrlBuff = strArrlBuff + 1 160 If strArrlBuff Mod mcInitBuffSize = 0 Then 161 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 162 End If 163 strArr(strArrlBuff) = ch 164 State = NonQuotesField 165 End If 166 Case NonQuotesField 167 If ch = 44 Then 168 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 169 Erase strArr 170 ReDim strArr(1 To mcInitBuffSize) 171 strArrlBuff = 0 172 State = FieldSeparator 173 ElseIf ch = 13 Then 174 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 175 State = RowSeparator 176 Else 177 strArrlBuff = strArrlBuff + 1 178 If strArrlBuff Mod mcInitBuffSize = 0 Then 179 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 180 End If 181 strArr(strArrlBuff) = ch 182 End If 183 Case QuotesField 184 If ch = 34 Then 185 If mIgnoreQuotes Then 186 strArrlBuff = strArrlBuff + 1 187 If strArrlBuff Mod mcInitBuffSize = 0 Then 188 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 189 End If 190 strArr(strArrlBuff) = ch 191 End If 192 State = QuoteInQuotesField 193 Else 194 strArrlBuff = strArrlBuff + 1 195 If strArrlBuff Mod mcInitBuffSize = 0 Then 196 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 197 End If 198 strArr(strArrlBuff) = ch 199 End If 200 Case FieldSeparator 201 If ch = 44 Then 202 lineArr.Add "" 203 ElseIf ch = 34 Then 204 Erase strArr 205 ReDim strArr(1 To mcInitBuffSize) 206 strArrlBuff = 0 207 If mIgnoreQuotes Then strArrlBuff = 1: strArr(strArrlBuff) = ch 208 State = QuotesField 209 ElseIf ch = 13 Then 210 lineArr.Add "" 211 State = RowSeparator 212 Else 213 strArrlBuff = strArrlBuff + 1 214 If strArrlBuff Mod mcInitBuffSize = 0 Then 215 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 216 End If 217 strArr(strArrlBuff) = ch 218 State = NonQuotesField 219 End If 220 Case QuoteInQuotesField 221 If ch = 44 Then 222 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 223 Erase strArr 224 ReDim strArr(1 To mcInitBuffSize) 225 strArrlBuff = 0 226 State = FieldSeparator 227 ElseIf ch = 13 Then 228 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 229 State = RowSeparator 230 ElseIf ch = 34 Then 231 strArrlBuff = strArrlBuff + 1 232 If strArrlBuff Mod mcInitBuffSize = 0 Then 233 ReDim Preserve strArr(1 To strArrlBuff + mcInitBuffSize) 234 End If 235 strArr(strArrlBuff) = ch 236 State = QuotesField 237 Else 238 State = ErrorS '"语法错误: 转义字符 \" 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"; 239 End If 240 Case RowSeparator 241 If ch = 10 Then 242 Erase strArr 243 ReDim strArr(1 To mcInitBuffSize) 244 strArrlBuff = 0 245 State = NewFieldStart 246 Exit For 247 Else 248 State = ErrorS '"语法错误: 行分隔用了回车 \\r。但未使用回车换行 \\r\\n "; 249 End If 250 Case ErrorS 251 GoTo errExit 252 253 End Select 254 Next .PtrInBuf 255 256 '退出 For 后,判断是否找到了分行符 vbCr或vbLf 257 If .PtrInBuf <= .BufLen Then '是否找到了 vbCr或vbLf 258 If .PtrInBuf + 1 > .BufLen And _ 259 .StartPosAbso + .BufLen > af_lngFileLength Then 260 '已经读完文件 261 lIsEndRead = True 262 If lAutoClose Then CloseFile 263 Else 264 '还未读完文件,再判断是否文件只剩一个字节;若只剩一个字节并且 _ 265 '剩下的正好是 vbLf,并且下次要忽略掉 vbLf,则仍是已经读完文件 266 If .StartPosAbso + .BufLen = af_lngFileLength And .IgnoreFirstLf Then 267 '读取文件中的最后一个字节,只测试一下 268 Dim tByt() As Byte, tRet As Integer 269 tRet = FileGetBytesLocal(.StartPosAbso + .BufLen, tByt()) 270 If tRet <= 0 Then GoTo errExit '出错处理 271 If tByt(1) = 10 Then 272 '已经读完文件 273 lIsEndRead = True 274 If lAutoClose Then CloseFile 275 End If 276 End If 277 End If 278 .PtrInBuf = .PtrInBuf + 1 279 280 If lIsEndRead Then 281 '已经读完文件,一定 Exit Function 282 283 Set col = lineArr 284 Set lineArr = New Collection 285 strArrlBuff = 0 286 GetNextLine = 0 287 288 Exit Function '已经读完文件,一定 Exit Function 289 Else 'If lIsEndRead Then 290 If lineArr.Count <> 0 Then 291 Set col = lineArr 292 Set lineArr = New Collection 293 strArrlBuff = 0 294 GetNextLine = 1 295 Exit Function 296 End If 297 End If 'If lIsEndRead Then 298 299 Else 'If .PtrInBuf <= .BufLen Then '是否找到了 vbCr或vbLf 300 301 .PtrInBuf = -1 302 '==== 准备继续读下一个缓冲区 ==== 303 .StartPosAbso = .StartPosAbso + .BufLen 304 End If 'If .PtrInBuf <= .BufLen Then '是否找到了 vbCr或vbLf 305 Loop 306 End With 307 308 309 '//////////// 全部读完文件,看还有无剩余的 //////////// 310 311 312 Select Case State 313 Case NonQuotesField 314 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 315 Erase strArr 316 ReDim strArr(1 To mcInitBuffSize) 317 strArrlBuff = 0 318 Case QuotesField 319 GoTo errExit '"语法错误: 引号字段未闭合"; 320 Case FieldSeparator 321 lineArr.Add "" 322 Case QuoteInQuotesField 323 lineArr.Add EncodeStr(strArr, EncodeErr, strArrlBuff) '代码转换 strArr 324 325 End Select 326 327 328 Set col = lineArr 329 Set lineArr = New Collection 330 strArrlBuff = 0 331 GetNextLine = 0 332 333 334 If lAutoClose Then CloseFile 335 lIsEndRead = True 336 '此时读完文件,必须返回 337 Exit Function 338 339 340 341 errExit: 342 lErrOccured = True 343 GetNextLine = 0 344 '为一般错误,不设置 lIsEndRead = True 345 If lAutoClose Then CloseFile 346 End Function 347 348 Private Function EncodeStr(ByRef bytIn() As Byte, hasError As Boolean, Optional byteSize As Long = -1) As String 349 On Error GoTo errH: 350 Select Case Encode 351 Case Default 352 If byteSize > 0 Then 353 ReDim Preserve bytIn(1 To byteSize) 354 End If 355 EncodeStr = bytIn 356 EncodeStr = StrConv(EncodeStr, vbUnicode) 357 Case ShifJis 358 EncodeStr = WCMB_Decode(ShifJis, bytIn, hasError, byteSize) 359 Case JIS 360 EncodeStr = WCMB_Decode(JIS, bytIn, hasError, byteSize) 361 Case Utf8 362 EncodeStr = WCMB_Decode(Utf8, bytIn, hasError, byteSize) 363 Case GB2312 364 EncodeStr = WCMB_Decode(GB2312, bytIn, hasError, byteSize) 365 End Select 366 hasError = False 367 Exit Function 368 errH: 369 hasError = True 370 End Function 371 372 373 ' 関数名 : WCMB_Decode 374 ' 返り値 : UNICODE文字列 375 ' 引き数 : cp : 入力文字データのコードページ番号 376 ' : bytIn : 入力文字データ 377 ' 機能説明 : 入力文字データをUNICODEに変換する 378 ' 備考 : MultiByteToWideCharによる文字コード変換 379 Private Function WCMB_Decode(ByVal cp As Long, ByRef bytIn() As Byte, ByRef hasError As Boolean, Optional byteSize As Long = -1) As String 380 On Error GoTo ErrHandler 381 382 Dim lngInSize As Long 383 Dim strBuf As String 384 Dim lngBufLen As Long 385 Dim lngRtn As Long 386 If byteSize > 0 Then 387 lngInSize = byteSize 388 Else 389 If bytIn(UBound(bytIn)) = 13 Then 390 lngInSize = UBound(bytIn) - 1 391 Else 392 lngInSize = UBound(bytIn) 393 End If 394 End If 395 lngBufLen = (lngInSize + 1) * 5 396 strBuf = String$(lngBufLen, vbNullChar) 397 lngRtn = MultiByteToWideChar _ 398 (cp, 0, bytIn(1), lngInSize, StrPtr(strBuf), lngBufLen) 399 If lngRtn Then 400 WCMB_Decode = Left$(strBuf, lngRtn) 401 End If 402 hasError = False 403 Exit Function 404 ErrHandler: 405 WCMB_Decode = "" 406 hasError = True 407 End Function 408 409 Public Sub Init() 410 411 ReDim strArr(1 To mcInitBuffSize) 'CSV 各个单元 缓冲区 412 strArrlBuff = 0 413 414 Erase af_Buff.bufBytes '缓冲区 415 416 417 418 af_lngFileLength = 0 419 af_Buff.StartPosAbso = 1 '当前缓冲区的起始处所在的文件位置 420 af_Buff.ptrNextStrStartInBuf = 1 421 422 '此作为标志,=-1表示下次运行 GetNextLine 要重新读取新的缓冲区 _ 423 '否则不重新读取,仍使用当前缓冲区和 .PtrInBuf 指针 424 af_Buff.PtrInBuf = -1 425 426 lErrOccured = False 427 428 429 af_Buff.IgnoreFirstLf = False '初始化标志:当前缓冲区不需要忽略第一个字节(若是vblf) 430 431 lIsEndRead = False 432 End Sub 433 434 Public Function GetPercent(Optional DotNum As Integer = 2) As Single 435 'DotNum保留几位小数,<0或>7为不保留小数 436 Dim sngPerc As Single 437 438 If af_lngFileLength > 0 Then 439 If af_Buff.PtrInBuf < 0 Then 440 sngPerc = (af_Buff.StartPosAbso - 1) / af_lngFileLength 441 Else 442 sngPerc = (af_Buff.StartPosAbso + af_Buff.PtrInBuf - 2) / af_lngFileLength 443 End If 444 End If 445 446 If DotNum >= 0 Or DotNum <= 7 Then 447 Dim Temp As Long 448 Temp = 10 ^ DotNum 449 sngPerc = Int(Temp * sngPerc + 0.5) / Temp 450 End If 451 452 GetPercent = sngPerc 453 End Function 454 455 Public Sub CloseFile() 456 If lFileNum > 0 Then Close lFileNum: lFileNum = 0 457 lStatus = -1 '表示文件已关闭 458 '不Init,防止读取行后自动关闭文件时状态变量被初始化;在OpenFile时会Init 459 End Sub 460 461 Public Function OpenFile() As Boolean 462 If lFileNum > 0 Then CloseFile '如果已打开了文件,则先关闭它 463 lFileNum = FreeFile '获得一个可用的文件号(同时属性 FileNum 的值也自动改变) 464 On Error GoTo errH '如果一下程序发生任何错误,就转到 errH 标签处执行 465 If Dir(lFileName, 31) = "" Then GoTo errH '如果文件不存在,就转到 errH 标签处执行 466 Open lFileName For Binary Access Read As #lFileNum '以二进制方式打开文件 467 lStatus = 1 '表示文件已打开 468 Init '初始化操作 469 af_lngFileLength = LOF(lFileNum) '设置文件总大小 470 OpenFile = True 471 Exit Function 472 errH: 473 If lFileNum > 0 Then CloseFile 474 OpenFile = False 475 End Function 476 477 478 479 Private Function FileGetBytesLocal(ByVal ReadPos As Long, _ 480 ArrBytes() As Byte, _ 481 Optional ByVal EndingBorder As Long = 0, _ 482 Optional ByVal ReadMax As Long = 16384) As Long 483 'Optional ByVal ReadMax As Long = 16384, _ 484 '从文件号 lFileNum 中的 ReadPos 位置开始读取一批字节 485 '从参数ArrBytes()返回读取的字节内容,会重新定义ArrBytes()数组 486 '所读取的字节数不确定,如果文件中有足够的内容,就读取ReadMax个字节, _ 487 '否则就读到文件尾(当EndingBorder参数<=0时)或读到EndingBorder _ 488 '为止(当EndingBorder参数>0时) 489 'ShowResume 指定如果读取出错,是否弹出对话框提示 490 '若ShowResume=1,提示框中有"重试"和"取消"两个按钮; 491 '若ShowResume=2,出错时提示框中有"终止"、"重试"和"忽略"三个按钮; 492 '若ShowResume=0,出错时不弹出提示框,不弹出提示框就不能在发生错误时重试 493 '返回读取的字节数,若失败返回<=0,若用户“忽略”则返回=0;_ 494 '若用户终止或取消或无提示框,则返回<0 495 496 Dim lngUBound As Long 497 498 If EndingBorder <= 0 Then EndingBorder = LOF(lFileNum) 499 If EndingBorder < ReadPos Then 500 FileGetBytesLocal = -1 501 Exit Function 502 End If 503 504 On Error GoTo errH 505 If EndingBorder - ReadPos + 1 >= ReadMax Then lngUBound = ReadMax Else _ 506 lngUBound = EndingBorder - ReadPos + 1 507 508 ReDim ArrBytes(1 To lngUBound) As Byte 509 510 Get #FileNum, ReadPos, ArrBytes 511 512 FileGetBytesLocal = lngUBound 513 Exit Function 514 errH: 515 FileGetBytesLocal = -1 516 End Function 517 518 519 520 Private Sub Class_Initialize() 521 lAutoOpen = True '设置 FileName 属性时自动打开文件 522 lAutoClose = True '读取行读完文件或出错时 自动关闭文件 523 mIgnoreQuotes = False 524 End Sub 525 526 Private Sub Class_Terminate() 527 CloseFile 528 Erase af_Buff.bufBytes 529 530 End Sub 531 532 533 Public Property Get FileName() As String 534 FileName = lFileName 535 End Property 536 537 Public Property Let FileName(ByVal vNewValue As String) 538 If lFileNum > 0 Then CloseFile 539 lFileName = vNewValue 540 If lAutoOpen Then OpenFile 541 End Property 542 543 Public Property Get FileNum() As Integer 544 FileNum = lFileNum 545 End Property 546 547 Public Property Get Status() As Integer 548 Status = lStatus 549 End Property 550 551 Public Property Get IsEndRead() As Boolean 552 IsEndRead = lIsEndRead 553 End Property 554 555 Public Property Get AutoOpen() As Boolean 556 AutoOpen = lAutoOpen 557 End Property 558 559 Public Property Let AutoOpen(ByVal vNewValue As Boolean) 560 lAutoOpen = vNewValue 561 End Property 562 563 Public Property Get AutoClose() As Boolean 564 AutoClose = lAutoClose 565 End Property 566 567 Public Property Let AutoClose(ByVal vNewValue As Boolean) 568 lAutoClose = vNewValue 569 End Property 570 571 572 Public Property Get ErrOccured() As Boolean 573 ErrOccured = lErrOccured 574 End Property 575 576 Public Property Let ErrOccured(ByVal vNewValue As Boolean) 577 lErrOccured = vNewValue 578 End Property 579 580 Public Property Get Encode() As EncodeEnum 581 Encode = lEncode 582 End Property 583 584 Public Property Let Encode(ByVal vNewValue As EncodeEnum) 585 lEncode = vNewValue 586 End Property 587 588 Public Property Get IsEncodeErr() As Boolean 589 IsEncodeErr = EncodeErr 590 End Property 591 592 Public Property Let IgnoreQuotes(ByVal vNewValue As Boolean) 593 mIgnoreQuotes = vNewValue 594 End Property 595 596 Public Property Get IgnoreQuotes() As Boolean 597 IgnoreQuotes = mIgnoreQuotes 598 End Property
1 Dim aFile As clsCsv 2 3 Dim strCol As Collection 4 5 Set aFile = New clsCsv 6 7 aFile.FileName = "C:\Users\Administrator\Desktop\Àϱøд«³ÌÐòÔ´´úÂë\µÚ6ÕÂ\Îı¾Îļþ°´ÐжÁÈ¡\ʾÀýÎļþ(»»Ðзû·ÖÐÐ).csv" 8 9 aFile.Encode = Utf8 10 11 Do Until aFile.IsEndRead 12 aFile.GetNextLine strCol 13 If aFile.ErrOccured Then 14 Exit Do 15 Else 16 i = i + 1 17 ' Debug.Print strLine 18 Label1.Caption = aFile.GetPercent * 100 & "%" 19 If i Mod 500 = 1 Then DoEvents 20 End If 21 Loop