[UMU WSH 教程](44) WII 应用实例 - 查找重复文件

[UMU WSH 教程](43) WIA 应用实例 - 批量转换图片格式

[UMU WSH 教程](42) FSO 应用实例 - 批量删除文件

UMU WSH 教程代码下载:http://sdrv.ms/ZpPPaS

UMU WSH Git:http://git.oschina.net/umu618/umu-wsh

  数据经常 in-in-out-out 难免产生重复,比如上次 UMU 在 Surface RT 上导入 iPhone4S 和 iPhone5 的照片时发生的悲剧:

把 iPhone 4S 的照片导到 #Surface#,然后刷机去越狱,后来又把 iPhone 5 的照片也导进来,没料到两个 iPhone 的文件夹是一样的名字,于是文件混到了一起,尼玛!里面有不少内容不一样文件名一样的照片,所以选择不覆盖,重新复制 5 的照片到另一个文件夹,但 4S 的文件夹里已经混入了好多 5 的照片!还好 #Surface# 支持 VBScript,哥的去重复文件脚本可以发挥作用了!生产力第一名的平板——其实就是给没有生产力的 iOS 设备打工的,哈!悲剧~

  本篇要介绍的是使用 Surface RT 支持的 WSH 脚本实现查找重复文件这个任务,其中使用到的三个主要对象:FSO、SD 大家应该知道,WII 是 WindowsInstaller.Installer,参考:《[UMU WSH 教程](40) 利用 WindowsInstaller.Installer 对象计算文件 MD5 hash 值》一文,http://hi.baidu.com/umu618/item/0769e3cecd216e3298b4980a

  先把思路介绍清楚,以便读者使用其它语言实现:1、计算 Hash 的代价是比较高的,而文件大小是很容易获取的,所以应该先利用文件大小来比较文件,只有大小重复的需要计算 Hash;2、空文件都是重复的,不用 Hash 了……3、由于要做成通用程序,所以只做了查找,您可以根据代码,自行添加删除带某些特征的重复文件。代码如下:

' 44_FindDuplicates.VBS
' UMU @ 10:23 2013/05/26
' [UMU WSH 教程](44) 查找重复文件
Option Explicit

Const APP_TITLE = "UMU.Script.Tools.FindDuplicates"

Dim fso
Dim wi
Dim file_name_dictionary, file_size_dictionary, file_size_and_hash_dictionary
Dim file_index
Dim group

Call Main()

Private Sub Main()
    Set fso = CreateObject("Scripting.FileSystemObject")
    Dim args
    Set args = WScript.Arguments
    If args.Count = 0 Then
        Dim wsh
        Dim send_to, copy_to

        MsgBox "本程序用来查找重复文件(根据文件内容,非文件名)。" & vbCrLf & _
            "请把要处理的文件或文件夹拖放到本程序的图标上!", _
            vbInformation, APP_TITLE

        Set wsh = CreateObject( "WScript.Shell" )
        send_to = wsh.SpecialFolders("SendTo")
        copy_to = send_to & "\FindDuplicates.VBE"

        If Not fso.FileExists(copy_to) Then
            If vbOK = MsgBox("本程序用来查找重复文件。" & vbCrLf & _
                "请把要处理的文件或文件夹拖放到本程序的图标上!" & vbCrLf & vbCrLf & _
                "提示:您可以把此文件放在 Sendto 目录里,然后使用右键菜单的“发送到”。" & vbCrLf & _
                "您的 Sendto 目录是 " & send_to & vbCrLf & "按“确定”执行复制操作。", _
                vbOKCancel + vbInformation, APP_TITLE) Then

                fso.CopyFile WScript.ScriptFullName, copy_to

                If vbYes = MsgBox("是否查看 Sendto 目录?", vbQuestion + vbYesNo, APP_TITLE) Then
                    wsh.Run "%SystemRoot%\explorer.exe /n, /select," & copy_to
                End If
            End If
        End If

        Set wsh = Nothing
        Set args = Nothing
        Set fso = Nothing
        Exit Sub
    End If

    Set file_name_dictionary = CreateObject("Scripting.Dictionary")
    Set file_size_dictionary = CreateObject("Scripting.Dictionary")
    file_index = 0

    Dim ar
    For Each ar In args
        If fso.FolderExists(ar) Then
            Call AddFolder(ar)
        ElseIf fso.FileExists(ar) Then
            Call AddFile(ar)
        End If
    Next
    Set ar = Nothing
    Set args = Nothing

    If file_name_dictionary.Count = file_size_dictionary.Count Then
        MsgBox "文件总数:" & file_name_dictionary.Count & vbCrLf & _
            "没有重复的文件", vbInformation, "整个世界清静了!"
    ElseIf MsgBox("文件总数:" & file_name_dictionary.Count & vbCrLf & _
        "文件大小不重复数:" & file_size_dictionary.Count, vbInformation + vbOkCancel, "按确定继续") = vbOK Then
        Call FindDuplicates()
    End If
    Set fso = Nothing
    Set file_name_dictionary = Nothing
    Set file_size_dictionary = Nothing

    MsgBox "重复组数:" & group, vbInformation, "整个世界清静了!"
End Sub

Private Sub AddFolder(ByVal folder_path)
    'On Error Resume Next

    Dim rfd, fs, f, fds, fd

    Set rfd = fso.GetFolder(folder_path)
    Set fs = rfd.Files
    For Each f In fs
        Call AddFileWithSize(f.Path, f.Size)
    Next

    Set fds = rfd.SubFolders
    For Each fd In fds
        Call AddFolder(fd.Path)
    Next
End Sub

Private Sub AddFile(file_path)
    'On Error Resume Next
    Dim file

    Set file = fso.GetFile(file_path)
    Call AddFileWithSize(file.Path, file.Size)
    Set file = Nothing
End Sub

Private Sub AddFileWithSize(file_path, file_size)
    'On Error Resume Next
    file_name_dictionary.Add file_index, file_path
    If file_size_dictionary.Exists(file_size) Then
        file_size_dictionary.Item(file_size) = file_size_dictionary.Item(file_size) & ";" & file_index
    Else
        file_size_dictionary.Add file_size, file_index
    End If
    file_index = file_index + 1
End Sub

Private Function BigEndianHex(int)
    Dim result
    Dim b1, b2, b3, b4

    result = Right("0000000" & Hex(int), 8)
    b1 = Mid(result, 7, 2)
    b2 = Mid(result, 5, 2)
    b3 = Mid(result, 3, 2)
    b4 = Mid(result, 1, 2)

    BigEndianHex = b1 & b2 & b3 & b4
End Function

Private Function GetFileHash(file_name)
    Dim file_hash
    Dim hash_value
    Dim i

    Set file_hash = wi.FileHash(file_name, 0)
    hash_value = ""
    For i = 1 To file_hash.FieldCount
        hash_value = hash_value & BigEndianHex(file_hash.IntegerData(i))
    Next
    Set file_hash = Nothing
    GetFileHash = hash_value
End Function

Private Sub FindDuplicates()
    Set wi = CreateObject("WindowsInstaller.Installer")
    Set file_size_and_hash_dictionary = CreateObject("Scripting.Dictionary")

    Dim file_size_array
    Dim file_name_index_array
    Dim ubound_of_file_size_dictionary

    ' UMU: dictionary -> 2 arraies, for quick finding
    file_size_array = file_size_dictionary.Keys
    file_name_index_array = file_size_dictionary.Items
    ubound_of_file_size_dictionary = file_size_dictionary.Count - 1
    file_size_dictionary.RemoveAll

    Dim i
    For i = 0 To ubound_of_file_size_dictionary
        If InStr(file_name_index_array(i), ";") <> 0 Then
            If file_size_array(i) = 0 Then
                ' UMU: empty files are all the same
                file_size_and_hash_dictionary.Add 0, file_name_index_array(i)
            Else
                Dim file_index_array
                Dim index

                file_index_array = Split(file_name_index_array(i), ";")
                For Each index In file_index_array
                    Dim hash
                    Dim key
                    ' UMU: CLng() is important
                    hash = GetFileHash(file_name_dictionary.Item(CLng(index)))
                    key = file_size_array(i) & ":" & hash
                    If file_size_and_hash_dictionary.Exists(key) Then
                        ' UMU: we've got it!
                        file_size_and_hash_dictionary.Item(key) = file_size_and_hash_dictionary.Item(key) & ";" & index
                    Else
                        file_size_and_hash_dictionary.Add key, index
                    End If
                Next
            End If
        End If
    Next
    Erase file_size_array
    Erase file_name_index_array

    ' UMU: dictionary -> 2 arraies, for quick finding
    Dim file_and_hash_array
    Dim ubound_of_file_size_and_hash_dictionary
    file_and_hash_array = file_size_and_hash_dictionary.Keys
    file_name_index_array = file_size_and_hash_dictionary.Items
    ubound_of_file_size_and_hash_dictionary = file_size_and_hash_dictionary.Count - 1
    file_size_and_hash_dictionary.RemoveAll

    Dim file
    Dim cd

    cd = WScript.ScriptFullName
    cd = Left(cd, InStrRev(cd, "\"))
    Set file = fso.CreateTextFile(cd & "Duplicates.txt")
    group = 0
    For i = 0 To ubound_of_file_size_and_hash_dictionary
        If InStr(file_name_index_array(i), ";") <> 0 Then
            group = group + 1
            file.WriteLine "// Group " & group & ", " & file_and_hash_array(i)
            file_index_array = Split(file_name_index_array(i), ";")
            For Each index In file_index_array
                file.WriteLine file_name_dictionary.Item(CLng(index))
            Next
            file.WriteLine ""
        End If
    Next
    file.Close
    Set file = Nothing
    Erase file_and_hash_array
    Erase file_name_index_array

    Set wi = Nothing
    Set file_size_and_hash_dictionary = Nothing
End Sub

 

你可能感兴趣的:(hash,vbs,WSH,UMU)