[UMU WSH 教程](43) WIA 应用实例 - 批量转换图片格式
[UMU WSH 教程](42) FSO 应用实例 - 批量删除文件
UMU WSH 教程代码下载:http://sdrv.ms/ZpPPaS
UMU WSH Git:http://git.oschina.net/umu618/umu-wsh
数据经常 in-in-out-out 难免产生重复,比如上次 UMU 在 Surface RT 上导入 iPhone4S 和 iPhone5 的照片时发生的悲剧:
把 iPhone 4S 的照片导到 #Surface#,然后刷机去越狱,后来又把 iPhone 5 的照片也导进来,没料到两个 iPhone 的文件夹是一样的名字,于是文件混到了一起,尼玛!里面有不少内容不一样文件名一样的照片,所以选择不覆盖,重新复制 5 的照片到另一个文件夹,但 4S 的文件夹里已经混入了好多 5 的照片!还好 #Surface# 支持 VBScript,哥的去重复文件脚本可以发挥作用了!生产力第一名的平板——其实就是给没有生产力的 iOS 设备打工的,哈!悲剧~
本篇要介绍的是使用 Surface RT 支持的 WSH 脚本实现查找重复文件这个任务,其中使用到的三个主要对象:FSO、SD 大家应该知道,WII 是 WindowsInstaller.Installer,参考:《[UMU WSH 教程](40) 利用 WindowsInstaller.Installer 对象计算文件 MD5 hash 值》一文,http://hi.baidu.com/umu618/item/0769e3cecd216e3298b4980a
先把思路介绍清楚,以便读者使用其它语言实现:1、计算 Hash 的代价是比较高的,而文件大小是很容易获取的,所以应该先利用文件大小来比较文件,只有大小重复的需要计算 Hash;2、空文件都是重复的,不用 Hash 了……3、由于要做成通用程序,所以只做了查找,您可以根据代码,自行添加删除带某些特征的重复文件。代码如下:
' 44_FindDuplicates.VBS ' UMU @ 10:23 2013/05/26 ' [UMU WSH 教程](44) 查找重复文件 Option Explicit Const APP_TITLE = "UMU.Script.Tools.FindDuplicates" Dim fso Dim wi Dim file_name_dictionary, file_size_dictionary, file_size_and_hash_dictionary Dim file_index Dim group Call Main() Private Sub Main() Set fso = CreateObject("Scripting.FileSystemObject") Dim args Set args = WScript.Arguments If args.Count = 0 Then Dim wsh Dim send_to, copy_to MsgBox "本程序用来查找重复文件(根据文件内容,非文件名)。" & vbCrLf & _ "请把要处理的文件或文件夹拖放到本程序的图标上!", _ vbInformation, APP_TITLE Set wsh = CreateObject( "WScript.Shell" ) send_to = wsh.SpecialFolders("SendTo") copy_to = send_to & "\FindDuplicates.VBE" If Not fso.FileExists(copy_to) Then If vbOK = MsgBox("本程序用来查找重复文件。" & vbCrLf & _ "请把要处理的文件或文件夹拖放到本程序的图标上!" & vbCrLf & vbCrLf & _ "提示:您可以把此文件放在 Sendto 目录里,然后使用右键菜单的“发送到”。" & vbCrLf & _ "您的 Sendto 目录是 " & send_to & vbCrLf & "按“确定”执行复制操作。", _ vbOKCancel + vbInformation, APP_TITLE) Then fso.CopyFile WScript.ScriptFullName, copy_to If vbYes = MsgBox("是否查看 Sendto 目录?", vbQuestion + vbYesNo, APP_TITLE) Then wsh.Run "%SystemRoot%\explorer.exe /n, /select," & copy_to End If End If End If Set wsh = Nothing Set args = Nothing Set fso = Nothing Exit Sub End If Set file_name_dictionary = CreateObject("Scripting.Dictionary") Set file_size_dictionary = CreateObject("Scripting.Dictionary") file_index = 0 Dim ar For Each ar In args If fso.FolderExists(ar) Then Call AddFolder(ar) ElseIf fso.FileExists(ar) Then Call AddFile(ar) End If Next Set ar = Nothing Set args = Nothing If file_name_dictionary.Count = file_size_dictionary.Count Then MsgBox "文件总数:" & file_name_dictionary.Count & vbCrLf & _ "没有重复的文件", vbInformation, "整个世界清静了!" ElseIf MsgBox("文件总数:" & file_name_dictionary.Count & vbCrLf & _ "文件大小不重复数:" & file_size_dictionary.Count, vbInformation + vbOkCancel, "按确定继续") = vbOK Then Call FindDuplicates() End If Set fso = Nothing Set file_name_dictionary = Nothing Set file_size_dictionary = Nothing MsgBox "重复组数:" & group, vbInformation, "整个世界清静了!" End Sub Private Sub AddFolder(ByVal folder_path) 'On Error Resume Next Dim rfd, fs, f, fds, fd Set rfd = fso.GetFolder(folder_path) Set fs = rfd.Files For Each f In fs Call AddFileWithSize(f.Path, f.Size) Next Set fds = rfd.SubFolders For Each fd In fds Call AddFolder(fd.Path) Next End Sub Private Sub AddFile(file_path) 'On Error Resume Next Dim file Set file = fso.GetFile(file_path) Call AddFileWithSize(file.Path, file.Size) Set file = Nothing End Sub Private Sub AddFileWithSize(file_path, file_size) 'On Error Resume Next file_name_dictionary.Add file_index, file_path If file_size_dictionary.Exists(file_size) Then file_size_dictionary.Item(file_size) = file_size_dictionary.Item(file_size) & ";" & file_index Else file_size_dictionary.Add file_size, file_index End If file_index = file_index + 1 End Sub Private Function BigEndianHex(int) Dim result Dim b1, b2, b3, b4 result = Right("0000000" & Hex(int), 8) b1 = Mid(result, 7, 2) b2 = Mid(result, 5, 2) b3 = Mid(result, 3, 2) b4 = Mid(result, 1, 2) BigEndianHex = b1 & b2 & b3 & b4 End Function Private Function GetFileHash(file_name) Dim file_hash Dim hash_value Dim i Set file_hash = wi.FileHash(file_name, 0) hash_value = "" For i = 1 To file_hash.FieldCount hash_value = hash_value & BigEndianHex(file_hash.IntegerData(i)) Next Set file_hash = Nothing GetFileHash = hash_value End Function Private Sub FindDuplicates() Set wi = CreateObject("WindowsInstaller.Installer") Set file_size_and_hash_dictionary = CreateObject("Scripting.Dictionary") Dim file_size_array Dim file_name_index_array Dim ubound_of_file_size_dictionary ' UMU: dictionary -> 2 arraies, for quick finding file_size_array = file_size_dictionary.Keys file_name_index_array = file_size_dictionary.Items ubound_of_file_size_dictionary = file_size_dictionary.Count - 1 file_size_dictionary.RemoveAll Dim i For i = 0 To ubound_of_file_size_dictionary If InStr(file_name_index_array(i), ";") <> 0 Then If file_size_array(i) = 0 Then ' UMU: empty files are all the same file_size_and_hash_dictionary.Add 0, file_name_index_array(i) Else Dim file_index_array Dim index file_index_array = Split(file_name_index_array(i), ";") For Each index In file_index_array Dim hash Dim key ' UMU: CLng() is important hash = GetFileHash(file_name_dictionary.Item(CLng(index))) key = file_size_array(i) & ":" & hash If file_size_and_hash_dictionary.Exists(key) Then ' UMU: we've got it! file_size_and_hash_dictionary.Item(key) = file_size_and_hash_dictionary.Item(key) & ";" & index Else file_size_and_hash_dictionary.Add key, index End If Next End If End If Next Erase file_size_array Erase file_name_index_array ' UMU: dictionary -> 2 arraies, for quick finding Dim file_and_hash_array Dim ubound_of_file_size_and_hash_dictionary file_and_hash_array = file_size_and_hash_dictionary.Keys file_name_index_array = file_size_and_hash_dictionary.Items ubound_of_file_size_and_hash_dictionary = file_size_and_hash_dictionary.Count - 1 file_size_and_hash_dictionary.RemoveAll Dim file Dim cd cd = WScript.ScriptFullName cd = Left(cd, InStrRev(cd, "\")) Set file = fso.CreateTextFile(cd & "Duplicates.txt") group = 0 For i = 0 To ubound_of_file_size_and_hash_dictionary If InStr(file_name_index_array(i), ";") <> 0 Then group = group + 1 file.WriteLine "// Group " & group & ", " & file_and_hash_array(i) file_index_array = Split(file_name_index_array(i), ";") For Each index In file_index_array file.WriteLine file_name_dictionary.Item(CLng(index)) Next file.WriteLine "" End If Next file.Close Set file = Nothing Erase file_and_hash_array Erase file_name_index_array Set wi = Nothing Set file_size_and_hash_dictionary = Nothing End Sub