微信缓存的图片dat格式转码为jpg等图片文件

微信中经常收到各种图片,想批量操作,但微信使用的是data格式,无法进行预览,如果图片较多,全部删除又怕删除有用的图片,使用以下代码可以直接读取微信图片dat转成可以预览的图片,可以进行批量操作,或者用于清理微信中的垃圾图片,非常好用。不仅可以提取图片格式,还可以提取其它文件格式。

使用的是进程+线程的方式加快转换速度。

# 系统库
import sys
import os
import stat
# 数据类型库
from enum import Enum
import math
from time import perf_counter
 
#线程库
import threading
from multiprocessing import Pool,Process,cpu_count
import multiprocessing
 
sem = threading.Semaphore(1000)
 
class ImgType(Enum):
    gif = 0x4749 # GIF (gif),文件头:47494638
    jpg = 0xFFD8 # JPEG (jpg),文件头:FFD8FF
    png = 0x8950 # PNG (png),文件头:89504E47
    tif = 0x4949 # TIFF (tif),文件头:49492A00
    bmp = 0x424D # Windows Bitmap (bmp),文件头:424D
    dwg = 0x4143 # CAD (dwg),文件头:41433130
    psd = 0x3842 # Adobe Photoshop (psd),文件头:38425053
    rtf = 0x7B5C # Rich Text Format (rtf),文件头:7B5C727466
    xml = 0x3C3F # XML (xml),文件头:3C3F786D6C
    html = 0x6874 # HTML (html),文件头:68746D6C3E
    eml = 0x4465 # Email [thorough only] (eml),文件头:44656C69766572792D646174653A
    dbx = 0xCFAD # Outlook Express (dbx),文件头:CFAD12FEC5FD746F
    pst = 0x2142 # Outlook (pst),文件头:2142444E
    xlsOrdoc = 0xD0CF # MS Word/Excel (xls.or.doc),文件头:D0CF11E0
    mdb = 0x5374 # MS Access (mdb),文件头:5374616E64617264204A
    wpd = 0xFF57 # WordPerfect (wpd),文件头:FF575043
    pdf = 0x2550 # Adobe Acrobat (pdf),文件头:255044462D312E
    qdf = 0xAC9E # Quicken (qdf),文件头:AC9EBD8F
    pwl = 0xE382 # Windows Password (pwl),文件头:E3828596
    zip = 0x504B # ZIP Archive (zip),文件头:504B0304
    rar = 0x5261 # RAR Archive (rar),文件头:52617221
    wav = 0x5741 # Wave (wav),文件头:57415645
    avi = 0x4156 # AVI (avi),文件头:41564920
    ram = 0x2E72 # Real Audio (ram),文件头:2E7261FD
    rm = 0x2E52 # Real Media (rm),文件头:2E524D46
    mpg = 0x0000 # MPEG (mpg),文件头:000001BA
    mpeg = 0x0000 # MPEG (mpg),文件头:000001B3
    mov = 0x6D6F # Quicktime (mov),文件头:6D6F6F76
    asf = 0x3026 # Windows Media (asf),文件头:3026B2758E66CF11
    mid = 0x4D54 # MIDI (mid),文件头:4D546864
 
 
class WechatConvert(object):
 
    @staticmethod
    def find_img_type(file_path):
os.chmod(file_path, stat.S_IWRITE) # 取消文件只读属性
        with open(file_path, 'rb+') as f:
            byte1 = int.from_bytes(f.read(1), byteorder=sys.byteorder)
            byte2 = int.from_bytes(f.read(1), byteorder=sys.byteorder)
        for img_enum in ImgType:
            png_tuple = WechatConvert.hex_to_tuple(img_enum.value)
            if png_tuple[0] ^ byte1 == png_tuple[1] ^ byte2:
                return img_enum.name, png_tuple[0] ^ byte1
        return 'none', '0x' + hex(byte1) + hex(byte2) # 返回找不到的图片类型为:none
        # raise Exception("不支持的图片类型")
 
    @staticmethod
    def hex_to_tuple(img_type):
        return img_type >> 8, img_type & 0b11111111
 
    def convert(self, file_path, output_path=".",number='1',total='1'):
 
        with sem:  # 限制最大线程数
            file_name = file_path.split("\\")[-1]
            # 获取图片类型
            img_type,img_xor = WechatConvert.find_img_type(file_path)
            convert_filetype = ['gif','jpg','png','tif','bmp']
            if img_type!='none':# 只转换支持的图片类型
                if img_type in convert_filetype:
                    print('正在转换图片' + str(number) + '/' + str(total) +':' + file_name)
                    with open(file_path, 'rb+') as fd:
                        # 读取2 byte
                        with open(output_path + "\\" + file_name + "." + img_type, 'wb+') as w:
                            while True:
                                b = fd.read(1)
                                if not b:
                                    break
                                real = int.from_bytes(b, byteorder=sys.byteorder) ^ img_xor
                                real_bytes = int.to_bytes(real, 1, sys.byteorder)
                                w.write(real_bytes)
                    print('图片转换完成:'+ str(number) + '/' + str(total) + ':'  + file_name)
                else:
                    os.rename(output_path + "\\" + file_name ,output_path + "\\" + file_name + "." + img_type) # 给无法转换的文件加后缀名
 
def thread_it(func, *args):
    '''将函数打包进线程'''
    file_path_list = args[0]
    output_path = args[1]
    number = args[2]
    total = args[3]
    for file_name in file_path_list:
        t = threading.Thread(target=func, args=(file_name, output_path, number + file_path_list.index(file_name) + 1 , total)) # 创建
t.start() # 启动
 
if __name__ == '__main__':
    multiprocessing.freeze_support()
    start = perf_counter()  # 记下开始时刻
    file_name_list = os.listdir(os.getcwd())
    cpu_count = cpu_count()
    p = Pool(cpu_count)
 
    convert_file_list=[]
    for file_name in file_name_list:
        splittext = os.path.splitext(file_name)[-1]
        if splittext == '.dat':
            convert_file_list.append(file_name)
 
    convert_file_list_qty = math.ceil(len(convert_file_list) / cpu_count)
    for n in range(cpu_count):#进程数
        if convert_file_list_qty * n < len(convert_file_list):
            p.apply_async(func=thread_it,args=(WechatConvert().convert,convert_file_list[convert_file_list_qty * n : convert_file_list_qty * (n+1)], ".",convert_file_list_qty * n , len(convert_file_list)))
 
    p.close()
    p.join()
    end = perf_counter()  # 记下结束时刻
    process_time = end - start
    if process_time > 60:
        process_time = process_time /60
        process_time_str = str(process_time) + '分'
    elif process_time > 60 * 60:
        process_time = process_time / 60 /60
        process_time_str = str(process_time) + '小时'
    else:
        process_time_str = str(process_time) + '秒'
    print('=================图片全部转换完成===========')
    print('一共转换图片数:' + str(len(convert_file_list)))
    print('一共用时:' + process_time_str)

微信缓存的图片dat格式转码为jpg等图片文件_第1张图片

你可能感兴趣的:(python,微信,data转jpg)