python 批量转换文件的编码方式

利用python批量转换一个目录下所有指定文件的编码方式,使用前需要设置好转换的路径、编码方式和文件类型。建议同时安装PyCharm和anaconda,PyCharm用于编码和调试因为他的调试功能十分强大,anaconda用于安装python和python的运行库
下面是源码:



# -*- coding: utf-8 -*-
"""
批量编码转换,用于批量转换文件的编码,支持utf-8,utf-8-bom,gb2312,gbk编码之间的转化,其他编码暂时没有测试
使用方法:修改main中文件的路径、文件的类型和需要转换的编码
    src_dir = "d:\\test" -->  指定要转换的文件夹路径
    tag_type = "utf-8-sig" --> 指定目标转换格式
    type_filter = [".cpp", ".h"] --> 指定需要转换的文件类型
注意:utf-8-bom请写成utf-8-sig
"""

import sys
import os
import codecs
import chardet


def utf8_conversion(f_context, code_type):
    if code_type.lower() == "utf-8":
        return f_context
    elif code_type.lower() == "utf-8-bom" or code_type.lower() == "utf-8-sig":
        return codecs.BOM_UTF8 + f_context
    else:
        return f_context.decode("utf-8").encode(code_type)


def conversion_to_utf8(f_context, code_type):
    if code_type.lower() == "utf-8":
        return f_context
    elif code_type.lower() == "utf-8-bom" or code_type.lower() == "utf-8-sig":
        return f_context[3:]
    else:
        return f_context.decode(code_type).encode("utf-8")


def file_code_conversion(file_name, tag_code_type):
    src_file = open(file_name, "rb")
    file_context = src_file.read()
    src_file.close()

    if file_context == "":
        print file_name + "  --  empty file"
        return

    code_type = chardet.detect(file_context)["encoding"]
    if code_type.lower() == tag_code_type.lower():
        print file_name + "  --  escape file"
        return
    else:
        print file_name + ("  --  converted from %s into %s" % (code_type, tag_code_type))
        tag_utf8_context = conversion_to_utf8(file_context, code_type)
        tag_context = utf8_conversion(tag_utf8_context, tag_code_type)
        tag_file = open(file_name, "wb")
        tag_file.write(tag_context)
        tag_file.close()


if __name__ == "__main__":
    src_dir = "D:\\project\\myproject"
    tag_type = "utf-8-bom"
    type_filter = [".cpp", ".h"]

    for dir_path, dirs, files in os.walk(src_dir):
        for name in files:
            if os.path.splitext(name)[1] in type_filter:
                file_code_conversion(os.path.join(dir_path, name), tag_type)

你可能感兴趣的:(python 批量转换文件的编码方式)