python批量查看修改文件编码

使用python批量查看文件编码,或者批量修改文件编码

代码(2022-11-22更新)

# -*- coding: UTF-8 -*-
import codecs
import os
import chardet

contenttypes = [".c", ".cpp", ".h", ".hpp"]

def convert_encoding(filename, target_encoding):
    # 将文件从源编码转换为目标编码

    # 以只读方式打开文件(r), 二进制文件(b), 打开一个文件进行更新(可读可写 +)
    content = codecs.open(filename, 'rb+').read()
    # 识别打开文件的编码
    source_encoding = chardet.detect(content)['encoding']
    print(source_encoding)
    # 获取文件内的内容
    content = content.decode(source_encoding, 'ignore')
    # 以 UTF-8-SIG 的方式将文件保存
    codecs.open(filename, 'w', encoding=target_encoding).write(content)

def main():
    for root, dirs, files in os.walk("需要修改的文件路径"):
        for name in files:
            for fifotype in contenttypes:
                # 如果字符串含有指定的后缀返回True,否则返回False
                if name.lower().endswith(fifotype):
                    # Path20 = home/develop/code
                    filename = os.path.join(root, name)
                    print(filename)
                    convert_encoding(filename, 'UTF-8')

if __name__ == '__main__':
    main()

代码(老代码)

#!/usr/bin/python
import os
import chardet

# 获得所有txt(根据个人需求更改)文件的路径,传入根目录路径
def find_all_file(path: str) -> str:
  for root, dirs, files in os.walk(path):
    for f in files:
      if f.endswith('.txt'):#根据个人需求更改
        fullname = os.path.join(root, f)
        yield fullname
      pass
    pass
  pass

# 判断是不是utf-8编码方式
def judge_coding(path: str) -> dict:
  with open(path, 'rb') as f:
    c = chardet.detect(f.read())
  if c['encoding'] != 'utf-8':
    return c
    
# 修改文件编码方式
def change_to_utf_file(path: str):
  for i in find_all_file(path):
    c = judge_coding(i)
    if c:
      change(i, c['encoding'])
      print("{} 编码方式已从{}改为 utf-8".format(i, c['encoding']))

# 执行修改操作
def change(path: str, coding: str):
  with open(path, 'r', encoding=coding) as f:
    text = f.read()
  with open(path, 'w', encoding='utf-8') as f:
    f.write(text)
    
# 查看所有文件编码方式
def check(path: str):
  for i in find_all_file(path):
    with open(i, 'rb') as f:
      print(chardet.detect(f.read())['encoding'], ': ', i)

def main():
  my_path = 'D:/Project/python/Chardet/test'
  #根据需求打开相应注释
  #change_to_utf_file(my_path)#修改文件编码,修改的时候打开注释
  #check(my_path)#查看文件编码,查看的时候打开注释
  
if __name__ == '__main__':
  main()

结果

查看文件编码
image.png
执行编码转换
image.png
再次查看转换后的编码
image.png

你可能感兴趣的:(python批量查看修改文件编码)