Python处理文件编码及修改为Unix格式

Python处理文件编码及修改为Unix格式

  • 上代码

上代码

# -*- coding: utf-8 -*-
import chardet
import os

#获取文件编码
def get_encoding(file):
    with open(file,'rb') as f:
        return chardet.detect(f.read())

#修改文件编码为utf-8
def change_encode_to_utf8(path:str,coding:str):
    with open(path,'r',encoding=coding) as f:
        text=f.read()
    with open(path,'w',encoding='utf-8') as f:
        f.write(text)

def replace_windows_to_unix(file):
     #for file in files:
     #    file = path+'\\'+file
         f = open(file,'r')
         result = f.read()
         #print result
         result = result.replace(r'\r\n',r'\n')
         f.close()
         #    需要用二进制的方式('b')重写才会OK,否则会自动按照操作系统默认方式
         f = open(file,'wb')
         f.write(result)
         f.close()

#得到文件夹下的所有文件
def file_name(file_dir):
    L=[]
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            #if os.path.splitext(file)[1] == '.jpeg':
            L.append(os.path.join(root, file))
    return L

#判断是否需要替换为Unix(LF)
def need_to_UnixLF(file):
    edcoding=get_encoding(file)
    with open(file,'r',newline='',encoding=edcoding['encoding']) as f:
        text=f.read()
        result=text.find('\r\n')
        #print(" result="+repr(result))
        if result>0 :
            return True
        else:
            return False

# 换行符替换
def to_lf(path,encoding = 'utf-8'):
    with open(path, newline=None, encoding=encoding) as infile:
        str = infile.readlines()
        with open(path, 'w', newline='\n', encoding=encoding) as outfile:
            outfile.writelines(str)
            print("  文件转换成功,格式:{0} ;编码:{1} ;路径:{2}".format('Unix(LF)', encoding, path))

#主函数
def main_def(file_dir):
    L=file_name(file_dir)
    for x in L:
        print(x)
        print("  原编码为:"+get_encoding(x)['encoding'],end ='')
        if get_encoding(x)['encoding'] == 'GB2312':
            print('  需要修改编码',end='')
            change_encode_to_utf8(x, get_encoding(x)['encoding'])
            print('  修改后编码为:'+get_encoding(x)['encoding'], end='')
        print()
        #change_encode_to_utf8(x, get_encoding(x)['encoding'])
        #replace_windows_to_unix(x)
        if need_to_UnixLF(x):
            to_lf(x)

file_dir="d:\\test\\test4"
file_dir=r"D:\soft\eclipse\workspace\log\src"
main_def(file_dir) 

你可能感兴趣的:(Python,python)