上代码
import chardet
import os
def get_encoding(file):
with open(file,'rb') as f:
return chardet.detect(f.read())
def change_encode_to_utf8(path:str,coding:str):
with open(path,'r',encoding=coding) as f:
text=f.read()
with open(path,'w',encoding='utf-8') as f:
f.write(text)
def replace_windows_to_unix(file):
f = open(file,'r')
result = f.read()
result = result.replace(r'\r\n',r'\n')
f.close()
f = open(file,'wb')
f.write(result)
f.close()
def file_name(file_dir):
L=[]
for root, dirs, files in os.walk(file_dir):
for file in files:
L.append(os.path.join(root, file))
return L
def need_to_UnixLF(file):
edcoding=get_encoding(file)
with open(file,'r',newline='',encoding=edcoding['encoding']) as f:
text=f.read()
result=text.find('\r\n')
if result>0 :
return True
else:
return False
def to_lf(path,encoding = 'utf-8'):
with open(path, newline=None, encoding=encoding) as infile:
str = infile.readlines()
with open(path, 'w', newline='\n', encoding=encoding) as outfile:
outfile.writelines(str)
print(" 文件转换成功,格式:{0} ;编码:{1} ;路径:{2}".format('Unix(LF)', encoding, path))
def main_def(file_dir):
L=file_name(file_dir)
for x in L:
print(x)
print(" 原编码为:"+get_encoding(x)['encoding'],end ='')
if get_encoding(x)['encoding'] == 'GB2312':
print(' 需要修改编码',end='')
change_encode_to_utf8(x, get_encoding(x)['encoding'])
print(' 修改后编码为:'+get_encoding(x)['encoding'], end='')
print()
if need_to_UnixLF(x):
to_lf(x)
file_dir="d:\\test\\test4"
file_dir=r"D:\soft\eclipse\workspace\log\src"
main_def(file_dir)