python中文件不知道编码格式时的读写

# -*- coding: utf-8 -*-
import codecs

1、python2.7中打开文件时open()不能指定编码方式,可用codecs

2、windows下注册表的编码格式为UCS-2 LE BOM , 对应python中的uft-16

3、如果不确定编码格式,可以在errors='ignore',将可以解码的部分解出

def read(path,encoding):
    if encoding=='':
        encoding='uft-8'
    # //ucs2可以认为是UTF-16
    with open(path, 'r', encoding=encoding,errors='ignore') as file:
        str = file.read()
        return str
    # python3
    # 以读入文件为例:
    # f = open(path, "rb")  # 二进制格式读文件
    # i = 0
    # while True:
    #     i += 1
    #     print(i)
    #     line = f.readline()
    #     if not line:
    #         break
    #     else:
    #         try:
    #             #             print(line)
    #             #             print(line.decode('utf8'))
    #             line.decode('utf8',errors='ignore')
    #             print(line)
    #             # 为了暴露出错误,最好此处不print
    #         except:
    #             print(str(line))
def write(path,str,encoding):
    if encoding=='':
        encoding='uft-8'
    with open(path, 'r+',encoding=encoding) as file:
        file.write(str)

你可能感兴趣的:(python)