python 操作 csv 编码问题,繁体字体乱码

# -*- coding: utf-8 -*-#

import os
import pandas as pd

def get_file():
    work_dir = './人工可以区分除霜与非除霜_99/'
    file_list = os.listdir(work_dir)
    lk = pd.DataFrame()
    lk['file_name'] = file_list
    # 采用 uft-8_sig 编码方式
    lk.to_csv('./lj_99.csv', index=False, encoding='utf-8_sig')

if __name__ == '__main__':
    get_file()
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import struct


# 核心代码,设置显示的最大列、宽等参数,消掉打印不完全中间的省略号
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

# 读取 csv 文件,转成可识别编码的字符
def read_data():
    """
    :param file_path: file path name
    :return: parse data
    """
    file_dir = './'
    file_name = 'Y5_Channel_List_Revised.csv'
    file_path = os.path.join(file_dir, file_name)
    # 台湾地区用的汉字编码是 'big5'
    # file = open(file_path, 'r', encoding='big5')
    df_data = pd.read_csv(file_path, encoding='big5', delimiter=',')
    # df_data.to_csv('./{0}_1.csv'.format(os.path.splitext(file_name)[0]), index=False, encoding='utf-8')

    print ()



if __name__ == '__main__':
    read_data()

你可能感兴趣的:(数据预处理)