Python——处理CSV文件

import pandas as pd
import csv

#替换列名,格式: ’原列名’:’新列名’,
colName = {   
}
#按列筛选
def save_columns_to(src_file, mid_file, columns=None):
    df = pd.read_csv(src_file)
    df[columns].to_csv(mid_file, index=None)

#更换列名
def changeColName(mid_file,columns):
    all_data = pd.read_csv(mid_file,encoding='ANSI')  #csv问价为ANSI格式,UTF-8可能会乱码  
    columns = all_data.columns
    all_data.rename(columns=lambda x: colName.get(x, x), inplace = True)
    all_data.to_csv(mid_file,encoding='ANSI')

#按行属性筛选
def selectRow(mid_file,dst_file):
     with open(mid_file, 'r', newline='') as csv_in_file:
        with open(dst_file, 'w', newline='') as csv_out_file:
            filereader = csv.reader(csv_in_file)
            filewriter = csv.writer(csv_out_file)
            header = next(filereader)#读取第一行列名
            filewriter.writerow(header)
            for row_list in filereader:
                supplier = str(row_list[3]).strip()#0为第一列,3为第四列
                if supplier == '':#筛选条件
                    filewriter.writerow(row_list)

if __name__ == '__main__':
    src_file=r''#待处理文件
    mid_file=r''#提取出指定列的文件
    dst_file=r''#在指定列的文件的基础上进行筛选的文件,需要提前创建
    columns = []#需要提取的列
    save_columns_to(src_file, mid_file, columns)
    changeColName(mid_file,columns)
    selectRow(mid_file,dst_file)

你可能感兴趣的:(Python,python)