使用python提取csv文件中列内容,生成新的文件

场景说明

  1. 适用于多个文件。
  2. 自定义自定新生成文件列内容。
import os
import pandas as pd


class ProcessCSV:
    def __init__(self, file_names):
        self.file_names = file_names

    def extract_data(self, selected_columns):
        data_frames = []
        for file_name in self.file_names:
            base_name = os.path.splitext(os.path.basename(file_name))[0]
            df = pd.read_csv(file_name, delimiter=',', encoding='utf_8_sig')
            df['File Name'] = base_name  # 新增一列原文件名称
            selected_columns_with_file_name = ['File Name'] + selected_columns  # 包括 'File Name' 列
            data_frames.append(df[selected_columns_with_file_name])

        combined_df = pd.concat(data_frames)
        return combined_df

    def save_to_new_csv(self, output_file_name):
        # 输出文件目录
        output_folder = 'out'
        os.makedirs(output_folder, exist_ok=True)
        output_path = os.path.join(output_folder, output_file_name)

        # 提取第一份CSV的列名,以供用户选择
        first_file_name = self.file_names[0]
        column_names = pd.read_csv(first_file_name, delimiter=',', encoding='utf_8_sig').columns.tolist()

        print("列名列表:")
        for i, column in enumerate(column_names, start=1):
            print(f"{i}. {column}")

        # 用户选择导出的列
        selected_columns_indices = input("请输入要导出的列的编号(用逗号分隔): ")
        selected_columns_indices = [int(index) - 1 for index in selected_columns_indices.split(',')]
        selected_columns = [column_names[index] for index in selected_columns_indices]

        extracted_data = self.extract_data(selected_columns)
        extracted_data.to_csv(output_path, index=False, encoding='utf_8_sig')


# 获取当前工作目录下所有CSV文件
file_names = [file for file in os.listdir() if file.endswith('.csv')]
processor = ProcessCSV(file_names)
# 输出文件名称
processor.save_to_new_csv('new_file.csv')

你可能感兴趣的:(python,开发语言)