场景说明
- 适用于多个文件。
- 自定义自定新生成文件列内容。
import os
import pandas as pd
class ProcessCSV:
def __init__(self, file_names):
self.file_names = file_names
def extract_data(self, selected_columns):
data_frames = []
for file_name in self.file_names:
base_name = os.path.splitext(os.path.basename(file_name))[0]
df = pd.read_csv(file_name, delimiter=',', encoding='utf_8_sig')
df['File Name'] = base_name
selected_columns_with_file_name = ['File Name'] + selected_columns
data_frames.append(df[selected_columns_with_file_name])
combined_df = pd.concat(data_frames)
return combined_df
def save_to_new_csv(self, output_file_name):
output_folder = 'out'
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, output_file_name)
first_file_name = self.file_names[0]
column_names = pd.read_csv(first_file_name, delimiter=',', encoding='utf_8_sig').columns.tolist()
print("列名列表:")
for i, column in enumerate(column_names, start=1):
print(f"{i}. {column}")
selected_columns_indices = input("请输入要导出的列的编号(用逗号分隔): ")
selected_columns_indices = [int(index) - 1 for index in selected_columns_indices.split(',')]
selected_columns = [column_names[index] for index in selected_columns_indices]
extracted_data = self.extract_data(selected_columns)
extracted_data.to_csv(output_path, index=False, encoding='utf_8_sig')
file_names = [file for file in os.listdir() if file.endswith('.csv')]
processor = ProcessCSV(file_names)
processor.save_to_new_csv('new_file.csv')