读取某文件夹下所有excel文件 python

import os
import pandas as pd
from sklearn import linear_model


path = r'D:\新数据\每日收益率'
filenames = os.listdir(path)
for filename in filenames:
    print(filename)
    
    
for i in filenames:
    excel_path = 'D:\新数据\每日收益率\\' + i
    f = open(excel_path,'rb')
    data = pd.read_excel(f) #到此处已是循环读取某文件夹下所有excel文件,下面是在循环中对读进来的文件进行统一的重复的一致的处理
    data['time'] = data.index
    data = data.reset_index(drop = True)
    
    data1 = data.iloc[0:110,]#估计窗口的真实收益率
    data2 = data.iloc[110:,]#事件窗口的真实收益率

    feature = data.columns.tolist()
    feature.remove('time')
    feature.remove('000300')#沪深300指数
    dfR = pd.DataFrame(data2['time'])
    dfAR = pd.DataFrame(data2['time'])
    for m in feature:
        regr=linear_model.LinearRegression()
        regr.fit(data1['000300'].values.reshape(-1, 1),data1[m].values.reshape(-1, 1))
        y_pred1 = regr.predict(data2['000300'].values.reshape(-1, 1))#事件窗口的预期收益率
        AR = data2[m].values.reshape(-1, 1)-y_pred1#真实收益率-预期收益率=超额收益率(事件窗口)
        dfR[m] = y_pred1#预期收益率
        dfAR[m] = AR #超额收益率
    save_path1 = 'D:\新数据\日预期收益率\\' + i
    save_path2 = 'D:\新数据\日超额收益率\\' + i
    dfR.to_excel(save_path1,index=False)
    dfAR.to_excel(save_path2,index=False)

  

转载于:https://www.cnblogs.com/Christina-Notebook/p/10867602.html

你可能感兴趣的:(读取某文件夹下所有excel文件 python)