"""
Created on Sun Nov 13 15:43:12 2022
@author: Lenovo
"""
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import random
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_squared_error
path = r'C:\Users\Lenovo\Desktop\四大类\REALTRY'
ori = pd.DataFrame()
ori_lx = pd.DataFrame()
filled = pd.DataFrame()
xticks = []
suijishu = []
suijishujianyi = []
sj = []
ori_index = []
ori_lx_index = []
ori_index_jianyi = []
test = pd.DataFrame()
test1 = pd.DataFrame()
rmse_list = []
rmse1_list = []
TEST_NUMBER = []
for i in os.listdir(path):
s = pd.read_csv(os.path.join(path,i))
ori = s.loc[s['LE_F_MDS_QC']==0,['TIMESTAMP_START','LE_F_MDS']]
filled = s.loc[s['LE_F_MDS_QC']!=0,['TIMESTAMP_START','LE_F_MDS']]
print(i)
s['TIMESTAMP_START'] = pd.to_datetime(s['TIMESTAMP_START'])
ori = ori.dropna()
ori['TIMESTAMP_START'] = pd.to_datetime(ori['TIMESTAMP_START'])
ori['index'] = ori.index
ori['cha'] = ori.diff()['index']
ori_lx = ori.loc[ori['cha']==1]
random.seed(0)
suijishu = sorted(random.sample(range(0,len(ori)),int(len(ori)*0.3)))
suijishujianyi = [x-1 for x in suijishu]
sj = sorted(list(set(suijishu).difference(set(suijishujianyi))))
sj = sj[:int(len(ori)*0.15)]
ori_lx_index = ori_lx['index'].to_list()
ori_index = ori['index'].to_list()
test_ori = s.iloc[sorted(list(set(ori_lx_index) & set(sj)))]
train = s.iloc[sorted(list(set(ori_index) - set(sj)))]
train = train.dropna()
rf=RandomForestRegressor(n_estimators=1100
,max_depth=80
,oob_score=True
,random_state=(0))
LE = train['LE_F_MDS']
Drivers = train.drop(['LE_F_MDS','LE_F_MDS_QC','TIMESTAMP_START','TIMESTAMP_END','NEE_VUT_REF','Unnamed: 0'],axis=1)
rf.fit(Drivers,LE)
test = test_ori.dropna()
y_test = test['LE_F_MDS']
x_test = test.drop(['LE_F_MDS','LE_F_MDS_QC','TIMESTAMP_START','TIMESTAMP_END','NEE_VUT_REF','Unnamed: 0'],axis=1)
TEST_NUMBER.append(test.shape[0])
rmse=np.sqrt(mean_squared_error(y_test,rf.predict(x_test)))
rmse_list.append(rmse)
s0=s.copy()
s0.loc[sorted(list(set(ori_lx_index) & set(sj))),'LE_F_MDS'] = np.nan
s0['LE_F_MDS']= s0['LE_F_MDS'].interpolate()
s1 = s0.loc[sorted(list(set(ori_lx_index) & set(sj)))].dropna()
rmse1=np.sqrt(mean_squared_error(test['LE_F_MDS'],s1['LE_F_MDS']))
rmse1_list.append(rmse1)
DF = pd.DataFrame({'TEST_NUMBER':TEST_NUMBER,'RF':rmse_list,'Interpolate':rmse1_list})
print(DF)
DF.to_csv(os.path.join(r'D:\Fluxnet\OUTCOME', 'IN VS. RF' + '.csv'),index = False)