iforest

features_list_tb = []
times = 41
# 环比特征
for d in range(1, times):
    feature_name = 't' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] + d * ONE_STEP_MS
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)
    
#同比的前30维环比
for d in range(1, times):
    feature_name = 't_yday_f' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] + d * ONE_STEP_MS + ONE_DAY_MS
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)
    
#同比的后30维环比
for d in range(0, times):
    feature_name = 't_yday_l' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] - d * ONE_STEP_MS + ONE_DAY_MS
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)

#一周前的前30维环比
for d in range(1, times):
    feature_name = 't_wday_f' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] + d * ONE_STEP_MS + ONE_DAY_MS*7
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)
    
#一周前的后30维环比
for d in range(0, times):
    feature_name = 't_wday_l' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] - d * ONE_STEP_MS + ONE_DAY_MS*7
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)

times = 14
features_list_hb = []
features_list_hb.append('time_space')
for d in range(1, times):
    feature_name = 'h' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] + d * ONE_DAY_MS
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)
    features_list_hb.append(feature_name)

for d in range(1, times):
    feature_name = 'lh' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] + d * ONE_DAY_MS + ONE_STEP_MS
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)
    features_list_hb.append(feature_name)

for d in range(1, times):
    feature_name = 'rh' + str(d)
    data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
    data_tb['ts'] = data_tb['ts'] + d * ONE_DAY_MS - ONE_STEP_MS
    data = data.merge(data_tb, on=['ts'], how='left')
    features_list_tb.append(feature_name)
    features_list_hb.append(feature_name)
    
#时间
feature_name = 'time_space'
data[feature_name] = data['ts'].apply(unix2time_ymd)
data[feature_name] = data[feature_name].apply(time_ymd2unix)
data[feature_name] = data['ts'] - data[feature_name] 
data.head()
features_list_tb.append(feature_name)

data.head()

import matplotlib.pyplot as plt
import scipy as sp

dftrain = data[data.ts < t7]
dftrain = dftrain[dftrain.ts >= t1]
dftrain = dftrain.reset_index(drop="True")

dftest = data[data.ts >= t5]
dftest = dftest.reset_index(drop='True')
dftrain.head()

X_train = dftrain[features_list_tb]
X_test = dftest[features_list_tb]
y_train = dftrain['value']
y_test = dftest['value']

from xgboost import  XGBRegressor
regr = XGBRegressor(max_depth=10)
regr.fit(X_train,y_train)
yt_hat = regr.predict(X_test)

y = dftest['value']
fig=plt.figure(figsize=(20, 15))
plt.plot(y, 'b')
plt.plot(np.array(yt_hat),'r')
plt.show()
 

 

dftest['yt_hat'] = yt_hat
dftest['cha'] = abs(dftest['yt_hat'] - dftest['value'])
features_list_tb = []
# features_list_tb.append('yt_hat')
# features_list_tb.append('value')
features_list_tb.append('cha')
# features_list_tb.append('time_space')
fig=plt.figure(figsize=(20, 15))
plt.plot(np.array(dftest['value']), 'b')
plt.plot(np.array(dftest['cha']),'r')
plt.show()

 

df1 = dftest[features_list_tb]
df1.to_csv(joinhome('iforest_1.csv'))
df1.shape

 

t = 1440*3

label = pd.read_csv(joinhome("iforest_1_label.txt"),sep=",")
label.head()

yichang_x = []
yichang_y = []
for i in range(t,len(dftest)):
    if label['a'][i-t] == 1:
        yichang_x.append(i)
        yichang_y.append(dftest.value[i])
fig=plt.figure(figsize=(20, 15))
x = dftest['value']
plt.plot(x[t:], 'b')
plt.plot(yichang_x, yichang_y,'r.')
plt.show()

 

features = []
location = []
for i in yichang_x:
    features.append(df1['cha'][i])
    location.append(i)

from sklearn.ensemble import IsolationForest
features = np.array(features).reshape(-1, 1)
clf = IsolationForest(contamination = 0.05)
clf.fit(features)
y_pred_train = clf.predict(features)
y_pred_train

abnomal_x = []
abnomal = []
for i in range(len(y_pred_train)):
    if y_pred_train[i] == -1:
        abnomal_x.append(location[i])
        abnomal.append(dftest['value'][location[i]])
fig=plt.figure(figsize=(20, 15))
plt.plot(x[t:], 'b')
plt.plot(abnomal_x, abnomal,'r.')
plt.show()

你可能感兴趣的:(iforest)