features_list_tb = []
times = 41
# 环比特征
for d in range(1, times):
feature_name = 't' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] + d * ONE_STEP_MS
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
#同比的前30维环比
for d in range(1, times):
feature_name = 't_yday_f' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] + d * ONE_STEP_MS + ONE_DAY_MS
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
#同比的后30维环比
for d in range(0, times):
feature_name = 't_yday_l' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] - d * ONE_STEP_MS + ONE_DAY_MS
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
#一周前的前30维环比
for d in range(1, times):
feature_name = 't_wday_f' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] + d * ONE_STEP_MS + ONE_DAY_MS*7
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
#一周前的后30维环比
for d in range(0, times):
feature_name = 't_wday_l' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] - d * ONE_STEP_MS + ONE_DAY_MS*7
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
times = 14
features_list_hb = []
features_list_hb.append('time_space')
for d in range(1, times):
feature_name = 'h' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] + d * ONE_DAY_MS
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
features_list_hb.append(feature_name)
for d in range(1, times):
feature_name = 'lh' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] + d * ONE_DAY_MS + ONE_STEP_MS
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
features_list_hb.append(feature_name)
for d in range(1, times):
feature_name = 'rh' + str(d)
data_tb = data[['ts', 'value']].rename(columns={'value': feature_name})
data_tb['ts'] = data_tb['ts'] + d * ONE_DAY_MS - ONE_STEP_MS
data = data.merge(data_tb, on=['ts'], how='left')
features_list_tb.append(feature_name)
features_list_hb.append(feature_name)
#时间
feature_name = 'time_space'
data[feature_name] = data['ts'].apply(unix2time_ymd)
data[feature_name] = data[feature_name].apply(time_ymd2unix)
data[feature_name] = data['ts'] - data[feature_name]
data.head()
features_list_tb.append(feature_name)
data.head()
import matplotlib.pyplot as plt
import scipy as sp
dftrain = data[data.ts < t7]
dftrain = dftrain[dftrain.ts >= t1]
dftrain = dftrain.reset_index(drop="True")
dftest = data[data.ts >= t5]
dftest = dftest.reset_index(drop='True')
dftrain.head()
X_train = dftrain[features_list_tb]
X_test = dftest[features_list_tb]
y_train = dftrain['value']
y_test = dftest['value']
from xgboost import XGBRegressor
regr = XGBRegressor(max_depth=10)
regr.fit(X_train,y_train)
yt_hat = regr.predict(X_test)
y = dftest['value']
fig=plt.figure(figsize=(20, 15))
plt.plot(y, 'b')
plt.plot(np.array(yt_hat),'r')
plt.show()
dftest['yt_hat'] = yt_hat
dftest['cha'] = abs(dftest['yt_hat'] - dftest['value'])
features_list_tb = []
# features_list_tb.append('yt_hat')
# features_list_tb.append('value')
features_list_tb.append('cha')
# features_list_tb.append('time_space')
fig=plt.figure(figsize=(20, 15))
plt.plot(np.array(dftest['value']), 'b')
plt.plot(np.array(dftest['cha']),'r')
plt.show()
df1 = dftest[features_list_tb]
df1.to_csv(joinhome('iforest_1.csv'))
df1.shape
t = 1440*3
label = pd.read_csv(joinhome("iforest_1_label.txt"),sep=",")
label.head()
yichang_x = []
yichang_y = []
for i in range(t,len(dftest)):
if label['a'][i-t] == 1:
yichang_x.append(i)
yichang_y.append(dftest.value[i])
fig=plt.figure(figsize=(20, 15))
x = dftest['value']
plt.plot(x[t:], 'b')
plt.plot(yichang_x, yichang_y,'r.')
plt.show()
features = []
location = []
for i in yichang_x:
features.append(df1['cha'][i])
location.append(i)
from sklearn.ensemble import IsolationForest
features = np.array(features).reshape(-1, 1)
clf = IsolationForest(contamination = 0.05)
clf.fit(features)
y_pred_train = clf.predict(features)
y_pred_train
abnomal_x = []
abnomal = []
for i in range(len(y_pred_train)):
if y_pred_train[i] == -1:
abnomal_x.append(location[i])
abnomal.append(dftest['value'][location[i]])
fig=plt.figure(figsize=(20, 15))
plt.plot(x[t:], 'b')
plt.plot(abnomal_x, abnomal,'r.')
plt.show()