from sklearn.cluster import KMeans
import featuretools as ft
import featuretools.variable_types as vtypes
from featuretools.primitives import make_agg_primitive
from tsfresh.feature_extraction.feature_calculators import (cid_ce, number_peaks,
last_location_of_maximum,
skewness, sample_entropy)
es = ft.EntitySet(id = 'engines')
es = es.entity_from_dataframe(dataframe = train,
entity_id='obs',
index = 'index',
time_index = 'time')
es.normalize_entity(base_entity_id='obs',
new_entity_id = 'engines',
index = 'engine_no')
test_es = ft.EntitySet(id = 'test_engines')
test_es = test_es.entity_from_dataframe(dataframe = test,
entity_id='obs',
index = 'index',
time_index = 'time')
test_es.normalize_entity(base_entity_id='obs',
new_entity_id = 'engines',
index = 'engine_no')
def cid_ce_func(x): return cid_ce(x , False)
def number_peaks_func(x): return number_peaks(x , 5)
def last_location_of_maximum_func(x): return last_location_of_maximum(x)
def skewness_func(x): return skewness(x)
def sample_entropy_func(x): return sample_entropy(x)
cid_ce_primitive = make_agg_primitive(cid_ce_func,
input_types = [vtypes.Numeric],
return_type = vtypes.Numeric,
name = 'Complexity')
number_peaks_primitive = make_agg_primitive(number_peaks_func,
input_types = [vtypes.Numeric],
return_type = vtypes.Numeric,
name = 'NumberPeaks')
last_location_of_maximum_primititive = make_agg_primitive(last_location_of_maximum_func,
input_types = [vtypes.Numeric],
return_type = vtypes.Numeric,
name = 'LastLocationMax')
skewness_primitive = make_agg_primitive(skewness_func,
input_types = [vtypes.Numeric],
return_type = vtypes.Numeric,
name = 'Skewness')
sample_entropy_primitive = make_agg_primitive(sample_entropy_func,
input_types = [vtypes.Numeric],
return_type = vtypes.Numeric,
name = 'Entropy')
feature_matrix, feature_names = ft.dfs(entityset=es, target_entity='engines',
agg_primitives = ['min', 'max', 'mean',
'count', 'sum', 'last',
'skew', 'std', 'trend',
cid_ce_primitive, number_peaks_primitive,
last_location_of_maximum_primititive,
skewness_primitive, sample_entropy_primitive],
trans_primitives = ['cum_mean', 'cum_sum'],
max_depth = 1, n_jobs = -1, verbose = 1,
chunk_size = 100)
test_feature_matrix = ft.calculate_feature_matrix(entityset=test_es,
features=feature_names,
n_jobs = -1, verbose = 1,
chunk_size = 100)
feature_matrix = feature_selection(feature_matrix)
test_feature_matrix = test_feature_matrix[feature_matrix.columns]
preds, fi = evaluate(feature_matrix, train_labels, test_feature_matrix, test_labels)
norm_fi = plot_feature_importances(fi)
feature_matrix, feature_names = ft.dfs(entityset=es, target_entity='engines',
agg_primitives = ['min', 'max', 'mean',
'count', 'sum', 'last',
'skew', 'std', 'trend',
cid_ce_primitive, number_peaks_primitive,
last_location_of_maximum_primititive,
skewness_primitive, sample_entropy_primitive],
trans_primitives = ['cum_mean', 'cum_sum'],
max_depth = 2, n_jobs = -1, verbose = 1,
chunk_size = 100)
test_feature_matrix = ft.calculate_feature_matrix(entityset=test_es,
features=feature_names,
n_jobs = -1, verbose = 1,
chunk_size = 100)
preds, fi = evaluate(feature_matrix, train_labels, test_feature_matrix, test_labels)
norm_fi = plot_feature_importances(fi)