自动提取时序特征 tsfresh(2)

import matplotlib.pylab as plt
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
if __name__ == '__main__':
    N = 500
    df = pd.read_csv('UCI HAR Dataset/train/Inertial Signals/body_acc_x_train.txt', delim_whitespace=True, header=None)
    y = pd.read_csv('UCI HAR Dataset/train/y_train.txt', delim_whitespace=True, header=None, squeeze=True)[:N]   
    # plt.title('accelerometer reading')
    # plt.plot(df.ix[0, :])
    # plt.show()
    # 
    extraction_settings = ComprehensiveFCParameters()
    master_df = pd.DataFrame({'feature': df[:N].values.flatten(),
                              'id': np.arange(N).repeat(df.shape[1])})
    # 时间序列特征工程
    X = extract_features(timeseries_container=master_df, n_jobs=0, column_id='id', impute_function=impute,
                         default_fc_parameters=extraction_settings)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    cl = DecisionTreeClassifier()
    cl.fit(X_train, y_train)
    print(classification_report(y_test, cl.predict(X_test)))
    # 未进行时间序列特征工程
    X_1 = df.ix[:N - 1, :]
    X_train, X_test, y_train, y_test = train_test_split(X_1, y, test_size=.2)
    cl = DecisionTreeClassifier()
    cl.fit(X_train, y_train)
    print(classification_report(y_test, cl.predict(X_test)))
    relevant_features = set()
    for label in y.unique():
        y_train_binary = y_train == label
        X_train_filtered = select_features(X_train, y_train_binary)
        print("Number of relevant features for class {}: {}/{}".format(label, X_train_filtered.shape[1],
                                                                     X_train.shape[1]))
        relevant_features = relevant_features.union(set(X_train_filtered.columns))
    X_train_filtered = X_train[list(relevant_features)]
    X_test_filtered = X_test[list(relevant_features)]
    cl = DecisionTreeClassifier()
    cl.fit(X_train_filtered, y_train)
    print(classification_report(y_test, cl.predict(X_test_filtered)))

你可能感兴趣的:(自动提取时序特征 tsfresh(2))