tsfresh是开源的提取时序数据特征的python包,能够提取出超过4000种特征,堪称提取时序特征的瑞士军刀。最近有所需求才开始研究tsfresh,目前除了官方的英文文档以外几乎没有中文的博客对此有详细的介绍。本文将介绍一个关于时间序列分类的demo,帮助大家尽快的学习tsfresh。
https://blog.csdn.net/xindoo/article/details/79177378,大家有兴趣可以看看这个博客,虽然介绍的很好但是都是比较重要的特征提取。
# -*- coding: utf-8 -*- from tsfresh.examples.robot_execution_failures import download_robot_execution_failures,load_robot_execution_failures import matplotlib.pylab as plt import seaborn as sns from tsfresh import extract_features, extract_relevant_features, select_features from tsfresh.utilities.dataframe_functions import impute from tsfresh.feature_extraction.settings import ComprehensiveFCParameters from sklearn.tree import DecisionTreeClassifier from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report download_robot_execution_failures() df, y = load_robot_execution_failures() print(df.head()) df[df.id == 3][['time', 'F_x' ,'F_y', 'F_z', 'T_x', 'T_y', 'T_z']].plot(x='time',title='Success example(id 3)', figsize=(12, 6)) df[df.id == 20][['time', 'F_x' ,'F_y', 'F_z', 'T_x', 'T_y', 'T_z']].plot(x='time',title='Failure example(id 20)', figsize=(12, 6)) plt.show() #特征提取 extract_settings = ComprehensiveFCParameters() #以id聚合 X = extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extract_settings, impute_function=impute) #print(X.head()) #提取最相关特征 三个步骤 X_filtered = extract_relevant_features(df, y,column_id='id', column_sort='time', default_fc_parameters=extract_settings) X_filtered.info() X_train, X_test, X_filtered_train, X_filtered_test, y_train, y_test = train_test_split(X, X_filtered, y, test_size=4) cl =DecisionTreeClassifier() cl.fit(X_train, y_train) print(classification_report(y_test, cl.predict(X_test))) cl.n_features_ cl2 = DecisionTreeClassifier() cl2.fit(X_filtered_train, y_train) print(classification_report(y_test, cl2.predict(X_filtered_test)))