一类是从三轴加速度传感器获得的时间序列(数据集中共包含了9个传感器数据)
还有一类数据集是经过特征工程之后获得的数据:每个特征向量包含561个特征数。
数据集下载地址:UCI Machine Learning Repository: Human Activity Recognition Using Smartphones Data Set
首先导入数据:
# 导入数据:
def load_X(X_signals_paths):
X_signals = []
for signal_type_path in X_signals_paths:
file = open(signal_type_path, 'r')
X_signals.append(
[np.array(serie, dtype=np.float32) for serie in
[row.replace(' ', ' ').strip().split(' ') for row in file]])
file.close()
return np.transpose(np.array(X_signals), (1, 2, 0))
def load_y(y_path):
file = open(y_path, 'r')
y_ = np.array([elem for elem in [row.replace(' ', ' ').strip().split(' ') for row in
file]], dtype=np.int32)
file.close()
return y_ - 1
标签设置成one-hot:
# 制作one-hot
def one_hot(y_):
y_ = y_.reshape(len(y_))
n_values = int(np.max(y_)) + 1
return np.eye(n_values)[np.array(y_, dtype=np.int32)]
导入的数据格式为[7352,128,9](训练集)和 [2947,128,6](测试集)。
接下来就可将数据送入CNN中:
这里要注意送入的数据的格式:
model.add(tf.keras.layers.Conv2D(126, (4, 4), input_shape=(8, 16, 9), activation='relu'))#126个卷积核,大小为4*4 转换成的矩阵为8*16
将原始时间序列的128拆解为矩阵(8*16),可将数据集中9个传感器数据理解为9个通道。
构建2D CNN网络,以单层为例:
# 构建2D CNN
model = Sequential()
model.add(tf.keras.layers.Conv2D(126, (4, 4), input_shape=(8, 16, 9), activation='relu'))#126个卷积核,大小为1*16 转换成的矩阵为1*128
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(Dense(200, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
predict=model.predict(testX,verbose=1)
试了很多次,发现卷积层数对准确率没什么影响。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
# 导入数据:
def load_X(X_signals_paths):
X_signals = []
for signal_type_path in X_signals_paths:
file = open(signal_type_path, 'r')
X_signals.append(
[np.array(serie, dtype=np.float32) for serie in
[row.replace(' ', ' ').strip().split(' ') for row in file]])
file.close()
return np.transpose(np.array(X_signals), (1, 2, 0))
def load_y(y_path):
file = open(y_path, 'r')
y_ = np.array([elem for elem in [row.replace(' ', ' ').strip().split(' ') for row in file]], dtype=np.int32)
file.close()
return y_ - 1
# 制作one-hot
def one_hot(y_):
y_ = y_.reshape(len(y_))
n_values = int(np.max(y_)) + 1
return np.eye(n_values)[np.array(y_, dtype=np.int32)]
# 数据路径:
INPUT_SIGNAL_TYPES = [
'body_acc_x_',
'body_acc_y_',
'body_acc_z_',
'body_gyro_x_',
'body_gyro_y_',
'body_gyro_z_',
'total_acc_x_',
'total_acc_y_',
'total_acc_z_'
]
# 六种行为标签,行走 站立 躺下 坐下 上楼 下楼
LABELS = [
'WALKING',
'WALKING_UPSTAIRS',
'WALKING_DOWNSTAIRS',
'SITTING',
'STANDING',
'LAYING'
]
# 指定数据路径
DATASET_PATH = 'D:/xiangmu/UCI HAR Dataset/'
TRAIN = 'train/'
TEST = 'test/'
X_train_signals_paths = [
DATASET_PATH + TRAIN + 'Inertial Signals/' + signal + 'train.txt' for signal in INPUT_SIGNAL_TYPES
]
X_test_signals_paths = [DATASET_PATH + TEST + 'Inertial Signals/' + signal + 'test.txt' for signal in
INPUT_SIGNAL_TYPES]
y_train_path = DATASET_PATH + TRAIN + 'y_train.txt'
y_test_path = DATASET_PATH + TEST + 'y_test.txt'
def evaluate(trainX, trainy, testX, testy):
verbose, epochs, batch_size = 1, 25, 64
n_features, n_outputs = 9, 6#原始数据集共9个传感器(可理解为CNN输入的9个通道)、6类动作
n_l, n_h = 8, 16#原始一维数据(序列)转换成矩阵后的长和宽
trainX = X_train.reshape((7352, n_l, n_h, n_features))
testX = X_test.reshape((2947, n_l, n_h, n_features))
# 构建2D CNN
model = Sequential()
model.add(tf.keras.layers.Conv2D(126, (4, 4), input_shape=(8, 16, 9), activation='relu'))#126个卷积核,大小为1*16 转换成的矩阵为1*128
#model.add(tf.keras.layers.Conv2D(126, (4, 4), input_shape=(8, 16, 9), activation='relu')) 126个卷积核,大小为4*4的情况
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(Dense(200, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
predict=model.predict(testX,verbose=1)
return accuracy,predict
# 计算准确率
def Accuracy(accuracy):
scores = list()
score = accuracy
score = score * 100.0
scores.append(score)
mean, std = np.mean(scores), np.std(scores)
print('Accuracy: %.3f%% (+/-%.3f)' % (mean, std))
#分类混淆矩阵
def plot_confusion_matrix(y_true, y_pred):
C = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3, 4, 5])
plt.matshow(C, cmap=plt.cm.Reds)
# plt.colorbar()
for i in range(len(C)):
for j in range(len(C)):
plt.annotate(C[j, i], xy=(i, j), horizontalalignment='center', verticalalignment='center')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
if __name__ == '__main__':
X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)
y_train = one_hot(load_y(y_train_path))
y_test = one_hot(load_y(y_test_path))
accuracy,x=evaluate(X_train, y_train, X_test, y_test)
print(Accuracy(accuracy))
#绘制分类混淆矩阵
predict_y=[]
true_y=[]
for i in range(len(x)):
predict_y.append(np.argmax(x[i],axis=0))
for i in range(2947):
true_y.append(load_y(y_test_path)[i][0])
y_pred = predict_y
y_true = true_y
plot_confusion_matrix(y_true, y_pred)
最终分类结果:
准确率大约92.094%,即使增加卷积层或者改变卷积核准确率也仅有92%左右。