该专栏将较为详细的介绍如何利用深度学习进行故障诊断方面的学术研究,主要以轴承为例,包括深度学习常用框架Tensorflow的搭建以及使用,并会记录完整搭建过程,并以卷积神经网络与循环神经网络为例进行代码编写和实际运行,相信经过本次学习,你能够入门开始着手研究。
完成该专栏的学习,你将会收获以下知识:
1.Anaconda的安装以及使用,深度学习框架Tensorflow2的安装以及使用
2.学会如何利用卷积神经网络与循环神经网络进行轴承故障诊断-以凯斯西楚大学轴承数据集为例
3.学会一些常用调参技巧
4.入门利用深度学习进行故障诊断的学术研究
说明:
1.专栏所涉及代码会全部公开在本人的github上,欢迎交流以及star。
https://github.com/boating-in-autumn-rain?tab=repositories
2.该专栏涉及数据集以及相关安装包在公众号《秋雨行舟》回复轴承即可领取。
3.对于该项目有疑问的可以公众号留言,看到了就会回复。
4.该专栏对应的视频可在B站搜索《秋雨行舟》进行观看学习。
项目结构:
0.数据预处理代码
#coding:utf-8
# 专栏地址:
# https://blog.csdn.net/qq_38918049/article/details/124948664?spm=1001.2014.3001.5501
#
# 专栏所涉及代码会全部公开在github上,欢迎交流以及star
# https://github.com/boating-in-autumn-rain?tab=repositories
#
# 该专栏涉及数据集以及相关安装包在公众号《秋雨行舟》回复轴承即可领取。
#
# 对于该项目有疑问的可以公众号留言,看到了就会回复。
#
# 该专栏对应的视频可在B站搜索《秋雨行舟》进行观看学习。
from scipy.io import loadmat
import numpy as np
import os
from sklearn import preprocessing # 0-1编码
from sklearn.model_selection import StratifiedShuffleSplit # 随机划分,保证每一类比例相同
def prepro(d_path, length=0, number=0, normal=True, rate=[0, 0, 0], enc=False, enc_step=28):
# 获得该文件夹下所有.mat文件名
filenames = os.listdir(d_path)
def capture(original_path):
files = {}
for i in filenames:
# 文件路径
file_path = os.path.join(d_path, i)
file = loadmat(file_path)
file_keys = file.keys()
for key in file_keys:
if 'DE' in key:
files[i] = file[key].ravel()
return files
def slice_enc(data, slice_rate=rate[1] + rate[2]):
keys = data.keys()
Train_Samples = {}
Test_Samples = {}
for i in keys:
slice_data = data[i]
all_lenght = len(slice_data)
# end_index = int(all_lenght * (1 - slice_rate))
samp_train = int(number * (1 - slice_rate)) # 1000(1-0.3)
Train_sample = []
Test_Sample = []
for j in range(samp_train):
sample = slice_data[j*150: j*150 + length]
Train_sample.append(sample)
# 抓取测试数据
for h in range(number - samp_train):
sample = slice_data[samp_train*150 + length + h*150: samp_train*150 + length + h*150 + length]
Test_Sample.append(sample)
Train_Samples[i] = Train_sample
Test_Samples[i] = Test_Sample
return Train_Samples, Test_Samples
# 仅抽样完成,打标签
def add_labels(train_test):
X = []
Y = []
label = 0
for i in filenames:
x = train_test[i]
X += x
lenx = len(x)
Y += [label] * lenx
label += 1
return X, Y
def scalar_stand(Train_X, Test_X):
# 用训练集标准差标准化训练集以及测试集
data_all = np.vstack((Train_X, Test_X))
scalar = preprocessing.StandardScaler().fit(data_all)
Train_X = scalar.transform(Train_X)
Test_X = scalar.transform(Test_X)
return Train_X, Test_X
def valid_test_slice(Test_X, Test_Y):
test_size = rate[2] / (rate[1] + rate[2])
ss = StratifiedShuffleSplit(n_splits=1, test_size=test_size)
Test_Y = np.asarray(Test_Y, dtype=np.int32)
for train_index, test_index in ss.split(Test_X, Test_Y):
X_valid, X_test = Test_X[train_index], Test_X[test_index]
Y_valid, Y_test = Test_Y[train_index], Test_Y[test_index]
return X_valid, Y_valid, X_test, Y_test
# 从所有.mat文件中读取出数据的字典
data = capture(original_path=d_path)
# 将数据切分为训练集、测试集
train, test = slice_enc(data)
# 为训练集制作标签,返回X,Y
Train_X, Train_Y = add_labels(train)
# 为测试集制作标签,返回X,Y
Test_X, Test_Y = add_labels(test)
# 训练数据/测试数据 是否标准化.
if normal:
Train_X, Test_X = scalar_stand(Train_X, Test_X)
Train_X = np.asarray(Train_X)
Test_X = np.asarray(Test_X)
# 将测试集切分为验证集和测试集.
Valid_X, Valid_Y, Test_X, Test_Y = valid_test_slice(Test_X, Test_Y)
return Train_X, Train_Y, Valid_X, Valid_Y, Test_X, Test_Y
1.直接运行cnn.py
# coding:utf-8
from time import sleep
from sklearn.manifold import TSNE
from tensorflow import keras
import preprocess
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import random
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
from datetime import datetime
from tensorflow_core.python.keras import layers
import numpy as np
import tensorflow as tf
# 专栏地址:
# https://blog.csdn.net/qq_38918049/article/details/124948664?spm=1001.2014.3001.5501
#
# 专栏所涉及代码会全部公开在github上,欢迎交流以及star
# https://github.com/boating-in-autumn-rain?tab=repositories
#
# 该专栏涉及数据集以及相关安装包在公众号《秋雨行舟》回复轴承即可领取。
#
# 对于该项目有疑问的可以公众号留言,看到了就会回复。
#
# 该专栏对应的视频可在B站搜索《秋雨行舟》进行观看学习。
#如果是GPU,需要去掉注释,如果是CPU,则注释
# gpu = tf.config.experimental.list_physical_devices(device_type='GPU')
# assert len(gpu) == 1
# tf.config.experimental.set_memory_growth(gpu[0], True)
def subtime(date1, date2):
return date2 - date1
num_classes = 10 # 样本类别
length = 784 # 样本长度
number = 300 # 每类样本的数量
normal = True # 是否标准化
rate = [0.5, 0.25, 0.25] # 测试集验证集划分比例
path = r'data\0HP'
x_train, y_train, x_valid, y_valid, x_test, y_test = preprocess.prepro(
d_path=path,
length=length,
number=number,
normal=normal,
rate=rate,
enc=False, enc_step=28)
x_train = np.array(x_train)
y_train = np.array(y_train)
x_valid = np.array(x_valid)
y_valid = np.array(y_valid)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_train.shape)
print(x_valid.shape)
print(x_test.shape)
print(y_train.shape)
print(y_valid.shape)
print(y_test.shape)
y_train = [int(i) for i in y_train]
y_valid = [int(i) for i in y_valid]
y_test = [int(i) for i in y_test]
# 打乱顺序
index = [i for i in range(len(x_train))]
random.seed(1)
random.shuffle(index)
x_train = np.array(x_train)[index]
y_train = np.array(y_train)[index]
index1 = [i for i in range(len(x_valid))]
random.shuffle(index1)
x_valid = np.array(x_valid)[index1]
y_valid = np.array(y_valid)[index1]
index2 = [i for i in range(len(x_test))]
random.shuffle(index2)
x_test = np.array(x_test)[index2]
y_test = np.array(y_test)[index2]
print(x_train.shape)
print(x_valid.shape)
print(x_test.shape)
print(y_train)
print(y_valid)
print(y_test)
print("x_train的最大值和最小值:", x_train.max(), x_train.min())
print("x_test的最大值和最小值:", x_test.max(), x_test.min())
x_train = tf.reshape(x_train, (len(x_train), 784, 1))
x_valid = tf.reshape(x_valid, (len(x_valid), 784, 1))
x_test = tf.reshape(x_test, (len(x_test), 784, 1))
# 保存最佳模型
class CustomModelCheckpoint(keras.callbacks.Callback):
def __init__(self, model, path):
self.model = model
self.path = path
self.best_loss = np.inf
def on_epoch_end(self, epoch, logs=None):
val_loss = logs['val_loss']
if val_loss < self.best_loss:
print("\nValidation loss decreased from {} to {}, saving model".format(self.best_loss, val_loss))
self.model.save_weights(self.path, overwrite=True)
self.best_loss = val_loss
# 模型定义
def mymodel():
inputs = keras.Input(shape=(x_train.shape[1], x_train.shape[2]))
h1 = layers.Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(inputs)
h1 = layers.MaxPool1D(pool_size=2, strides=2, padding='same')(h1)
h1 = layers.Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(h1)
h1 = layers.MaxPool1D(pool_size=2, strides=2, padding='same')(h1)
h1 = layers.Flatten()(h1)
h1 = layers.Dropout(0.6)(h1)
h1 = layers.Dense(32, activation='relu')(h1)
h1 = layers.Dense(10, activation='softmax')(h1)
deep_model = keras.Model(inputs, h1, name="cnn")
return deep_model
model = mymodel()
model.summary()
startdate = datetime.utcnow() # 获取当前时间
# 编译模型
model.compile(
optimizer=keras.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=256, epochs=50, verbose=1,
validation_data=(x_valid, y_valid),
callbacks=[CustomModelCheckpoint(
model, r'bestcnn.h5')])
#加载模型
# filepath = r'bestcnn.h5'
model.load_weights(filepath= 'bestcnn.h5')
# 编译模型
model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
# 评估模型
scores = model.evaluate(x_test, y_test, verbose=1)
print('%s: %.2f%%' % (model.metrics_names[1], scores[1] * 100))
y_predict = model.predict(x_test)
y_pred_int = np.argmax(y_predict, axis=1)
print(y_pred_int[0:5])
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_int, digits=4))
def acc_line():
# 绘制acc和loss曲线
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc)) # Get number of epochs
# 画accuracy曲线
plt.plot(epochs, acc, 'r', linestyle='-.')
plt.plot(epochs, val_acc, 'b', linestyle='dashdot')
plt.title('Training and validation accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["Accuracy", "Validation Accuracy"])
plt.figure()
# 画loss曲线
plt.plot(epochs, loss, 'r', linestyle='-.')
plt.plot(epochs, val_loss, 'b', linestyle='dashdot')
plt.title('Training and validation loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(["Loss", "Validation Loss"])
# plt.figure()
plt.show()
acc_line()
# 绘制混淆矩阵
def confusion():
y_pred_gailv = model.predict(x_test, verbose=1)
y_pred_int = np.argmax(y_pred_gailv, axis=1)
print(len(y_pred_int))
con_mat = confusion_matrix(y_test.astype(str), y_pred_int.astype(str))
print(con_mat)
classes = list(set(y_train))
classes.sort()
plt.imshow(con_mat, cmap=plt.cm.Blues)
indices = range(len(con_mat))
plt.xticks(indices, classes)
plt.yticks(indices, classes)
plt.colorbar()
plt.xlabel('guess')
plt.ylabel('true')
for first_index in range(len(con_mat)):
for second_index in range(len(con_mat[first_index])):
plt.text(first_index, second_index, con_mat[second_index][first_index], va='center', ha='center')
plt.show()
confusion()
模型训练截图
2.训练集与测试集训练曲线截图
3.混淆矩阵截图
整个专栏大致是这样,如果有疑问的话,欢迎留言交流,共同进步。