数据预处理(十分类)
文件名:data_10
Fault location | Loads(hp) | Defect diameters (inches) | Class |
---|---|---|---|
Normal | 0/1/2/3 | 0 | 0 |
Inner race | 0/1/2/3 | 0.007 0.014 0.021 |
1 2 3 |
Ball | 0/1/2/3 | 0.007 0.014 0.021 |
4 5 6 |
Outer race | 0/1/2/3 | 0.007 0.014 0.021 |
7 8 9 |
import random
import numpy as np
import scipy.io as scio
from sklearn import preprocessing
def open_data(bath_path,key_num):
#open_data('/Users/apple/Desktop/cwru/12k Drive End Bearing Fault Data/',105)
path = bath_path + str(key_num) + ".mat"
str1 = "X" + "%03d"%key_num + "_DE_time"
data = scio.loadmat(path)
data = data[str1]
return data
def deal_data(data,length,label):
#line:num column:1025
data = np.reshape(data,(-1))
num = len(data)//length
data = data[0:num*length]
data = np.reshape(data,(num,length))
min_max_scaler = preprocessing.MinMaxScaler()
data = min_max_scaler.fit_transform(np.transpose(data,[1,0]))
data = np.transpose(data,[1,0])
label = np.ones((num,1))*label
return np.column_stack((data,label))
def split_data(data,split_rate):
length = len(data)
num1 = int(length*split_rate[0])
num2 = int(length*split_rate[1])
index1 = random.sample(range(num1),num1)
train = data[index1]
data = np.delete(data,index1,axis=0)
index2 = random.sample(range(num2),num2)
valid = data[index2]
test = np.delete(data,index2,axis=0)
return train,valid,test
def load_data(num,length,hp,fault_diameter,split_rate):
#num: number of sample in each data file
#length: each sample
#split_rate: train:valid:test
bath_path1 = 'path of Normal Baseline Data'
bath_path2 = 'path of 12k Drive End Bearing Fault Data/'
data_list = []
file_list = np.array([[105,118,130,106,119,131,107,120,132,108,121,133], #0.007
[169,185,197,170,186,198,171,187,199,172,188,200], #0.014
[209,222,234,210,223,235,211,224,236,212,225,237]]) #0.021
label = 0
#normal data
for i in hp:
normal_data = open_data(bath_path1,97+i)
data = deal_data(normal_data,length,label = label)
data_list.append(data)
#abnormal data
for i in fault_diameter:
for j in hp:
inner_num = file_list[int(i/0.007-1),3*j]
ball_num = file_list[int(i/0.007-1),3*j+1]
outer_num = file_list[int(i/0.007-1),3*j+2]
inner_data = open_data(bath_path2,inner_num)
inner_data = deal_data(inner_data,length,label + 1)
data_list.append(inner_data)
ball_data = open_data(bath_path2,ball_num)
ball_data = deal_data(ball_data,length,label + 4)
data_list.append(ball_data)
outer_data = open_data(bath_path2,outer_num)
outer_data = deal_data(outer_data,length,label + 7)
data_list.append(outer_data)
label = label + 1
#keep each class same number of data
num_list = []
for i in data_list:
num_list.append(len(i))
min_num = min(num_list)
if num > min_num:
print("The number of each class overflow, the maximum number is:%d" %min_num)
min_num = min(num,min_num)
#Divide the train, validation, test sets and shuffle
train = []
valid = []
test = []
for data in data_list:
data = data[0:min_num,:]
a,b,c = split_data(data,split_rate)
train.append(a)
valid.append(b)
test.append(c)
train = np.reshape(train,(-1,length+1))
train = train[random.sample(range(len(train)),len(train))]
train_data = train[:,0:length]
train_label = train[:,length]
onehot_encoder = preprocessing.OneHotEncoder(sparse=False)
train_label = train_label.reshape(len(train_label), 1)
train_label = onehot_encoder.fit_transform(train_label)
valid = np.reshape(valid,(-1,length+1))
valid = valid[random.sample(range(len(valid)),len(valid))]
valid_data = valid[:,0:length]
valid_label = valid[:,length]
valid_label = valid_label.reshape(len(valid_label), 1)
valid_label = onehot_encoder.fit_transform(valid_label)
test = np.reshape(test,(-1,length+1))
test = test[random.sample(range(len(test)),len(test))]
test_data = test[:,0:length]
test_label = test[:,length]
test_label = test_label.reshape(len(test_label), 1)
test_label = onehot_encoder.fit_transform(test_label)
return train_data,train_label,valid_data,valid_label,test_data,test_label
WDCNN训练与测试
from data_10 import load_data
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.utils.data import DataLoader, TensorDataset
import torchvision
from torchvision import datasets, transforms
class Net(nn.Module):
def __init__(self, in_channel=1, out_channel=10):
super(TeacherNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv1d(in_channel, 16, kernel_size=64,stride=16,padding=24),
nn.BatchNorm1d(16),
nn.ReLU(inplace=True),
nn.MaxPool1d(kernel_size=2,stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv1d(16, 32, kernel_size=3,padding=1),
nn.BatchNorm1d(32),
nn.ReLU(inplace=True),
nn.MaxPool1d(kernel_size=2, stride=2))
self.layer3 = nn.Sequential(
nn.Conv1d(32, 64, kernel_size=3,padding=1),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
nn.MaxPool1d(kernel_size=2, stride=2)
)
self.layer4 = nn.Sequential(
nn.Conv1d(64, 64, kernel_size=3,padding=1),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
nn.MaxPool1d(kernel_size=2, stride=2)
)
self.layer5 = nn.Sequential(
nn.Conv1d(64, 64, kernel_size=3),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
nn.MaxPool1d(kernel_size=2, stride=2)
)
self.fc=nn.Sequential(
nn.Linear(64, 100),
nn.ReLU(inplace=True),
nn.Linear(100, out_channel)
)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = x.view(x.size(0), -1)
output = self.fc(x)
return output
def train_Model(model,train_loader,optimizer,epoch):
model.train()
trained_samples = 0
correct = 0
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss_fn = nn.MSELoss(reduce=True, size_average=True)
loss = loss_fn(output.float(), target.float())
loss.backward(loss.clone().detach())
optimizer.step()
trained_samples += len(data)
print("\rTrain epoch %d: %d/%d, " %
(epoch, trained_samples, len(train_loader.dataset),), end='')
pred = output.argmax(dim=1, keepdim=True)
real = target.argmax(dim=1, keepdim=True)
correct += pred.eq(real.view_as(pred)).sum().item()
train_acc = correct / len(train_loader.dataset)
print("Train acc: " , train_acc)
def test_Model(model,test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = model(data) #logits
print(output)
loss_fn = nn.MSELoss(reduce=True, size_average=False)
test_loss += loss_fn(output.float(), target.float()).item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
# print((pred==4).sum())
target = target.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest: average loss: {:.4f}, accuracy: {}/{} ({:.2f}%)'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_loss, correct / len(test_loader.dataset)
def main():
epochs = 300
batch_size = 32
torch.manual_seed(0)
train_dataset,train_label,_,_,test_dataset,test_label = load_data(num = 100,length = 1024,hp = [0,1,2,3],fault_diameter = [0.007,0.014,0.021],split_rate = [0.7,0.1,0.2])
train_dataset = torch.tensor(train_dataset)
train_label = torch.tensor(train_label)
test_dataset = torch.tensor(test_dataset)
test_label = torch.tensor(test_label)
train_dataset = train_dataset.unsqueeze(1)
test_dataset = test_dataset.unsqueeze(1)
train_dataset = train_dataset.to(torch.float32)
test_dataset = test_dataset.to(torch.float32)
train_id = TensorDataset(train_dataset, train_label)
test_id = TensorDataset(test_dataset, test_label)
train_loader = DataLoader(dataset=train_id, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_id, batch_size=batch_size, shuffle=False)
model = Net()
optimizer = torch.optim.Adadelta(model.parameters())
model_history = []
for epoch in range(1, epochs + 1):
train_Model(model, train_loader, optimizer, epoch)
loss, acc = test_Model(model, test_loader)
model_history.append((loss, acc))
# torch.save(model.state_dict(), "model.pt")
return model, model_history
model, model_history = main()