对于数据量大,维度高,且难以找到数据之间内在关系的数据集,可以尝试用卷积神经网络对数据进行分类。
以下为卷积神经网络数据分类的大致结构,采用Adam作为优化器,交叉熵作为损失函数。
import torch
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
class CipvCNN(nn.Module):
def __init__(self):
super(CipvCNN,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=3,
stride=1,
padding=1,#padding=(kernel_size-1)/2
),#输入数据维度1*20*4,此时数据维度为16*20*4
nn.ReLU(),
)
self.conv2=nn.Sequential(
nn.Conv2d(16,32,3,1,1),
nn.ReLU(),#此时数据维度为32*20*4
nn.MaxPool2d(2),#此时数据维度为32*10*2
)
self.out=nn.Linear(32*10*2,20)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)#展平成一维
output = self.out(x)
return output
def predict(self, features):
self.eval()
features = torch.from_numpy(features).float()
return self.forward(features).detach().numpy()
class CipvDataset(Dataset):
def __init__(self, labels, features):
super(CipvDataset, self).__init__()
self.labels = labels
self.features = features
def __len__(self):
return self.features.shape[0]
def __getitem__(self, idx):
feature = self.features[idx]
label = self.labels[idx]
return {'feature': feature, 'label': label}
class CipvTrain(object):
def __init__(self):
self.network = CipvCNN()
self.learning_rate = 0.001
self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.learning_rate)
self.criterion = nn.CrossEntropyLoss()
self.num_epochs = 150
self.batchsize = 300
self.shuffle = True #打乱数据,是情况而定加不加
def train(self, feature, label):
self.network.train()
dataset = CipvDataset(label, feature)
loader = DataLoader(dataset, shuffle=self.shuffle, batch_size=self.batchsize)
for epoch in range(self.num_epochs):
total_loss = 0.0
for i, data in enumerate(loader):
features = data['feature'].float()
labels = data['label'].long()
self.optimizer.zero_grad()
predictions = self.network(features)
loss = self.criterion(predictions, labels.squeeze(-1))
loss.backward()
total_loss += loss.item()
self.optimizer.step()
print('loss', total_loss / (i+1))
def get_action(self, feature):
self.network.eval()
feature = feature.reshape(1,1,20,5)#输入数据的维度为1,20,5,但feature需要再加一维
p = self.network.predict(feature)
label = np.argmax(p)
return label
在训练好之后,如何知道自己模型的预测效果呢?这里提供一个简单的预测函数:
def test_model(policy,features,labels):#policy=CipvTrain()
success_time=0
for i in range(len(features)):
label_got=policy.get_action(features[i])
if label_got==labels[i]:
success_time +=1
success_rate=success_time/len(features)
return success_rate