【神经网络-数据分类】利用卷积神经网络(CNN)对数据进行分类

对于数据量大,维度高,且难以找到数据之间内在关系的数据集,可以尝试用卷积神经网络对数据进行分类。

以下为卷积神经网络数据分类的大致结构,采用Adam作为优化器,交叉熵作为损失函数。

import torch
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader

class CipvCNN(nn.Module):
    def __init__(self):
        super(CipvCNN,self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=1,#padding=(kernel_size-1)/2
            ),#输入数据维度1*20*4,此时数据维度为16*20*4
            nn.ReLU(),
        )
        self.conv2=nn.Sequential(
            nn.Conv2d(16,32,3,1,1),
            nn.ReLU(),#此时数据维度为32*20*4
            nn.MaxPool2d(2),#此时数据维度为32*10*2
        )
        self.out=nn.Linear(32*10*2,20)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)#展平成一维
        output = self.out(x)
        return output

    def predict(self, features):
        self.eval()
        features = torch.from_numpy(features).float()
        return self.forward(features).detach().numpy()

class CipvDataset(Dataset):
    def __init__(self, labels, features):
        super(CipvDataset, self).__init__()
        self.labels = labels
        self.features = features

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        feature = self.features[idx]
        label = self.labels[idx]
        return {'feature': feature, 'label': label}

class CipvTrain(object):
    def __init__(self):

        self.network = CipvCNN()
        self.learning_rate = 0.001
        self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.learning_rate)
        self.criterion = nn.CrossEntropyLoss()
        self.num_epochs = 150
        self.batchsize = 300
        self.shuffle = True #打乱数据,是情况而定加不加

    def train(self, feature, label):
        self.network.train()
        dataset = CipvDataset(label, feature)
        loader = DataLoader(dataset, shuffle=self.shuffle, batch_size=self.batchsize)

        for epoch in range(self.num_epochs):
            total_loss = 0.0
            for i, data in enumerate(loader):
                features = data['feature'].float()
                labels = data['label'].long()
                self.optimizer.zero_grad()
                predictions = self.network(features)
                loss = self.criterion(predictions, labels.squeeze(-1))
                loss.backward()
                total_loss += loss.item()
                self.optimizer.step()
            print('loss', total_loss / (i+1))

    def get_action(self, feature):
        self.network.eval()
        
        feature = feature.reshape(1,1,20,5)#输入数据的维度为1,20,5,但feature需要再加一维
        p = self.network.predict(feature)
        label = np.argmax(p)

        return label

在训练好之后,如何知道自己模型的预测效果呢?这里提供一个简单的预测函数:

def test_model(policy,features,labels):#policy=CipvTrain()
    success_time=0
    for i in range(len(features)):
        label_got=policy.get_action(features[i])
        if label_got==labels[i]:
            success_time +=1
    success_rate=success_time/len(features)
    return success_rate

你可能感兴趣的:(数据挖掘,神经网络,神经网络,数据挖掘)