将灰度图片转化成Mnist数据集并加载该Mnist数据集训练

步骤1:

首先使用这个项目将灰度图片转化成Mnist数据集

GitHub - gskielian/JPG-PNG-to-MNIST-NN-Format: Python/Bash scripts for creating custom Neural Net Training Data -- this repo is for the MNIST format

步骤2:

加载本地Mnist数据集,并使用CNN训练,训练完成会生成对应的.pth权重文件

import gzip
import os
import struct

import torch
import torchvision
from torchvision import transforms#是一个常用的图片变换类
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import numpy as np
batch_size=8
transform=transforms.Compose(
    [
        transforms.ToTensor(),#把数据转换成张量
        transforms.Normalize((0.1307,),(0.3081,))#0.1307是均值,0.3081是标准差
    ]
)


"""path:数据集的路径
kind:值为train,代表读取训练集"""
# 加载MNIST数据集

# #************************************a2torchloadlocalminist*********************************************************
class DealDataset(Dataset):
    """
        读取数据、初始化数据
    """

    def __init__(self, folder, data_name, label_name, transform=None):
        (train_set, train_labels) = load_data(folder, data_name,
                                              label_name)  # 其实也可以直接使用torch.load(),读取之后的结果为torch.Tensor形式
        self.train_set = train_set
        self.train_labels = train_labels
        self.transform = transform

    def __getitem__(self, index):
        img, target = self.train_set[index], int(self.train_labels[index])
        if self.transform is not None:
            img = self.transform(img)
        return img, target

    def __len__(self):
        return len(self.train_set)


def load_data(data_folder, data_name, label_name):
    with gzip.open(os.path.join(data_folder, label_name), 'rb') as lbpath:  # rb表示的是读取二进制数据
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(os.path.join(data_folder, data_name), 'rb') as imgpath:
        x_train = np.frombuffer(
            imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
    return (x_train, y_train)



train_dataset = DealDataset(r'D:\traffic_experiment\USTC-TK2016\5_Mnist', "train-images-idx3-ubyte.gz",
                           "train-labels-idx1-ubyte.gz", transform=transforms.ToTensor())
test_dataset = DealDataset(r'D:\traffic_experiment\USTC-TK2016\5_Mnist', "test-images-idx3-ubyte.gz",
                           "test-labels-idx1-ubyte.gz", transform=transforms.ToTensor())


train_loader=DataLoader(train_dataset,
                        shuffle=True,
                        batch_size=batch_size)
test_loader=DataLoader(test_dataset,
                       shuffle=True,
                       batch_size=batch_size)

print(train_dataset)


class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 25, kernel_size=3),
            torch.nn.BatchNorm2d(25),
            torch.nn.ReLU(inplace=True)
        )

        self.layer2 = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(25, 50, kernel_size=3),
            torch.nn.BatchNorm2d(50),
            torch.nn.ReLU(inplace=True)
        )

        self.layer4 = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc = torch.nn.Sequential(
            torch.nn.Linear(50 * 5 * 5, 1024),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(1024, 128),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(128, 11)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(x.size(0), -1)  # 在进入全连接层之前需要把数据拉直Flatten
        x = self.fc(x)
        return x


model=CNN()
criterion=torch.nn.CrossEntropyLoss() #使用交叉熵损失
optimizer=torch.optim.SGD(model.parameters(),lr=0.1,momentum=0.5)#momentum表示冲量,冲出局部最小
def train(epochs):
    running_loss=0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,target=data
        optimizer.zero_grad()
        #前馈+反馈+更新
        outputs=model(inputs)
        loss=criterion(outputs,target)
        loss.backward()
        optimizer.step()

        running_loss+=loss.item()
        if batch_idx%300==299:#不让他每一次小的迭代就输出,而是300次小迭代再输出一次
            print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
            running_loss=0.0
    torch.save(model, 'model_{}.pth'.format(epochs))

def test():
    correct=0
    total=0
    with torch.no_grad():#下面的代码就不会再计算梯度
        for data in test_loader:
            images,labels=data
            outputs=model(images)
            _,predicted=torch.max(outputs.data,dim=1)#_为每一行的最大值,predicted表示每一行最大值的下标
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    print('Accuracy on test set:%d %%'%(100*correct/total))
if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

步骤3: 

使用训练的权重文件测试

from tkinter import Variable

import torch
import torchvision
from PIL import Image
from torchvision import transforms  # 是一个常用的图片变换类
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
# 加载数据
from torch import nn
import numpy as np
import pandas as pd
from torchvision.transforms import InterpolationMode

# transform = torchvision.transforms.Compose([torchvision.transforms.Resize((1,784)),
#  torchvision.transforms.ToTensor()])
image_path = "4_Png/Test/3/Gmail.pcap.TCP_1-1-23-255_39055_1-2-171-212_443.png"
image = Image.open(image_path)
print(image)

transform = torchvision.transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    torchvision.transforms.Resize((28, 28)),
    torchvision.transforms.ToTensor()],
)
image = transform(image) #在这里是三维的向量
image = image.unsqueeze(0) #这里
print(image.shape)

# 加载模型

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 25, kernel_size=3),
            torch.nn.BatchNorm2d(25),
            torch.nn.ReLU(inplace=True)
        )

        self.layer2 = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(25, 50, kernel_size=3),
            torch.nn.BatchNorm2d(50),
            torch.nn.ReLU(inplace=True)
        )

        self.layer4 = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc = torch.nn.Sequential(
            torch.nn.Linear(50 * 5 * 5, 1024),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(1024, 128),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(128, 11)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(x.size(0), -1)  # 在进入全连接层之前需要把数据拉直Flatten
        x = self.fc(x)
        return x

net = torch.load("1model_4.pth", map_location=torch.device("cpu"))
output = net(image)


traffic_class={0:'BitTorrent',1:'Facetime',2:'FTP',3:'Gmail',4:'Miuref',5:'MySQL',6:'Outlook',7:'Skype',8:'Tinba',9:'WorldOfWarcraft',10:'Zeus'}
print(output)
print(torch.argmax(output))
print(int(torch.argmax(output)))
print("经过检测,流量类别为:"+traffic_class[int(torch.argmax(output))])

你可能感兴趣的:(机器学习,python,开发语言,pytorch,深度学习,机器学习)