首先使用这个项目将灰度图片转化成Mnist数据集
GitHub - gskielian/JPG-PNG-to-MNIST-NN-Format: Python/Bash scripts for creating custom Neural Net Training Data -- this repo is for the MNIST format
加载本地Mnist数据集,并使用CNN训练,训练完成会生成对应的.pth权重文件
import gzip
import os
import struct
import torch
import torchvision
from torchvision import transforms#是一个常用的图片变换类
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import numpy as np
batch_size=8
transform=transforms.Compose(
[
transforms.ToTensor(),#把数据转换成张量
transforms.Normalize((0.1307,),(0.3081,))#0.1307是均值,0.3081是标准差
]
)
"""path:数据集的路径
kind:值为train,代表读取训练集"""
# 加载MNIST数据集
# #************************************a2torchloadlocalminist*********************************************************
class DealDataset(Dataset):
"""
读取数据、初始化数据
"""
def __init__(self, folder, data_name, label_name, transform=None):
(train_set, train_labels) = load_data(folder, data_name,
label_name) # 其实也可以直接使用torch.load(),读取之后的结果为torch.Tensor形式
self.train_set = train_set
self.train_labels = train_labels
self.transform = transform
def __getitem__(self, index):
img, target = self.train_set[index], int(self.train_labels[index])
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
return len(self.train_set)
def load_data(data_folder, data_name, label_name):
with gzip.open(os.path.join(data_folder, label_name), 'rb') as lbpath: # rb表示的是读取二进制数据
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(os.path.join(data_folder, data_name), 'rb') as imgpath:
x_train = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
return (x_train, y_train)
train_dataset = DealDataset(r'D:\traffic_experiment\USTC-TK2016\5_Mnist', "train-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz", transform=transforms.ToTensor())
test_dataset = DealDataset(r'D:\traffic_experiment\USTC-TK2016\5_Mnist', "test-images-idx3-ubyte.gz",
"test-labels-idx1-ubyte.gz", transform=transforms.ToTensor())
train_loader=DataLoader(train_dataset,
shuffle=True,
batch_size=batch_size)
test_loader=DataLoader(test_dataset,
shuffle=True,
batch_size=batch_size)
print(train_dataset)
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 25, kernel_size=3),
torch.nn.BatchNorm2d(25),
torch.nn.ReLU(inplace=True)
)
self.layer2 = torch.nn.Sequential(
torch.nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer3 = torch.nn.Sequential(
torch.nn.Conv2d(25, 50, kernel_size=3),
torch.nn.BatchNorm2d(50),
torch.nn.ReLU(inplace=True)
)
self.layer4 = torch.nn.Sequential(
torch.nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = torch.nn.Sequential(
torch.nn.Linear(50 * 5 * 5, 1024),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(1024, 128),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(128, 11)
)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(x.size(0), -1) # 在进入全连接层之前需要把数据拉直Flatten
x = self.fc(x)
return x
model=CNN()
criterion=torch.nn.CrossEntropyLoss() #使用交叉熵损失
optimizer=torch.optim.SGD(model.parameters(),lr=0.1,momentum=0.5)#momentum表示冲量,冲出局部最小
def train(epochs):
running_loss=0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target=data
optimizer.zero_grad()
#前馈+反馈+更新
outputs=model(inputs)
loss=criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss+=loss.item()
if batch_idx%300==299:#不让他每一次小的迭代就输出,而是300次小迭代再输出一次
print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
running_loss=0.0
torch.save(model, 'model_{}.pth'.format(epochs))
def test():
correct=0
total=0
with torch.no_grad():#下面的代码就不会再计算梯度
for data in test_loader:
images,labels=data
outputs=model(images)
_,predicted=torch.max(outputs.data,dim=1)#_为每一行的最大值,predicted表示每一行最大值的下标
total+=labels.size(0)
correct+=(predicted==labels).sum().item()
print('Accuracy on test set:%d %%'%(100*correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
使用训练的权重文件测试
from tkinter import Variable
import torch
import torchvision
from PIL import Image
from torchvision import transforms # 是一个常用的图片变换类
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
# 加载数据
from torch import nn
import numpy as np
import pandas as pd
from torchvision.transforms import InterpolationMode
# transform = torchvision.transforms.Compose([torchvision.transforms.Resize((1,784)),
# torchvision.transforms.ToTensor()])
image_path = "4_Png/Test/3/Gmail.pcap.TCP_1-1-23-255_39055_1-2-171-212_443.png"
image = Image.open(image_path)
print(image)
transform = torchvision.transforms.Compose([
transforms.Grayscale(num_output_channels=1),
torchvision.transforms.Resize((28, 28)),
torchvision.transforms.ToTensor()],
)
image = transform(image) #在这里是三维的向量
image = image.unsqueeze(0) #这里
print(image.shape)
# 加载模型
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 25, kernel_size=3),
torch.nn.BatchNorm2d(25),
torch.nn.ReLU(inplace=True)
)
self.layer2 = torch.nn.Sequential(
torch.nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer3 = torch.nn.Sequential(
torch.nn.Conv2d(25, 50, kernel_size=3),
torch.nn.BatchNorm2d(50),
torch.nn.ReLU(inplace=True)
)
self.layer4 = torch.nn.Sequential(
torch.nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = torch.nn.Sequential(
torch.nn.Linear(50 * 5 * 5, 1024),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(1024, 128),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(128, 11)
)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(x.size(0), -1) # 在进入全连接层之前需要把数据拉直Flatten
x = self.fc(x)
return x
net = torch.load("1model_4.pth", map_location=torch.device("cpu"))
output = net(image)
traffic_class={0:'BitTorrent',1:'Facetime',2:'FTP',3:'Gmail',4:'Miuref',5:'MySQL',6:'Outlook',7:'Skype',8:'Tinba',9:'WorldOfWarcraft',10:'Zeus'}
print(output)
print(torch.argmax(output))
print(int(torch.argmax(output)))
print("经过检测,流量类别为:"+traffic_class[int(torch.argmax(output))])