PyTorch-MLP垃圾邮件分类

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split#划分训练集和测试集
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report #模型评估
from sklearn.manifold import TSNE#数据降维 可视化
import torch
import torch.nn as nn
from torch.optim import SGD,Adam
import torch.utils.data as Data
import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
#垃圾邮件分类
spam=pd.read_csv("Data/spambase.csv")
# print(spam.head()) 57个特征向量 1个标签 最后一列
# print(spam.describe())
print(pd.value_counts(spam.label))
'''
0    2788
1    1813
Name: label, dtype: int64
'''
#划分测试集和训练集
X=spam.iloc[:,0:57].values
y=spam.label.values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=123)
#标准化处理
scales=MinMaxScaler(feature_range=(0,1))
X_train_s=scales.fit_transform(X_train)
X_test_s=scales.fit_transform(X_test)
# #箱线图表示
# colname=spam.columns.values[:-1]
# plt.figure(figsize=(20,14))
# for ii in range(len(colname)):
#     plt.subplot(7,9,ii+1)
#     sns.boxplot(x=y_train,y=X_train_s[:,ii])
#     plt.title(colname[ii])
# plt.subplots_adjust(hspace=0.5)
# plt.show()
#全连接神经网络
class MLPclassifica(nn.Module):
    def __init__(self):
        super(MLPclassifica,self).__init__()
        #定义第一个隐藏层
        self.hidden1=nn.Sequential(
            nn.Linear(
                in_features=57,#第一个隐藏层的输入,数据的特征数
                out_features=30,#第一个隐藏层的输出,神经元的数量
                bias=True,#默认设置偏置项
            ),
            nn.ReLU()
        )
        #定义第二个隐藏层
        self.hidden2=nn.Sequential(
            nn.Linear(30,10),#10个神经元
            nn.ReLU()
        )
        #分类层 二分类
        self.classifica=nn.Sequential(
            nn.Linear(10,2),#两个神经元
            nn.Sigmoid()
        )
    def forward(self,x):
        fc1=self.hidden1(x)
        fc2=self.hidden2(fc1)
        output=self.classifica(fc2)
        return fc1,fc2,output
#网络模型可视化
MyConvnet=MLPclassifica()
print(MyConvnet)
#hiddenlayer可视化
import  hiddenlayer as hl
from graphviz import Digraph
hl_graph=hl.build_graph(MyConvnet,torch.zeros([1,57]))
hl_graph.theme=hl.graph.THEMES["blue"].copy()
#将可视化的网络保存为图片
hl_graph.save("Data/MLPclassifica_hl.png",format="png")
#模型训练
X_train_t=torch.from_numpy(X_train_s.astype(np.float32))
y_train_t=torch.from_numpy(y_train.astype(np.int64))
X_test_t=torch.from_numpy(X_test_s.astype(np.float32))
y_test_t=torch.from_numpy(y_test.astype(np.int64))
#将训练集转化为张量后,使用TensorDataset将x y整合
train_data=Data.TensorDataset(X_train_t,y_train_t)
train_loader=Data.DataLoader(
    dataset=train_data,
    batch_size=64,
    shuffle=True,
    num_workers=0,
)
#定义优化器
optimizer=torch.optim.Adam(MyConvnet.parameters(),lr=0.01)
loss_func=nn.CrossEntropyLoss()#交叉熵损失函数
history1=hl.History()#保存训练过程的指标
canvas1=hl.Canvas()#可视化训练过程
print_step=25
for epoch in range(25):
    for step,(b_x,b_y) in enumerate(train_loader):
        _,_,output=MyConvnet(b_x)
        train_loss=loss_func(output,b_y)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        niter=epoch*len(train_loader)+step+1
        if niter%print_step==0:
            _, _, output = MyConvnet(X_test_t)
            _,pre_lab=torch.max(output,1)
            test_accuracy=accuracy_score(y_test_t,pre_lab)
            history1.log(niter,train_loss=train_loss,test_accuracy=test_accuracy)
            with canvas1:
                canvas1.draw_plot(history1["train_loss"])
                canvas1.draw_plot(history1["test_accuracy"])
_,_,output=MyConvnet(X_test_t)
_, pre_lab = torch.max(output, 1)
test_accuracy = accuracy_score(y_test_t, pre_lab)
print("test_accuracy:",test_accuracy)

PyTorch-MLP垃圾邮件分类_第1张图片
PyTorch-MLP垃圾邮件分类_第2张图片

你可能感兴趣的:(神经网络,神经网络)