Pytorch深度学习入门与实战二——卷积神经网络

1.卷积神经网络基本单元

  • 空洞卷积
    通过在卷积核中添加空洞(0元素),从而增大感受野,获取更多的信息。
    感受野:在卷积神经网络中,决定某一层输出结果中一个元素所对应的输入层的区域大小,即特征映射上的一个元素所对应的输入图的区域大小。
  • 转置卷积
    作用:将特征图放大恢复到原来的尺寸
    转置卷积是卷积的方向过程,即卷积操作的输入作为转置卷积的输出,卷积操作的输出作为转置卷积的输入。
  • 二维卷积
    针对自然语言的词嵌入进行二维卷积,是利用卷积神经网络对自然语言进行分类的关键步骤。
    2.经典卷积神经网络
  • LeNet-5
    主要用于处理手写字体的识别。
    输入:32x32灰度图像
    结构:2卷积层+2pool+2*fullConnect
    输出:10个特征数字
  • AlexNet
 import torchvision.models as models
alxnet=models.alexnet()
  • GoogleNet
import torchvision.models as models
googlenet=models.googlenet()
  • VGG
    在VGG网络中,通过使用多个较小的卷积核(3x3)的卷积层,来替代一个卷积核较大的卷积层。
    使用小卷积核一方面可以减少参数,另一方面相当于进行了更多的特征映射,可以进一步增加网络的拟合能力。
import torchvision.models as models
vgg16=models.vgg16()
vgg19=models.vgg19()
  • TextCNN
    在网络结构中,针对一个句子的词嵌入使用一层卷积层进行文本信息的提取。
    3.卷积神经网络识别Fashion-MNIST
 import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import time
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.utils.data as Data
from torchvision import transforms
from torchvision.datasets import FshionMNIST

train_data=FashionMNIST(root="./data/FashionMNIST",
                        train=True,
                        transform= transforms.ToTensor(),
                        download=False)
train_loader=Data.DataLoader(dataset=train_data,
                             batch_size=64,
                             shuffle=False,
                             num_workers=2)
print("batch num of train_loader is :  ",len(train_loader))
for step,(b_x,b_y) in enumerate(train_loader):
    if step>0:
        break
    batch_x=b_x.squeeze().numpy()
    batch_y=b_y.numpy()
    class_label=train_data.classes
    class_label[0]="T-shirt"
    plt.figure(figsize=(12,5))
    for ii in np.arange(len(batch_y)):
        plt.subplot(4,16,ii+1)
        plt.imshow(batch_x[ii,:,:],cmap=plt.cm.gray)
        plt.title(class_label[batch_y[ii]],size=9)
        plt.axis("off")
        plt.subplots_adjust(wspace=0.05)

test_data=FashionMNIST(root="./data/FashionMNIST",
                       train=False,
                       download=False)
test_data_x=test_data.data.type(torch.FloatTensor)/255.0
test_data_x=torch.unsqueeze(test_data_x,dim=1)
test_data_y=test_data.targets
print("test_data_x.shape:",test_data_x.shape)
print("test_data_y.shape:",test_data_y.shape)

class MyConvNet(nn.Module):
    def __init__(self):
        super(MyConvNet,self).__init__()
        self.conv1=nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=1,
            ),
            nn.ReLU(),
            nn.AvgPool2d(
                kernel_size=2,
                stride=2,
            )
        )
        self.conv2=nn.Sequential(
            nn.Conv2d(16,32,3,1,0),
            nn.ReLU(),
            nn.AvgPool2d(2,2)
        )
        self.classifier=nn.Sequential(
            nn.Linear(32*6*6,256),
            nn.ReLU(),
            nn.Linear(256,128),
            nn.ReLU(),
            nn.Linear(128,10)
        )
        def forward(self,x):
            x=self.conv1(x)
            x=self.conv2(x)
            x=x.view(x.size(0),-1)
            output=self.classifier(x)
            return output
myconvnet=MyConvNet()
print(myconvnet)


def train_model(model,traindataloader,train_rate,criterion,optimizer,num_epochs=25):
    batch_num=len(traindataloader)
    train_batch_num=round(batch_num*train_rate)
    best_model_wts=copy.deepcopy(model.state_dict())
    best_acc=0.0
    train_loss_all=[]
    train_acc_all=[]
    val_loss_all=[]
    val_acc_all=[]
    since=time.time()
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch,num_epochs-1))
        print('-'*10)
        train_loss=0.0
        train_corrects=0
        train_num=0
        val_loss=0.0
        val_corrects=0
        val_num=0
        for step,(b_x,b_y) in enumerate(traindataloader):
            if step<train_batch_num:
                model.train()
                output=model(b_x)
                pre_lab=torch.argmax(output,1)
                loss=criterion(output,b_y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss +=loss.item()*b_x.size(0)
                train_corrects+=torch.sum(pre_lab==b_y.data)
                train_num+=b_x.size(0)
            else:
                model.eval()
                output=model(b_x)
                pre_lab=torch.argmax(output,1)
                loss=criterion(output,b_y)
                val_loss+=loss.item()*b_x.size(0)
                val_corrects+=torch.sum(pre_lab==b_y.data)
                val_num+=b_x.size(0)
        train_loss_all.appen(train_loss/trian_num)
        train_acc_all.append(train_corrects.double().item()/train_num)
        val_loss_all.append(val_loss/val_num)
        val_acc_all.appen(val_corrects.double().item()/val_num)
        print('{} Train Loss :{:.4f} Train Acc:{:4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
        print('{} Val Loss :{:.4f} val Acc:{:4f}'.format(epoch,val_loss_all[-1],val_acc_all[-1]))
        if val_acc_all[-1]>best_acc:
            best_acc=val_acc_all[-1]
            best_model_wts=copy.deepcopy(model.state_dict())
        time_use=time.time()-since
        print("Train and val complete in {:.0f}m {:.of}s".format(time_use//60,time_use%60))
    model.load_stat_dict(best_model_wts)
    train_process=pd.DataFrame(
        data={
     "epoch":range(num_epochs),
              "train_loss_all":train_loss_all,
              "val_loss_all":val_loss_all,
              "train_acc_all":train_acc_all,
              "val_acc_all":val_acc_all}
    )
    return model,train_process

optimizer=torch.optim.Adam(myconvnet.parameters(),lr=0.0003)
criterion=nn.CrossEntropyLoss()
myconvnet,train_process=train_model(
    myconvnet,train_loader,0.8,
    criterion,optimizer,num_epochs=25
)

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(train_process.epoch,train_process.train_loss_all,"ro-",label="Train loss")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.subplot(1,2,2)
plt.plot(train_process.epoch,train_process.train_acc_all,"ro-",label="Train acc")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.legend()
plt.show()

myconvnet.eval()
output=myconvnet(test_data_x)
pre_lab=torch.argmax(output,1)
acc=accuracy_score(test_data_y,pre_lab)
print("the accuracy of test data is ",acc)

conf_mat=confusion_matrix(test_data_y,pre_lab)
df_cm=pd.DataFrame(conf_mat,index=class_label,columns=class_label)
heatmap=sns.heatmap(df_cm,annot=True,fmt="d",cmap="Y1GnBu")
heatmap.yaxis.set_ticklables(heatmap.yaxis.get_ticklabels(),rotation=0,ha='right')
heatmap.xaxis.set_ticklables(heatmap.xaxis.get_ticklabels(),rotation=45,ha='right')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
  • 空洞卷积神经
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import time
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.utils.data as Data
from torchvision import transforms
from torchvision.datasets import FshionMNIST

train_data=FashionMNIST(root="./data/FashionMNIST",
                        train=True,
                        transform= transforms.ToTensor(),
                        download=False)
train_loader=Data.DataLoader(dataset=train_data,
                             batch_size=64,
                             shuffle=False,
                             num_workers=2)
print("batch num of train_loader is :  ",len(train_loader))
for step,(b_x,b_y) in enumerate(train_loader):
    if step>0:
        break
    batch_x=b_x.squeeze().numpy()
    batch_y=b_y.numpy()
    class_label=train_data.classes
    class_label[0]="T-shirt"
    plt.figure(figsize=(12,5))
    for ii in np.arange(len(batch_y)):
        plt.subplot(4,16,ii+1)
        plt.imshow(batch_x[ii,:,:],cmap=plt.cm.gray)
        plt.title(class_label[batch_y[ii]],size=9)
        plt.axis("off")
        plt.subplots_adjust(wspace=0.05)

test_data=FashionMNIST(root="./data/FashionMNIST",
                       train=False,
                       download=False)
test_data_x=test_data.data.type(torch.FloatTensor)/255.0
test_data_x=torch.unsqueeze(test_data_x,dim=1)
test_data_y=test_data.targets
print("test_data_x.shape:",test_data_x.shape)
print("test_data_y.shape:",test_data_y.shape)

class MyConvdilaNet(nn.Module):
    def __init__(self):
        super(MyConvdilaNet,self).__init__()
        self.conv1=nn.Sequential(nn.Conv2d(1,16,3,1,1,dilation=2),
                                 nn.ReLU(),
                                 nn.AvgPool2d(2,2),
                                 )
        self.conv2=nn.Sequential(nn.Conv2d(16,32,3,1,0,dilation=2),
                                 nn.ReLU(),
                                 nn.AvgPool2d(2,2),
                                 )
        self.classifier=nn.Sequential(nn.Linear(32*4*4,256),
                                      nn.ReLU(),
                                      nn.Linear(256,128),
                                      nn.ReLU(),
                                      nn.Linear(128,10)
                                      )
    def forward(selfself,x):
        x=self.conv1(x)
        x=self.conv2(x)
        x=x.view(x.size(0),-1)
        output=self.classifier(x)
        return output
myconvidilanet=MyConvdilaNet()


def train_model(model,traindataloader,train_rate,criterion,optimizer,num_epochs=25):
    batch_num=len(traindataloader)
    train_batch_num=round(batch_num*train_rate)
    best_model_wts=copy.deepcopy(model.state_dict())
    best_acc=0.0
    train_loss_all=[]
    train_acc_all=[]
    val_loss_all=[]
    val_acc_all=[]
    since=time.time()
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch,num_epochs-1))
        print('-'*10)
        train_loss=0.0
        train_corrects=0
        train_num=0
        val_loss=0.0
        val_corrects=0
        val_num=0
        for step,(b_x,b_y) in enumerate(traindataloader):
            if step<train_batch_num:
                model.train()
                output=model(b_x)
                pre_lab=torch.argmax(output,1)
                loss=criterion(output,b_y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss +=loss.item()*b_x.size(0)
                train_corrects+=torch.sum(pre_lab==b_y.data)
                train_num+=b_x.size(0)
            else:
                model.eval()
                output=model(b_x)
                pre_lab=torch.argmax(output,1)
                loss=criterion(output,b_y)
                val_loss+=loss.item()*b_x.size(0)
                val_corrects+=torch.sum(pre_lab==b_y.data)
                val_num+=b_x.size(0)
        train_loss_all.appen(train_loss/trian_num)
        train_acc_all.append(train_corrects.double().item()/train_num)
        val_loss_all.append(val_loss/val_num)
        val_acc_all.appen(val_corrects.double().item()/val_num)
        print('{} Train Loss :{:.4f} Train Acc:{:4f}'.format(epoch,train_loss_all[-1],train_acc_all[-1]))
        print('{} Val Loss :{:.4f} val Acc:{:4f}'.format(epoch,val_loss_all[-1],val_acc_all[-1]))
        if val_acc_all[-1]>best_acc:
            best_acc=val_acc_all[-1]
            best_model_wts=copy.deepcopy(model.state_dict())
        time_use=time.time()-since
        print("Train and val complete in {:.0f}m {:.of}s".format(time_use//60,time_use%60))
    model.load_stat_dict(best_model_wts)
    train_process=pd.DataFrame(
        data={
     "epoch":range(num_epochs),
              "train_loss_all":train_loss_all,
              "val_loss_all":val_loss_all,
              "train_acc_all":train_acc_all,
              "val_acc_all":val_acc_all}
    )
    return model,train_process

optimizer=torch.optim.Adam(myconvidilanet.parameters(),lr=0.0003)
criterion=nn.CrossEntropyLoss()
myconvidilanet,train_process=train_model(
    myconvidilanet,train_loader,0.8,
    criterion,optimizer,num_epochs=25
)

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(train_process.epoch,train_process.train_loss_all,"ro-",label="Train loss")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.subplot(1,2,2)
plt.plot(train_process.epoch,train_process.train_acc_all,"ro-",label="Train acc")
plt.plot(train_process.epoch,train_process.val_loss_all,"bs-",label="Val acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.legend()
plt.show()

myconvidilanet.eval()
output=myconvidilanet(test_data_x)
pre_lab=torch.argmax(output,1)
acc=accuracy_score(test_data_y,pre_lab)
print("the accuracy of test data is ",acc)

conf_mat=confusion_matrix(test_data_y,pre_lab)
df_cm=pd.DataFrame(conf_mat,index=class_label,columns=class_label)
heatmap=sns.heatmap(df_cm,annot=True,fmt="d",cmap="Y1GnBu")
heatmap.yaxis.set_ticklables(heatmap.yaxis.get_ticklabels(),rotation=0,ha='right')
heatmap.xaxis.set_ticklables(heatmap.xaxis.get_ticklabels(),rotation=45,ha='right')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

4.对训练好的卷积网络微调
基于预训练好的VGG16网络,对其网络结构进行微调
使用的数据集来自kaggle数据库中的10类猴子数据集,数据地址为https://www.kaggle.com/slothkong/10-monkey-species
根据预训练好的权重,提取数据特征,然后定义新的全连接层,用于图像的分类

 import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import hiddenlayer as hl
import torch
import torch.nn as nn
from torch.optim import Adam,SGD
import torch.utils.data as Data
from torchvision import transforms
from torchvision import models
from torchvision.datasets import ImageFloder

vgg16=models.vgg16(pretrained=True)
vgg=vgg16.features
for param in vgg.parameters():
    param.requires_grad_(False)

class MyVggModel(nn.Module):
    def __init__(self):
        super(MyVggModel,self).__init__()
        self.vgg=vggself.classifier=nn.Sequential(
            nn.Linear(25088,512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(256,10),
            nn.softmax(dim=1)
        )
    def forward(self,x):
        x=self.vgg(x)
        x=x.view(x.size(0),-1)
        output=self.classifer(x)
        return output
Myvggc=MyVggModel()
ptiny(Myvggc)
train_data_transforms=transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
val_data_transforms=transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
train_data_dir="data/chap6/10-monkey-species/training"
train_data=ImageFloder(train_data_dir,transforms=train_data_transforms)
train_data_loader=Data.DataLoader(train_data,batch_size=32,shuffle=True,num_worker=2)

val_data_dir="data/chap6/10-monkey-species/validation"
val_data=ImageFloder(val_data_dir,transforms=val_data_transforms)
val_data_loader=Data.DataLoader(val_data,batch_size=32,shuffle=True,num_workers=2)
print("train sample data num:",len(train_data.targets))
print("test sample data num : ",len(val_data.targets))

for step,(b_x,b_y) in enumerate(train_data_loader):
    if step>0:
        break
    mean=np.array([0.485,0.456,0.406])
    std=np.array([0.229,0.224,0.225])
    plt.figure(figsize=(12,6))
    for ii in np.arange(len(b_y)):
        plt.subplot(4,8,ii+1)
        image=b_x[ii,:,:,:].numpy().transpose((1,2,0))
        image=std*image+mean
        image=np.clip(image,0,1)
        plt.imshow(image)
        plt.title(b_y[ii].data.numpy())
        plt.axis("off")
    plt.subplots_adjust(hspace=0.3)
    
optimizer=torch.optim.Adam(Myvggc.parameters(),lr=0.003)
loss_func=nn.CrossENtropyLoss()
history1=hl.History()
canvas1=hl.Canvas()
for epoch in range(10):
    train_loss_epoch=0
    val_loss_epoch=0
    train_corrects=0
    val_corrects=0
    Myvggc.train()
    for step,(b_x,b_y) in enumerate(train_data_loader):
        output=Myvggc(b_x)
        loss=loss_func(output,b_y)
        pre_lab=torch.argmax(output,1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss_epoch+=loss.item()*b_x.size(0)
        train_corrects+=torch.sum(pre_lab==b_y.data)
    train_loss=train_loss_epoch/len(train_data.targets)
    train_acc=train_corrects.double()/len(train_data.targets)
    Myvggc.eval()
    for step,(val_x,val_y) in enumerate(val_data_loader):
        output=Myvggc(val_x)
        loss=loss_func(output,val_y)
        pre_lab=torch.argmax(output,1)
        val_loss_epoch+=loss.item()*val_x.size(0)
        val_corrects+=torch.sum(pre_lab==val_y.data)
    val_loss=val_loss_epoch/len(val_data.targets)
    val_acc=val_corrects.double()/len(val_data.targets)
    
    history1.log(epoch,train_loss=train_loss,
                 val_loss=val_loss,
                 train_acc=train_acc.item(),
                 val_acc=val_acc.item())
    with canvas1:
        canvas1.draw_plot([history1["train_loss"],history1["val_loss"]])
        canvas1.draw_plot([history1["train_acc"],history1["val_acc"]])

5.卷积神经网络进行情感分类
使用的影评数据来自https://www.kaggle.com/iarunava/imdb-movie-reviews-dataset,是IMDB的电影影评数据。

 import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import seaborn as sns
from wordcloud import WordCloud
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtest import data
from torchvision import transforms
from torchtext.vocab import Vectors,GloVe

def load_text_data(path):
    text_data=[]
    label=[]
    for dset in ["pos","neg"]:
        path_dset=os.path.join(path,dset)
        path_list=os.listdir(path_dset)
        for fname in path_list:
            if fname.endswith(".txt"):
                filename=os.path.join(path_dset,fname)
                with open(filename) as f:
                    text_data.append(f.read())
            if dset=="pos":
                label.append(1)
            else:
                label.append(0)
    return np.array(text_data),np.array(label)
train_path="data/chap6/imdb/train"
train_text,train_label=load_text_data(train_path)
test_path="data/chap6/imdb/test"
test_text,test_label=load_text_data(test_path)
print(len(train_text),len(train_label))
print(len(test_text),len(test_label))

def text_preprocess(text_data):
    text_pre=[]
    for text1 in text_data:
        text1=re.sub("

"
," ",text1) text1=text1.lower() text1=re.sub("\d+","",text1) text1=text1.translate(str.maketrans("","",string.punctuation.replace("'",""))) text1=text1.strip() text_pre.appen(text1) return np.array(text_pre) train_text_pre=text_preprocess(train_text) test_text_pre=text_preprocess(test_text) def stop_stem_word(datalist,stop_words): datalist_pre=[] for text in datalist: text_words=word_tokenize(text) text_words=[word for word in text_words if not word in stop_words] text_words=[word for word in text_words if len(re.findall("'",word))==0] datalist_pre.append(text_words) return np.array(datalist_pre) stop_words=stopwords.words("english") stop_words=set(stop_words) train_text_pre2=stop_stem_word(train_text_pre,stop_words) test_text_pre2=stop_stem_word(test_text_pre,stop_words) print(train_text_pre[10000]) print("="*10) print(train_text_pre2[10000]) texts=[" ".join(words) for words in train_text_pre2] traindatasave=pd.DataFrame({ "text":texts, "label":train_label}) texts=[" ".join(words) for words in test_text_pre2] testdatasave=pd.DataFrame({ "text":texts, "label":test_label}) traindatasave.to_csv("data/chap6/imdb_train.csv",index=False) testdatasave.to_csv("data/chap6/imdb_test.csv",index=False) traindata=pd.DataFrame({ "train_text":train_text,"train_word":train_text_pre2,"trian_label":train_label}) train_word_num=[len(text) for text in train_text_pre2] traindata["train_word_num"]=train_word_num plt.figure(figsize=(8,5)) _=plt.hist(train_word_num,bins=100) plt.xlabel("word number") plt.ylabel("Freq") plt.show() plt.figure(figsize=(16,10)) for ii in np.unique(train_label): text=np.array(traindata.train_word[traindata.train_label == ii]) text=" ".join(np.concatenate(text)) plt.subplot(1,2,ii+1) wordcod.generate_from_text(text) plt.imshow(wordcod) plt.axis("off") if ii==1: plt.title("Positive") else: plt.title("Negative") plt.subplots_adjust(wspace=0.05) plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import seaborn as sns
from wordcloud import WordCloud
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtest import data
from torchvision import transforms
from torchtext.vocab import Vectors,GloVe

mytokenize=lambda x:x.split()
TEXT=data.Field(sequential=True,tokenize=mytokenize,include_lengths=True,use_vocab=True,batch_first=True,fix_length=200)
LABEL=data.Field(sequential=False,use_vocab=False,pad_token=None,unk_token=None)
train_test_fields=[
    ("label",LABEL),
    ("text",TEXT)
]
traindata,testdata=data.TabularDataset.splits(
    path="./data/chap6",format="csv",
    train="imdb_train.csv",fields=train_test_fields,
    test="imdb_test.csv",skip_header=True
)
len(traindata),len(testdata)
train_data,val_data=traindata.split(split_ratio=0.7)
len(train_data),len(val_data)

vec=Vectors("glove.6B.100d.txt","./data")
TEXT.build_vocab(train_data,max_size=20000,vectors=vec)
LABEL.build_vocab(train_data)

print(TEXT.vocab.freqs.most_common(n=10))
print("词典的词数 : ",len(TEXT.vocab.itos))
print("前 10 个单词: \n",TEXT.vocab.itos[0:10])
print("类别标签情况: ",LABEL.vocab.freqs)

BATCH_SIZE=32
train_iter=data.BucketIterator(train_data,batch_size=BATCH_SIZE)
val_iter=data.BucketIterator(val_data,batch_size=BATCH_SIZE)
test_iter=data.BucketIterator(testdata,batch_size=BATCH_SIZE)

for step,batch in enumerate(train_iter):
    if step>0:
        break
print("数据的尺寸: ",batch.text[0].shape)
print("数据的类别标签: \n"batch.lable)
print("数据样本数 : ",len(batch.text[1]))

class CNN_Text(nn.Modules):
    def __init__(self,vocab_size,embedding_dim,n_filters,filter_sizes,output_dim,dropout,pad_idx):
        super().__init__()
        self.embedding=nn.Embedding(vocab_size,embedding_dim,padding_idx=pad_idx)
        seld.convs-nn.ModuleList([
            nn.Conv2d(in_channels=1,out_channels=n_filters,kernel_size=(fs,embedding_dim)) for fs in filter_sizes
        ])
        self.fc=nn.Linear(len(filter_sizes)*n_filters,output_dim)
        self.dropout=nn.Dropout(dropout)
    def forward(selfself,text):
        embedded=self.embedding(text)
        embedded=embedded.unsqueeze(1)
        conved=[F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
        pooled=[F.max_pool1d(conv,conv.shape[2]).squeeze(2) for conv in conved]
        cat=self.dropout(torch.cat(pooled,dim=1))
        return self.fc(cat)

INPUT_DIM=len(TEXT.vocab)
EMBEDDING_DIM=100
N_FILTERS=100
FILTER_SIZES=[3,4,5]
OUTPUT_DIM=1
DROPOUT=0.5
PAD_IDX=TEXT.vocab.stoi[TEXT.pad_token]
model=CNN_Text(INPUT_DIM,EMBEDDING_DIM,N_FILTERS,FILTER_SIZES,OUTPUT_DIM,DROPOUT,PAD_IDX)
model

pretrained_embeddings=TEXT.vocab.vectors
model.embedding.weigth.data.copy_(pretrained_embeddings)
UNK_IDX=TEXT.vocab.stoi[TEXT.unk_token]
model.embedding.weight.data[UNK_IDX]=torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX]=torch.zeros(EMBEDDING_DIM)

optimizer=optim.Adam(model.parameters())
criterion=nn.BCEWithLogitsLoss()

def train_epoch(model,iterator,optimizer,criterion):
    epoch_loss=0
    epoch_acc=0
    trian_corrects=0
    train_num=0
    model.train()
    for batch in iterator:
        optimizer.zero_grad()
        pre=model(batch.text[0]).squeeze(1)
        loss=criterion(pre,batch.label.type(torch.FloatTensor))
        pre_lab=torch.round(torch.sigmoid(pre))
        train_corrects+=torch.sum(pre_lab.long()==batch.label)
        train_num+=len(batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss=epoch_loss/train_num
    epoch_acc=train_corrects.double().item()/train_num
    return epoch_Loss,epoch_acc

def evaluate(model,iterator,criterion):
    epoch_loss =0;
    epoch_acc=0;
    train_corrects=0;
    trian_num=0;
    model.eval()
    with torch.no_grad():
        for batch in iterator:
            pre=model(batch.text[0]).seqeeze(1)
            loss=criterion(pre,batch.label.type(torch.FloatTensor))
            pre_lab=torch.round(torch.sigmoid(pre))
            train_corrects+=torch.sum(pre_lab.long() == batch.label)
            trian_num+=len(batch.label)
            epoch_loss+=loss.item()
        epoch_loss = epoch_loss/train_num
        epoch_acc = train_corrects.double().item()/trian_num
    return  epoch_loss,epoch_acc

EPOCHS=10
best_val_loss=float("inf")
best_acc=float(0)
for epoch in range(EPOCHS):
    start_time=time.time()
    train_loss,train_acc=train_epoch(model,train_iter,optimizer,criterion)
    val_loss,val_acc=evaluate(model,val_iter,criterion)
    end_time=time.time()
    print("Epoch: ",epoch+1,"|","EPoch TIme: ",end_time-start_time,"s")
    print("Train Loss: "train_loss,"|","Train acc: ",train_acc)
    print("Val.Loss: ",val_loss,"|","Val.Acc: ",val_acc)
    if(val_loss<best_val_loss) & (val_acc>best_acc):
        best_model_wts=copy.deepcopy(model.state_dict())
        best_val_loss=val_loss
        best_acc=val_acc
    model.load_state_dict(best_model_wts)

6.使用预训练好的卷积网络

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchvision import transforms
from PIL import Image

vgg16=models.vgg16(pretrained=True)
im=Image.open("data/chess.jpg")
imarray=np.asarray(im)/255.0
plt.figure()
plt.imshow(imarray)
plt.show()

data_transforms=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
input_im=data_transforms(im).unsqueeze(10)
print("input_im.shape:",input_im.shape)


activation=[]
def get_activation(name):
    def hook(model,input,output):
        activation[name]=output.detach()
    return hook()
vgg16.features[4].register_forward_hook(get_activation("maxpool1"))
_=vgg16(input_im)
maxpool1=activation["maxpool1"]
print("获取特征的尺寸为: ",maxpool1.shape())

plt.figure(figsize=(11,6))
for ii in range(maxpool1.shape[1]):
    plt.subplot(6,11,ii+1)
    plt.imshow(maxpool1.data.numpy()[0,ii,:,:],cmap="gray")
    plt.axis("off")
plt.subplots_adjust(wspace=0.1,hspace=0.1)
plt.show()

vgg16.eval()
vgg16.features[21].register_forward_hook(get_activation("layer21_conv"))
_=vgg16(input_im)
layer21_conv=activation["layer21_conv"]
print("获取特征的尺寸为:" layer21_conv.shape)

plt.figure(figsize=(12,6))
for ii in range(72):
    plt.subplot(6,12,ii+1)
    plt.imshow(layer21_conv.data.numpy()[0,ii,:,:],cmap="gray")
    plt.axis("off")
plt.subplots_adjust(wspace=0.1,hspace=0.1)
plt.show()

LABELS_URL="https://s3.amazonaws.com/outcome-blog/imagenet/labels.json"
response=requests.get(LABELS_URL)
labels=(int(key):value for key,value in response.json().items())

vgg16.eval()
im_pre=vgg16(input_im)
softmax=nn.Softmax(dim=1)
im_pre_prob=softmax(im_pre)
prob,prelab=torch.topk(im_pre_prob,5)
prob=prob.data.numpy().flatten()
prelab=prelab.numpy().flatten()
for ii,lab in enumerate(prelab):
    print("index: ",lab," label: ",labels[lab]," ||",prob[ii])
####热力图
class MyVgg16(nn.Module):
    def __init__(self):
        super(MyVgg16, self).__init__()
        self.vgg=models.vgg16(pretrained=True)
        self.features_conv=self.vgg.features[:30]
        self.max_pool=self.vgg.features[30]
        self.avgpool=self.vgg.avgpool
        self.classifier=self.vgg.classifier
        self.gradients=None
    def activations_hook(self,grad):
        self.gradients=grad
    def forward(self,x):
        x=self.features_conv(x)
        h=x.register_hook(self.activations_hook)
        x=self.max_pool(x)
        x=self.avgpool(x)
        x=x.view(1,-1)
        x=self.classifier(x)
        return x
    def get_activations_gradient(self):
        return self.gradients
    def get_activations(self,x):
        return self.features_conv(x)
vggcam=MyVgg16()
vggcam.eval()
im_pre=vggcam(input_im)
softmax=nn.Softmax(dim=1)
im_pre_prob=softmax(im_pre)
prob,prelab=torch.topk(im_pre_prob,5)
prob=prob.data.numpy().flatten()
prelab=prelab.numpy().flatten()
for ii,lab in enumerate(prelab):
    print("index: ",lab,"label: ",labels[lab]," || ",prob[ii])

im_pre[:,prelab[0]].backward()
gradients=vggcam.get_activations_gradient()
mean_gradients=torch.mean(gradients,dim=[0,2,3])
activations=vggcam.get_activations(input_im).detach()
for i in range(len(mean_gradients)):
    activations[:,i,:,:]*=mean_gradients[i]
heatmap=torch.mean(activations,dim=1).squeeze()
heatmap=F.relu(heatmap)
heatmap/=torch.max(heatmap)
heatmap=heatmap.numpy()
plt.matshow(heatmap)

img=cv2.imread("data/chap6/大象.jpg")
heatmap=cv2.resize(heatmap,(img.shape[1],img.shape[0]))
heatmap=np.uint8(255*heatmap)
heatmap=cv2.applyColorMap(heatmap,cv2.COLORMAP_JET)
Grad_cam_img=heatmap*0.4+img
Grad_cam_img=Grad_cam_img/Grad_cam_img.max()

b,g,r=cv2.split(Grad_cam_img)
Grad_cam_img=cv2.merge([r,g,b])
plt.figure()
plt.imshow(Grad_cam_img)
plt.show()

你可能感兴趣的:(笔记,卷积,神经网络,深度学习,cv)