【NLP】textCNN进行中文文本分类(pytorch)

textCNN模型详解

https://www.cnblogs.com/bymo/p/9675654.html

https://zhuanlan.zhihu.com/p/77634533?from_voters_page=true

实现参考

https://blog.csdn.net/u013832707/article/details/88634197

核心代码 

https://github.com/g1kyne/textCNN

用conv1D实现的模型 model.py

维度变化

embedding:(batch_size, seq_length句子长度)--> (batch_size, seq_length, embedding_size 词向量维度)

                        (128, 20) --> (128, 20, 60)

permute():  修改维度顺序,以便之后的卷积运算

                    (batch_size, seq_length, embedding_size)   -->  (batch_size, embedding_size, seq_length)

                    (128, 20, 60) --> (128, 60, 20)

conv1D:  以kernel_size = 3为例,(batch_size, embedding_size, seq_length)  -->  (batch_size, kernel_num, conv1d)

                (128, 60, 20)  -->  (128, 16, 18)   kernel_num=16, conv1d = 20-3+1=18     

                ps: kernel_num为卷积的输出通道数

relu: 经过激活函数维度不变

max_pool1d: (batch_size, kernel_num, conv1d)  -->  (batch_size, kernel_num, 1)

                       (128, 16, 18)  -->  (128, 16, 1)

squeeze(2): 去掉维数为1的维度

                    (batch_size, kernel_num, 1)  -->  (batch_size, kernel_num)

                    (128, 16, 1)  -->  (128, 16)

cat(): 连接经过不同的kernel_size卷积池化后的向量

          (batch_size, kernel_num)  -->  (batch_size, kernel_num * kernel_size的数量)

          (128, 16)  -->  (128, 16*3)

          PS:每个size经过以上操作得到的向量维度都是(batch_size, kernel_num)(128, 16)

linear:(batch_size, kernel_num * kernel_size的数量)  -->  (batch_size, class_num分类数目)

          (128, 16*3)  -->  (128, 5)

import torch
import torch.nn as nn
from torch.nn import functional as F
import math
import torch.nn.init as init

class textCNN(nn.Module):
    def __init__(self, param):
        super(textCNN, self).__init__()
        kernel_num = param['kernel_num'] # output chanel size 16
        kernel_size = param['kernel_size']  # 3,4,5
        vocab_size = param['vocab_size']  # 22906
        embed_dim = param['embed_dim']  # 60
        dropout = param['dropout']  # 0.5
        class_num = param['class_num']  # 5
        self.param = param
        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=1)  #  Embedding(22906, 60, padding_idx=1)  # class Embedding(num_embeddings词嵌入词典大小: int, embedding_dim: int
        self.conv11 = nn.Conv1d(embed_dim, kernel_num, kernel_size[0])  #   # class torch.nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        self.conv12 = nn.Conv1d(embed_dim, kernel_num, kernel_size[1])  #   # 卷积核高度与词向量维度一致,为60
        self.conv13 = nn.Conv1d(embed_dim, kernel_num, kernel_size[2])  #  
        self.dropout = nn.Dropout(dropout)  # Dropout(p=0.5, inplace=False)
        self.fc1 = nn.Linear(len(kernel_size) * kernel_num, class_num)  #  Linear(in_features=48, out_features=5, bias=True)  将(128,48)==》(128,5)    # 对输入数据做线性变换:y=Ax+b  
        # 参数:
        # in_features - 每个输入样本的大小
        # out_features - 每个输出样本的大小
        # bias - 若设置为False,这层不会学习偏置。默认值:True

        # 形状:
        # 输入: (N,in_features)
        # 输出: (N,out_features)

        # 变量:
        # weight -形状为(out_features x in_features)的模块中可学习的权值
        # bias -形状为(out_features)的模块中可学习的偏置

    def init_embed(self, embed_matrix):
        self.embed.weight = nn.Parameter(torch.Tensor(embed_matrix))

    @staticmethod
    def conv_and_pool(x, conv):
        #  torch.Size([128, 60, 20])
        x = conv(x)
        # 经过一维卷积后的大小 torch.Size([128, 16, 18])  
        x = F.relu(x) 
        # 激活层后:torch.Size([128, 16, 18])
        x = F.max_pool1d(x, x.size(2))  #(128,16,1)  # torch.nn.functional.max_pool1d(input([128, 16, 18]), kernel_size 18)     # x.size(2)指H_out的值
        x = x.squeeze(2)
        #  (batch, kernel_num)   torch.Size([128, 16])         (128,16,1) .squeeze(2)==> (128,16)
        return x

    def forward(self, x):
        # x: (batch, sentence_length)  (128, 20)
        x = self.embed(x)
        # x: (batch, sentence_length, embed_dim)   经过embedding层后(128, 20, 60)
        # TODO init embed matrix with pre-trained
        x = x.permute(0,2,1)  # 将(128, 20, 60)换为(128, 60, 20)

        x1 = self.conv_and_pool(x, self.conv11)  # (batch, kernel_num)  torch.Size([128, 16])
        x2 = self.conv_and_pool(x, self.conv12)  # (batch, kernel_num)  torch.Size([128, 16])
        x3 = self.conv_and_pool(x, self.conv13)  # (batch, kernel_num)
        x = torch.cat((x1, x2, x3), 1)  # (batch, 3 * kernel_num) (128,3*16) = (128,48)   torch.cat(inputs, dimension=0) → Tensor
        x = self.dropout(x)  # torch.Size([128, 48])
        logit = F.log_softmax(self.fc1(x), dim=1)
        return logit

    def init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                init.xavier_normal(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

conv2D实现 model_oo.py

维度变化

embedding:(batch_size, seq_length句子长度)--> (batch_size, seq_length, embedding_size 词向量维度)

                        (128, 20) --> (128, 20, 60)

unsqueeze():  增加维数为1的维,以便进行conv2D

                    (batch_size, seq_length, embedding_size)   -->  (batch_size, 1, seq_length, embedding_size)

                    (128, 20, 60) --> (128, 1, 20, 60)

conv2D:  以kernel_size = 3为例,(batch_size, 1, seq_length, embedding_size)  -->  (batch_size, kernel_num, conv2d, 1)

                (128, 1, 20, 60)  -->  (128, 16, 18, 1)   kernel_num=16, conv1d = 20-3+1=18     

                ps: kernel_num为卷积的输出通道数

squeeze: 去掉维数为1的维度

                (batch_size, kernel_num, conv2d, 1)  --> (batch_size, kernel_num, conv2d)

                (128, 16, 18, 1)  --> (128, 16, 18)

relu: 经过激活函数维度不变

max_pool1d: (batch_size, kernel_num, conv2d)  -->  (batch_size, kernel_num, 1)

                       (128, 16, 18)  -->  (128, 16, 1)

squeeze(2): 去掉维数为1的维度

                    (batch_size, kernel_num, 1)  -->  (batch_size, kernel_num)

                    (128, 16, 1)  -->  (128, 16)

cat(): 连接经过不同的kernel_size卷积池化后的向量

          PS:每个size经过以上操作得到的向量维度都是(batch_size, kernel_num)(128, 16)

          (batch_size, kernel_num)  -->  (batch_size, kernel_num * kernel_size的数量)

          (128, 16)  -->  (128, 16*3)

linear:(batch_size, kernel_num * kernel_size的数量)  -->  (batch_size, class_num分类数目)

          (128, 16*3)  -->  (128, 5)

import torch
import torch.nn as nn
from torch.nn import functional as F
import math

class textCNN(nn.Module):
    def __init__(self, param):
        super(textCNN, self).__init__()
        ci = 1  # input chanel size
        kernel_num = param['kernel_num'] # output chanel size 16
        kernel_size = param['kernel_size']  # 3,4,5
        vocab_size = param['vocab_size']  # 22906
        embed_dim = param['embed_dim']  # 60
        dropout = param['dropout']  # 0.5
        class_num = param['class_num']  # 5
        self.param = param
        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=1)  #  Embedding(22906, 60, padding_idx=1)  # class Embedding(num_embeddings词嵌入词典大小: int, embedding_dim: int
        self.conv11 = nn.Conv2d(ci, kernel_num, (kernel_size[0], embed_dim))  #  Conv2d(1, 16, kernel_size=(3, 60), stride=(1, 1))  # class Conv2d(in_channels: int, out_channels: int, kernel_size: _size_2_t
        self.conv12 = nn.Conv2d(ci, kernel_num, (kernel_size[1], embed_dim))  #  Conv2d(1, 16, kernel_size=(4, 60), stride=(1, 1))  # 卷积核高度与词向量维度一致,为60
        self.conv13 = nn.Conv2d(ci, kernel_num, (kernel_size[2], embed_dim))  #  Conv2d(1, 16, kernel_size=(5, 60), stride=(1, 1))
        self.dropout = nn.Dropout(dropout)  # Dropout(p=0.5, inplace=False)
        self.fc1 = nn.Linear(len(kernel_size) * kernel_num, class_num)  #  Linear(in_features=48, out_features=5, bias=True)  将(128,48)==》(128,5)    # 对输入数据做线性变换:y=Ax+b  
        # 参数:
        # in_features - 每个输入样本的大小
        # out_features - 每个输出样本的大小
        # bias - 若设置为False,这层不会学习偏置。默认值:True

        # 形状:
        # 输入: (N,in_features)
        # 输出: (N,out_features)

        # 变量:
        # weight -形状为(out_features x in_features)的模块中可学习的权值
        # bias -形状为(out_features)的模块中可学习的偏置

    def init_embed(self, embed_matrix):
        self.embed.weight = nn.Parameter(torch.Tensor(embed_matrix))

    @staticmethod
    def conv_and_pool(x, conv):
        # x: (batch, 1, sentence_length, embed_dim) torch.Size([128, 1, 20, 60])
        x = conv(x)   # x: (batch, kernel_num, H_out, 1)  经过二维卷积后的大小 torch.Size([128, 16, 18, 1])       18 = 20-3+1  = seq_length - kernel_size + 1
        x = F.relu(x.squeeze(3))  # a.squeeze(N) 就是去掉a中指定的维数为一的维度  # x: (batch, kernel_num, H_out)     激活层后:torch.Size([128, 16, 18])
        x = F.max_pool1d(x, x.size(2)).squeeze(2)   #  (batch, kernel_num)   torch.Size([128, 16])         (128,16,1) .squeeze(2)==> (128,16)
        # torch.nn.functional.max_pool1d(input([128, 16, 18]), kernel_size 18)     # x.size(2)指H_out的值
        return x

    def forward(self, x):
        # x: (batch, sentence_length)  (128, 20)
        
        x = self.embed(x)  # x: (batch, sentence_length, embed_dim)   经过embedding层后(128, 20, 60)
        x = x.unsqueeze(1)  #  # x: (batch, 1, sentence_length, embed_dim) (128,1,20,60)    在第1维增加维度‘1’使x变为合法的conv2D的输入形式,  1 与 conv2D的输入通道对应
        
        x1 = self.conv_and_pool(x, self.conv11)  # (batch, kernel_num)  torch.Size([128, 16])
        x2 = self.conv_and_pool(x, self.conv12)  # (batch, kernel_num)  torch.Size([128, 16])
        x3 = self.conv_and_pool(x, self.conv13)  # (batch, kernel_num)
        x = torch.cat((x1, x2, x3), 1)  # (batch, 3 * kernel_num) (128,3*16) = (128,48)   torch.cat(inputs, dimension=0) → Tensor
        x = self.dropout(x)  # torch.Size([128, 48])
        logit = F.log_softmax(self.fc1(x), dim=1)
        return logit

    def init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

 

你可能感兴趣的:(NLP,Pytorch)