tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)

文本处理的前提
文本预处理:

  1. 分词
  2. 词语 -> id,将词语转化成id表示
  3. id矩阵: matrix -> [|V|, embed_size]
  4. 词语A -> id(5)
  5. 生成一个字典型的词表
  6. label -> id来表示
    数据来源:
    搜狐新闻数据集
    提取码 fech
    训练集大概为23000条,一共13个分类
    若读文件头部出现\ufeff,则把编码改成 encoding='utf-8-sig’

分词的构建:

由于训练集和测试集的格式不一样,
训练集为
tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第1张图片
测试集为
tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第2张图片
训练集用','来分割
测试集用'\t'来分割,代码如下:
train:

def generate_seg_file(input_file, out_seg_file):
    with open(input_file, 'r', encoding='utf8') as f:
        lines = f.readlines()
        
    with open(out_seg_file, 'w') as f:
    		# 去掉第一行的数据
        for line in lines[1:]:
            index, label, content, count = line.strip('\n').split(',')
            word_iter = jieba.cut(content)
            word_content = ''
            for word in word_iter:
                word = word.strip(' ')
                if word != '':
                    word_content += word + ' '
            out_line = '%s\t%s\n' % (label, word_content.strip(' '))
            f.write(out_line)
            
generate_seg_file(train_file, seg_train_file)

测试集:


def generate_seg_file(input_file, out_seg_file):
    with open(input_file, 'r', encoding='utf8') as f:
        lines = f.readlines()
        
    with open(out_seg_file, 'w', encoding='utf8') as f:
        for line in lines:
            label, content = line.strip('\n').split('\t')
            word_iter = jieba.cut(content)
            word_content = ''
            for word in word_iter:
                word = word.strip(' ')
                if word != '':
                    word_content += word + ' '
            out_line = '%s\t%s\n' % (label, word_content.strip(' '))
            f.write(out_line)
            
generate_seg_file(test_file, seg_test_file)

词分类的构建:

def generare_category_dcit(input_file, categegory_file):
    with open(input_file, 'r', encoding='utf8') as f:
        lines = f.readlines()[1:]
    categegory_dict = {}
    for line in lines:
        index, label, content, count = line.strip('\r\n').split(',')
        categegory_dict.setdefault(label, 0)
        categegory_dict[label] += 1
    category_number = len(categegory_dict)
    with open(categegory_file, 'w') as f:
        for category in categegory_dict:
            line = '%s\n' % category
            print('{0}\t{1}'.format(category, categegory_dict[category]))
            f.write(line)
            
generare_category_dcit(train_file, category_file)

tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第3张图片

词表的构建:

利用已有的分词后的训练集进行构建词表
是用来记录出现在训练集的词,而没有出现在测试集的词


def generate_vocab_file(input_seg_file, output_vocab_file):
    with open(input_seg_file, 'r') as f:
        lines = f.readlines()
    word_dict = {}
    for line in lines:
        label, content = line.strip('\r\n').split('\t')
        for word in content.split():
            word_dict.setdefault(word, 0)
            word_dict[word] += 1
    
    sorted_word_dict = sorted(
        word_dict.items(), key = lambda d:d[1], reverse=True)
    with open(output_vocab_file, 'w') as f:
        f.write('\t1000000\n')
        for item in sorted_word_dict:
            f.write('%s\t%d\n' % (item[0], item[1]))

generate_vocab_file(seg_train_file, vocab_file)   

在构神经网络之前,先逐步查看生成对应的文件格式对不对,一防止后面的出错

下一步操作

tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第4张图片
定义常用变量

import tensorflow as tf
import os
import sys
import numpy as np
import math

tf.logging.set_verbosity(tf.logging.INFO)


def get_default_params():
    return tf.contrib.training.HParams(
        num_embedding_size = 16,
        num_timesteps = 50,
        num_lstm_nodes = [32, 32],
        num_lstm_laysers = 2,
        num_fc_nodes = 32,
        batch_size = 100,
        clip_lstm_grads = 1.0,
        learning_rate = 0.001,
        num_word_threshold = 10,
    )
hps = get_default_params()

train_file = '.\deep_learn\sohu_train.txt'
test_file = '.\deep_learn\sohu_test.txt'

vocab_file =  '.\deep_learn\sohu_vocab.txt'
category_file = '.\deep_learn\sohu_category.txt'
output_file = '.\deep_learn\sohu_run_text_run'

if not os.path.exists(output_file):
    os.mkdir(output_file)

词表封装api

class Vocab:
    def __init__(self, filename, num_word_threshold):
        self._word_to_id = {}
        self._unk = -1
        self._num_word_threshold = num_word_threshold
        self._read_dict(filename)
    
    def _read_dict(self, filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        for line in lines:
            word, frequency = line.strip('\r\n').split('\t')
            frequency = int(frequency)
            if frequency < self._num_word_threshold:
                continue
            idx = len(self._word_to_id)
            if word == '':
                self._unk = idx
            self._word_to_id[word] = idx
    def word_to_id(self, word):
        return self._word_to_id.get(word, self._unk)
    
    @property
    def unk(self):
        return self._unk
    
    def size(self):
        return len(self._word_to_id)
    
    def sentence_to_id(self, sentence):
        word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
        return word_ids
# 测试
vocab = Vocab(vocab_file, hps.num_word_threshold)
tf.logging.info('vocab_size: {}'.format(vocab.size()))   

类别的封装

class CategoryDict:
    def __init__(self, filename):
        self._category_to_id = {}
        with open(filename, 'r') as f:
            lines = f.readlines()
        for line in lines:
            category = line.strip('\r\n')
            idx = len(self._category_to_id)
            self._category_to_id[category] = idx
        
    def category_to_id(self, category):
        if category not in self._category_to_id:
            raise Exception("{} is not in our category".format(category))
            
        return self._category_to_id[category]

# 测试
category_vocab = CategoryDict(category_file)
test_str = '女人'
tf.logging.info('id:{}'.format(category_vocab.category_to_id(test_str)))       
# api sentence_to_id的实现
class Vocab:
    def __init__(self, filename, num_word_threshold):
        self._word_to_id = {}
        self._unk = -1
        self._num_word_threshold = num_word_threshold
        self._read_dict(filename)
    
    def _read_dict(self, filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        for line in lines:
            word, frequency = line.strip('\r\n').split('\t')
            frequency = int(frequency)
            if frequency < self._num_word_threshold:
                continue
            idx = len(self._word_to_id)
            if word == '':
                self._unk = idx
            self._word_to_id[word] = idx
    def word_to_id(self, word):
        return self._word_to_id.get(word, self._unk)
    
    @property
    def unk(self):
        return self._unk
    
    def size(self):
        return len(self._word_to_id)
    
    def sentence_to_id(self, sentence):
        word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
        return word_ids

category_to_id的实现和test


class CategoryDict:
    def __init__(self, filename):
        self._category_to_id = {}
        with open(filename, 'r') as f:
            lines = f.readlines()
        for line in lines:
            category = line.strip('\r\n')
            idx = len(self._category_to_id)
            self._category_to_id[category] = idx
        
    def category_to_id(self, category):
        if not category in self._category_to_id:
            print(self._category_to_id)
            raise Exception("{} is not in our category".format(category))
            
        return self._category_to_id[category]
          
vocab = Vocab(vocab_file, hps.num_word_threshold)
tf.logging.info('vocab_size: {}'.format(vocab.size()))

category_vocab = CategoryDict(category_file)
test_str = '女人'
tf.logging.info('id:{}'.format(category_vocab.category_to_id(test_str)))

数据集的netx batch的实现
由于train_seg_file和test_seg_file的格式不一样,所以编码不一样

class TextDataSet:
    def __init__(self, filename, vocab, category_vocab, num_timesteps):
        self._vocab = vocab
        self._category_vocab = category_vocab
        self._num_timesteps = num_timesteps
        
        self._inputs = []
        self._outputs = []
        
        self._indicator = 0
        self._parse_file(filename)
        
    def _parse_file(self, filename):
        tf.logging.info('Loading data from {}'.format(filename))
        lines = 0
        import re
        if re.findall('train', filename):
            with open(filename, 'r') as f:
                lines = f.readlines()
        elif re.findall('test', filename):
            with open(filename, 'r', encoding='utf-8-sig') as f:
                lines = f.readlines()  
        for line in lines:
            label, content = line.strip('\r\n').split('\t')
            id_label = self._category_vocab.category_to_id(label)
            id_words = self._vocab.sentence_to_id(content)
            id_words = id_words[0: self._num_timesteps]
            padding_num = self._num_timesteps - len(id_words)
            id_words = id_words + [
                self._vocab.unk for i in range(padding_num)]
            self._inputs.append(id_label)
            self._outputs.append(id_words)

        
        self._inputs = np.asarray(self._inputs, dtype=np.int32)
        self._outputs = np.asarray(self._outputs, dtype=np.int32)
        self._random_shuffle()
        
    def _random_shuffle(self):
        p = np.random.permutation(len(self._inputs))
        self._inputs = self._inputs[p]
        self._outputs = self._outputs[p]
    
    def next_batch(self, batch_size):
        end_indicator = self._indicator + batch_size
        if end_indicator > len(self._inputs):
            self._random_shuffle()
            self._indicator = 0
            end_indicator = batch_size
        if end_indicator > len(self._inputs):
            raise Exception("batch size: {} is too large".format(batch_size))
        
        batch_inputs = self._inputs[self._indicator: end_indicator]
        batch_output = self._outputs[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_inputs, batch_output

train_dataset = TextDataSet(train_file, vocab, category_vocab, hps.num_timesteps)
test_dataset = TextDataSet(test_file, vocab, category_vocab, hps.num_timesteps)

print(train_dataset.next_batch(2))
print(test_dataset.next_batch(2))

LSTM模型

def create_model(hps, vocab_size, num_classes):
    # 取一个句子的前50个分词, num_classes为固定的50个分词
    num_timesteps = hps.num_timesteps
    # 训练批次大小
    batch_size = hps.batch_size
    # 输入为[批次的大小,50]
    inputs = tf.placeholder(tf.int32, (batch_size, num_timesteps))
    # 输出为[批次的大小,]
    outputs = tf.placeholder(tf.int32, (batch_size, ))
    # dropout的使用
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    # 保存训练到哪一步
    global_step = tf.Variable(
        tf.zeros([], tf.int64), name='global_step', trainable=False)
    # 随机化embedding 编码
    embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
    with tf.variable_scope(
        'embedding', initializer=embedding_initializer):
        embeddings = tf.get_variable(
            'embedding',
            [vocab_size, hps.num_embedding_size],
            tf.float32)
        # 把输入的分词中的id -> embedding编码形式
        # ex [1, 10, 7] -> [embeddings[1], embeddings[10], embeddings[7]]
        embed_inputs = tf.nn.embedding_lookup(embeddings, inputs)
    # 网络initializer的一种方法
    scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
    lstm_init = tf.random_uniform_initializer(-scale, scale)
    # 构建lstm
    with tf.variable_scope('lstm_nn', initializer=lstm_init):
        cells = []
        for i in range(hps.num_lstm_laysers):
            # 循环初始化lstm
            cell = tf.contrib.rnn.BasicLSTMCell(
                hps.num_lstm_nodes[i],
                state_is_tuple = True
            )
            # 使用dropout方法
            cell = tf.contrib.rnn.DropoutWrapper(
                cell,
                output_keep_prob = keep_prob
            )
            cells.append(cell)
        # 合并两个cell
        cell = tf.contrib.rnn.MultiRNNCell(cells)
        # 初始化cell内的值
        initial_state = cell.zero_state(batch_size, tf.float32)
        # run_outputs: [batch_size, num_timesteps, lstm_outpus[-1]]
        run_outpus, _ = tf.nn.dynamic_rnn(
            cell, embed_inputs, initial_state=initial_state
        )
        print(run_outpus)
        last = run_outpus[:, -1, :]
    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    # lstm连接到全连接层
    with tf.variable_scope('fc', initializer=fc_init):
        fc1 = tf.layers.dense(last,
                              hps.num_fc_nodes,
                              activation=tf.nn.relu,
                              name='fc1')
        # 使用dropout方法
        fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
        logits = tf.layers.dense(fc1_dropout,
                                 num_classes,
                                 name='fc2')
    # 计算损失函数
    with tf.name_scope('metrics'):
        sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels= outputs
        )
        loss = tf.reduce_mean(sofmax_loss)
        y_pred = tf.arg_max(tf.nn.softmax(logits=logits),
                            1,
                            output_type= tf.int32)
        correct_pred = tf.equal(outputs, y_pred)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # 构建train_op
    with tf.name_scope('train_op'):
        tvars = tf.trainable_variables()
        for var in tvars:
            tf.logging.info('variable name: {}'.format(var.name))
        # 限制训练时的梯度大小,使得不会出现梯度爆炸
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, tvars), hps.clip_lstm_grads
        )
        # 梯度应用到变量中去
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars), global_step= global_step
        )
    
    return ((inputs, outputs, keep_prob),
            (loss, accuracy),
            (train_op, global_step))

placeholders, metrics, others = create_model(
    hps, vocab_size, num_classes
)
inputs, outputs, keep_prod = placeholders
loss, accuracy = metrics
train_op, global_step = others

训练

init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.8
test_keep_prob_value = 1.0

test_steps = 100
num_train_steps = 10000

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(num_train_steps):
        batch_inputs, batch_labels = train_dataset.next_batch(
            hps.batch_size
        )
        outputs_val = sess.run([loss, accuracy, train_op, global_step],
                               feed_dict = {
                                inputs: batch_inputs,
                                outputs: batch_labels,
                                   keep_prod: train_keep_prob_value,
                               })
        loss_val, accuracy_val, _, global_step_val = outputs_val
        if (i+1) % 20 == 0:
            tf.logging.info("Train Step: {}, loss: {}, accuracy: {}".format(global_step_val, loss_val, accuracy_val))

        if (i+1) % 100 == 0:
            all_test_acc_cal = []
            for j in range(test_steps):
                test_inputs, test_labels = test_dataset.next_batch(hps.batch_size)
                test_val = sess.run([loss, accuracy, train_op, global_step],
                                    feed_dict= {
                                        inputs: test_inputs,
                                        outputs: test_labels,
                                        keep_prod: test_keep_prob_value,
                                    })
                test_loss_val, test_accuarcy_val, _, test_step_val = test_val
                all_test_acc_cal.append(test_accuarcy_val)
            test_acc = np.mean(all_test_acc_cal)
            tf.logging.info("Test Step: {}, loss: {}, accuracy: {}".format(global_step_val, test_loss_val, test_acc))

使用以下参数

        # num_embedding_size = 16,
        num_embedding_size = 32,
        # 一个句子取前50个分词
        # num_timesteps = 50,
        num_timesteps = 600,
        # num_lstm_nodes = [32, 32],
        num_lstm_nodes = [64, 64],
        num_lstm_laysers = 2,
        # num_fc_nodes = 32,
        num_fc_nodes = 64,
        batch_size = 100,
        clip_lstm_grads = 1.0,
        learning_rate = 0.001,
        num_word_threshold = 10,

训练10K次得到:
train 集和test集大概都有98%的准确度

tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第5张图片

使用CNN

def create_model(hps, vocab_size, num_classes):
    # 取一个句子的前50个分词, num_classes为固定的50个分词
    num_timesteps = hps.num_timesteps
    # 训练批次大小
    batch_size = hps.batch_size
    # 输入为[批次的大小,50]
    inputs = tf.placeholder(tf.int32, (batch_size, num_timesteps))
    # 输出为[批次的大小,]
    outputs = tf.placeholder(tf.int32, (batch_size, ))
    # dropout的使用
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    # 保存训练到哪一步
    global_step = tf.Variable(
        tf.zeros([], tf.int64), name='global_step', trainable=False)
    # 随机化embedding 编码
    embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
    with tf.variable_scope(
        'embedding', initializer=embedding_initializer):
        embeddings = tf.get_variable(
            'embedding',
            [vocab_size, hps.num_embedding_size],
            tf.float32)
        # 把输入的分词中的id -> embedding编码形式
        # ex [1, 10, 7] -> [embeddings[1], embeddings[10], embeddings[7]]
        embed_inputs = tf.nn.embedding_lookup(embeddings, inputs)
    # 卷积实现
    scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_filters) / 3.0
    cnn_init = tf.random_uniform_initializer(-scale, scale)
    with tf.variable_scope('cnn', initializer= cnn_init):
        # embed_inputs : [batch_size, timesteps, embed_size]
        # conv1d : [batch_size, timesteps, num_filters]
        conv1d = tf.layers.conv1d(embed_inputs,
                                  hps.num_filters,
                                  hps.num_kernel_size,
                                  activation=tf.nn.relu,
                                  )
        global_maxpooling = tf.reduce_max(conv1d, axis=[1])
    
     
    """
    # 网络initializer的一种方法
    scale = 1.0 / math.sqrt(hps.num_embedding_size + hps.num_lstm_nodes[-1]) / 3.0
    lstm_init = tf.random_uniform_initializer(-scale, scale)
    # 构建lstm
    with tf.variable_scope('lstm_nn', initializer=lstm_init):
        cells = []
        for i in range(hps.num_lstm_laysers):
            # 循环初始化lstm
            cell = tf.contrib.rnn.BasicLSTMCell(
                hps.num_lstm_nodes[i],
                state_is_tuple = True
            )
            # 使用dropout方法
            cell = tf.contrib.rnn.DropoutWrapper(
                cell,
                output_keep_prob = keep_prob
            )
            cells.append(cell)
        # 合并两个cell
        cell = tf.contrib.rnn.MultiRNNCell(cells)
        # 初始化cell内的值
        initial_state = cell.zero_state(batch_size, tf.float32)
        # run_outputs: [batch_size, num_timesteps, lstm_outpus[-1]]
        run_outpus, _ = tf.nn.dynamic_rnn(
            cell, embed_inputs, initial_state=initial_state
        )
        print(run_outpus)
        last = run_outpus[:, -1, :]
    """
    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    # lstm连接到全连接层
    with tf.variable_scope('fc', initializer=fc_init):
        fc1 = tf.layers.dense(global_maxpooling,
                              hps.num_fc_nodes,
                              activation=tf.nn.relu,
                              name='fc1')
        # 使用dropout方法
        fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
        logits = tf.layers.dense(fc1_dropout,
                                 num_classes,
                                 name='fc2')
    # 计算损失函数
    with tf.name_scope('metrics'):
        sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels= outputs
        )
        loss = tf.reduce_mean(sofmax_loss)
        y_pred = tf.arg_max(tf.nn.softmax(logits=logits),
                            1,
                            output_type= tf.int32)
        correct_pred = tf.equal(outputs, y_pred)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # 构建train_op
    with tf.name_scope('train_op'):
        train_op = tf.train.AdamOptimizer(hps.learning_rate).minimize(loss,
                                                                      global_step=global_step)
        """
        tvars = tf.trainable_variables()
        for var in tvars:
            tf.logging.info('variable name: {}'.format(var.name))
        # 限制训练时的梯度大小,使得不会出现梯度爆炸
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, tvars), hps.clip_lstm_grads
        )
        # 梯度应用到变量中去
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars), global_step= global_step
        )
        """
    return ((inputs, outputs, keep_prob),
            (loss, accuracy),
            (train_op, global_step))

placeholders, metrics, others = create_model(
    hps, vocab_size, num_classes
)
inputs, outputs, keep_prod = placeholders
loss, accuracy = metrics
train_op, global_step = others

#%%

# train:
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.8
test_keep_prob_value = 1.0

test_steps = 100
num_train_steps = 10000

# Train : 100%
# Test : 95.3%

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(num_train_steps):
        batch_inputs, batch_labels = train_dataset.next_batch(
            hps.batch_size
        )
        outputs_val = sess.run([loss, accuracy, train_op, global_step],
                               feed_dict = {
                                inputs: batch_inputs,
                                outputs: batch_labels,
                                   keep_prod: train_keep_prob_value,
                               })
        loss_val, accuracy_val, _, global_step_val = outputs_val
        if global_step_val % 100 == 0:
            tf.logging.info("Train Step: {}, loss: {}, accuracy: {}".format(global_step_val, loss_val, accuracy_val))

        if global_step_val % 1000 == 0:
            all_test_acc_cal = []
            for j in range(test_steps):
                test_inputs, test_labels = test_dataset.next_batch(hps.batch_size)
                test_val = sess.run([loss, accuracy, train_op, global_step],
                                    feed_dict= {
                                        inputs: test_inputs,
                                        outputs: test_labels,
                                        keep_prod: test_keep_prob_value,
                                    })
                test_loss_val, test_accuarcy_val, _, test_step_val = test_val
                all_test_acc_cal.append(test_accuarcy_val)
            test_acc = np.mean(all_test_acc_cal)
            tf.logging.info("------Test Step: {}, loss: {}, accuracy: {}".format(global_step_val, test_loss_val, test_acc))

使用CNN会比lstm收敛地快很多
参数如下:

def get_default_params():
    return tf.contrib.training.HParams(
        # num_embedding_size = 16,
        num_embedding_size = 32,
        # 一个句子取前50个分词
        # num_timesteps = 50,
        num_timesteps = 600,
        # num_filters = 128,
        num_filters = 256,
        num_kernel_size = 3,
        num_fc_nodes = 32,
        # num_fc_nodes = 64,
        batch_size = 100,
        learning_rate = 0.001,
        num_word_threshold = 10,
    )

tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第6张图片
在后面使用(Bi-)和attention会比单单LSTM收敛的更快。参考tf-rnn-attention。
代码为

def create_model(hps, vocab_size, num_classes):
    # 取一个句子的前50个分词, num_classes为固定的50个分词
    num_timesteps = hps.num_timesteps
    # 训练批次大小
    batch_size = hps.batch_size
    # 输入为[批次的大小,50]
    inputs = tf.placeholder(tf.int32, (batch_size, num_timesteps))
    # 输出为[批次的大小,]
    outputs = tf.placeholder(tf.int32, (batch_size, ))
    # dropout的使用
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    # 保存训练到哪一步
    global_step = tf.Variable(
        tf.zeros([], tf.int64), name='global_step', trainable=False)
    
    seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')

    # 随机化embedding 编码
    embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
    with tf.variable_scope(
        'embedding', initializer=embedding_initializer):
        embeddings = tf.get_variable(
            'embedding',
            [vocab_size+2, hps.num_embedding_size],
            tf.float32)
        print('embeddings', embeddings)# (85430, 32)
        # 把输入的分词中的id -> embedding编码形式
        # ex [1, 10, 7] -> [embeddings[1], embeddings[10], embeddings[7]]
        embed_inputs = tf.nn.embedding_lookup(embeddings, inputs) # (100, 200, 32)
        print('embed_inputs', embed_inputs)
    # (Bi-)RNN layser

    rnn_outputs, _ = bi_rnn(GRUCell(64), GRUCell(64),
                            inputs=embed_inputs, sequence_length=seq_len_ph, dtype=tf.float32)
    # [batch_size, cell_fw.output_size + cell_bw.output_size]
    print('rnn_outputs', rnn_outputs) # shape=(100, 200, 64)
    # (100, 200, 64)
    # last = rnn_outputs[:, -1, :]
# Attention layer
    with tf.name_scope('Attention_layer'):
        attention_output, alphas = attention(rnn_outputs, 50, return_alphas=True)
    drop = tf.nn.dropout(attention_output, keep_prob)
    print(drop.shape)# (100, 128)
    

    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    # lstm连接到全连接层
    with tf.variable_scope('fc', initializer=fc_init):
        fc1 = tf.layers.dense(drop,
                              hps.num_fc_nodes,
                              activation=tf.nn.relu,
                              name='fc1')
        fc1_dropout = tf.layers.dropout(fc1, keep_prob)
        logits = tf.layers.dense(fc1_dropout,
                                 num_classes,
                                 name='fc2')

    # 计算损失函数
    
    with tf.name_scope('metrics'):
        
        sofmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels= outputs
        )
        loss = tf.reduce_mean(sofmax_loss)
        # optimzer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)
        
        y_pred = tf.arg_max(tf.nn.softmax(logits=logits),
                            1,
                            output_type= tf.int32)
        correct_pred = tf.equal(outputs, y_pred)
        # accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), outputs), tf.float32))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    # 构建train_op
    
    with tf.name_scope('train_op'):
        tvars = tf.trainable_variables()
        for var in tvars:
            tf.logging.info('variable name: {}'.format(var.name))
        # 限制训练时的梯度大小,使得不会出现梯度爆炸
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, tvars), hps.clip_lstm_grads
        )
        # 梯度应用到变量中去
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars), global_step= global_step
        )
    
    # return ((inputs, outputs, keep_prob),
    return ((inputs, outputs, keep_prob, seq_len_ph),
            (loss, accuracy),
            # (optimzer)
            (train_op, global_step)
            )

训练如下:
早早在2K多次的时候,训练就达到了1.0
tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第7张图片
tensorflow(神经网络)学习笔记(四)之基于LSTM和CNN的新闻分类模型(笔记)_第8张图片

你可能感兴趣的:(机器学习,笔记,python,LSTM)