CC‘s World

【Keras】基于tf2.0+和tf.keras的Transformer实现

代码来自： Learning transferable visual models from natural language supervision

该代码主要由有train.py、data_loader.py、model.py、utils.py和test.py组成，每一个python文件的作用如下：

train.py：初始化DataLoader（来自于data_loader.py）并调用DataLoader的load()方法构建训练集和验证集；构建Transformer（来自model.py）的模型结构；定义学习率、优化器和目标函数，并初始化Trainer（来自于utils.py）的结构，然后调用Trainer的single_gpu_train()方法进行模型的单GPU训练。
data_loader.py：构建训练集、验证集和测试集，并进行一些预处理。
model.py：搭建Transformer模型，其中包括EncoderLayer类、DecoderLayer类、PositionWiseFeedForwardLayer类、MultiHeadAttention类、ScaledDotProductAttention类、Embeddinglayer类。
utils.py：作为实现一些功能的工具文件，实现了用于训练的Trainer类、用于学习率设置warmup的CustomSchedule类、用于Mask操作的Mask类和一些其他功能的函数，如：标签平滑label_smoothing函数、计算bleu得分的calculate_bleu_score函数等。
test.py：初始化DataLoader（来自于data_loader.py）并调用DataLoader的load_test()方法构建测试集，定义Transformer（来自model.py）的模型结构为了后续加载进来训练好的模型参数，导入checkpoint并进行测试。

一、train.py

初始化DataLoader（来自于data_loader.py）并调用DataLoader的load()方法构建训练集和验证集；构建Transformer（来自model.py）的模型结构；定义学习率、优化器和目标函数，并初始化Trainer（来自于utils.py）的结构，然后调用Trainer的single_gpu_train()方法进行模型的单GPU训练。

from __future__ import (absolute_import, division, print_function,unicode_literals)

import os
import tensorflow as tf
from data_loader import DataLoader
from model import Transformer
from utils import CustomSchedule, Trainer

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


# hyper paramaters
TRAIN_RATIO = 0.9
D_POINT_WISE_FF = 2048
D_MODEL = 512
ENCODER_COUNT = DECODER_COUNT = 6
EPOCHS = 20
ATTENTION_HEAD_COUNT = 8
DROPOUT_PROB = 0.1
BATCH_SIZE = 32
SEQ_MAX_LEN_SOURCE = 100
SEQ_MAX_LEN_TARGET = 100
BPE_VOCAB_SIZE = 32000

# for overfitting test hyper parameters
# BATCH_SIZE = 32
# EPOCHS = 100
DATA_LIMIT = None

GLOBAL_BATCH_SIZE = (BATCH_SIZE * 1)
print('GLOBAL_BATCH_SIZE ', GLOBAL_BATCH_SIZE)

data_loader = DataLoader(
    dataset_name='wmt14/en-de',
    data_dir='./datasets',
    batch_size=GLOBAL_BATCH_SIZE,
    bpe_vocab_size=BPE_VOCAB_SIZE,
    seq_max_len_source=SEQ_MAX_LEN_SOURCE,
    seq_max_len_target=SEQ_MAX_LEN_TARGET,
    data_limit=DATA_LIMIT,
    train_ratio=TRAIN_RATIO
)

dataset, val_dataset = data_loader.load()

transformer = Transformer(
    inputs_vocab_size=BPE_VOCAB_SIZE,
    target_vocab_size=BPE_VOCAB_SIZE,
    encoder_count=ENCODER_COUNT,
    decoder_count=DECODER_COUNT,
    attention_head_count=ATTENTION_HEAD_COUNT,
    d_model=D_MODEL,
    d_point_wise_ff=D_POINT_WISE_FF,
    dropout_prob=DROPOUT_PROB
)

learning_rate = CustomSchedule(D_MODEL)
optimizer = tf.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
loss_object = tf.losses.CategoricalCrossentropy(from_logits=True, reduction='none')

trainer = Trainer(
    model=transformer,
    dataset=dataset,
    loss_object=loss_object,
    optimizer=optimizer,
    batch_size=GLOBAL_BATCH_SIZE,
    vocab_size=BPE_VOCAB_SIZE,
    epoch=EPOCHS,
)

trainer.single_gpu_train()

二、data_loader.py

构建训练集、验证集和测试集，并进行一些预处理。

import os
from urllib.request import urlretrieve

import sentencepiece
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tqdm import tqdm


class DataLoader:
    DIR = None
    PATHS = {}
    BPE_VOCAB_SIZE = 0
    MODES = ['source', 'target']
    dictionary = {
        'source': {
            'token2idx': None,
            'idx2token': None,
        },
        'target': {
            'token2idx': None,
            'idx2token': None,
        }
    }
    CONFIG = {
        'wmt14/en-de': {
            'source_lang': 'en',
            'target_lang': 'de',
            'base_url': 'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/',
            'train_files': ['train.en', 'train.de'],
            'vocab_files': ['vocab.50K.en', 'vocab.50K.de'],
            'dictionary_files': ['dict.en-de'],
            'test_files': [
                'newstest2012.en', 'newstest2012.de',
                'newstest2013.en', 'newstest2013.de',
                'newstest2014.en', 'newstest2014.de',
                'newstest2015.en', 'newstest2015.de',
            ]
        }
    }
    BPE_MODEL_SUFFIX = '.model'
    BPE_VOCAB_SUFFIX = '.vocab'
    BPE_RESULT_SUFFIX = '.sequences'
    SEQ_MAX_LEN = {
        'source': 100,
        'target': 100
    }
    DATA_LIMIT = None
    TRAIN_RATIO = 0.9
    BATCH_SIZE = 16

    source_sp = None
    target_sp = None

    def __init__(self, dataset_name, data_dir, batch_size=16, bpe_vocab_size=32000, seq_max_len_source=100,
                 seq_max_len_target=100, data_limit=None, train_ratio=0.9):
        if dataset_name is None or data_dir is None:
            raise ValueError('dataset_name and data_dir must be defined')
        self.DIR = data_dir
        self.DATASET = dataset_name
        self.BPE_VOCAB_SIZE = bpe_vocab_size
        self.SEQ_MAX_LEN['source'] = seq_max_len_source
        self.SEQ_MAX_LEN['target'] = seq_max_len_target
        self.DATA_LIMIT = data_limit
        self.TRAIN_RATIO = train_ratio
        self.BATCH_SIZE = batch_size

        self.PATHS['source_data'] = os.path.join(self.DIR, self.CONFIG[self.DATASET]['train_files'][0])
        self.PATHS['source_bpe_prefix'] = self.PATHS['source_data'] + '.segmented'

        self.PATHS['target_data'] = os.path.join(self.DIR, self.CONFIG[self.DATASET]['train_files'][1])
        self.PATHS['target_bpe_prefix'] = self.PATHS['target_data'] + '.segmented'

    def load(self, custom_dataset=False):
        if custom_dataset:
            print('#1 use custom dataset. please implement custom download_dataset function.')
        else:            
            print('#1 download data')
            self.download_dataset()

        print('#2 parse data')
        source_data = self.parse_data_and_save(self.PATHS['source_data'])
        target_data = self.parse_data_and_save(self.PATHS['target_data'])

        print('#3 train bpe')

        self.train_bpe(self.PATHS['source_data'], self.PATHS['source_bpe_prefix'])
        self.train_bpe(self.PATHS['target_data'], self.PATHS['target_bpe_prefix'])

        print('#4 load bpe vocab')

        self.dictionary['source']['token2idx'], self.dictionary['source']['idx2token'] = self.load_bpe_vocab(
            self.PATHS['source_bpe_prefix'] + self.BPE_VOCAB_SUFFIX)
        self.dictionary['target']['token2idx'], self.dictionary['target']['idx2token'] = self.load_bpe_vocab(
            self.PATHS['target_bpe_prefix'] + self.BPE_VOCAB_SUFFIX)

        print('#5 encode data with bpe')
        source_sequences = self.texts_to_sequences(
            self.sentence_piece(
                source_data,
                self.PATHS['source_bpe_prefix'] + self.BPE_MODEL_SUFFIX,
                self.PATHS['source_bpe_prefix'] + self.BPE_RESULT_SUFFIX
            ),
            mode="source"
        )
        target_sequences = self.texts_to_sequences(
            self.sentence_piece(
                target_data,
                self.PATHS['target_bpe_prefix'] + self.BPE_MODEL_SUFFIX,
                self.PATHS['target_bpe_prefix'] + self.BPE_RESULT_SUFFIX
            ),
            mode="target"
        )

        print('source sequence example:', source_sequences[0])
        print('target sequence example:', target_sequences[0])

        if self.TRAIN_RATIO == 1.0:
            source_sequences_train = source_sequences
            source_sequences_val = []
            target_sequences_train = target_sequences
            target_sequences_val = []
        else:
            (source_sequences_train,
             source_sequences_val,
             target_sequences_train,
             target_sequences_val) = train_test_split(
                source_sequences, target_sequences, train_size=self.TRAIN_RATIO
            )

        if self.DATA_LIMIT is not None:
            print('data size limit ON. limit size:', self.DATA_LIMIT)
            source_sequences_train = source_sequences_train[:self.DATA_LIMIT]
            target_sequences_train = target_sequences_train[:self.DATA_LIMIT]

        print('source_sequences_train', len(source_sequences_train))
        print('source_sequences_val', len(source_sequences_val))
        print('target_sequences_train', len(target_sequences_train))
        print('target_sequences_val', len(target_sequences_val))

        print('train set size: ', len(source_sequences_train))
        print('validation set size: ', len(source_sequences_val))

        train_dataset = self.create_dataset(
            source_sequences_train,
            target_sequences_train
        )
        if self.TRAIN_RATIO == 1.0:
            val_dataset = None
        else:
            val_dataset = self.create_dataset(
                source_sequences_val,
                target_sequences_val
            )

        return train_dataset, val_dataset

    def load_test(self, index=0, custom_dataset=False):
        
        if index < 0 or index >= len(self.CONFIG[self.DATASET]['test_files']) // 2:
            raise ValueError('test file index out of range. min: 0, max: {}'.format(
                len(self.CONFIG[self.DATASET]['test_files']) // 2 - 1)
            )
        if custom_dataset:
            print('#1 use custom dataset. please implement custom download_dataset function.')
        else:
            print('#1 download data')
            self.download_dataset()

        print('#2 parse data')

        source_test_data_path, target_test_data_path = self.get_test_data_path(index)

        source_data = self.parse_data_and_save(source_test_data_path)
        target_data = self.parse_data_and_save(target_test_data_path)

        print('#3 load bpe vocab')

        self.dictionary['source']['token2idx'], self.dictionary['source']['idx2token'] = self.load_bpe_vocab(
            self.PATHS['source_bpe_prefix'] + self.BPE_VOCAB_SUFFIX)
        self.dictionary['target']['token2idx'], self.dictionary['target']['idx2token'] = self.load_bpe_vocab(
            self.PATHS['target_bpe_prefix'] + self.BPE_VOCAB_SUFFIX)

        return source_data, target_data

    def get_test_data_path(self, index):
        source_test_data_path = os.path.join(self.DIR, self.CONFIG[self.DATASET]['test_files'][index * 2])
        target_test_data_path = os.path.join(self.DIR, self.CONFIG[self.DATASET]['test_files'][index * 2 + 1])
        return source_test_data_path, target_test_data_path

    def download_dataset(self):
        for file in (self.CONFIG[self.DATASET]['train_files']
                     + self.CONFIG[self.DATASET]['vocab_files']
                     + self.CONFIG[self.DATASET]['dictionary_files']
                     + self.CONFIG[self.DATASET]['test_files']):
            self._download("{}{}".format(self.CONFIG[self.DATASET]['base_url'], file))

    def _download(self, url):
        path = os.path.join(self.DIR, url.split('/')[-1])
        if not os.path.exists(path):
            with TqdmCustom(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=url) as t:
                urlretrieve(url, path, t.update_to)

    def parse_data_and_save(self, path):
        print('load data from {}'.format(path))
        with open(path, encoding='utf-8') as f:
            lines = f.read().strip().split('\n')

        if lines is None:
            raise ValueError('Vocab file is invalid')

        with open(path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(lines))

        return lines

    def train_bpe(self, data_path, model_prefix):
        model_path = model_prefix + self.BPE_MODEL_SUFFIX
        vocab_path = model_prefix + self.BPE_VOCAB_SUFFIX

        if not (os.path.exists(model_path) and os.path.exists(vocab_path)):
            print('bpe model does not exist. train bpe. model path:', model_path, ' vocab path:', vocab_path)
            train_source_params = "--inputs={} \
                --pad_id=0 \
                --unk_id=1 \
                --bos_id=2 \
                --eos_id=3 \
                --model_prefix={} \
                --vocab_size={} \
                --model_type=bpe ".format(
                data_path,
                model_prefix,
                self.BPE_VOCAB_SIZE
            )
            sentencepiece.SentencePieceTrainer.Train(train_source_params)
        else:
            print('bpe model exist. load bpe. model path:', model_path, ' vocab path:', vocab_path)

    def load_bpe_encoder(self):
        self.dictionary['source']['token2idx'], self.dictionary['source']['idx2token'] = self.load_bpe_vocab(
            self.PATHS['source_bpe_prefix'] + self.BPE_VOCAB_SUFFIX
        )
        self.dictionary['target']['token2idx'], self.dictionary['target']['idx2token'] = self.load_bpe_vocab(
            self.PATHS['target_bpe_prefix'] + self.BPE_VOCAB_SUFFIX
        )

    def sentence_piece(self, source_data, source_bpe_model_path, result_data_path):
        sp = sentencepiece.SentencePieceProcessor()
        sp.load(source_bpe_model_path)

        if os.path.exists(result_data_path):
            print('encoded data exist. load data. path:', result_data_path)
            with open(result_data_path, 'r', encoding='utf-8') as f:
                sequences = f.read().strip().split('\n')
                return sequences

        print('encoded data does not exist. encode data. path:', result_data_path)
        sequences = []
        with open(result_data_path, 'w') as f:
            for sentence in tqdm(source_data):
                pieces = sp.EncodeAsPieces(sentence)
                sequence = " ".join(pieces)
                sequences.append(sequence)
                f.write(sequence + "\n")
        return sequences

    def encode_data(self, inputs, mode='source'):
        if mode not in self.MODES:
            ValueError('not allowed mode.')

        if mode == 'source':
            if self.source_sp is None:
                self.source_sp = sentencepiece.SentencePieceProcessor()
                self.source_sp.load(self.PATHS['source_bpe_prefix'] + self.BPE_MODEL_SUFFIX)

            pieces = self.source_sp.EncodeAsPieces(inputs)
            sequence = " ".join(pieces)

        elif mode == 'target':
            if self.target_sp is None:
                self.target_sp = sentencepiece.SentencePieceProcessor()
                self.target_sp.load(self.PATHS['target_bpe_prefix'] + self.BPE_MODEL_SUFFIX)

            pieces = self.target_sp.EncodeAsPieces(inputs)
            sequence = " ".join(pieces)

        else:
            ValueError('not allowed mode.')

        return sequence

    def load_bpe_vocab(self, bpe_vocab_path):
        with open(bpe_vocab_path, 'r') as f:
            vocab = [line.split()[0] for line in f.read().splitlines()]

        token2idx = {}
        idx2token = {}

        for idx, token in enumerate(vocab):
            token2idx[token] = idx
            idx2token[idx] = token
        return token2idx, idx2token

    def texts_to_sequences(self, texts, mode='source'):
        if mode not in self.MODES:
            ValueError('not allowed mode.')

        sequences = []
        for text in texts:
            text_list = [""] + text.split() + [""]

            sequence = [
                self.dictionary[mode]['token2idx'].get(
                    token, self.dictionary[mode]['token2idx'][""]
                )
                for token in text_list
            ]
            sequences.append(sequence)
        return sequences

    def sequences_to_texts(self, sequences, mode='source'):
        if mode not in self.MODES:
            ValueError('not allowed mode.')

        texts = []
        for sequence in sequences:
            if mode == 'source':
                if self.source_sp is None:
                    self.source_sp = sentencepiece.SentencePieceProcessor()
                    self.source_sp.load(self.PATHS['source_bpe_prefix'] + self.BPE_MODEL_SUFFIX)
                text = self.source_sp.DecodeIds(sequence)
            else:
                if self.target_sp is None:
                    self.target_sp = sentencepiece.SentencePieceProcessor()
                    self.target_sp.load(self.PATHS['target_bpe_prefix'] + self.BPE_MODEL_SUFFIX)
                text = self.target_sp.DecodeIds(sequence)
            texts.append(text)
        return texts

    def create_dataset(self, source_sequences, target_sequences):
        new_source_sequences = []
        new_target_sequences = []
        for source, target in zip(source_sequences, target_sequences):
            if len(source) > self.SEQ_MAX_LEN['source']:
                continue
            if len(target) > self.SEQ_MAX_LEN['target']:
                continue
            new_source_sequences.append(source)
            new_target_sequences.append(target)

        source_sequences = tf.keras.preprocessing.sequence.pad_sequences(
            sequences=new_source_sequences, maxlen=self.SEQ_MAX_LEN['source'], padding='post'
        )
        target_sequences = tf.keras.preprocessing.sequence.pad_sequences(
            sequences=new_target_sequences, maxlen=self.SEQ_MAX_LEN['target'], padding='post'
        )
        buffer_size = int(source_sequences.shape[0] * 0.3)
        dataset = tf.data.Dataset.from_tensor_slices(
            (source_sequences, target_sequences)
        ).shuffle(buffer_size)
        dataset = dataset.batch(self.BATCH_SIZE)
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

        return dataset


class TqdmCustom(tqdm):

    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)

三、model.py

搭建Transformer模型，其中包括EncoderLayer类、DecoderLayer类、PositionWiseFeedForwardLayer类、MultiHeadAttention类、ScaledDotProductAttention类、Embeddinglayer类。

其中，Transformer类是继承了tf.keras.Model类，Model类将各种层进行组织和连接，并封装成一个整体，描述了如何将输入数据通过各种层以及运算而得到输出。

Keras 模型以类的形式呈现，我们可以通过继承 tf.keras.Model 这个 Python 类来定义自己的模型。在继承类中，我们需要重写 init() （构造函数，初始化）和 call(input) （模型调用）两个方法，同时也可以根据需要增加自定义的方法。

EncoderLayer类、DecoderLayer类、PositionWiseFeedForwardLayer类、MultiHeadAttention类、ScaledDotProductAttention类、Embeddinglayer类则是继承了tf.keras.layers.Layer类，Layer类将各种计算流程和变量进行了封装（例如基本的全连接层，CNN 的卷积层、池化层等）。

我们可以通过继承 tf.keras.layers.Layer这个 Python 类来定义自己的层。在继承类中，我们需要重写 init() （构造函数，初始化）和 call(input) （模型调用）两个方法，同时也可以根据需要增加自定义的方法。

import os

import numpy as np
import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


class Transformer(tf.keras.Model):
    def __init__(self,
                 inputs_vocab_size,
                 target_vocab_size,
                 encoder_count,
                 decoder_count,
                 attention_head_count,
                 d_model,
                 d_point_wise_ff,
                 dropout_prob):
        super(Transformer, self).__init__()

        # model hyper parameter variables
        self.encoder_count = encoder_count
        self.decoder_count = decoder_count
        self.attention_head_count = attention_head_count
        self.d_model = d_model
        self.d_point_wise_ff = d_point_wise_ff
        self.dropout_prob = dropout_prob

        self.encoder_embedding_layer = Embeddinglayer(inputs_vocab_size, d_model)
        self.encoder_embedding_dropout = tf.keras.layers.Dropout(dropout_prob)
        self.decoder_embedding_layer = Embeddinglayer(target_vocab_size, d_model)
        self.decoder_embedding_dropout = tf.keras.layers.Dropout(dropout_prob)

        self.encoder_layers = [
            EncoderLayer(
                attention_head_count,
                d_model,
                d_point_wise_ff,
                dropout_prob
            ) for _ in range(encoder_count)
        ]

        self.decoder_layers = [
            DecoderLayer(
                attention_head_count,
                d_model,
                d_point_wise_ff,
                dropout_prob
            ) for _ in range(decoder_count)
        ]

        self.linear = tf.keras.layers.Dense(target_vocab_size)

    def call(self,
             inputs,
             target,
             inputs_padding_mask,
             look_ahead_mask,
             target_padding_mask,
             training
             ):
        encoder_tensor = self.encoder_embedding_layer(inputs)
        encoder_tensor = self.encoder_embedding_dropout(encoder_tensor, training=training)

        for i in range(self.encoder_count):
            encoder_tensor, _ = self.encoder_layers[i](encoder_tensor, inputs_padding_mask, training=training)
        target = self.decoder_embedding_layer(target)
        decoder_tensor = self.decoder_embedding_dropout(target, training=training)
        for i in range(self.decoder_count):
            decoder_tensor, _, _ = self.decoder_layers[i](
                decoder_tensor,
                encoder_tensor,
                look_ahead_mask,
                target_padding_mask,
                training=training
            )
        return self.linear(decoder_tensor)


class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, attention_head_count, d_model, d_point_wise_ff, dropout_prob):
        super(EncoderLayer, self).__init__()

        # model hyper parameter variables
        self.attention_head_count = attention_head_count
        self.d_model = d_model
        self.d_point_wise_ff = d_point_wise_ff
        self.dropout_prob = dropout_prob

        self.multi_head_attention = MultiHeadAttention(attention_head_count, d_model)
        self.dropout_1 = tf.keras.layers.Dropout(dropout_prob)
        self.layer_norm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.position_wise_feed_forward_layer = PositionWiseFeedForwardLayer(
            d_point_wise_ff,
            d_model
        )
        self.dropout_2 = tf.keras.layers.Dropout(dropout_prob)
        self.layer_norm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs, mask, training):
        output, attention = self.multi_head_attention(inputs, inputs, inputs, mask)
        output = self.dropout_1(output, training=training)
        output = self.layer_norm_1(tf.add(inputs, output))  # residual network
        output_temp = output

        output = self.position_wise_feed_forward_layer(output)
        output = self.dropout_2(output, training=training)
        output = self.layer_norm_2(tf.add(output_temp, output)) #correct

        return output, attention


class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, attention_head_count, d_model, d_point_wise_ff, dropout_prob):
        super(DecoderLayer, self).__init__()

        # model hyper parameter variables
        self.attention_head_count = attention_head_count
        self.d_model = d_model
        self.d_point_wise_ff = d_point_wise_ff
        self.dropout_prob = dropout_prob

        self.masked_multi_head_attention = MultiHeadAttention(attention_head_count, d_model)
        self.dropout_1 = tf.keras.layers.Dropout(dropout_prob)
        self.layer_norm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.encoder_decoder_attention = MultiHeadAttention(attention_head_count, d_model)
        self.dropout_2 = tf.keras.layers.Dropout(dropout_prob)
        self.layer_norm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.position_wise_feed_forward_layer = PositionWiseFeedForwardLayer(
            d_point_wise_ff,
            d_model
        )
        self.dropout_3 = tf.keras.layers.Dropout(dropout_prob)
        self.layer_norm_3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    def call(self, decoder_inputs, encoder_output, look_ahead_mask, padding_mask, training):
        output, attention_1 = self.masked_multi_head_attention(
            decoder_inputs,
            decoder_inputs,
            decoder_inputs,
            look_ahead_mask
        )
        output = self.dropout_1(output, training=training)
        query = self.layer_norm_1(tf.add(decoder_inputs, output))  # residual network
        output, attention_2 = self.encoder_decoder_attention(
            query,
            encoder_output,
            encoder_output,
            padding_mask
        )
        output = self.dropout_2(output, training=training)
        encoder_decoder_attention_output = self.layer_norm_2(tf.add(output, query))

        output = self.position_wise_feed_forward_layer(encoder_decoder_attention_output)
        output = self.dropout_3(output, training=training)
        output = self.layer_norm_3(tf.add(encoder_decoder_attention_output, output))  # residual network

        return output, attention_1, attention_2


class PositionWiseFeedForwardLayer(tf.keras.layers.Layer):
    def __init__(self, d_point_wise_ff, d_model):
        super(PositionWiseFeedForwardLayer, self).__init__()
        self.w_1 = tf.keras.layers.Dense(d_point_wise_ff)
        self.w_2 = tf.keras.layers.Dense(d_model)

    def call(self, inputs):
        inputs = self.w_1(inputs)
        inputs = tf.nn.relu(inputs)
        return self.w_2(inputs)


class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, attention_head_count, d_model):
        super(MultiHeadAttention, self).__init__()

        # model hyper parameter variables
        self.attention_head_count = attention_head_count
        self.d_model = d_model

        if d_model % attention_head_count != 0:
            raise ValueError(
                "d_model({}) % attention_head_count({}) is not zero.d_model must be multiple of attention_head_count.".format(
                    d_model, attention_head_count
                )
            )

        self.d_h = d_model // attention_head_count

        self.w_query = tf.keras.layers.Dense(d_model)
        self.w_key = tf.keras.layers.Dense(d_model)
        self.w_value = tf.keras.layers.Dense(d_model)

        self.scaled_dot_product = ScaledDotProductAttention(self.d_h)

        self.ff = tf.keras.layers.Dense(d_model)

    def call(self, query, key, value, mask=None):
        batch_size = tf.shape(query)[0]

        query = self.w_query(query)
        key = self.w_key(key)
        value = self.w_value(value)

        query = self.split_head(query, batch_size)
        key = self.split_head(key, batch_size)
        value = self.split_head(value, batch_size)

        output, attention = self.scaled_dot_product(query, key, value, mask)
        output = self.concat_head(output, batch_size)

        return self.ff(output), attention

    def split_head(self, tensor, batch_size):
        # inputs tensor: (batch_size, seq_len, d_model)
        return tf.transpose(
            tf.reshape(
                tensor,
                (batch_size, -1, self.attention_head_count, self.d_h)
                # tensor: (batch_size, seq_len_splited, attention_head_count, d_h)
            ),
            [0, 2, 1, 3]
            # tensor: (batch_size, attention_head_count, seq_len_splited, d_h)
        )

    def concat_head(self, tensor, batch_size):
        return tf.reshape(
            tf.transpose(tensor, [0, 2, 1, 3]),
            (batch_size, -1, self.attention_head_count * self.d_h)
        )


class ScaledDotProductAttention(tf.keras.layers.Layer):
    def __init__(self, d_h):
        super(ScaledDotProductAttention, self).__init__()
        self.d_h = d_h

    def call(self, query, key, value, mask=None):
        matmul_q_and_transposed_k = tf.matmul(query, key, transpose_b=True)
        scale = tf.sqrt(tf.cast(self.d_h, dtype=tf.float32))
        scaled_attention_score = matmul_q_and_transposed_k / scale
        if mask is not None:
            scaled_attention_score += (mask * -1e9)

        attention_weight = tf.nn.softmax(scaled_attention_score, axis=-1)

        return tf.matmul(attention_weight, value), attention_weight


class Embeddinglayer(tf.keras.layers.Layer):
    def __init__(self, vocab_size, d_model):
        # model hyper parameter variables
        super(Embeddinglayer, self).__init__()
        self.vocab_size = vocab_size
        self.d_model = d_model

        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)

    def call(self, sequences):
        max_sequence_len = sequences.shape[1]
        output = self.embedding(sequences) * tf.sqrt(tf.cast(self.d_model, dtype=tf.float32))
        output += self.positional_encoding(max_sequence_len)

        return output

    def positional_encoding(self, max_len):
        pos = np.expand_dims(np.arange(0, max_len), axis=1)
        index = np.expand_dims(np.arange(0, self.d_model), axis=0)

        pe = self.angle(pos, index)

        pe[:, 0::2] = np.sin(pe[:, 0::2])
        pe[:, 1::2] = np.cos(pe[:, 1::2])

        pe = np.expand_dims(pe, axis=0)
        return tf.cast(pe, dtype=tf.float32)

    def angle(self, pos, index):
        return pos / np.power(10000, (index - index % 2) / np.float32(self.d_model))

四、utils.py

作为实现一些功能的工具文件，实现了用于训练的Trainer类、用于学习率设置warmup的CustomSchedule类、用于Mask操作的Mask类和一些其他功能的函数，如：标签平滑label_smoothing函数、计算bleu得分的calculate_bleu_score函数等。

整个代码如下：

import datetime
import os
import re
import time

import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
CURRENT_DIR_PATH = os.path.dirname(os.path.realpath(__file__))
BLEU_CALCULATOR_PATH = os.path.join(CURRENT_DIR_PATH, 'multi-bleu.perl')


class Mask:
    @classmethod
    def create_padding_mask(cls, sequences):
        sequences = tf.cast(tf.math.equal(sequences, 0), dtype=tf.float32)
        return sequences[:, tf.newaxis, tf.newaxis, :]

    @classmethod
    def create_look_ahead_mask(cls, seq_len):
        return 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)

    @classmethod
    def create_masks(cls, inputs, target):
        encoder_padding_mask = Mask.create_padding_mask(inputs)
        decoder_padding_mask = Mask.create_padding_mask(inputs)

        look_ahead_mask = tf.maximum(
            Mask.create_look_ahead_mask(tf.shape(target)[1]),
            Mask.create_padding_mask(target)
            )

        return encoder_padding_mask, look_ahead_mask, decoder_padding_mask


class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()
        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)


def label_smoothing(target_data, depth, epsilon=0.1):
    target_data_one_hot = tf.one_hot(target_data, depth=depth)
    n = target_data_one_hot.get_shape().as_list()[-1]
    return ((1 - epsilon) * target_data_one_hot) + (epsilon / n)


class Trainer:
    def __init__(
            self,
            model,
            dataset,
            loss_object=None,
            optimizer=None,
            checkpoint_dir='./checkpoints',
            batch_size=None,
            distribute_strategy=None,
            vocab_size=32000,
            epoch=20,
            ):
        self.batch_size = batch_size
        self.distribute_strategy = distribute_strategy
        self.model = model
        self.loss_object = loss_object
        self.optimizer = optimizer
        self.checkpoint_dir = checkpoint_dir
        self.vocab_size = vocab_size
        self.epoch = epoch
        self.dataset = dataset

        os.makedirs(self.checkpoint_dir, exist_ok=True)
        if self.optimizer is None:
            self.checkpoint = tf.train.Checkpoint(step=tf.Variable(1), model=self.model)
        else:
            self.checkpoint = tf.train.Checkpoint(step=tf.Variable(1), optimizer=self.optimizer, model=self.model)
        self.checkpoint_manager = tf.train.CheckpointManager(self.checkpoint, self.checkpoint_dir, max_to_keep=3)

        # metrics
        self.train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')
        self.validation_loss = tf.keras.metrics.Mean('validation_loss', dtype=tf.float32)
        self.validation_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('validation_accuracy')

    def multi_gpu_train(self, reset_checkpoint=False):
        with self.distribute_strategy.scope():
            self.dataset = self.distribute_strategy.experimental_distribute_dataset(self.dataset)
            self.trainer(reset_checkpoint=reset_checkpoint, is_distributed=True)

    def single_gpu_train(self, reset_checkpoint=False):
        self.trainer(reset_checkpoint=reset_checkpoint, is_distributed=False)

    def trainer(self, reset_checkpoint, is_distributed=False):
        current_day = datetime.datetime.now().strftime("%Y%m%d")
        train_log_dir = './logs/gradient_tape/' + current_day + '/train'
        os.makedirs(train_log_dir, exist_ok=True)
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)

        if not reset_checkpoint:
            if self.checkpoint_manager.latest_checkpoint:
                print("Restored from {}".format(self.checkpoint_manager.latest_checkpoint))
            else:
                print("Initializing from scratch.")

            self.checkpoint.restore(
                self.checkpoint_manager.latest_checkpoint
            )
        else:
            print("reset and initializing from scratch.")

        for epoch in range(self.epoch):
            start = time.time()
            print('start learning')

            for (batch, (inputs, target)) in enumerate(self.dataset):
                if is_distributed:
                    self.distributed_train_step(inputs, target)
                else:
                    self.train_step(inputs, target)

                self.checkpoint.step.assign_add(1)
                if batch % 50 == 0:
                    print(
                        "Epoch: {}, Batch: {}, Loss:{}, Accuracy: {}".format(epoch, batch, self.train_loss.result(),
                                                                             self.train_accuracy.result()))
                if batch % 10000 == 0 and batch != 0:
                    self.checkpoint_manager.save()
            print("{} | Epoch: {} Loss:{}, Accuracy: {}, time: {} sec".format(
                datetime.datetime.now(), epoch, self.train_loss.result(), self.train_accuracy.result(),
                time.time() - start
            ))
            with train_summary_writer.as_default():
                tf.summary.scalar('train_loss', self.train_loss.result(), step=epoch)
                tf.summary.scalar('train_accuracy', self.train_accuracy.result(), step=epoch)

            self.checkpoint_manager.save()

            self.train_loss.reset_states()
            self.train_accuracy.reset_states()
            self.validation_loss.reset_states()
            self.validation_accuracy.reset_states()
        self.checkpoint_manager.save()

    def basic_train_step(self, inputs, target):
        target_include_start = target[:, :-1]
        target_include_end = target[:, 1:]
        encoder_padding_mask, look_ahead_mask, decoder_padding_mask = Mask.create_masks(
            inputs, target_include_start
        )

        with tf.GradientTape() as tape:
            pred = self.model.call(
                inputs=inputs,
                target=target_include_start,
                inputs_padding_mask=encoder_padding_mask,
                look_ahead_mask=look_ahead_mask,
                target_padding_mask=decoder_padding_mask,
                training=True
            )

            loss = self.loss_function(target_include_end, pred)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

        self.train_loss(loss)
        self.train_accuracy(target_include_end, pred)

        if self.distribute_strategy is None:
            return tf.reduce_mean(loss)

        return loss

    def loss_function(self, real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        real_one_hot = label_smoothing(real, depth=self.vocab_size)
        loss = self.loss_object(real_one_hot, pred)

        mask = tf.cast(mask, dtype=loss.dtype)

        loss *= mask
        return tf.reduce_mean(loss)

    @tf.function
    def train_step(self, inputs, target):
        return self.basic_train_step(inputs, target)

    @tf.function
    def distributed_train_step(self, inputs, target):
        loss = self.distribute_strategy.experimental_run_v2(self.basic_train_step, args=(inputs, target))
        loss_value = self.distribute_strategy.reduce(tf.distribute.ReduceOp.MEAN, loss, axis=None)
        return tf.reduce_mean(loss_value)


def translate(inputs, data_loader, trainer, seq_max_len_target=100):
    if data_loader is None:
        ValueError('data loader is None')

    if trainer is None:
        ValueError('trainer is None')

    if trainer.model is None:
        ValueError('model is None')

    if not isinstance(seq_max_len_target, int):
        ValueError('seq_max_len_target is not int')

    encoded_data = data_loader.encode_data(inputs, mode='source')
    encoded_data = data_loader.texts_to_sequences([encoded_data])
    encoder_inputs = tf.convert_to_tensor(
        encoded_data,
        dtype=tf.int32
    )
    decoder_inputs = [data_loader.dictionary['target']['token2idx']['']]
    decoder_inputs = tf.expand_dims(decoder_inputs, 0)
    decoder_end_token = data_loader.dictionary['target']['token2idx']['']

    for _ in range(seq_max_len_target):
        encoder_padding_mask, look_ahead_mask, decoder_padding_mask = Mask.create_masks(
            encoder_inputs, decoder_inputs
        )
        pred = trainer.model.call(
            inputs=encoder_inputs,
            target=decoder_inputs,
            inputs_padding_mask=encoder_padding_mask,
            look_ahead_mask=look_ahead_mask,
            target_padding_mask=decoder_padding_mask,
            training=False
        )
        pred = pred[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(pred, axis=-1), dtype=tf.int32)

        if predicted_id == decoder_end_token:
            break
        decoder_inputs = tf.concat([decoder_inputs, predicted_id], axis=-1)

    total_output = tf.squeeze(decoder_inputs, axis=0)
    return data_loader.sequences_to_texts([total_output.numpy().tolist()], mode='target')


def calculate_bleu_score(target_path, ref_path):

    get_bleu_score = f"perl {BLEU_CALCULATOR_PATH} {ref_path} < {target_path} > temp"
    os.system(get_bleu_score)
    with open("temp", "r") as f:
        bleu_score_report = f.read()
    score = re.findall("BLEU = ([^,]+)", bleu_score_report)[0]

    return score, bleu_score_report

四+1、utils.py的Trainer实现

由于在tensorflow2.0以上的版本中，不再使用tensorflow1所使用的Session图执行模式进行，而是使用即时执行模式作为默认模式，所以在构建训练过程的部分有一些不同，这一小节将着重介绍tf2.0以上版本中的训练过程。

1. Trainer类中的basic_train_step()函数实现

basic_train_step()函数是被train_step()函数调用的（train_step()函数将在下一小节介绍），即：

@tf.function
def train_step(self, inputs, target):
    return self.basic_train_step(inputs, target)

在机器学习中，我们经常需要计算函数的导数。TensorFlow 提供了强大的自动求导机制来计算导数。在即时执行模式下，TensorFlow 引入了 tf.GradientTape() 这个 “求导记录器” 来实现自动求导。

tf.GradientTape() 是一个自动求导的记录器。只要进入了 with tf.GradientTape() as tape 的上下文环境，则在该环境中计算步骤都会被自动记录。如以下所示：

# 在 with tf.GradientTape() as tape 的上下文环境中，
# 调用了model的call方法，也就是把数据从input送入到模型中，
# 然后一步步得到pred，并计算了loss函数，
# 整个从数据输入到计算loss函数的过程都是会被自动记录并用于后面的求导进行参数更新。
with tf.GradientTape() as tape:
    pred = self.model.call(
        inputs=inputs,
        target=target_include_start,
        inputs_padding_mask=encoder_padding_mask,
        look_ahead_mask=look_ahead_mask,
        target_padding_mask=decoder_padding_mask,
        training=True
    )

    loss = self.loss_function(target_include_end, pred)

# TensorFlow自动计算损失函数关于自变量（模型参数）的梯度
gradients = tape.gradient(loss, self.model.trainable_variables)

# TensorFlow自动根据梯度更新参数
self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

在上面所示的例子中，在 with tf.GradientTape() as tape 的上下文环境中，调用了model的call方法，也就是把数据从input送入到模型中，然后一步步得到pred，并计算了loss函数，整个从数据输入到计算loss函数的过程都是会被自动记录并用于后面的求导进行参数更新。

离开 with tf.GradientTape() as tape 上下文环境后，记录将停止，但记录器 tape 依然可用，因此可以通过 gradients = tape.gradient(loss, self.model.trainable_variables) 求张量 loss 对变量 self.model.trainable_variables 的导数。

TensorFlow 的即时执行模式提供了更快速的运算（GPU 支持）、自动求导、优化器等一系列对深度学习非常重要的功能。以下展示了如何使用 TensorFlow 计算线性回归。这里，TensorFlow 帮助我们做了两件重要的工作：

使用 tape.gradient(loss, self.model.trainable_variables) 自动计算梯度；
使用 optimizer.apply_gradients(grads_and_vars) 自动更新模型参数，即：self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

gradients = tape.gradient(loss, self.model.trainable_variables)这一步是TensorFlow自动计算损失函数关于自变量（模型参数）的梯度；

self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))这一步是TensorFlow自动根据梯度更新参数

2. Trainer类中的train_step()函数实现

train_step()函数是被trainer()函数调用的（trainer()函数将在下一小节介绍），即：

这一小节我们将介绍train_step()函数，train_step()函数会调用上一小节介绍的basic_train_step()函数，即：

@tf.function
def train_step(self, inputs, target):
    return self.basic_train_step(inputs, target)

我们可以看到，train_step()函数有一个@tf.function修饰符，@tf.function修饰符可以一定程度上加速程序运行，具体可以参考这一篇博客：【Keras】tf.function ：图执行模式

在 TensorFlow 2 中，推荐使用 tf.function （而非 1.X 中的 tf.Session ）实现图执行模式，从而将模型转换为易于部署且高性能的 TensorFlow 图模型。只需要将我们希望以图执行模式运行的代码封装在一个函数内，并在函数前加上 @tf.function 即可。

运行 400 个 Batch 进行测试，加入 @tf.function 的程序耗时 35.5 秒，未加入 @tf.function 的纯即时执行模式程序耗时 43.8 秒。可见 @tf.function 带来了一定的性能提升。一般而言，当模型由较多小的操作组成的时候， @tf.function 带来的提升效果较大。而当模型的操作数量较少，但单一操作均很耗时的时候，则 @tf.function 带来的性能提升不会太大。

3. Trainer中的trainer()函数实现

trainer()函数是被single_gpu_train()函数调用的，而single_gpu_train()函数是被train.py中调用的，由此进入训练。这一小节将介绍trainer()函数的实现。

def trainer(self, reset_checkpoint, is_distributed=False):
    current_day = datetime.datetime.now().strftime("%Y%m%d")
    train_log_dir = './logs/gradient_tape/' + current_day + '/train'
    os.makedirs(train_log_dir, exist_ok=True)
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)

    if not reset_checkpoint:
        if self.checkpoint_manager.latest_checkpoint:
            print("Restored from {}".format(self.checkpoint_manager.latest_checkpoint))
        else:
            print("Initializing from scratch.")

        self.checkpoint.restore(
            self.checkpoint_manager.latest_checkpoint
        )
    else:
        print("reset and initializing from scratch.")

    for epoch in range(self.epoch):
        start = time.time()
        print('start learning')

        for (batch, (inputs, target)) in enumerate(self.dataset):
            if is_distributed:
                self.distributed_train_step(inputs, target)
            else:
                self.train_step(inputs, target)

            self.checkpoint.step.assign_add(1)
            if batch % 50 == 0:
                print(
                    "Epoch: {}, Batch: {}, Loss:{}, Accuracy: {}".format(epoch, batch, self.train_loss.result(),
                                                                         self.train_accuracy.result()))
            if batch % 10000 == 0 and batch != 0:
                self.checkpoint_manager.save()
        print("{} | Epoch: {} Loss:{}, Accuracy: {}, time: {} sec".format(
            datetime.datetime.now(), epoch, self.train_loss.result(), self.train_accuracy.result(),
            time.time() - start
        ))
        with train_summary_writer.as_default():
            tf.summary.scalar('train_loss', self.train_loss.result(), step=epoch)
            tf.summary.scalar('train_accuracy', self.train_accuracy.result(), step=epoch)

        self.checkpoint_manager.save()

        self.train_loss.reset_states()
        self.train_accuracy.reset_states()
        self.validation_loss.reset_states()
        self.validation_accuracy.reset_states()
    self.checkpoint_manager.save()

五、test.py

初始化DataLoader（来自于data_loader.py）并调用DataLoader的load_test()方法构建测试集，定义Transformer（来自model.py）的模型结构为了后续加载进来训练好的模型参数，导入checkpoint并进行测试。

from __future__ import (absolute_import, division, print_function,unicode_literals)

import os

from data_loader import DataLoader
from model import Transformer
from utils import Trainer, calculate_bleu_score, translate

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# hyper paramaters
TRAIN_RATIO = 0.9
D_POINT_WISE_FF = 2048
D_MODEL = 512
ENCODER_COUNT = DECODER_COUNT = 6
EPOCHS = 20
ATTENTION_HEAD_COUNT = 8
DROPOUT_PROB = 0.1
BATCH_SIZE = 32
SEQ_MAX_LEN_SOURCE = 100
SEQ_MAX_LEN_TARGET = 100
BPE_VOCAB_SIZE = 32000

data_loader = DataLoader(
    dataset_name='wmt14/en-de',
    data_dir='./datasets'
)
data_loader.load_bpe_encoder()

source_data, target_data = data_loader.load_test(index=3)
_, target_data_path = data_loader.get_test_data_path(index=3)

data = zip(source_data, target_data)

transformer = Transformer(
    inputs_vocab_size=BPE_VOCAB_SIZE,
    target_vocab_size=BPE_VOCAB_SIZE,
    encoder_count=ENCODER_COUNT,
    decoder_count=DECODER_COUNT,
    attention_head_count=ATTENTION_HEAD_COUNT,
    d_model=D_MODEL,
    d_point_wise_ff=D_POINT_WISE_FF,
    dropout_prob=DROPOUT_PROB
)

trainer = Trainer(
    model=transformer,
    dataset=None,
    loss_object=None,
    optimizer=None,
    checkpoint_dir='./checkpoints'
)
if trainer.checkpoint_manager.latest_checkpoint:
    print("Restored from {}".format(trainer.checkpoint_manager.latest_checkpoint))
else:
    print("Initializing from scratch.")

trainer.checkpoint.restore(
    trainer.checkpoint_manager.latest_checkpoint
)


def do_translate(input_data):
    index = input_data[0]
    source = input_data[1][0]
    target = input_data[1][1]
    print(index)
    output = translate(source, data_loader, trainer, SEQ_MAX_LEN_TARGET)
    return {
        'source': source,
        'target': target,
        'output': output
    }


translated_data = []

for test_data in data:
    res = do_translate(test_data)
    translated_data.append(res['output'])

with open('translated_data', 'w') as f:
    f.write(str('\n'.join(translated_data)))

score, report = calculate_bleu_score(target_path='translated_data', ref_path=target_data_path)

六、分布式训练

这个代码还提供了分布式训练的选项，本文不再说明，可以参考【Keras】TensorFlow分布式训练

你可能感兴趣的:(Keras,transformer,keras,深度学习,Transformer)

机器学习与深度学习间关系与区别 ℒℴѵℯ心·动ꦿ໊ོ꫞ 人工智能学习深度学习 python
一、机器学习概述定义机器学习（MachineLearning,ML）是一种通过数据驱动的方法，利用统计学和计算算法来训练模型，使计算机能够从数据中学习并自动进行预测或决策。机器学习通过分析大量数据样本，识别其中的模式和规律，从而对新的数据进行判断。其核心在于通过训练过程，让模型不断优化和提升其预测准确性。主要类型1.监督学习（SupervisedLearning）监督学习是指在训练数据集中包含输入
将cmd中命令输出保存为txt文本文件落难Coder Windows cmd window
最近深度学习本地的训练中我们常常要在命令行中运行自己的代码，无可厚非，我们有必要保存我们的炼丹结果，但是复制命令行输出到txt是非常麻烦的，其实Windows下的命令行为我们提供了相应的操作。其基本的调用格式就是：运行指令>输出到的文件名称或者具体保存路径测试下，我打开cmd并且ping一下百度：pingwww.baidu.com>./data.txt看下相同目录下data.txt的输出：如果你再
BART&BERT Ambition_LAO 深度学习
BART和BERT都是基于Transformer架构的预训练语言模型。模型架构：BERT(BidirectionalEncoderRepresentationsfromTransformers)主要是一个编码器（Encoder）模型，它使用了Transformer的编码器部分来处理输入的文本，并生成文本的表示。BERT特别擅长理解语言的上下文，因为它在预训练阶段使用了掩码语言模型（MLM）任务，即
100天持续行动—Day01 Richard_DL
今天开始站着学习，发现效率大幅提升。把fast.ai的Lesson1的后半部分和Lesson2看完了。由于Keras版本和视频中的不一致，运行notebook时经常出现莫名其妙的错误，导致自己只动手实践了视频中的一小部分内容。为了赶时间，我打算先把与CNN相关的视频过一遍。然后尽快开始做自己的项目。明天继续加油，争取把Lesson3和Lesson4看完。
推荐3家毕业AI论文可五分钟一键生成！文末附免费教程！小猪包333 写论文人工智能 AI写作深度学习计算机视觉
在当前的学术研究和写作领域，AI论文生成器已经成为许多研究人员和学生的重要工具。这些工具不仅能够帮助用户快速生成高质量的论文内容，还能进行内容优化、查重和排版等操作。以下是三款值得推荐的AI论文生成器：千笔-AIPassPaper、懒人论文以及AIPaperPass。千笔-AIPassPaper千笔-AIPassPaper是一款基于深度学习和自然语言处理技术的AI写作助手，旨在帮助用户快速生成高质
AI大模型的架构演进与最新发展季风泯灭的季节 AI大模型应用技术二人工智能架构
随着深度学习的发展，AI大模型（LargeLanguageModels,LLMs）在自然语言处理、计算机视觉等领域取得了革命性的进展。本文将详细探讨AI大模型的架构演进，包括从Transformer的提出到GPT、BERT、T5等模型的历史演变，并探讨这些模型的技术细节及其在现代人工智能中的核心作用。一、基础模型介绍：Transformer的核心原理Transformer架构的背景在Transfo
[实践应用] 深度学习之模型性能评估指标 YuanDaima2048 深度学习工具使用深度学习人工智能损失函数性能评估 pytorch python 机器学习
文章总览：YuanDaiMa2048博客文章总览深度学习之模型性能评估指标分类任务回归任务排序任务聚类任务生成任务其他介绍在机器学习和深度学习领域，评估模型性能是一项至关重要的任务。不同的学习任务需要不同的性能指标来衡量模型的有效性。以下是对一些常见任务及其相应的性能评估指标的详细解释和总结。分类任务分类任务是指模型需要将输入数据分配到预定义的类别或标签中。以下是分类任务中常用的性能指标：准确率(
[实践应用] 深度学习之优化器 YuanDaima2048 深度学习工具使用 pytorch 深度学习人工智能机器学习 python 优化器
文章总览：YuanDaiMa2048博客文章总览深度学习之优化器1.随机梯度下降（SGD）2.动量优化（Momentum）3.自适应梯度（Adagrad）4.自适应矩估计（Adam）5.RMSprop总结其他介绍在深度学习中，优化器用于更新模型的参数，以最小化损失函数。常见的优化函数有很多种，下面是几种主流的优化器及其特点、原理和PyTorch实现：1.随机梯度下降（SGD）原理:随机梯度下降通过
生成式地图制图 Bwywb_3 深度学习机器学习深度学习生成对抗网络
生成式地图制图（GenerativeCartography）是一种利用生成式算法和人工智能技术自动创建地图的技术。它结合了传统的地理信息系统（GIS）技术与现代生成模型（如深度学习、GANs等），能够根据输入的数据自动生成符合需求的地图。这种方法在城市规划、虚拟环境设计、游戏开发等多个领域具有应用前景。主要特点：自动化生成：通过算法和模型，系统能够根据输入的地理或空间数据自动生成地图，而无需人工逐
轻量级模型解读——轻量transformer系列 lishanlu136 #图像分类轻量级模型 transformer 图像分类
先占坑，持续更新。。。文章目录1、DeiT2、ConViT3、Mobile-Former4、MobileViTTransformer是2017谷歌提出的一篇论文，最早应用于NLP领域的机器翻译工作，Transformer解读，但随着2020年DETR和ViT的出现(DETR解读，ViT解读)，其在视觉领域的应用也如雨后春笋般渐渐出现，其特有的全局注意力机制给图像识别领域带来了重要参考。但是tran
吴恩达深度学习笔记(30)-正则化的解释极客Array
正则化（Regularization）深度学习可能存在过拟合问题——高方差，有两个解决方法，一个是正则化，另一个是准备更多的数据，这是非常可靠的方法，但你可能无法时时刻刻准备足够多的训练数据或者获取更多数据的成本很高，但正则化通常有助于避免过拟合或减少你的网络误差。如果你怀疑神经网络过度拟合了数据，即存在高方差问题，那么最先想到的方法可能是正则化，另一个解决高方差的方法就是准备更多数据，这也是非常
个人学习笔记7-6：动手学深度学习pytorch版-李沐浪子L 深度学习深度学习笔记计算机视觉 python 人工智能神经网络 pytorch
#人工智能##深度学习##语义分割##计算机视觉##神经网络#计算机视觉13.11全卷积网络全卷积网络（fullyconvolutionalnetwork，FCN）采用卷积神经网络实现了从图像像素到像素类别的变换。引入l转置卷积（transposedconvolution）实现的，输出的类别预测与输入图像在像素级别上具有一一对应关系：通道维的输出即该位置对应像素的类别预测。13.11.1构造模型下
深度学习-点击率预估-研究论文2024-09-14速读 sp_fyf_2024 深度学习人工智能
深度学习-点击率预估-研究论文2024-09-14速读1.DeepTargetSessionInterestNetworkforClick-ThroughRatePredictionHZhong,JMa,XDuan,SGu,JYao-2024InternationalJointConferenceonNeuralNetworks,2024深度目标会话兴趣网络用于点击率预测摘要：这篇文章提出了一种新
损失函数与反向传播 Star_. PyTorch pytorch 深度学习 python
损失函数定义与作用损失函数(lossfunction)在深度学习领域是用来计算搭建模型预测的输出值和真实值之间的误差。1.损失函数越小越好2.计算实际输出与目标之间的差距3.为更新输出提供依据（反向传播)常见的损失函数回归常见的损失函数有：均方差（MeanSquaredError，MSE）、平均绝对误差（MeanAbsoluteErrorLoss，MAE）、HuberLoss是一种将MSE与MAE
探索创新科技： Lite-Mono - 简约高效的小型化Mono框架杭律沛Meris
探索创新科技：Lite-Mono-简约高效的小型化Mono框架Lite-Mono[CVPR2023]Lite-Mono:ALightweightCNNandTransformerArchitectureforSelf-SupervisedMonocularDepthEstimation项目地址:https://gitcode.com/gh_mirrors/li/Lite-Mono如果你在寻找一个轻
【深度学习】训练过程中一个OOM的问题，太难查了 weixin_40293999 深度学习深度学习人工智能
现象：各位大佬又遇到过ubuntu的这个问题么？现象是在训练过程中，ssh上不去了，能ping通，没死机，但是ubunutu的pc侧的显示器，鼠标啥都不好用了。只能重启。问题原因：OOM了95G，尼玛！！！！pytorch爆内存了，然后journald假死了，在journald被watchdog干掉之后，系统就崩溃了。这种规模的爆内存一般，即使被oomkill了，也要卡半天的，确实会这样，能不能配
解决BERT模型bert-base-chinese报错（无法自动联网下载）搬砖修狗 bert 人工智能深度学习 python
一、下载问题hugging-face是访问BERT模型的最初网站，但是目前hugging-face在中国多地不可达，在代码中涉及到该网站的模型都会报错，本文我们就以bert-base-chinese报错为例，提供一个下载到本地的方法来解决问题。二、网站google-bert(BERTcommunity)Thisorganizationismaintainedbythetransformerstea
云服务业界动态简报-20180128 Captain7
一、青云青云QingCloud推出深度学习平台DeepLearningonQingCloud，包含了主流的深度学习框架及数据科学工具包，通过QingCloudAppCenter一键部署交付，可以让算法工程师和数据科学家快速构建深度学习开发环境，将更多的精力放在模型和算法调优。二、腾讯云1.腾讯云正式发布腾讯专有云TCE(TencentCloudEnterprise)矩阵，涵盖企业版、大数据版、AI
机器学习VS深度学习 nfgo 机器学习
机器学习（MachineLearning,ML）和深度学习（DeepLearning,DL）是人工智能（AI）的两个子领域，它们有许多相似之处，但在技术实现和应用范围上也有显著区别。下面从几个方面对两者进行区分：1.概念层面机器学习：是让计算机通过算法从数据中自动学习和改进的技术。它依赖于手动设计的特征和数学模型来进行学习，常用的模型有决策树、支持向量机、线性回归等。深度学习：是机器学习的一个子领
数据分析-24-时间序列预测之基于keras的VMD-LSTM和VMD-CNN-LSTM预测风速皮皮冰燃数据分析数据分析
文章目录1普通的LSTM模型1.1数据重采样1.2数据标准化1.3切分窗口1.4划分数据集1.5建立模型1.6预测效果2VMD-LSTM模型2.1VMD分解时间序列2.2对每一个IMF建立LSTM模型2.2.1IMF1—LSTM2.2.2IMF2-LSTM2.2.3统一代码2.3评估效果3CNN-LSTM模型3.1数据预处理3.2建立模型3.3效果预测4VMD-CNN-LSTM模型4.1VMD分解
车载以太网之SOME/IP IT_码农车载以太网车载以太网 SOME/IP
整体介绍SOME/IP(全称为：Scalableservice-OrientedMiddlewarEoverIP)，是运行在车载以太网协议栈基础之上的中间件，或者也可以称为应用层软件。发展历程AUTOSAR4.0-完成宝马SOME/IP消息的初步集成；AUTOSAR4.1-支持SOME/IP-SD及其发布/订阅功能；AUTOSAR4.2-添加transformer用于序列化以及其他相关优化；AUT
大数据毕业设计hadoop+spark+hive知识图谱租房数据分析可视化大屏租房推荐系统 58同城租房爬虫房源推荐系统房价预测系统计算机毕业设计机器学习深度学习人工智能 2401_84572577 程序员大数据 hadoop 人工智能
做了那么多年开发，自学了很多门编程语言，我很明白学习资源对于学一门新语言的重要性，这些年也收藏了不少的Python干货，对我来说这些东西确实已经用不到了，但对于准备自学Python的人来说，或许它就是一个宝藏，可以给你省去很多的时间和精力。别在网上瞎学了，我最近也做了一些资源的更新，只要你是我的粉丝，这期福利你都可拿走。我先来介绍一下这些东西怎么用，文末抱走。（1）Python所有方向的学习路线（
深度学习-13-小语言模型之SmolLM的使用皮皮冰燃深度学习深度学习
文章附录1SmolLM概述1.1SmolLM简介1.2下载模型2运行2.1在CPU/GPU/多GPU上运行模型2.2使用torch.bfloat162.3通过位和字节的量化版本3应用示例4问题及解决4.1attention_mask和pad_token_id报错4.2max_new_tokens=205参考附录1SmolLM概述1.1SmolLM简介SmolLM是一系列尖端小型语言模型，提供三种规
基于深度学习的农作物病害检测 SEU-WYL 深度学习dnn 深度学习人工智能
基于深度学习的农作物病害检测利用卷积神经网络（CNN）、生成对抗网络（GAN）、Transformer等深度学习技术，自动识别和分类农作物的病害，帮助农业工作者提高作物管理效率、减少损失。1.农作物病害检测的挑战病害种类繁多：农作物病害的类型多样，不同病害在同一作物上的表现差异很大，同时同一种病害在不同生长阶段的症状也可能不同。环境影响：天气、光照、湿度等外部环境因素会影响农作物的表现，使得病害检
基于深度学习的文本引导的图像编辑 SEU-WYL 深度学习dnn 深度学习人工智能
基于深度学习的文本引导的图像编辑（Text-GuidedImageEditing）是一种通过自然语言文本指令对图像进行编辑或修改的技术。它结合了图像生成和自然语言处理（NLP）的最新进展，使用户能够通过描述性文本对图像内容进行精确的调整和操控。1.文本引导的图像编辑的挑战文本和图像之间的对齐：如何将文本中的语义信息准确地映射到图像中的特定区域或元素是一个关键挑战。这涉及到多模态数据的对齐和理解。编
深度学习--对抗生成网络（GAN, Generative Adversarial Network） Ambition_LAO 深度学习生成对抗网络
对抗生成网络（GAN,GenerativeAdversarialNetwork）是一种深度学习模型，由IanGoodfellow等人在2014年提出。GAN主要用于生成数据，通过两个神经网络相互对抗，来生成以假乱真的新数据。以下是对GAN的详细阐述，包括其概念、作用、核心要点、实现过程、代码实现和适用场景。1.概念GAN由两个神经网络组成：生成器（Generator）和判别器（Discrimina
深度学习：怎么看pth文件的参数奥利给少年深度学习人工智能
.pth文件是PyTorch模型的权重文件，它通常包含了训练好的模型的参数。要查看或使用这个文件，你可以按照以下步骤操作：1.确保你有模型的定义你需要有创建这个.pth文件时所用的模型的代码。这意味着你需要有模型的类定义和架构。2.加载模型权重使用PyTorch的load_state_dict方法来加载权重。这里是如何操作的：importtorchimporttorch.nnasnn#定义模型结构
chatgpt赋能python：如何在Python中安装Keras库？ turensu ChatGpt python chatgpt keras 计算机
如何在Python中安装Keras库？Keras是一个简单易用的神经网络库，由FrançoisChollet编写。它在Python编程语言中实现了深度学习的功能，可以使您更轻松地构建和试验不同类型的神经网络。如果您是一名Python开发人员，肯定会想知道如何在您的Python项目中安装Keras库。在本文中，我们将向您展示如何安装和配置Keras库。步骤1：安装Python要使用Keras库，您需
如何理解深度学习的训练过程奋斗的草莓熊深度学习人工智能 python scikit-learn virtualenv numpy pandas
文章目录1.训练是干什么？2.预训练模型进行训练，主要更改的是预训练模型的什么东西？1.训练是干什么？以yolov5为例子，训练的目的是把一组输入猫狗图像放到神经网络中，得到一个输出模型，这个模型下次可以直接用来识别哪个是猫，哪个是狗2.预训练模型进行训练，主要更改的是预训练模型的什么东西？超参数（Hyperparameters）：这是模型结构中定义的参数，比如：卷积核大小（kernel_size
Keras深度学习框架入门及实战指南司莹嫣Maude
Keras深度学习框架入门及实战指南keraskeras-team/keras:是一个基于Python的深度学习库，它没有使用数据库。适合用于深度学习任务的开发和实现，特别是对于需要使用Python深度学习库的场景。特点是深度学习库、Python、无数据库。项目地址:https://gitcode.com/gh_mirrors/ke/keras一、项目介绍Keras简介Keras是一款高级神经网络
多线程编程之join()方法周凡杨 java JOIN 多线程编程线程
现实生活中，有些工作是需要团队中成员依次完成的，这就涉及到了一个顺序问题。现在有T1、T2、T3三个工人，如何保证T2在T1执行完后执行，T3在T2执行完后执行？问题分析：首先问题中有三个实体，T1、T2、T3，因为是多线程编程，所以都要设计成线程类。关键是怎么保证线程能依次执行完呢？ Java实现过程如下： public class T1 implements Runnabl
java中switch的使用 bingyingao java enum break continue
java中的switch仅支持case条件仅支持int、enum两种类型。用enum的时候，不能直接写下列形式。 switch (timeType) { case ProdtransTimeTypeEnum.DAILY: break; default: br
hive having count 不能去重 daizj hive 去重 having count 计数
hive在使用having count()是，不支持去重计数 hive (default)> select imei from t_test_phonenum where ds=20150701 group by imei having count(distinct phone_num)>1 limit 10; FAILED: SemanticExcep
WebSphere对JSP的缓存周凡杨 WAS JSP 缓存
对于线网上的工程，更新JSP到WebSphere后，有时会出现修改的jsp没有起作用，特别是改变了某jsp的样式后，在页面中没看到效果，这主要就是由于websphere中缓存的缘故，这就要清除WebSphere中jsp缓存。要清除WebSphere中JSP的缓存，就要找到WAS安装后的根目录。现服务
设计模式总结朱辉辉33 java 设计模式
1.工厂模式 1.1 工厂方法模式 (由一个工厂类管理构造方法) 1.1.1普通工厂模式(一个工厂类中只有一个方法) 1.1.2多工厂模式(一个工厂类中有多个方法) 1.1.3静态工厂模式(将工厂类中的方法变成静态方法) &n
实例：供应商管理报表需求调研报告老A不折腾 finereport 报表系统报表软件信息化选型
引言随着企业集团的生产规模扩张，为支撑全球供应链管理，对于供应商的管理和采购过程的监控已经不局限于简单的交付以及价格的管理，目前采购及供应商管理各个环节的操作分别在不同的系统下进行，而各个数据源都独立存在，无法提供统一的数据支持；因此，为了实现对于数据分析以提供采购决策，建立报表体系成为必须。业务目标 1、通过报表为采购决策提供数据分析与支撑 2、对供应商进行综合评估以及管理，合理管理和
mysql 林鹤霄
转载源：http://blog.sina.com.cn/s/blog_4f925fc30100rx5l.html mysql -uroot -p ERROR 1045 (28000): Access denied for user 'root'@'localhost' (using password: YES) [root@centos var]# service mysql
Linux下多线程堆栈查看工具(pstree、ps、pstack) aigo linux
原文：http://blog.csdn.net/yfkiss/article/details/6729364 1. pstree pstree以树结构显示进程$ pstree -p work | grep adsshd(22669)---bash(22670)---ad_preprocess(4551)-+-{ad_preprocess}(4552) &n
html input与textarea 值改变事件 alxw4616 JavaScript
// 文本输入框(input) 文本域(textarea)值改变事件 // onpropertychange(IE) oninput(w3c) $('input,textarea').on('propertychange input', function(event) { console.log($(this).val()) });
String类的基本用法百合不是茶 String
字符串的用法; // 根据字节数组创建字符串 byte[] by = { 'a', 'b', 'c', 'd' }; String newByteString = new String(by); 1,length() 获取字符串的长度 &nbs
JDK1.5 Semaphore实例 bijian1013 java thread java多线程 Semaphore
Semaphore类一个计数信号量。从概念上讲，信号量维护了一个许可集合。如有必要，在许可可用前会阻塞每一个 acquire()，然后再获取该许可。每个 release() 添加一个许可，从而可能释放一个正在阻塞的获取者。但是，不使用实际的许可对象，Semaphore 只对可用许可的号码进行计数，并采取相应的行动。 S
使用GZip来压缩传输量 bijian1013 java GZip
启动GZip压缩要用到一个开源的Filter：PJL Compressing Filter。这个Filter自1.5.0开始该工程开始构建于JDK5.0，因此在JDK1.4环境下只能使用1.4.6。 PJL Compressi
【Java范型三】Java范型详解之范型类型通配符 bit1129 java
定义如下一个简单的范型类， package com.tom.lang.generics; public class Generics<T> { private T value; public Generics(T value) { this.value = value; } }
【Hadoop十二】HDFS常用命令 bit1129 hadoop
1. 修改日志文件查看器 hdfs oev -i edits_0000000000000000081-0000000000000000089 -o edits.xml cat edits.xml 修改日志文件转储为xml格式的edits.xml文件，其中每条RECORD就是一个操作事务日志 2. fsimage查看HDFS中的块信息等 &nb
怎样区别nginx中rewrite时break和last ronin47
在使用nginx配置rewrite中经常会遇到有的地方用last并不能工作，换成break就可以，其中的原理是对于根目录的理解有所区别，按我的测试结果大致是这样的。 location / { proxy_pass http://test;
java-21.中兴面试题输入两个整数 n 和 m ，从数列 1 ， 2 ， 3.......n 中随意取几个数 , 使其和等于 m bylijinnan java
import java.util.ArrayList; import java.util.List; import java.util.Stack; public class CombinationToSum { /* 第21 题 2010 年中兴面试题编程求解：输入两个整数 n 和 m ，从数列 1 ， 2 ， 3.......n 中随意取几个数 , 使其和等
eclipse svn 帐号密码修改问题开窍的石头 eclipse SVN svn帐号密码修改
问题描述： Eclipse的SVN插件Subclipse做得很好，在svn操作方面提供了很强大丰富的功能。但到目前为止，该插件对svn用户的概念极为淡薄，不但不能方便地切换用户，而且一旦用户的帐号、密码保存之后，就无法再变更了。解决思路：删除subclipse记录的帐号、密码信息，重新输入
[电子商务]传统商务活动与互联网的结合 comsci 电子商务
某一个传统名牌产品，过去销售的地点就在某些特定的地区和阶层，现在进入互联网之后，用户的数量群突然扩大了无数倍，但是，这种产品潜在的劣势也被放大了无数倍，这种销售利润与经营风险同步放大的效应，在最近几年将会频繁出现。。。。如何避免销售量和利润率增加的
java 解析 properties-使用 Properties-可以指定配置文件路径 cuityang java properties
#mq xdr.mq.url=tcp://192.168.100.15:61618; import java.io.IOException; import java.util.Properties; public class Test { String conf = "log4j.properties"; private static final
Java核心问题集锦 darrenzhu java 基础核心难点
注意，这里的参考文章基本来自Effective Java和jdk源码 1)ConcurrentModificationException 当你用for each遍历一个list时，如果你在循环主体代码中修改list中的元素，将会得到这个Exception，解决的办法是： 1)用listIterator, 它支持在遍历的过程中修改元素， 2)不用listIterator, new一个
1分钟学会Markdown语法 dcj3sjt126com markdown
markdown 简明语法基本符号 *,-,+ 3个符号效果都一样，这3个符号被称为 Markdown符号空白行表示另起一个段落 `是表示inline代码，tab是用来标记代码段，分别对应html的code，pre标签换行单一段落( <p>) 用一个空白行连续两个空格会变成一个 <br> 连续3个符号，然后是空行
Gson使用二（GsonBuilder） eksliang json gson GsonBuilder
转载请出自出处：http://eksliang.iteye.com/blog/2175473 一.概述 GsonBuilder用来定制java跟json之间的转换格式二.基本使用实体测试类：温馨提示：默认情况下@Expose注解是不起作用的,除非你用GsonBuilder创建Gson的时候调用了GsonBuilder.excludeField
报ClassNotFoundException: Didn't find class "...Activity" on path: DexPathList gundumw100 android
有一个工程，本来运行是正常的，我想把它移植到另一台PC上，结果报： java.lang.RuntimeException: Unable to instantiate activity ComponentInfo{com.mobovip.bgr/com.mobovip.bgr.MainActivity}: java.lang.ClassNotFoundException: Didn't f
JavaWeb之JSP指令 ihuning javaweb
要点 JSP指令简介 page指令 include指令 JSP指令简介 JSP指令（directive）是为JSP引擎而设计的，它们并不直接产生任何可见输出，而只是告诉引擎如何处理JSP页面中的其余部分。 JSP指令的基本语法格式： <%@ 指令属性名="
mac上编译FFmpeg跑ios 啸笑天 ffmpeg
1、下载文件：https://github.com/libav/gas-preprocessor，复制gas-preprocessor.pl到/usr/local/bin/下，修改文件权限：chmod 777 /usr/local/bin/gas-preprocessor.pl 2、安装yasm-1.2.0 curl http://www.tortall.net/projects/yasm
sql mysql oracle中字符串连接 macroli oracle sql mysql SQL Server
有的时候，我们有需要将由不同栏位获得的资料串连在一起。每一种资料库都有提供方法来达到这个目的： MySQL: CONCAT() Oracle: CONCAT(), || SQL Server: + CONCAT() 的语法如下： Mysql 中 CONCAT(字串1, 字串2, 字串3, ...): 将字串1、字串2、字串3，等字串连在一起。请注意，Oracle的CON
Git fatal: unab SSL certificate problem: unable to get local issuer ce rtificate qiaolevip 学习永无止境每天进步一点点 git 纵观千象
// 报错如下： $ git pull origin master fatal: unable to access 'https://git.xxx.com/': SSL certificate problem: unable to get local issuer ce rtificate // 原因：由于git最新版默认使用ssl安全验证，但是我们是使用的git未设
windows命令行设置wifi surfingll windows wifi 笔记本wifi
还没有讨厌无线wifi的无尽广告么，还在耐心等待它慢慢启动么教你命令行设置笔记本电脑wifi： 1、开启wifi命令 netsh wlan set hostednetwork mode=allow ssid=surf8 key=bb123456 netsh wlan start hostednetwork pause 其中pause是等待输入，可以去掉 2、
Linux（Ubuntu）下安装sysv-rc-conf wmlJava linux ubuntu sysv-rc-conf
安装：sudo apt-get install sysv-rc-conf 使用：sudo sysv-rc-conf 操作界面十分简洁，你可以用鼠标点击，也可以用键盘方向键定位，用空格键选择，用Ctrl+N翻下一页，用Ctrl+P翻上一页，用Q退出。背景知识 sysv-rc-conf是一个强大的服务管理程序，群众的意见是sysv-rc-conf比chkconf
svn切换环境，重发布应用多了javaee标签前缀 zengshaotao javaee
更换了开发环境，从杭州，改变到了上海。svn的地址肯定要切换的，切换之前需要将原svn自带的.svn文件信息删除，可手动删除，也可通过废弃原来的svn位置提示删除.svn时删除。然后就是按照最新的svn地址和规范建立相关的目录信息，再将原来的纯代码信息上传到新的环境。然后再重新检出，这样每次修改后就可以看到哪些文件被修改过，这对于增量发布的规范特别有用。检出