发呆的比目鱼

ELMO实战-命名实体识别

ELMO模型实战-命名实体识别

数据处理

import numpy as np
import torch
import os

# shared global variables to be imported from model also
UNK = "$UNK$"
NUM = "$NUM$"
NONE = "O"

# special error message
class MyIOError(Exception):
    def __init__(self, filename):
        # custom error message
        message = """
ERROR: Unable to locate file {}.

FIX: Have you tried running python build_data.py first?
This will build vocab file from your train, test and dev sets and
trimm your word vectors.
""".format(filename)
        super(MyIOError, self).__init__(message)

class CoNLLDataset(object):
    """Class that iterates over CoNLL Dataset

    __iter__ method yields a tuple (words, tags)
        words: list of raw words
        tags: list of raw tags

    If processing_word and processing_tag are not None,
    optional preprocessing is appplied

    Example:
        ```python
        data = CoNLLDataset(filename)
        for sentence, tags in data:
            pass
        ```

    """
    def __init__(self, filename, processing_word=None, processing_tag=None,
                 max_iter=None, use_crf=True):
        """
        Args:
            filename: path to the file
            processing_words: (optional) function that takes a word as input
            processing_tags: (optional) function that takes a tag as input
            max_iter: (optional) max number of sentences to yield

        """
        self.filename = filename
        self.processing_word = processing_word
        self.processing_tag = processing_tag
        self.max_iter = max_iter
        self.use_crf = use_crf
        self.length = None


    def __iter__(self):
        niter = 0
        with open(self.filename) as f:
            words, tags = [], []
            for line in f:
                line = line.strip()
                if (len(line) == 0 or line.startswith("-DOCSTART-")):
                    if len(words) != 0:
                        niter += 1
                        if self.max_iter is not None and niter > self.max_iter:
                            break
                        yield words, tags
                        words, tags = [], []
                else:
                    ls = line.split(' ')
                    word, tag = ls[0],ls[-1]
                    if self.processing_word is not None:
                        word = self.processing_word(word)
                    if self.processing_tag is not None:
                        if self.use_crf:
                            tag = self.processing_tag(tag)
                    words += [word]
                    tags += [tag]


    def __len__(self):
        """Iterates once over the corpus to set and store length"""
        if self.length is None:
            self.length = 0
            for _ in self:
                self.length += 1

        return self.length

def get_vocabs(datasets):
    """Build vocabulary from an iterable of datasets objects

    Args:
        datasets: a list of dataset objects

    Returns:
        a set of all the words in the dataset

    """
    print("Building vocab...")
    vocab_words = set()
    vocab_tags = set()
    for dataset in datasets:
        for words, tags in dataset:
            vocab_words.update(words)
            vocab_tags.update(tags)
    print("- done. {} tokens".format(len(vocab_words)))
    return vocab_words, vocab_tags


def get_char_vocab(dataset):
    """Build char vocabulary from an iterable of datasets objects

    Args:
        dataset: a iterator yielding tuples (sentence, tags)

    Returns:
        a set of all the characters in the dataset

    """
    print("Building char vocab...")
    vocab_char = set()
    for words, _ in dataset:
        for word in words:
            vocab_char.update(word)
    print("- done. {} tokens)")
    return vocab_char
          
          
def get_glove_vocab(filename):
    """Load vocab from file

    Args:
        filename: path to the glove vectors

    Returns:
        vocab: set() of strings
    """
    print("Building vocab...")
    vocab = set()
    with open(filename, encoding="utf8") as f:
        for line in f:
            word = line.strip().split(' ')[0]
            vocab.add(word)
#     print("- done. {} tokens".format(len(vocab)))
    return vocab
          
          
def write_vocab(vocab, filename):
    """Writes a vocab to a file

    Writes one word per line.

    Args:
        vocab: iterable that yields word
        filename: path to vocab file

    Returns:
        write a word per line

    """
    print("Writing vocab...")
    with open(filename, "w") as f:
        print(filename)
#         print("vocab", vocab)
        for i, word in enumerate(vocab):
            if i != len(vocab) - 1:
                f.write("{}\n".format(word))
            else:
                f.write(word)
#     print("- done. {} tokens".format(len(vocab)))
 
          
def load_vocab(filename):
    """Loads vocab from a file

    Args:
        filename: (string) the format of the file must be one word per line.

    Returns:
        d: dict[word] = index

    """
    try:
        d = dict()
        with open(filename) as f:
            for idx, word in enumerate(f):
                word = word.strip()
                d[word] = idx

    except IOError:
        raise MyIOError(filename)
    return d
          
          
def export_trimmed_glove_vectors(vocab, glove_filename, trimmed_filename, dim):
    """Saves glove vectors in numpy array

    Args:
        vocab: dictionary vocab[word] = index
        glove_filename: a path to a glove file
        trimmed_filename: a path where to store a matrix in npy
        dim: (int) dimension of embeddings

    """
    embeddings = np.zeros([len(vocab), dim])
    with open(glove_filename, encoding="utf8") as f:
        for line in f:
            line = line.strip().split(' ')
            word = line[0]
            embedding = [float(x) for x in line[1:]]
            if word in vocab:
                word_idx = vocab[word]
                embeddings[word_idx] = np.asarray(embedding)

    np.savez_compressed(trimmed_filename, embeddings=embeddings)
          
          
def get_trimmed_glove_vectors(filename):
    """
    Args:
        filename: path to the npz file

    Returns:
        matrix of embeddings (np array)

    """
    try:
        with np.load(filename) as data:
            return data["embeddings"]

    except IOError:
        raise MyIOError(filename)
           
def get_processing_word(vocab_words=None, vocab_chars=None,
                    lowercase=False, chars=False, allow_unk=True):
    """Return lambda function that transform a word (string) into list,
    or tuple of (list, id) of int corresponding to the ids of the word and
    its corresponding characters.

    Args:
        vocab: dict[word] = idx

    Returns:
        f("cat") = ([12, 4, 32], 12345)
                 = (list of char ids, word id)

    """
    def f(word):
        # 0. get chars of words
        if vocab_chars is not None and chars == True:
            char_ids = []
            for char in word:
                # ignore chars out of vocabulary
                if char in vocab_chars:
                    char_ids += [vocab_chars[char]]

        # 1. preprocess word
        if lowercase:
            word = word.lower()
        if word.isdigit():
            word = NUM

        # 2. get id of word
        if vocab_words is not None:
            if word in vocab_words:
                word = vocab_words[word]
            else:
                if allow_unk:
                    word = vocab_words[UNK]
                else:
                    raise Exception("Unknow key is not allowed. Check that "\
                                    "your vocab (tags?) is correct")

        # 3. return tuple char ids, word id
        if vocab_chars is not None and chars == True:
            return char_ids, word
        else:
            return word

    return f
          
          
def _pad_sequences(sequences, pad_tok, max_length):
    """
    Args:
        sequences: a generator of list or tuple
        pad_tok: the char to pad with

    Returns:
        a list of list where each sublist has same length
    """
    sequence_padded, sequence_length = [], []

    for seq in sequences:
        seq = list(seq)
        seq_ = seq[:max_length] + [pad_tok]*max(max_length - len(seq), 0)
        sequence_padded +=  [seq_]
        sequence_length += [min(len(seq), max_length)]

    return sequence_padded, sequence_length
          
          
def pad_sequences(sequences, pad_tok, nlevels=1):
    """
    Args:
        sequences: a generator of list or tuple
        pad_tok: the char to pad with
        nlevels: "depth" of padding, for the case where we have characters ids

    Returns:
        a list of list where each sublist has same length

    """
    if nlevels == 1:
        max_length = max(map(lambda x : len(x), sequences))
        sequence_padded, sequence_length = _pad_sequences(sequences,
                                            pad_tok, max_length)

    elif nlevels == 2:
        max_length_word = max([max(map(lambda x: len(x), seq))
                               for seq in sequences])
        sequence_padded, sequence_length = [], []
        for seq in sequences:
            # all words are same length now
            sp, sl = _pad_sequences(seq, pad_tok, max_length_word)
            sequence_padded += [sp]
            sequence_length += [sl]

        max_length_sentence = max(map(lambda x : len(x), sequences))
        sequence_padded, _ = _pad_sequences(sequence_padded,
                [pad_tok]*max_length_word, max_length_sentence)
        sequence_length, _ = _pad_sequences(sequence_length, 0,
                max_length_sentence)

    return sequence_padded, sequence_length


def minibatches(data, minibatch_size, use_crf=True):
    """
    Args:
        data: generator of (sentence, tags) tuples
        minibatch_size: (int)

    Yields:
        list of tuples

    """
    x_batch, y_batch = [], []
    for (x, y) in data:
        if len(x_batch) == minibatch_size:
            yield x_batch, y_batch
            x_batch, y_batch = [], []

        if type(x[0]) == tuple:
            x = zip(*x)
        x_batch += [x]
        if use_crf:
            y_batch += [y]
        else:
            if any([x.isdigit() for x in y]):
                y_batch.append([int(x) for x in y if x.isdigit()])
            else:
                y_batch.append([0,0,0,0,0])

    if len(x_batch) != 0:
        yield x_batch, y_batch


def get_chunk_type(tok, idx_to_tag):
    """
    Args:
        tok: id of token, ex 4
        idx_to_tag: dictionary {4: "B-PER", ...}

    Returns:
        tuple: "B", "PER"

    """
    if isinstance(tok, torch.Tensor): tok = tok.item()
    tag_name = idx_to_tag[tok]

    tag_class = tag_name.split('-')[0]
    tag_type = tag_name.split('-')[-1]
    return tag_class, tag_type


def get_chunks(seq, tags):
    """Given a sequence of tags, group entities and their position

    Args:
        seq: [4, 4, 0, 0, ...] sequence of labels
        tags: dict["O"] = 4

    Returns:
        list of (chunk_type, chunk_start, chunk_end)

    Example:
        seq = [4, 5, 0, 3]
        tags = {"B-PER": 4, "I-PER": 5, "B-LOC": 3}
        result = [("PER", 0, 2), ("LOC", 3, 4)]

    """
    default = tags[NONE]
    idx_to_tag = {idx: tag for tag, idx in tags.items()}
    chunks = []
    chunk_type, chunk_start = None, None
    for i, tok in enumerate(seq):
        # End of a chunk 1
        if tok == default and chunk_type is not None:
            # Add a chunk.
            chunk = (chunk_type, chunk_start, i)
            chunks.append(chunk)
            chunk_type, chunk_start = None, None

        # End of a chunk + start of a chunk!
        elif tok != default:
            tok_chunk_class, tok_chunk_type = get_chunk_type(tok, idx_to_tag)
            if chunk_type is None:
                chunk_type, chunk_start = tok_chunk_type, i
            elif tok_chunk_type != chunk_type or tok_chunk_class == "B":
                chunk = (chunk_type, chunk_start, i)
                chunks.append(chunk)
                chunk_type, chunk_start = tok_chunk_type, i
        else:
            pass

    # end condition
    if chunk_type is not None:
        chunk = (chunk_type, chunk_start, len(seq))
        chunks.append(chunk)

    return chunks

通用工具

import time
import sys
import logging
import numpy as np

def get_logger(filename):
    """Return a logger instance that writes in filename

    Args:
        filename: (string) path to log.txt

    Returns:
        logger: (instance of logger)

    """
    logger = logging.getLogger('logger')
    logger.setLevel(logging.DEBUG)
    logging.basicConfig(format='%(message)s', level=logging.DEBUG)
    handler = logging.FileHandler(filename)
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(logging.Formatter(
            '%(asctime)s:%(levelname)s: %(message)s'))
    logging.getLogger().addHandler(handler)

    return logger
class Progbar(object):
    """Progbar class copied from keras (https://github.com/fchollet/keras/)

    Displays a progress bar.
    Small edit : added strict arg to update
    # Arguments
        target: Total number of steps expected.
        interval: Minimum visual progress update interval (in seconds).
    """

    def __init__(self, target, width=30, verbose=1):
        self.width = width
        self.target = target
        self.sum_values = {}
        self.unique_values = []
        self.start = time.time()
        self.total_width = 0
        self.seen_so_far = 0
        self.verbose = verbose

    def update(self, current, values=[], exact=[], strict=[]):
        """
        Updates the progress bar.
        # Arguments
            current: Index of current step.
            values: List of tuples (name, value_for_last_step).
                The progress bar will display averages for these values.
            exact: List of tuples (name, value_for_last_step).
                The progress bar will display these values directly.
        """

        for k, v in values:
            if k not in self.sum_values:
                self.sum_values[k] = [v * (current - self.seen_so_far),
                                      current - self.seen_so_far]
                self.unique_values.append(k)
            else:
                self.sum_values[k][0] += v * (current - self.seen_so_far)
                self.sum_values[k][1] += (current - self.seen_so_far)
        for k, v in exact:
            if k not in self.sum_values:
                self.unique_values.append(k)
            self.sum_values[k] = [v, 1]

        for k, v in strict:
            if k not in self.sum_values:
                self.unique_values.append(k)
            self.sum_values[k] = v

        self.seen_so_far = current

        now = time.time()
        if self.verbose == 1:
            prev_total_width = self.total_width
            sys.stdout.write("\b" * prev_total_width)
            sys.stdout.write("\r")

            numdigits = int(np.floor(np.log10(self.target))) + 1
            barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
            bar = barstr % (current, self.target)
            prog = float(current)/self.target
            prog_width = int(self.width*prog)
            if prog_width > 0:
                bar += ('='*(prog_width-1))
                if current < self.target:
                    bar += '>'
                else:
                    bar += '='
            bar += ('.'*(self.width-prog_width))
            bar += ']'
            sys.stdout.write(bar)
            self.total_width = len(bar)

            if current:
                time_per_unit = (now - self.start) / current
            else:
                time_per_unit = 0
            eta = time_per_unit*(self.target - current)
            info = ''
            if current < self.target:
                info += ' - ETA: %ds' % eta
            else:
                info += ' - %ds' % (now - self.start)
            for k in self.unique_values:
                if type(self.sum_values[k]) is list:
                    info += ' - %s: %.4f' % (k,
                        self.sum_values[k][0] / max(1, self.sum_values[k][1]))
                else:
                    info += ' - %s: %s' % (k, self.sum_values[k])

            self.total_width += len(info)
            if prev_total_width > self.total_width:
                info += ((prev_total_width-self.total_width) * " ")

            sys.stdout.write(info)
            sys.stdout.flush()

            if current >= self.target:
                sys.stdout.write("\n")

        if self.verbose == 2:
            if current >= self.target:
                info = '%ds' % (now - self.start)
                for k in self.unique_values:
                    info += ' - %s: %.4f' % (k,
                        self.sum_values[k][0] / max(1, self.sum_values[k][1]))
                sys.stdout.write(info + "\n")

    def add(self, n, values=[]):
        self.update(self.seen_so_far+n, values)

配置文件

class Config():
    def __init__(self, load=True):
        """Initialize hyperparameters and load vocabs

        Args:
            load_embeddings: (bool) if True, load embeddings into
                np array, else None

        """
        # directory for training outputs
        if not os.path.exists(self.dir_output):
            os.makedirs(self.dir_output)

        # create instance of logger
        self.logger = get_logger(self.path_log)

        # load if requested (default)
        if load:
            self.load()

    def load(self):
        """Loads vocabulary, processing functions and embeddings

        Supposes that build_data.py has been run successfully and that
        the corresponding files have been created (vocab and trimmed GloVe
        vectors)

        """
        # 1. vocabulary
        self.vocab_words = load_vocab(self.filename_words)
        self.vocab_tags  = load_vocab(self.filename_tags)
        self.vocab_chars = load_vocab(self.filename_chars)

        self.nwords     = len(self.vocab_words)
        self.nchars     = len(self.vocab_chars)
        self.ntags      = len(self.vocab_tags)

        # 2. get processing functions that map str -> id
        self.processing_word = get_processing_word(self.vocab_words,
                self.vocab_chars, lowercase=True, chars=self.use_chars)
        self.processing_tag  = get_processing_word(self.vocab_tags,
                lowercase=False, allow_unk=False)

        # 3. get pre-trained embeddings
        self.embeddings = (get_trimmed_glove_vectors(self.filename_trimmed)
                if self.use_pretrained else None)


    # general config
    dir_output = "results/test/"
    dir_model  = dir_output
    path_log   = dir_output + "log.txt"

    # embeddings
    dim_word = 300
    dim_char = 100

    # glove files
    filename_glove = "data/glove.6B.{}d.txt".format(dim_word)
    # trimmed embeddings (created from glove_filename with build_data.py)
    filename_trimmed = "data/glove.6B.{}d.trimmed.npz".format(dim_word)
    use_pretrained = True

    # dataset
    # filename_dev = "data/coNLL/eng/eng.testa.iob"
    # filename_test = "data/coNLL/eng/eng.testb.iob"
    # filename_train = "data/coNLL/eng/eng.train.iob"

    #filename_dev = filename_test = filename_train = "data/test.txt" # test

    filename_dev = "data/dev.txt"
    filename_test = "data/test.txt"
    filename_train = "data/train.txt"

    max_iter = None # if not None, max number of examples in Dataset

    # vocab (created from dataset with build_data.py)
    filename_words = "data/words.txt"
    filename_tags = "data/tags.txt"
    filename_chars = "data/chars.txt"

    # training
    train_embeddings = False
    nepochs          = 15
    dropout          = 0.5
    batch_size       = 5
    lr_method        = "adam"
    lr               = 0.001
    lr_decay         = 0.9
    epoch_drop       = 1 # Step Decay: per # epochs to apply lr_decay
    clip             = -1 # if negative, no clipping
    nepoch_no_imprv  = 3

    # model hyperparameters
    hidden_size_char = 100 # lstm on chars
    hidden_size_lstm = 300 # lstm on word embeddings

    ner_model_path = "saves/ner_{}e_glove".format(nepochs)

    # elmo config
    use_elmo = True
    dim_elmo = 1024

    # NOTE: if both chars and crf, only 1.6x slower on GPU
    use_crf = True # if crf, training is 1.7x slower on CPU
    use_chars = False if use_elmo else True#  if char embedding, training is 3.5x slower on CPU

核心文件

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import os
import spacy

USE_GPU = torch.cuda.is_available()

def to_gpu(x, *args, **kwargs):
    '''puts pytorch variable to gpu, if cuda is available and USE_GPU is set to true. '''
    return x.cuda(*args, **kwargs) if USE_GPU else x

def children(m): return m if isinstance(m, (list, tuple)) else list(m.children())

def set_trainable_attr(m,b):
    m.trainable=b
    for p in m.parameters(): p.requires_grad=b

def apply_leaf(m, f):
    c = children(m)
    if isinstance(m, nn.Module): f(m)
    if len(c)>0:
        for l in c: apply_leaf(l,f)

def set_trainable(l, b):
    apply_leaf(l, lambda m: set_trainable_attr(m,b))

def save_model(m, p): torch.save(m.state_dict(), p)

def T(a, half=False, cuda=True):
    """
    Convert numpy array into a pytorch tensor.
    if Cuda is available and USE_GPU=True, store resulting tensor in GPU.
    """
    if not torch.is_tensor(a):
        a = np.array(np.ascontiguousarray(a))
        if a.dtype in (np.int8, np.int16, np.int32, np.int64):
            a = torch.LongTensor(a.astype(np.int64))
        elif a.dtype in (np.float32, np.float64):
            a = torch.cuda.HalfTensor(a) if half else torch.FloatTensor(a)
        else: raise NotImplementedError(a.dtype)
    if cuda: a = to_gpu(a)
    return a

def load_ner_model(m, p, strict=True):
    sd = torch.load(p, map_location=lambda storage, loc: storage)
    names = set(m.state_dict().keys())
    for n in list(sd.keys()): # list "detatches" the iterator
        if n not in names and n+'_raw' in names:
            if n+'_raw' not in sd: sd[n+'_raw'] = sd[n]
            del sd[n]
    m.load_state_dict(sd, strict=strict)

模型

class NERModel(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.config = config
        self.use_elmo = config.use_elmo

        if not self.use_elmo:
            self.emb = nn.Embedding(self.config.nwords, self.config.dim_word, padding_idx=0)
            self.char_embeddings = nn.Embedding(self.config.nchars, self.config.dim_char, padding_idx=0)
            self.char_lstm = nn.LSTM(self.config.dim_char, self.config.hidden_size_char, bidirectional=True)

        self.dropout = nn.Dropout(p=self.config.dropout)
        self.word_lstm = nn.LSTM(self.config.dim_elmo if self.use_elmo else self.config.dim_word+2*self.config.hidden_size_char,
                                 self.config.hidden_size_lstm, bidirectional=True)#dim_elmo=1024,hidden_size_lstm=300

        self.linear = LinearClassifier(self.config, layers=[self.config.hidden_size_lstm*2, self.config.ntags], drops=[0.5])#hidden_size_lstm=300,ntags=9


    def forward(self, input):
        # Word_dim = (batch_size x sent_length)
        # char_dim = (batch_size x sent_length x word_length)

        if self.use_elmo:
            word_emb = self.dropout(input.transpose(0,1))
            #[31,5,1024]

        else:
            word_input, char_input = input[0], input[1]
            word_input.transpose_(0,1)

            # Word Embedding
            word_emb = self.emb(word_input) #shape= S*B*wnh

            # Char LSTM
            char_emb = self.char_embeddings(char_input.view(-1, char_input.size(2))) #https://stackoverflow.com/questions/47205762/embedding-3d-data-in-pytorch
            char_emb = char_emb.view(*char_input.size(), -1) #dim = BxSxWxE

            _, (h, c) = self.char_lstm(char_emb.view(-1, char_emb.size(2), char_emb.size(3)).transpose(0,1)) #(num_layers * num_directions, batch, hidden_size) = 2*BS*cnh
            char_output = torch.cat((h[0], h[1]), 1) #shape = BS*2cnh
            char_output = char_output.view(char_emb.size(0), char_emb.size(1), -1).transpose(0,1) #shape = S*B*2cnh

            # Concat char output and word output
            word_emb = torch.cat((word_emb, char_output), 2) #shape = S*B*(wnh+2cnh)
            word_emb = self.dropout(word_emb)

        output, (h, c) = self.word_lstm(word_emb) #shape = S*B*hidden_size_lstm=[31,5,600]
        output = self.dropout(output)

        output = self.linear(output)#[31,5,9]
        return output #shape = S*B*ntags

class LinearBlock(nn.Module):
    def __init__(self, ni, nf, drop):
        super().__init__()
        self.lin = nn.Linear(ni, nf)
        self.drop = nn.Dropout(drop)
        self.bn = nn.BatchNorm1d(ni)

    def forward(self, x):
        return self.lin(self.drop(self.bn(x)))

class LinearClassifier(nn.Module):
    def __init__(self, config, layers, drops):
        self.config = config
        super().__init__()
        self.layers = nn.ModuleList([
            LinearBlock(layers[i], layers[i + 1], drops[i]) for i in range(len(layers) - 1)])
        #[600,9]
        
    def forward(self, input):
        output = input
        sl,bs,_ = output.size()
        x = output.view(-1, 2*self.config.hidden_size_lstm)#[155,600]

        for l in self.layers:
            l_x = l(x)
            x = F.relu(l_x)
        return l_x.view(sl, bs, self.config.ntags)

CRF

from typing import List, Optional, Union

from torch.autograd import Variable
import torch
import torch.nn as nn

class CRF(nn.Module):
    """Conditional random field.
    This module implements a conditional random field [LMP]. The forward computation
    of this class computes the log likelihood of the given sequence of tags and
    emission score tensor. This class also has ``decode`` method which finds the
    best tag sequence given an emission score tensor using `Viterbi algorithm`_.
    Arguments
    ---------
    num_tags : int
        Number of tags.
    Attributes
    ----------
    num_tags : int
        Number of tags passed to ``__init__``.
    start_transitions : :class:`~torch.nn.Parameter`
        Start transition score tensor of size ``(num_tags,)``.
    end_transitions : :class:`~torch.nn.Parameter`
        End transition score tensor of size ``(num_tags,)``.
    transitions : :class:`~torch.nn.Parameter`
        Transition score tensor of size ``(num_tags, num_tags)``.
    References
    ----------
    .. [LMP] Lafferty, J., McCallum, A., Pereira, F. (2001).
             "Conditional random fields: Probabilistic models for segmenting and
             labeling sequence data". *Proc. 18th International Conf. on Machine
             Learning*. Morgan Kaufmann. pp. 282–289.
    .. _Viterbi algorithm: https://en.wikipedia.org/wiki/Viterbi_algorithm
    """
    def __init__(self, num_tags: int) -> None:
        if num_tags <= 0:
            raise ValueError(f'invalid number of tags: {num_tags}')
        super().__init__()
        self.num_tags = num_tags
        self.start_transitions = nn.Parameter(torch.Tensor(num_tags))
        self.end_transitions = nn.Parameter(torch.Tensor(num_tags))
        self.transitions = nn.Parameter(torch.Tensor(num_tags, num_tags))

        self.reset_parameters()

    def reset_parameters(self) -> None:
        """Initialize the transition parameters.
        The parameters will be initialized randomly from a uniform distribution
        between -0.1 and 0.1.
        """
        nn.init.uniform(self.start_transitions, -0.1, 0.1)
        nn.init.uniform(self.end_transitions, -0.1, 0.1)
        nn.init.uniform(self.transitions, -0.1, 0.1)

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}(num_tags={self.num_tags})'

    def forward(self,
                emissions: Variable,
                tags: Variable,
                mask: Optional[Variable] = None,
                reduce: bool = True,
                ) -> Variable:
        """Compute the log likelihood of the given sequence of tags and emission score.
        Arguments
        ---------
        emissions : :class:`~torch.autograd.Variable`
            Emission score tensor of size ``(seq_length, batch_size, num_tags)``.
        tags : :class:`~torch.autograd.Variable`
            Sequence of tags as ``LongTensor`` of size ``(seq_length, batch_size)``.
        mask : :class:`~torch.autograd.Variable`, optional
            Mask tensor as ``ByteTensor`` of size ``(seq_length, batch_size)``.
        reduce : bool
            Whether to sum the log likelihood over the batch.
        Returns
        -------
        :class:`~torch.autograd.Variable`
            The log likelihood. This will have size (1,) if ``reduce=True``, ``(batch_size,)``
            otherwise.
        """
        if emissions.dim() != 3:
            raise ValueError(f'emissions must have dimension of 3, got {emissions.dim()}')
        if tags.dim() != 2:
            raise ValueError(f'tags must have dimension of 2, got {tags.dim()}')
        if emissions.size()[:2] != tags.size():
            raise ValueError(
                'the first two dimensions of emissions and tags must match, '
                f'got {tuple(emissions.size()[:2])} and {tuple(tags.size())}'
            )
        if emissions.size(2) != self.num_tags:
            raise ValueError(
                f'expected last dimension of emissions is {self.num_tags}, '
                f'got {emissions.size(2)}'
            )
        if mask is not None:
            if tags.size() != mask.size():
                raise ValueError(
                    f'size of tags and mask must match, got {tuple(tags.size())} '
                    f'and {tuple(mask.size())}'
                )
            if not all(mask[0].data):
                raise ValueError('mask of the first timestep must all be on')

        if mask is None:
            mask = Variable(self._new(tags.size()).fill_(1)).byte()
        
        #计算前面部分
        numerator = self._compute_joint_llh(emissions, tags, mask)
        #计算后面部分
        denominator = self._compute_log_partition_function(emissions, mask)
        llh = numerator - denominator
        return llh if not reduce else torch.sum(llh)

    def decode(self,
               emissions: Union[Variable, torch.FloatTensor],
               mask: Optional[Union[Variable, torch.ByteTensor]] = None) -> List[List[int]]:
        """Find the most likely tag sequence using Viterbi algorithm.
        Arguments
        ---------
        emissions : :class:`~torch.autograd.Variable` or :class:`~torch.FloatTensor`
            Emission score tensor of size ``(seq_length, batch_size, num_tags)``.
        mask : :class:`~torch.autograd.Variable` or :class:`torch.ByteTensor`
            Mask tensor of size ``(seq_length, batch_size)``.
        Returns
        -------
        list
            List of list containing the best tag sequence for each batch.
        """
        if emissions.dim() != 3:
            raise ValueError(f'emissions must have dimension of 3, got {emissions.dim()}')
        if emissions.size(2) != self.num_tags:
            raise ValueError(
                f'expected last dimension of emissions is {self.num_tags}, '
                f'got {emissions.size(2)}'
            )
        if mask is not None and emissions.size()[:2] != mask.size():
            raise ValueError(
                'the first two dimensions of emissions and mask must match, '
                f'got {tuple(emissions.size()[:2])} and {tuple(mask.size())}'
            )

        if isinstance(emissions, Variable):
            emissions = emissions.data
        if mask is None:
            mask = self._new(emissions.size()[:2]).fill_(1).byte()
        elif isinstance(mask, Variable):
            mask = mask.data

        return self._viterbi_decode(emissions, mask)

    def _compute_joint_llh(self,
                           emissions: Variable,
                           tags: Variable,
                           mask: Variable) -> Variable:
        # emissions: (seq_length, batch_size, num_tags)
        # tags: (seq_length, batch_size)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and tags.dim() == 2
        assert emissions.size()[:2] == tags.size()
        assert emissions.size(2) == self.num_tags
        assert mask.size() == tags.size()
        assert all(mask[0].data)

        seq_length = emissions.size(0)
        mask = mask.float()#[31,5]

        # Start transition score
        llh = self.start_transitions[tags[0]]  # (batch_size,)=5
        #emissions 发射矩阵==每个词对应的tag的概率==[31,5,9]
        for i in range(seq_length - 1):
            cur_tag, next_tag = tags[i], tags[i+1]#[5],[5]
            # Emission score for current tag
            # X(i,yi)
            llh += emissions[i].gather(1, cur_tag.view(-1, 1)).squeeze(1) * mask[i]#取出当前词的emissions[5,9]对应的tag[5,1]==[5]
            # Transition score to next tag
            #T(y-[y+1])
            transition_score = self.transitions[cur_tag, next_tag]#转移矩阵从当前的tag传递到下个tag [5]
            # Only add transition score if the next tag is not masked (mask == 1)
            llh += transition_score * mask[i+1]

        # Find last tag index
        last_tag_indices = mask.long().sum(0) - 1  # (batch_size,)
        last_tags = tags.gather(0, last_tag_indices.view(1, -1)).squeeze(0)

        # End transition score
        llh += self.end_transitions[last_tags]
        # Emission score for the last tag, if mask is valid (mask == 1)
        llh += emissions[-1].gather(1, last_tags.view(-1, 1)).squeeze(1) * mask[-1]

        return llh

    def _compute_log_partition_function(self,
                                        emissions: Variable,
                                        mask: Variable) -> Variable:
        # emissions: (seq_length, batch_size, num_tags)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.size()[:2] == mask.size()
        assert emissions.size(2) == self.num_tags
        assert all(mask[0].data)

        seq_length = emissions.size(0)
        mask = mask.float()

        # Start transition score and first emission
        log_prob = self.start_transitions.view(1, -1) + emissions[0]#[5,9]
        # Here, log_prob has size (batch_size, num_tags) where for each batch,
        # the j-th column stores the log probability that the current timestep has tag j

        for i in range(1, seq_length):
            # Broadcast log_prob over all possible next tags
            broadcast_log_prob = log_prob.unsqueeze(2)  # (batch_size, num_tags, 1)
            # Broadcast transition score over all instances in the batch
            broadcast_transitions = self.transitions.unsqueeze(0)  # (1, num_tags, num_tags)
            # Broadcast emission score over all possible current tags
            broadcast_emissions = emissions[i].unsqueeze(1)  # (batch_size, 1, num_tags)
            # Sum current log probability, transition, and emission scores
            score = broadcast_log_prob + broadcast_transitions \
                + broadcast_emissions  # (batch_size, num_tags, num_tags)
            # Sum over all possible current tags, but we're in log prob space, so a sum
            # becomes a log-sum-exp
            score = self._log_sum_exp(score, 1)  # (batch_size, num_tags)
            # Set log_prob to the score if this timestep is valid (mask == 1), otherwise
            # leave it alone
            log_prob = score * mask[i].unsqueeze(1) + log_prob * (1.-mask[i]).unsqueeze(1)

        # End transition score
        log_prob += self.end_transitions.view(1, -1)
        # Sum (log-sum-exp) over all possible tags
        return self._log_sum_exp(log_prob, 1)  # (batch_size,)

    def _viterbi_decode(self, emissions: torch.FloatTensor, mask: torch.ByteTensor) \
            -> List[List[int]]:
        # Get input sizes
        seq_length = emissions.size(0)
        batch_size = emissions.size(1)
        sequence_lengths = mask.long().sum(dim=0)

        # emissions: (seq_length, batch_size, num_tags)
        assert emissions.size(2) == self.num_tags

        # list to store the decoded paths
        best_tags_list = []

        # Start transition
        viterbi_score = []
        viterbi_score.append(self.start_transitions.data + emissions[0])
        viterbi_path = []

        # Here, viterbi_score is a list of tensors of shapes of (num_tags,) where value at
        # index i stores the score of the best tag sequence so far that ends with tag i
        # viterbi_path saves where the best tags candidate transitioned from; this is used
        # when we trace back the best tag sequence

        # Viterbi algorithm recursive case: we compute the score of the best tag sequence
        # for every possible next tag
        for i in range(1, seq_length):
            # Broadcast viterbi score for every possible next tag
            broadcast_score = viterbi_score[i - 1].view(batch_size, -1, 1)
            # Broadcast emission score for every possible current tag
            broadcast_emission = emissions[i].view(batch_size, 1, -1)
            # Compute the score matrix of shape (batch_size, num_tags, num_tags) where
            # for each sample, each entry at row i and column j stores the score of
            # transitioning from tag i to tag j and emitting
            score = broadcast_score + self.transitions.data + broadcast_emission
            # Find the maximum score over all possible current tag
            best_score, best_path = score.max(1)  # (batch_size,num_tags,)
            # Save the score and the path
            viterbi_score.append(best_score)
            viterbi_path.append(best_path)

        # Now, compute the best path for each sample
        for idx in range(batch_size):
            # Find the tag which maximizes the score at the last timestep; this is our best tag
            # for the last timestep
            seq_end = sequence_lengths[idx]-1
            _, best_last_tag = (viterbi_score[seq_end][idx] + self.end_transitions.data).max(0)
            best_tags = [best_last_tag.item()] #[best_last_tag[0]] #[best_last_tag.item()]

            # We trace back where the best last tag comes from, append that to our best tag
            # sequence, and trace it back again, and so on
            for path in reversed(viterbi_path[:sequence_lengths[idx] - 1]):
                best_last_tag = path[idx][best_tags[-1]]
                best_tags.append(best_last_tag)

            # Reverse the order because we start from the last timestep
            best_tags.reverse()
            best_tags_list.append(best_tags)
        return best_tags_list

    @staticmethod
    def _log_sum_exp(tensor: Variable, dim: int) -> Variable:
        # Find the max value along `dim`
        offset, _ = tensor.max(dim)
        # Make offset broadcastable
        broadcast_offset = offset.unsqueeze(dim)
        # Perform log-sum-exp safely
        safe_log_sum_exp = torch.log(torch.sum(torch.exp(tensor - broadcast_offset), dim))
        # Add offset back
        return offset + safe_log_sum_exp

    def _new(self, *args, **kwargs) -> torch.FloatTensor:
        param = next(self.parameters())
        return param.data.new(*args, **kwargs)

NER

from torch.optim.lr_scheduler import StepLR

if os.name == "posix": from allennlp.modules.elmo import Elmo, batch_to_ids

""" Works with pytorch 0.4.0 """
class NERLearner(object):
    """
    NERLearner class that encapsulates a pytorch nn.Module model and ModelData class
    Contains methods for training a testing the model
    """
    def __init__(self, config, model):
        super().__init__()
        self.config = config
        self.logger = self.config.logger
        self.model = model
        self.model_path = config.dir_model
        self.use_elmo = config.use_elmo


        self.idx_to_tag = {idx: tag for tag, idx in
                           self.config.vocab_tags.items()}

        self.criterion = CRF(self.config.ntags)
        self.optimizer = optim.Adam(self.model.parameters())

        if self.use_elmo:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file, weight_file, 2, dropout=0)
        else:
            self.load_emb()

        if USE_GPU:
            self.use_cuda = True
            self.logger.info("GPU found.")
            self.model = model.cuda()
            self.criterion = self.criterion.cuda()
            if self.use_elmo:
                self.elmo = self.elmo.cuda()
                print("Moved elmo to cuda")
        else:
            self.model = model.cpu()
            self.use_cuda = False
            self.logger.info("No GPU found.")

    def get_model_path(self, name):
        return os.path.join(self.model_path,name)+'.h5'

    def get_layer_groups(self, do_fc=False):
        return children(self.model)

    def freeze_to(self, n):
        c=self.get_layer_groups()
        for l in c:
            set_trainable(l, False)
        for l in c[n:]:
            set_trainable(l, True)

    def unfreeze(self):
        self.freeze_to(0)

    def save(self, name=None):
        if not name:
            name = self.config.ner_model_path
        save_model(self.model, self.get_model_path(name))
        self.logger.info(f"Saved model at {self.get_model_path(name)}")

    def load_emb(self):
        self.model.emb.weight = nn.Parameter(T(self.config.embeddings))
        self.model.emb.weight.requires_grad = False
        self.logger.info('Loading pretrained word embeddings')

    def load(self, fn=None):
        if not fn: fn = self.config.ner_model_path
        fn = self.get_model_path(fn)
        load_ner_model(self.model, fn, strict=True)
        self.logger.info(f"Loaded model from {fn}")

    def batch_iter(self, train, batch_size, return_lengths=False, shuffle=False, sorter=False):
        """
        Builds a generator from the given dataloader to be fed into the model

        Args:
            train: DataLoader
            batch_size: size of each batch
            return_lengths: if True, generator returns a list of sequence lengths for each
                            sample in the batch
                            ie. sequence_lengths = [8,7,4,3]
            shuffle: if True, shuffles the data for each epoch
            sorter: if True, uses a sorter to shuffle the data

        Returns:
            nbatches: (int) number of batches
            data_generator: batch generator yielding
                                dict inputs:{'word_ids' : np.array([[padded word_ids in sent1], ...])
                                             'char_ids': np.array([[[padded char_ids in word1_sent1], ...],
                                                                    [padded char_ids in word1_sent2], ...],
                                                                    ...])}
                                labels: np.array([[padded label_ids in sent1], ...])
                                sequence_lengths: list([len(sent1), len(sent2), ...])

        """
        nbatches = (len(train) + batch_size - 1) // batch_size

        def data_generator():
            while True:
                if shuffle: train.shuffle()
                elif sorter==True and train.sorter: train.sort()

                for i, (words, labels) in enumerate(minibatches(train, batch_size)):

                    # perform padding of the given data
                    if self.config.use_chars:
                        char_ids, word_ids = zip(*words)
                        word_ids, sequence_lengths = pad_sequences(word_ids, 1)
                        char_ids, word_lengths = pad_sequences(char_ids, pad_tok=0,
                        nlevels=2)

                    else:
                        word_ids, sequence_lengths = pad_sequences(words, 0)

                    if self.use_elmo:
                        word_ids = words

                    if labels:
                        labels, _ = pad_sequences(labels, 0)
                        # if categorical
                        ## labels = [to_categorical(label, num_classes=len(train.tag_itos)) for label in labels]

                    # build dictionary
                    inputs = {
                        "word_ids": np.asarray(word_ids)
                    }

                    if self.config.use_chars:
                        inputs["char_ids"] = np.asarray(char_ids)

                    if return_lengths:
                        yield(inputs, np.asarray(labels), sequence_lengths)

                    else:
                        yield (inputs, np.asarray(labels))

        return (nbatches, data_generator())


    def fine_tune(self, train, dev=None):
        """
        Fine tune the NER model by freezing the pre-trained encoder and training the newly
        instantiated layers for 1 epochs
        """
        self.logger.info("Fine Tuning Model")
        self.fit(train, dev, epochs=1, fine_tune=True)


    def fit(self, train, dev=None, epochs=None, fine_tune=False):
        """
        Fits the model to the training dataset and evaluates on the validation set.
        Saves the model to disk
        """
        if not epochs:
            epochs = self.config.nepochs
        batch_size = self.config.batch_size

        nbatches_train, train_generator = self.batch_iter(train, batch_size,
                                                          return_lengths=True)
        if dev:
            nbatches_dev, dev_generator = self.batch_iter(dev, batch_size,
                                                      return_lengths=True)

        scheduler = StepLR(self.optimizer, step_size=1, gamma=self.config.lr_decay)

        if not fine_tune: self.logger.info("Training Model")

        f1s = []

        for epoch in range(epochs):
            scheduler.step()
            self.train(epoch, nbatches_train, train_generator, fine_tune=fine_tune)

            if dev:
                f1 = self.test(nbatches_dev, dev_generator, fine_tune=fine_tune)

            # Early stopping
            if len(f1s) > 0:
                if f1 < max(f1s[max(-self.config.nepoch_no_imprv, -len(f1s)):]): #if sum([f1 > f1s[max(-i, -len(f1s))] for i in range(1,self.config.nepoch_no_imprv+1)]) == 0:
                    print("No improvement in the last 3 epochs. Stopping training")
                    break
            else:
                f1s.append(f1)

        if fine_tune:
            self.save(self.config.ner_ft_path)
        else :
            self.save(self.config.ner_model_path)


    def train(self, epoch, nbatches_train, train_generator, fine_tune=False):
        self.logger.info('\nEpoch: %d' % epoch)
        self.model.train()
        if not self.use_elmo: self.model.emb.weight.requires_grad = False

        train_loss = 0
        correct = 0
        total = 0
        total_step = None

        prog = Progbar(target=nbatches_train)

        for batch_idx, (inputs, targets, sequence_lengths) in enumerate(train_generator):

            if batch_idx == nbatches_train: break
            if inputs['word_ids'].shape[0] == 1:
                self.logger.info('Skipping batch of size=1')
                continue

            total_step = batch_idx
            targets = T(targets, cuda=self.use_cuda).transpose(0,1).contiguous()
            self.optimizer.zero_grad()

            if self.use_elmo:
                sentences = inputs['word_ids']#list(['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']
                character_ids = batch_to_ids(sentences)
                if self.use_cuda:
                    character_ids = character_ids.cuda()
                embeddings = self.elmo(character_ids)
                word_input = embeddings['elmo_representations'][0]#[5,31,1024]
                word_input, targets = Variable(word_input, requires_grad=False), \
                                      Variable(targets)#[31,5]
                inputs = (word_input)

            else:
                word_input = T(inputs['word_ids'], cuda=self.use_cuda)
                char_input = T(inputs['char_ids'], cuda=self.use_cuda)
                word_input, char_input, targets = Variable(word_input, requires_grad=False), \
                                                  Variable(char_input, requires_grad=False),\
                                                  Variable(targets)
                inputs = (word_input, char_input)


            outputs = self.model(inputs)

            # Create mask
            if self.use_elmo:
                mask = Variable(embeddings['mask'].transpose(0,1))
                if self.use_cuda:
                    mask = mask.cuda()
            else:
                mask = create_mask(sequence_lengths, targets, cuda=self.use_cuda)

            # Get CRF Loss
            loss = -1*self.criterion(outputs, targets, mask=mask)
            loss.backward()
            self.optimizer.step()

            # Callbacks
            train_loss += loss.item()
            predictions = self.criterion.decode(outputs, mask=mask)
            masked_targets = mask_targets(targets, sequence_lengths)

            t_ = mask.type(torch.LongTensor).sum().item()
            total += t_
            c_ = sum([1 if p[i] == mt[i] else 0 for p, mt in zip(predictions, masked_targets) for i in range(len(p))])
            correct += c_

            prog.update(batch_idx + 1, values=[("train loss", loss.item())], exact=[("Accuracy", 100*c_/t_)])

        self.logger.info("Train Loss: %.3f, Train Accuracy: %.3f%% (%d/%d)" %(train_loss/(total_step+1), 100.*correct/total, correct, total) )


    def test(self, nbatches_val, val_generator, fine_tune=False):
        self.model.eval()
        accs = []
        test_loss = 0
        correct_preds = 0
        total_correct = 0
        total_preds = 0
        total_step = None

        for batch_idx, (inputs, targets, sequence_lengths) in enumerate(val_generator):
            if batch_idx == nbatches_val: break
            if inputs['word_ids'].shape[0] == 1:
                self.logger.info('Skipping batch of size=1')
                continue

            total_step = batch_idx
            targets = T(targets, cuda=self.use_cuda).transpose(0,1).contiguous()

            if self.use_elmo:
                sentences = inputs['word_ids']#list(['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']
                character_ids = batch_to_ids(sentences)
                if self.use_cuda:
                    character_ids = character_ids.cuda()
                embeddings = self.elmo(character_ids)
                word_input = embeddings['elmo_representations'][1]
                word_input, targets = Variable(word_input, requires_grad=False), \
                                      Variable(targets)
                inputs = (word_input)

            else:
                word_input = T(inputs['word_ids'], cuda=self.use_cuda)
                char_input = T(inputs['char_ids'], cuda=self.use_cuda)
                word_input, char_input, targets = Variable(word_input, requires_grad=False), \
                                                  Variable(char_input, requires_grad=False),\
                                                  Variable(targets)
                inputs = (word_input, char_input)

            outputs = self.model(inputs)

            # Create mask
            if self.use_elmo:
                mask = Variable(embeddings['mask'].transpose(0,1))
                if self.use_cuda:
                    mask = mask.cuda()
            else:
                mask = create_mask(sequence_lengths, targets, cuda=self.use_cuda)

            # Get CRF Loss
            loss = -1*self.criterion(outputs, targets, mask=mask)

            # Callbacks
            test_loss += loss.item()
            predictions = self.criterion.decode(outputs, mask=mask)
            masked_targets = mask_targets(targets, sequence_lengths)

            for lab, lab_pred in zip(masked_targets, predictions):

                accs    += [1 if a==b else 0 for (a, b) in zip(lab, lab_pred)]

                lab_chunks      = set(get_chunks(lab, self.config.vocab_tags))
                lab_pred_chunks = set(get_chunks(lab_pred,
                                                 self.config.vocab_tags))

                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds   += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p   = correct_preds / total_preds if correct_preds > 0 else 0
        r   = correct_preds / total_correct if correct_preds > 0 else 0
        f1  = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)

        self.logger.info("Val Loss : %.3f, Val Accuracy: %.3f%%, Val F1: %.3f%%" %(test_loss/(total_step+1), 100*acc, 100*f1))
        return 100*f1

    def evaluate(self,test):
        batch_size = self.config.batch_size
        nbatches_test, test_generator = self.batch_iter(test, batch_size,
                                                        return_lengths=True)
        self.logger.info('Evaluating on test set')
        self.test(nbatches_test, test_generator)

    def predict_batch(self, words):
        self.model.eval()
        if len(words) == 1:
            mult = np.ones(2).reshape(2, 1).astype(int)

        if self.use_elmo:
            sentences = words
            character_ids = batch_to_ids(sentences)
            if self.use_cuda:
                character_ids = character_ids.cuda()
            embeddings = self.elmo(character_ids)
            word_input = embeddings['elmo_representations'][1]
            word_input = Variable(word_input, requires_grad=False)

            if len(words) == 1:
                word_input = ((mult*word_input.transpose(0,1)).transpose(0,1).contiguous()).type(torch.FloatTensor)

            word_input = T(word_input, cuda=self.use_cuda)
            inputs = (word_input)

        else:
            #char_ids, word_ids = zip(*words)
            char_ids = [[c[0] for c in s] for s in words]
            word_ids = [[x[1] for x in s] for s in words]
            word_ids, sequence_lengths = pad_sequences(word_ids, 1)
            char_ids, word_lengths = pad_sequences(char_ids, pad_tok=0,
                                                   nlevels=2)
            word_ids = np.asarray(word_ids)
            char_ids = np.asarray(char_ids)

            if len(words) == 1:
                word_ids = mult*word_ids
                char_ids = (mult*char_ids.transpose(1,0,2)).transpose(1,0,2)
            word_input = T(word_ids, cuda=self.use_cuda)
            char_input = T(char_ids, cuda=self.use_cuda)

            word_input, char_input = Variable(word_input, requires_grad=False), \
                                     Variable(char_input, requires_grad=False)

            inputs = (word_input, char_input)


        outputs = self.model(inputs)

        predictions = self.criterion.decode(outputs)

        predictions = [p[:i] for p, i in zip(predictions, sequence_lengths)]

        return predictions

    def predict(self, sentences):
        """Returns list of tags

        Args:
            words_raw: list of words (string), just one sentence (no batch)

        Returns:
            preds: list of tags (string), one for each word in the sentence

        """
        nlp = spacy.load('en')
        doc = nlp(sentences)
        words_raw = [[token.text for token in sent] for sent in doc.sents]
        if self.use_elmo:
            words = words_raw
        else:
            words = [[self.config.processing_word(w) for w in s] for s in words_raw]
            # print(words)
            # raise NameError('testing')
            # if type(words[0]) == tuple:
            #     words = zip(*words)

        pred_ids = self.predict_batch(words)
        preds = [[self.idx_to_tag[idx.item() if isinstance(idx, torch.Tensor) else idx]  for idx in s] for s in pred_ids]

        return preds


def create_mask(sequence_lengths, targets, cuda, batch_first=False):
    """ Creates binary mask """
    mask = Variable(torch.ones(targets.size()).type(torch.ByteTensor))
    if cuda: mask = mask.cuda()

    for i,l in enumerate(sequence_lengths):
        if batch_first:
            if l < targets.size(1):
                mask.data[i, l:] = 0
        else:
            if l < targets.size(0):
                mask.data[l:, i] = 0

    return mask


def mask_targets(targets, sequence_lengths, batch_first=False):
    """ Masks the targets """
    if not batch_first:
         targets = targets.transpose(0,1)
    t = []
    for l, p in zip(targets,sequence_lengths):
        t.append(l[:p].data.tolist())
    return t

训练

构建数据

def build():
    """Procedure to build data

    You MUST RUN this procedure. It iterates over the whole dataset (train,
    dev and test) and extract the vocabularies in terms of words, tags, and
    characters. Having built the vocabularies it writes them in a file. The
    writing of vocabulary in a file assigns an id (the line #) to each word.
    It then extract the relevant GloVe vectors and stores them in a np array
    such that the i-th entry corresponds to the i-th word in the vocabulary.


    Args:
        config: (instance of Config) has attributes like hyper-params...

    """
    # 1. get config and processing of words
    config = Config(load=False)

    #2. Get processing word generator
    processing_word = get_processing_word(lowercase=True)

    # 3. Generators
    dev   = CoNLLDataset(config.filename_dev, processing_word)
    test  = CoNLLDataset(config.filename_test, processing_word)
    train = CoNLLDataset(config.filename_train, processing_word)


    # 4. Build Word and Tag vocab
    vocab_words, vocab_tags = get_vocabs([train, dev, test])
    vocab_glove = get_glove_vocab(config.filename_glove)

    # 5. Get a vocab set for words in both vocab_words and vocab_glove
    vocab = vocab_words & vocab_glove
    vocab.add(UNK)
    vocab.add(NUM)

    # 6. Save vocab
    write_vocab(vocab, config.filename_words)
    write_vocab(vocab_tags, config.filename_tags)

    # 7. Trim GloVe Vectors
    vocab = load_vocab(config.filename_words)
    export_trimmed_glove_vectors(vocab, config.filename_glove,
                                config.filename_trimmed, config.dim_word)

    # Build and save char vocab
    train = CoNLLDataset(config.filename_train)
    vocab_chars = get_char_vocab(train)
    print("vocab_char",vocab_chars)
    write_vocab(vocab_chars, config.filename_chars)   
build()

开始训练

def main():
    # create instance of config
    config = Config()
    if config.use_elmo: config.processing_word = None

    #build model
    model = NERModel(config)

    # create datasets
    dev = CoNLLDataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)
    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter, config.use_crf)

    learn = NERLearner(config, model)
    learn.fit(train, dev)
    
main()

欢迎关注公众号：

你可能感兴趣的:(实战,预训练模型)

【llm对话系统】大模型 RAG 之回答生成：融合检索信息，生成精准答案 kakaZhui 人工智能 AIGC chatgpt llama
今天，我们将深入RAG流程的最后一步，也是至关重要的一步：回答生成(AnswerGeneration)。在这一步，LLM将融合用户问题和检索到的文档片段，生成最终的答案。这个过程不仅仅是简单的文本拼接，更需要LLM对检索结果进行理解、推理和整合，才能输出准确、流畅且符合用户需求的答案。一、回答生成的目标RAG中回答生成的目标主要包括：准确性(Accuracy):生成的答案需要准确回答用户的问题，并
探索SakuraLLM：轻小说与Galgame翻译的新纪元蒋素萍Marilyn
探索SakuraLLM：轻小说与Galgame翻译的新纪元SakuraLLM适配轻小说/Galgame的日中翻译大模型项目地址:https://gitcode.com/gh_mirrors/sa/SakuraLLM在人工智能的浪潮中，SakuraLLM以其独特的魅力和强大的功能，成为了日中翻译领域的一颗璀璨明星。本文将深入介绍SakuraLLM项目，分析其技术特点，探讨其应用场景，并揭示其与众不同
【Leetcode 热题 100】32. 最长有效括号冠位观测者 Leetcode Top 100 Liked leetcode 算法数据结构
问题背景给你一个只包含‘(’和‘)’的字符串，找出最长有效（格式正确且连续）括号子串的长度。数据约束0≤s.length≤3×1040\les.length\le3\times10^40≤s.length≤3×104s[i]s[i]s[i]为‘(’或‘)’解题过程这题可以用栈来解决，还是挺简单的，困难的是用动态规划来实现。新年的第二天，偷偷懒，这题就留到手边事情告一段落，专门训练动态规划的时候再写
大模型问答机器人的智能化程度 AI大模型应用之禅 AI大模型与大数据 java python javascript kotlin golang 架构人工智能
大模型、问答机器人、智能化程度、自然语言处理、深度学习、Transformer模型、知识图谱、推理能力、对话系统1.背景介绍近年来，人工智能技术取得了飞速发展，特别是深度学习的兴起，为自然语言处理（NLP）领域带来了革命性的变革。其中，大模型问答机器人作为一种新型的智能交互系统，凭借其强大的语言理解和生成能力，在客服、教育、娱乐等领域展现出广阔的应用前景。问答机器人是指能够理解用户自然语言问题并给
ollama如何保持模型加载在内存（显存）中或立即卸载点动生态云 python llama 语言模型
一、ollama如何保持模型加载在内存中或立即卸载？默认情况下，模型在生成响应后会在内存中保留5分钟。这允许在您多次请求LLM时获得更快的响应时间。然而，您可能希望在5分钟内释放内存，或者希望模型无限期地保留在内存中。使用keep_alive参数与/api/generate或/api/chatAPI端点，可以控制模型在内存中保留的时间。keep_alive参数可以设置为：一个持续时间字符串（例如“
AI学习指南Ollama篇-Ollama的多模态应用探索俞兆鹏 AI学习指南 ai
AI学习指南应用篇-Ollama的多模态应用探索一、引言（一）背景介绍随着大语言模型（LLM）的发展，多模态应用（结合文本、图像、语音等）成为新的趋势。多模态模型能够处理多种类型的数据，如文本、图像和语音，从而提供更丰富、更智能的交互体验。Ollama作为本地部署工具，支持多模态模型的运行，为开发者提供了强大的功能。（二）文章目标本文将探讨Ollama在多模态应用中的可能性，并通过实际案例展示如何
各大模型厂商API使用：百度、阿里、豆包、kimi、deepseek、Yi loong_XL 深度学习大模型AI 百度大模型
百度ERNIE（支持requests接口）ERNIESpeed、ERNIELite免费免费测试下来模型ernie_speed输出吞吐量计算20-30来个，“{length/cost}tokens/s”输出总长度/耗时https://qianfan.cloud.baidu.com/文档：https://cloud.baidu.com/doc/WENXINWORKSHOP/s/dltgsna1oapi
Python-作业统计管理系统 Vicky__3021 Python实例 python 编程语言
目录一、设计目的二、需求分析三、总体设计1.系统流程设计2.系统模块设计四、详细设计1.模块选择2.界面设计3.模块实现五、总结六、感想七、Python源码mainexcelhandlejob一、设计目的1、教学目的本课程设计是学生学习完《Python程序设计》课程后，进行的一次全面的综合训练，通过课程设计，更好地掌握使用Python语言进行程序设计的方法，加深对Python语言特点和使用Pyth
【Numpy核心编程攻略：Python数据处理、分析详解与科学计算】1.24 随机宇宙：生成现实世界数据的艺术精通代码大仙 numpy python numpy python 开发语言
1.24随机宇宙：生成现实世界数据的艺术目录随机宇宙：生成现实世界数据的艺术引言复杂联合分布的采样技巧随机游走的蒙特卡洛实现基于物理规律的生成模型随机数在加密中的应用总结参考文献引言复杂联合分布的采样技巧随机游走的蒙特卡洛实现基于物理规律的生成模型随机数在加密中的应用总结参考文献随机数生成分布采样物理模拟密码学应用多元正态分布随机过程布朗运动流体动力学安全随机数随机性检验1.24.1引言在数据科学
大语言模型原理与工程实践：残差连接与层归一化 AI大模型应用之禅 AI大模型与大数据计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
1.背景介绍随着自然语言处理（NLP）的发展，深度学习在过去几年中取得了令人瞩目的成果。其中，循环神经网络（RNN）和卷积神经网络（CNN）在图像和文本分类、语义角色标注、机器翻译等领域表现出色。然而，这些网络在训练过程中经常遭遇梯度消失和梯度爆炸的问题。为了解决这些问题，我们引入了残差连接（ResidualConnections）和层归一化（BatchNormalization）来改善模型性能。
【数据集】——1 花花 Show Python pyecharts—从0到精通信息可视化数据分析 python
解锁数据可视化的魔法钥匙——pyecharts实战指南在这个数据为王的时代，每一次点击、每一次交易、每一份报告背后都隐藏着无尽的故事与洞察。但你是否曾苦恼于如何将这些冰冷的数据转化为直观、吸引人的视觉盛宴？欢迎来到《pyecharts图形绘制大师班》在这里，你将不再受限于单调的表格和图表，而是学会如何运用pyecharts这一强大的Python数据可视化库，将复杂的数据转化为令人惊叹的交互式图形。
Transformer架构原理详解：残差连接和层归一化（Residual Connection an AI天才研究院 AI大模型企业级应用开发实战 Python实战大数据AI人工智能 java python javascript kotlin golang 架构人工智能大厂程序员硅基计算碳基计算认知计算生物计算深度学习神经网络大数据 AIGC AGI LLM 系统架构设计软件哲学 Agent 程序员实现财富自由
《Transformer架构原理详解：残差连接和层归一化（ResidualConnectionandLayerNormalization）》文章关键词Transformer残差连接层归一化自注意力机制序列模型编码器与解码器摘要本文将深入解析Transformer架构的核心原理，特别是残差连接和层归一化技术。通过详细阐述这些关键组件的作用、数学模型和具体实现，读者将能够理解Transformer在处
C#使用实体类Entity Framework Core操作mysql入门：从数据库反向生成模型2 处理连接字符串初级代码游戏 dotnet和C#数据库 mysql EntityFramework
初级代码游戏的专栏介绍与文章目录-CSDN博客我的github：codetoys，所有代码都将会位于ctfc库中。已经放入库中我会指出在库中的位置。这些代码大部分以Linux为目标但部分代码是纯C++的，可以在任何平台上使用。源码指引：github源码指引_初级代码游戏的博客-CSDN博客本文接上一篇：C#使用实体类EntityFrameworkCore操作mysql入门：从数据库反向生成模型-C
有道子曰推理模型“子曰-o1”发布即开源，14B小参数复现OpenAI o1强推理效果百态老人笔记
根据我搜索到的资料，网易有道于2025年1月22日正式发布了国内首个输出分步式讲解的推理模型“子曰-o1”，并宣布其开源。这一模型以14B（140亿）参数规模为基础，支持在消费级显卡上部署，采用思维链技术，能够提供详细且逻辑严密的解题过程，显著提升了推理能力和准确性，尤其是在中文逻辑推理方面表现突出。“子曰-o1”复现了OpenAI发布的o1模型的单模型推理能力，但通过更轻量级的设计实现了在低算力
模型架构选择：从传统NLP到Transformer AI天才研究院 AI大模型应用入门实战与进阶大数据AI人工智能计算大数据人工智能语言模型 AI 大模型 LLM Java Python 架构设计 Agent RPA
模型架构选择：从传统NLP到Transformer关键词：自然语言处理(NLP),模型架构,传统NLP,Transformer,RNN,CNN,预训练模型文章目录模型架构选择：从传统NLP到Transformer1.背景介绍1.1问题的由来1.2研究现状1.3研究意义1.4本文结构2.核心概念与联系3.核心算法原理&具体操作步骤3.1算法原理概述3.1.1传统NLP模型3.1.2RNN模型3.1.
用自然语言与mysql数据库对话几种方案的思考闲云野鹤_SG 数据库 mysql AI text2sql 自然语言本地部署大模型
如何用自然语言与mysql数据库对话,而不是用sql语句去查询数据库?处于安全考虑,可训练一个本地大语言模型来完成此项任务,mysql服务器中的数据大约有两万多条记录,服务器的作用主要是记录设备的出库和回库的流水账(即以时间为序的记录),但有一些sql查询比较复杂,必须根据特定的sql语句查询,否则很难得到准确稳定的答案,调试和训练大模型的方法有多种方式,比如lora训练模型,提示词方式,rag方
子曰-o1：网易有道开源国内首个分步式讲解推理模型，支持K12数学教学蚝油菜花每日 AI 项目与应用实例人工智能开源
❤️如果你也关注AI的发展现状，且对AI应用开发非常感兴趣，我会每日分享大模型与AI领域的最新开源项目和应用，提供运行实例和实用教程，帮助你快速上手AI技术，欢迎关注我哦！微信公众号｜搜一搜：蚝油菜花快速阅读功能：子曰-o1是国内首个分步式讲解推理模型，支持K12数学教学。技术：采用14B轻量级架构，专为消费级显卡设计，能在低显存设备上稳定运行。应用：应用于网易有道旗下的AI全科学习助手“有道小P
Llama 3：开源大模型的里程碑式突破 XianxinMao llama 开源
标题：Llama3：开源大模型的里程碑式突破文章信息摘要：Meta通过Llama3展现了开源LLM的重大突破：采用超大规模训练数据和多阶段训练方法（SFT、rejectionsampling、PPO和DPO），突破了传统的Chinchilla最优比例法则。在产品策略上，针对8B和70B两种规模采用不同的训练数据截止日期，实现差异化定位。即将发布的400B模型有望达到GPT-4级别性能，但同时也凸显
阿里巴巴Qwen团队发布AI模型，可操控PC和手机新加坡内哥谈技术人工智能深度学习语言模型学习
每周跟踪AI热点新闻动向和震撼发展想要探索生成式人工智能的前沿进展吗？订阅我们的简报，深入解析最新的技术突破、实际应用案例和未来的趋势。与全球数同行一同，从行业内部的深度分析和实用指南中受益。不要错过这个机会，成为AI领域的领跑者。点击订阅，与未来同行！订阅：https://rengongzhineng.io/这周，科技界的目光几乎都被DeepSeek的R1模型吸引，但阿里巴巴并没有袖手旁观。1月
网易有道开源 “子曰 - o1” 推理模型 timer_017 开源
网易有道开源的“子曰-o1”推理模型支持消费级显卡，主要通过以下技术实现：轻量级模型设计：“子曰-o1”采用14B参数规模，相比许多对硬件配置要求高的大型推理模型，选择了较小的参数规模，从基础上降低了对硬件的性能需求，尤其是对显卡的要求，使其可以在消费级显卡上运行。低比特量化技术：该模型使用低比特量化技术，在不影响模型推理能力的前提下，对模型数据进行量化处理，减少数据存储和计算所需的空间和资源，进
对比DeepSeek、ChatGPT和Kimi的学术写作摘要能力 AIWritePaper官方账号 DeepSeek AIWritePaper ChatGPT 人工智能 chatgpt llama 数据分析论文阅读
摘要摘要是文章的精华，通常在200-250词左右。要包括研究的目的、方法、结果和结论。让AI工具作为某领域内资深的研究专家，编写摘要需要言简意赅，直接概括论文的核心，为读者提供快速了解的窗口。下面我们使用DeepSeek、ChatGPT4以及Kimi辅助编写摘要。提示词：你现在是一名[计算机理论专家]，研究方向集中在[人工智能、大模型、数据挖掘等计算机相关方向]。我现在需要撰写一篇围绕[人工智能在
Transformer架构的GPU并行和之前的NLP算法并行有什么不同？ AI大模型学习不迷路 transformer 自然语言处理大模型深度学习 NLP LLM 大语言模型
1.什么是GPU并行计算？GPU并行计算是一种利用图形处理单元（GPU）进行大规模并行数据处理的技术。与传统的中央处理单元（CPU）相比，GPU拥有更多的核心，能够同时处理数千个线程，这使得GPU在处理高度并行的任务时表现出色。在深度学习中，GPU并行计算被广泛应用于训练神经网络，加速模型训练过程。在2017年之前，自然语言处理（NLP）领域的研究者们通常会从头开始训练模型，那时能够利用GPU进行
《向量数据库指南》——MoE应用：解锁深度学习新境界的钥匙大禹智库《实战AI智能体》《向量数据库指南》深度学习人工智能向量数据库大禹智库低代码 MoE模型
在深度学习的广阔天地里，混合专家（MoE）模型如同一把锐利的钥匙，正逐步解锁着各种复杂应用场景的新境界。作为大禹智库的向量数据库高级研究员，同时也是《向量数据库指南》的作者，我深感MoE模型在推动AI技术向前发展中所扮演的重要角色。今天，我将带大家深入探讨MoE模型在自然语言处理、计算机视觉以及多模态学习等领域的应用，并巧妙引导大家通过《向量数据库指南》获取更多干货和深度实战经验。一、自然语言处理
代码随想录算法训练营第三十九天|198.打家劫舍、 jinshengqile 算法 leetcode 动态规划
题目链接：198.打家劫舍-力扣（LeetCode）思路：因为隔一家才能取，所以当前最大的价值要么是dp[i-2]+nums[i]或者是dp[i-1]classSolution(object):defrob(self,nums):""":typenums:List[int]:rtype:int"""dp=[0]*len(nums)if(len(nums)==1):returnnums[0]dp[0
代码随想录算法训练营第三十九天-动态规划-198. 打家劫舍 taoyong001 算法动态规划 c++leetcode
动规五部曲dp[i]表示在下标为i的房间偷或不偷与前面所偷之和所能获得的最大价值递推公式：dp[i]=std::max(dp[i-2]+nums[i],dp[i-1])初始化：要给dp[0]与dp[1]来给定初始值，因为递推公式有-1与-2。dp[0]=nums[0],dp[1]=std::max(nums[0],nums[1]);其它下标值，初始成任意值都可以，因为其值是由前面元素推导出来的遍历
小南每日 AI 资讯 | 国产AI之光DeepSeek暴击硅谷？？？ | 25/01/29 小南AI学院人工智能
1.中国AI模型震惊硅谷：DeepSeek为何一夜火出圈？国产AI大模型DeepSeek迅速崛起，引发硅谷关注。2.中国银行支持AI产业：1万亿元金融扶持助推智能化升级中国银行宣布提供1万亿元资金支持人工智能产业链发展，助力智能化升级。3.国产AI大模型DeepSeek惊艳全球：游戏科学冯骥称其为“国运级别科技成果”DeepSeek的AI模型引起全球关注，游戏科学的冯骥高度评价其意义。4.AI产业
Django 静态文件配置实战指南 ivwdcwso django sqlite python 开发
静态文件（如CSS、JavaScript、图片等）是Django项目中构建用户界面的重要组成部分。然而，静态文件的配置问题常常导致开发和生产环境中的404NotFound错误。本文将详细介绍如何正确配置Django静态文件，结合最佳实践，帮助你解决常见的静态文件问题，并确保项目在不同环境中正常运行。©ivwdcwso(ID:u012172506)1.静态文件的基本概念1.1什么是静态文件？静态文件
Java实现FIFO缓存策略实战 smart_ljh 缓存策略 java 缓存开发语言 FIFO 策略模式
实现FIFO模型选择FIFO模型实现过程FIFO模型完整代码下面看一下先进先出的示例过程总结FIFO（FirstInFirstOut，先进先出）策略是一种基本的数据处理和存储管理方法，在Java中，这种策略通常用于管理那些需要按照顺序处理的数据项，比如任务的队列、数据的传输缓冲区等。在Java中，java.util.Queue接口以及实现了这个接口的类，比如LinkedList和PriorityQ
【深度分析】Deepseek为什么会这么爆火？精通代码大仙深度分析新媒体运营创业创新
算力霸权崩塌之夜：一场颠覆AI工业体系的静默革命当DeepSeek用600万美元训练成本击穿硅谷巨头60亿美元的护城河时，整个AI工业体系的地基正在发生断裂。这场看似技术跃迁的盛宴，实则是算力霸权崩塌的末日钟声——当中国团队用1/10的显卡数量训练出性能碾压Llama3的模型，硅谷引以为傲的暴力堆料范式正在沦为数字时代的蒸汽机车。效率革命的背后暗藏致命悖论：MoE架构创造的11倍训练效率奇迹，是否
揭秘排行榜系统：如何在高并发场景下实现高效更新！软件求生 #工作建议算法排序算法数据结构哈希算法 java
大家好，我是你们的技术分享伙伴小米！今天我们来聊聊一个非常有趣的话题——如何设计一个排行榜。在这个互联网时代，无论是游戏、学习平台，还是各种社交应用，排行榜都是用户互动和竞争的核心功能之一。而如何设计一个高效、实时更新的排行榜，是一个充满挑战性的问题。今天，我们就一起来探讨一下如何在个人实战中设计出一个既高效又实用的排行榜系统！需求分析在设计排行榜之前，我们需要明确以下需求：个人总得分和总排名实时
java责任链模式 3213213333332132 java 责任链模式村民告县长
责任链模式，通常就是一个请求从最低级开始往上层层的请求，当在某一层满足条件时，请求将被处理，当请求到最高层仍未满足时，则请求不会被处理。就是一个请求在这个链条的责任范围内，会被相应的处理，如果超出链条的责任范围外，请求不会被相应的处理。下面代码模拟这样的效果：创建一个政府抽象类,方便所有的具体政府部门继承它。 package 责任链模式; /** *
linux、mysql、nginx、tomcat 性能参数优化 ronin47
一、linux 系统内核参数 /etc/sysctl.conf文件常用参数 net.core.netdev_max_backlog = 32768 #允许送到队列的数据包的最大数目 net.core.rmem_max = 8388608 #SOCKET读缓存区大小 net.core.wmem_max = 8388608 #SOCKET写缓存区大
php命令行界面 dcj3sjt126com PHP cli
常用选项 php -v php -i PHP安装的有关信息 php -h 访问帮助文件 php -m 列出编译到当前PHP安装的所有模块执行一段代码 php -r 'echo "hello, world!";' php -r 'echo "Hello, World!\n";' php -r '$ts = filemtime("
Filter&Session 171815164 session
Filter HttpServletRequest requ = (HttpServletRequest) req; HttpSession session = requ.getSession(); if (session.getAttribute("admin") == null) { PrintWriter out = res.ge
连接池与Spring,Hibernate结合 g21121 Hibernate
前几篇关于Java连接池的介绍都是基于Java应用的，而我们常用的场景是与Spring和ORM框架结合，下面就利用实例学习一下这方面的配置。 1.下载相关内容： &nb
[简单]mybatis判断数字类型 53873039oycg mybatis
昨天同事反馈mybatis保存不了int类型的属性,一直报错，错误信息如下: Caused by: java.lang.NumberFormatException: For input string: "null" at sun.mis
项目启动时或者启动后ava.lang.OutOfMemoryError: PermGen space 程序员是怎么炼成的 eclipse jvm tomcat catalina.sh eclipse.ini
在启动比较大的项目时，因为存在大量的jsp页面，所以在编译的时候会生成很多的.class文件，.class文件是都会被加载到jvm的方法区中，如果要加载的class文件很多，就会出现方法区溢出异常 java.lang.OutOfMemoryError: PermGen space. 解决办法是点击eclipse里的tomcat，在
我的crm小结 aijuans crm
各种原因吧，crm今天才完了。主要是接触了几个新技术： Struts2、poi、ibatis这几个都是以前的项目中用过的。 Jsf、tapestry是这次新接触的，都是界面层的框架，用起来也不难。思路和struts不太一样，传说比较简单方便。不过个人感觉还是struts用着顺手啊，当然springmvc也很顺手，不知道是因为习惯还是什么。jsf和tapestry应用的时候需要知道他们的标签、主
spring里配置使用hibernate的二级缓存几步 antonyup_2006 java spring Hibernate xml cache
．在spring的配置文件中 applicationContent.xml，hibernate部分加入 xml 代码 <prop key="hibernate.cache.provider_class">org.hibernate.cache.EhCacheProvider</prop> <prop key="hi
JAVA基础面试题百合不是茶抽象实现接口 String类接口继承抽象类继承实体类自定义异常
/* * 栈（stack）：主要保存基本类型（或者叫内置类型）（char、byte、short、 *int、long、 float、double、boolean）和对象的引用，数据可以共享，速度仅次于 * 寄存器（register），快于堆。堆（heap）：用于存储对象。 */ &
让sqlmap文件 "继承" 起来 bijian1013 java ibatis sqlmap
多个项目中使用ibatis , 和数据库表对应的 sqlmap文件（增删改查等基本语句)，dao, pojo 都是由工具自动生成的, 现在将这些自动生成的文件放在一个单独的工程中，其它项目工程中通过jar包来引用，并通过"继承"为基础的sqlmap文件，dao,pojo 添加新的方法来满足项
精通Oracle10编程SQL(13)开发触发器 bijian1013 oracle 数据库 plsql
/* *开发触发器 */ --得到日期是周几 select to_char(sysdate+4,'DY','nls_date_language=AMERICAN') from dual; select to_char(sysdate,'DY','nls_date_language=AMERICAN') from dual; --建立BEFORE语句触发器 CREATE O
【EhCache三】EhCache查询 bit1129 ehcache
本文介绍EhCache查询缓存中数据，EhCache提供了类似Hibernate的查询API，可以按照给定的条件进行查询。要对EhCache进行查询，需要在ehcache.xml中设定要查询的属性数据准备 @Before public void setUp() { //加载EhCache配置文件 Inpu
CXF框架入门实例白糖_ spring Web 框架 webservice servlet
CXF是apache旗下的开源框架，由Celtix + XFire这两门经典的框架合成，是一套非常流行的web service框架。它提供了JAX-WS的全面支持，并且可以根据实际项目的需要，采用代码优先（Code First）或者 WSDL 优先（WSDL First）来轻松地实现 Web Services 的发布和使用，同时它能与spring进行完美结合。在apache cxf官网提供
angular.equals boyitech AngularJS AngularJS API AnguarJS 中文API angular.equals
angular.equals 描述: 比较两个值或者两个对象是不是相等。还支持值的类型，正则表达式和数组的比较。两个值或对象被认为是相等的前提条件是以下的情况至少能满足一项：两个值或者对象能通过=== （恒等）的比较两个值或者对象是同样类型，并且他们的属性都能通过angular
java-腾讯暑期实习生-输入一个数组A[1,2,...n]，求输入B，使得数组B中的第i个数字B[i]=A[0]*A[1]*...*A[i-1]*A[i+1] bylijinnan java
这道题的具体思路请参看何海涛的微博：http://weibo.com/zhedahht import java.math.BigInteger; import java.util.Arrays; public class CreateBFromATencent { /** * 题目：输入一个数组A[1,2,...n]，求输入B，使得数组B中的第i个数字B[i]=A
FastDFS 的安装和配置修订版 Chen.H linux fastDFS 分布式文件系统
FastDFS Home:http://code.google.com/p/fastdfs/ 1. 安装 http://code.google.com/p/fastdfs/wiki/Setup http://hi.baidu.com/leolance/blog/item/3c273327978ae55f93580703.html 安装libevent (对libevent的版本要求为1.4.
[强人工智能]拓扑扫描与自适应构造器 comsci 人工智能
当我们面对一个有限拓扑网络的时候,在对已知的拓扑结构进行分析之后,发现在连通点之后,还存在若干个子网络,且这些网络的结构是未知的,数据库中并未存在这些网络的拓扑结构数据....这个时候,我们该怎么办呢? 那么,现在我们必须设计新的模块和代码包来处理上面的问题
oracle merge into的用法 daizj oracle sql merget into
Oracle中merge into的使用 http://blog.csdn.net/yuzhic/article/details/1896878 http://blog.csdn.net/macle2010/article/details/5980965 该命令使用一条语句从一个或者多个数据源中完成对表的更新和插入数据. ORACLE 9i 中，使用此命令必须同时指定UPDATE 和INSE
不适合使用Hadoop的场景 datamachine hadoop
转自：http://dev.yesky.com/296/35381296.shtml。　　Hadoop通常被认定是能够帮助你解决所有问题的唯一方案。当人们提到“大数据”或是“数据分析”等相关问题的时候，会听到脱口而出的回答：Hadoop! 实际上Hadoop被设计和建造出来，是用来解决一系列特定问题的。对某些问题来说，Hadoop至多算是一个不好的选择，对另一些问题来说，选择Ha
YII findAll的用法 dcj3sjt126com yii
看文档比较糊涂，其实挺简单的： $predictions=Prediction::model()->findAll("uid=:uid",array(":uid"=>10)); 第一个参数是选择条件：”uid=10″。其中:uid是一个占位符，在后面的array(“:uid”=>10)对齐进行了赋值；更完善的查询需要
vim 常用 NERDTree 快捷键 dcj3sjt126com vim
下面给大家整理了一些vim NERDTree的常用快捷键了，这里几乎包括了所有的快捷键了，希望文章对各位会带来帮助。切换工作台和目录 ctrl + w + h 光标 focus 左侧树形目录ctrl + w + l 光标 focus 右侧文件显示窗口ctrl + w + w 光标自动在左右侧窗口切换ctrl + w + r 移动当前窗口的布局位置 o 在已有窗口中打开文件、目录或书签，并跳
Java把目录下的文件打印出来蕃薯耀列出目录下的文件文件夹下面的文件目录下的文件
Java把目录下的文件打印出来 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 蕃薯耀 2015年7月11日 11:02:
linux远程桌面----VNCServer与rdesktop hanqunfeng Desktop
windows远程桌面到linux，需要在linux上安装vncserver，并开启vnc服务，同时需要在windows下使用vnc-viewer访问Linux。vncserver同时支持linux远程桌面到linux。 linux远程桌面到windows，需要在linux上安装rdesktop，同时开启windows的远程桌面访问。下面分别介绍，以windo
guava中的join和split功能 jackyrong java
guava库中，包含了很好的join和split的功能，例子如下： 1）将LIST转换为使用字符串连接的字符串 List<String> names = Lists.newArrayList("John", "Jane", "Adam", "Tom");
Web开发技术十年发展历程 lampcy android Web 浏览器 html5
回顾web开发技术这十年发展历程： Ajax 03年的时候我上六年级，那时候网吧刚在小县城的角落萌生。传奇，大话西游第一代网游一时风靡。我抱着试一试的心态给了网吧老板两块钱想申请个号玩玩，然后接下来的一个小时我一直在，注，册，账，号。彼时网吧用的512k的带宽，注册的时候，填了一堆信息，提交，页面跳转，嘣，”您填写的信息有误，请重填”。然后跳转回注册页面，以此循环。我现在时常想，如果当时a
架构师之mima-----------------mina的非NIO控制IOBuffer(说得比较好) nannan408 buffer
1.前言。如题。 2.代码。 IoService IoService是一个接口，有两种实现：IoAcceptor和IoConnector；其中IoAcceptor是针对Server端的实现，IoConnector是针对Client端的实现；IoService的职责包括： 1、监听器管理 2、IoHandler 3、IoSession
ORA-00054:resource busy and acquire with NOWAIT specified Everyday都不同 oracle session Lock
[Oracle] 今天对一个数据量很大的表进行操作时，出现如题所示的异常。此时表明数据库的事务处于“忙”的状态，而且被lock了，所以必须先关闭占用的session。 step1，查看被lock的session： select t2.username, t2.sid, t2.serial#, t2.logon_time from v$locked_obj
javascript学习笔记 tntxia JavaScript
javascript里面有6种基本类型的值:number、string、boolean、object、function和undefined。number：就是数字值，包括整数、小数、NaN、正负无穷。string:字符串类型、单双引号引起来的内容。boolean:true、false object:表示所有的javascript对象，不用多说function:我们熟悉的方法，也就是
Java enum的用法详解 xieke90 enum 枚举
Java中枚举实现的分析：示例： public static enum SEVERITY{ INFO,WARN,ERROR } enum很像特殊的class，实际上enum声明定义的类型就是一个类。而这些类都是类库中Enum类的子类 (java.l