风之清扬

tensorflow练习1：利用神经网络进行分类

TensorFlow可被用于语音识别或图像识别等多项机器深度学习领域，它可在小到手机、大到数千台服务器上运行。前段时间在做有关情感分类的实验，利用了神经网络对数据进行分类；效果还不错，达到80+%。
数据集来源：评论数据集，中文的，很不容易，感谢作者！
pos数据
neg数据

数据处理：

import random
def loadfile():
    neg = pd.read_excel('data/neg.xls', header=None, index=None)
    pos = pd.read_excel('data/pos.xls', header=None, index=None)  # 读取训练语料完毕
    pos['mark'] = 1
    neg['mark'] = 0  # 给训练语料贴上标签
    pn = pd.concat([pos, neg], ignore_index=True)  # 合并语料
    #neglen = len(neg)
    #poslen = len(pos)  # 计算语料数目
    #print(type(neg['mark'].values[0]))
    #print(pn[:10],pn[-10:-1])

    print (len(pn[0].values),len(pn['mark'].values))

    with open('data/data.txt','w',encoding='utf-8') as f:
        for x in pn[0].values:
           f.write(x+'\n')
    with open('data/label.txt', 'w', encoding='utf-8') as f:
        for x in pn['mark'].values:
            f.write(str(x)+'\n')

loadfile()#加载并合并数据

-------------------------------------------------

#分词，去停用词
import jieba
import numpy as np
with open('data/stopwords', 'r', encoding='utf-8') as f:

    stopwords = []
    for line in f.readlines():
        stopwords.append(line.strip())
def split_word():
    with open('data/data.txt', 'r', encoding='utf-8') as f:
        lines = f.readlines()
        #lines=random.sample(lines, len(lines))#打乱次序
        lines_1 = []
        #word_list = []
        for line in lines:
            line = ' '.join(jieba.cut(line.strip()))
            #for word in line.split(' '):
            #    if word not in stopwords:
            #        word_list.append(word)
            lines_1.append(line)
        with open('data/split_data.txt','w',encoding='utf-8') as f1:
            for line in lines_1:
                f1.write(line+'\n')


    #print(lines_1[0])
    #with open('data/clean_data.txt','w',encoding='utf-8') as f:
    #    for line in lines_1:
    #        f.write((" ".join([word for word in line]) + "\n"))
with open('data/split_data.txt','r',encoding='utf-8') as f:
    line_list=[]
    #len_list=[]
    for line in f.readlines():
        line =line.strip().split(' ')
        line_1=[]
        for word in line:
            if word not in stopwords:
                line_1.append(word)
        #len_list.append(len(line_1))
        line_list.append(line_1)
    with open('data_clean.txt','w',encoding='utf-8') as f1:
        for line in line_list:
            f1.write((" ".join([num for num in line]) + "\n"))

停用词表（stopwords）：

"
..
>>

/
...

8
二
＜
＠
］
、
，
“
”
。
-
&
《
》
…
?
^
_
（
）
#
啊
此
这
呢
哦
仅
*
+
=
0
1
2
3
4
5
6
7
8
9
@
$
【
】
[
]
矣
兮
~
>
<
{
}
了
个
呵
的
」
「
&#
;
%
．
.
：
—
TWILIGHT
,
\
；
.....

创建词典:

#coding=utf-8
import numpy as np
import random
import os
from io import open
import datetime
"""
***yuchuli
"""
PAD = "__PAD__"
GO = "__GO__"
EOS = "__EOS__"  # 对话结束
UNK = "__UNK__"  # 标记未出现在词汇表中的字符
START_VOCABULART = [PAD, GO, EOS, UNK]

PAD_ID = 0
GO_ID = 1
EOS_ID = 2
UNK_ID = 3

dataset_path_1='data_clean.txt'
#dataset_path_2="data/sentiment_XS_test.txt"

def set_dataset_path(path):
    dataset_path=path

if not os.path.exists(dataset_path_1):
    print('training dataset is null')
    exit()

#gen_vocabulary(生成字典)
def gen_vocabulary_file(input_file, output_file,vocab_size,input_file2=None):
    f = open(input_file, encoding='utf-8')
    train_set_x = []
    #train_set_y = []
    #test_set_x = []
    #test_set_y = []
    for line in f.readlines():
        x = line.strip()
        train_set_x.append(x)
        #train_set_y.append(y)
    f.close()

    #train_set_x = train_set_x[1:]
    vocabulary = {}

    counter = 0
    for line in train_set_x:
        counter += 1
        # print line
        tokens = line.strip().split(' ')  # 这一步有问题，输出的不是汉字
        #print(tokens)
        for word in tokens:
            if word in vocabulary:  # 已在词汇表中，则词频加1
                vocabulary[word] += 1
            else:  # 不在则为1
                vocabulary[word] = 1
    vocabulary_list = START_VOCABULART + sorted(vocabulary, key=vocabulary.get, reverse=True)
    # print vocabulary
    # 取前5000个常用汉字, 应该差不多够用了
    if len(vocabulary_list) > vocab_size:
        vocabulary_list = vocabulary_list[:vocab_size]  # vocab_size大小的词汇表

    print(input_file, " 词汇表大小:", len(vocabulary_list))
    with open(output_file, "w",encoding='utf-8') as ff:
        for word in vocabulary_list:
            ff.write(word + '\n')



print ("vocabulary start convert...:")
gen_vocabulary_file(dataset_path_1,"train_set_vocabulary",20000)

句子转换id：

#coding=utf-8
import numpy as np
import random
import os
from io import open
import datetime
"""
***yuchuli
"""
PAD = "__PAD__"
GO = "__GO__"
EOS = "__EOS__"  # 对话结束
UNK = "__UNK__"  # 标记未出现在词汇表中的字符
START_VOCABULART = [PAD, GO, EOS, UNK]

PAD_ID = 0
GO_ID = 1
EOS_ID = 2
UNK_ID = 3

dataset_path_1='data_clean.txt'
#dataset_path_2="data/sentiment_XS_test.txt"

#  把对话字符串转为向量形式
def convert_to_vector(input_file, vocabulary_file, output_file):
    starttime = datetime.datetime.now()
    tmp_vocab = []
    with open(vocabulary_file, "r",encoding='utf-8') as f:
        tmp_vocab.extend(f.readlines())#将词汇表填入tmp_vocab
    tmp_vocab = [line.strip() for line in tmp_vocab]#去除一些无用字符

    vocab = dict([(x, y) for (y, x) in enumerate(tmp_vocab)])
    # {'硕': 3142, 'v': 577, 'Ｉ': 4789, '\ue796': 4515, '拖': 1333, '疤': 2201 ...}
    #print vocab以上内容正确组成了字典
    output_f = open(output_file, 'w',encoding='utf-8')#写入输出文件
    train_set_x=[]
    train_set_y=[]
    with open(input_file, encoding='utf-8') as f:
        for line in f:
            x = line.strip().split(' ')
            #print (x)
            train_set_x.append(x)
    #train_set_x = train_set_x[1:]



    for line in train_set_x:
        line_vec = []
        for words in line:

            line_vec.append(vocab.get(words, UNK_ID))

            #如果words在vocab里，则填入vocab[words],否则3
        #print line_vec
        output_f.write((" ".join([str(num) for num in line_vec]) + "\n"))
        #返回一个字符串的连接,以空格为分隔符，以换行符为结尾
    output_f.close()
    endtime = datetime.datetime.now()
    print("运行时间:%d 秒"%((endtime - starttime).seconds))

convert_to_vector(dataset_path_1,vocabulary_file="train_set_vocabulary",output_file="train_set_encode")
#convert_to_vector(dataset_path_2,vocabulary_file="train_set_vocabulary",output_file="test_set_encode")

自己手动提取10%的数据作为测试集
接下来，进行分类模型构建：
MLP模型：mlp_model.py

#coding=utf-8
import tensorflow as tf
import numpy as np
#coding=utf-8
import tensorflow as tf
import numpy as np
class MLP_Model(object):


    def __init__(self,config,is_training=True):

        self.keep_prob=config.keep_prob
        self.batch_size=tf.Variable(0,dtype=tf.int32,trainable=False)
        self.is_training =is_training
        num_step=config.num_step
        self.input_data=tf.placeholder(tf.int32,[None,num_step])
        self.target = tf.placeholder(tf.int64,[None])
        #self.mask_x = tf.placeholder(tf.float32,[num_step,None])

        #emotion_embed_dim = config.emotion_embed_dim
        class_num=config.class_num
        hidden_neural_size=config.hidden_neural_size
        vocabulary_size=config.vocabulary_size
        max_len = config.max_len
        embed_dim=config.embed_dim
        hidden_layer_num = config.hidden_layer_num
        self.new_batch_size = tf.placeholder(tf.int32,shape=[],name="new_batch_size")
        self._batch_size_update = tf.assign(self.batch_size,self.new_batch_size)

        # Store layers weight & bias
        weights = {
            'h1': tf.Variable(tf.random_normal([embed_dim, hidden_neural_size])),
            'h2': tf.Variable(tf.random_normal([hidden_neural_size, hidden_neural_size])),
            'out': tf.Variable(tf.random_normal([hidden_neural_size, class_num]))
        }
        biases = {
            'b1': tf.Variable(tf.random_normal([hidden_neural_size])),
            'b2': tf.Variable(tf.random_normal([hidden_neural_size])),
            'out': tf.Variable(tf.random_normal([class_num]))
        }
        #build mlp network
        def multilayer_perceptron(_X, _weights, _biases):
            layer_1=[]
            layer_2=[]
            for i in range(max_len):
                if i > 0: tf.get_variable_scope().reuse_variables()
                layer_1.append(tf.nn.relu(
                    tf.add(tf.matmul(_X[i], _weights['h1']), _biases['b1'])))  # Hidden layer with sigmoid activation
                layer_2.append(tf.nn.relu(
                    tf.add(tf.matmul(layer_1[i], _weights['h2']), _biases['b2']))) # Hidden layer with RELU activation
            with tf.name_scope("mean_pooling_layer"):

                out_put = tf.reduce_mean(layer_2, 0)
            return tf.matmul(out_put, _weights['out']) + _biases['out']

        #lstm_fw_cell = rnn_cell.BasicLSTMCell(hidden_neural_size,forget_bias=0.0,state_is_tuple=True)
        #lstm_bw_cell = rnn_cell.BasicLSTMCell(hidden_neural_size, forget_bias=0.0,state_is_tuple=True)
        #if self.keep_prob<1:
        #    lstm_fw_cell =  rnn_cell.DropoutWrapper(
        #        lstm_fw_cell,output_keep_prob=self.keep_prob
        #    )
        #    lstm_bw_cell = rnn_cell.DropoutWrapper(
        #        lstm_bw_cell, output_keep_prob=self.keep_prob
        #    )


        #lstm_fw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_fw_cell]*hidden_layer_num)
        #lstm_bw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_bw_cell]*hidden_layer_num)
        #self._initial_state = cell.zero_state(self.batch_size,dtype=tf.float32)

        #embedding layer
        with tf.device("/cpu:0"),tf.name_scope("embedding_layer"):
            embedding = tf.get_variable("embedding",[vocabulary_size,embed_dim],dtype=tf.float32)
            inputs= tf.nn.embedding_lookup(embedding,self.input_data)
            inputs_emb = tf.transpose(inputs, [1, 0, 2])
            inputs_emb = tf.reshape(inputs_emb, [-1, embed_dim])
            inputs_emb = tf.split(0, num_step, inputs_emb)

        #print(inputs)
        if self.keep_prob<1:
            inputs = tf.nn.dropout(inputs,self.keep_prob)
        with tf.variable_scope("mlp_layer"):
            self.logits = multilayer_perceptron(inputs_emb,weights,biases)

        #out_put=[]
        #state=self._initial_state
        #with tf.variable_scope("LSTM_layer"):
        #    for time_step in range(num_step):
        #        if time_step>0: tf.get_variable_scope().reuse_variables()
        #        (cell_output,state)=cell(inputs[:,time_step,:],state)
        #        out_put.append(cell_output)

        #out_put=out_put*self.mask_x[:,:,None]

        #with tf.name_scope("mean_pooling_layer"):

        #    out_put=tf.reduce_sum(out_put,0)/(tf.reduce_sum(self.mask_x,0)[:,None])

        #with tf.name_scope("Softmax_layer_and_output"):
        #    softmax_w = tf.get_variable("softmax_w",[2*hidden_neural_size,class_num],dtype=tf.float32)
        #    softmax_b = tf.get_variable("softmax_b",[class_num],dtype=tf.float32)
        #    self.logits = tf.matmul(outputs[-1],softmax_w)+softmax_b

        with tf.name_scope("loss"):
            self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.logits+1e-10,self.target)
            self.cost = tf.reduce_mean(self.loss)

        with tf.name_scope("accuracy"):
            self.prediction = tf.argmax(self.logits,1)
            correct_prediction = tf.equal(self.prediction,self.target)
            self.correct_num=tf.reduce_sum(tf.cast(correct_prediction,tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32),name="accuracy")

        #add summary
        loss_summary = tf.summary.scalar("loss",self.cost)
        #add summary
        accuracy_summary=tf.summary.scalar("accuracy_summary",self.accuracy)

        if not self.is_training:
            self.saver = tf.train.Saver(tf.global_variables())
            return

        self.globle_step = tf.Variable(0,name="globle_step",trainable=False)
        self.lr = tf.Variable(0.0,trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                      config.max_grad_norm)


        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in zip(grads, tvars):
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        self.grad_summaries_merged = tf.summary.merge(grad_summaries)

        self.summary =tf.summary.merge([loss_summary,accuracy_summary,self.grad_summaries_merged])



        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer.apply_gradients(zip(grads, tvars))
        self.train_op=optimizer.apply_gradients(zip(grads, tvars))

        self.new_lr = tf.placeholder(tf.float32,shape=[],name="new_learning_rate")
        self._lr_update = tf.assign(self.lr,self.new_lr)
        self.global_step = tf.Variable(0, trainable=False)
        self.saver = tf.train.Saver(tf.global_variables())


    def assign_new_lr(self,session,lr_value):
        session.run(self._lr_update,feed_dict={self.new_lr:lr_value})
    def assign_new_batch_size(self,session,batch_size_value):
        session.run(self._batch_size_update,feed_dict={self.new_batch_size:batch_size_value})

训练模型：mlp.py

import os
import time

import tensorflow as tf
#import datetime
#from rnn_model import RNN_Model
from mlp_model import MLP_Model
import data_process

flags =tf.app.flags
FLAGS = flags.FLAGS


flags.DEFINE_integer('batch_size',64,'the batch_size of the training procedure')
flags.DEFINE_float('lr',0.1,'the learning rate')
flags.DEFINE_float('lr_decay',0.6,'the learning rate decay')
flags.DEFINE_integer('vocabulary_size',40000,'vocabulary_size')
#emotion embedding
flags.DEFINE_integer("emotion_nums",2,'emotion_nums')#posivitive,negative,neural
flags.DEFINE_integer("emotion_embed_dim",128,'emotion embedding_dim')

flags.DEFINE_integer('emdedding_dim',128,'embedding dim')
flags.DEFINE_integer('hidden_neural_size',128,'LSTM hidden neural size')
flags.DEFINE_integer('hidden_layer_num',3,'LSTM hidden layer num')
flags.DEFINE_string('dataset_path','data/subj0.pkl','dataset path')
flags.DEFINE_integer('max_len',100,'max_len of training sentence')
flags.DEFINE_integer('valid_num',100,'epoch num of validation')
flags.DEFINE_integer('checkpoint_num',1000,'epoch num of checkpoint')
flags.DEFINE_float('init_scale',0.1,'init scale')
flags.DEFINE_integer('class_num',2,'class num')
flags.DEFINE_float('keep_prob',0.5,'dropout rate')
flags.DEFINE_integer('num_epoch',81,'num epoch')
flags.DEFINE_integer('max_decay_epoch',30,'num epoch')
flags.DEFINE_integer('max_grad_norm',5,'max_grad_norm')
flags.DEFINE_string('out_dir',os.path.abspath(os.path.join(os.path.curdir,"review_runs2_81")),'output directory')
flags.DEFINE_integer('check_point_every',10,'checkpoint every num epoch ')

class Config(object):

    hidden_neural_size=FLAGS.hidden_neural_size
    vocabulary_size=FLAGS.vocabulary_size
    embed_dim=FLAGS.emdedding_dim
    #emotion
    emotion_nums=FLAGS.emotion_nums
    emotion_embed_dim=FLAGS.emotion_embed_dim
    #
    hidden_layer_num=FLAGS.hidden_layer_num
    class_num=FLAGS.class_num
    keep_prob=FLAGS.keep_prob
    lr = FLAGS.lr
    lr_decay = FLAGS.lr_decay
    batch_size=FLAGS.batch_size
    num_step = FLAGS.max_len
    max_grad_norm=FLAGS.max_grad_norm
    num_epoch = FLAGS.num_epoch
    max_decay_epoch = FLAGS.max_decay_epoch
    valid_num=FLAGS.valid_num
    out_dir=FLAGS.out_dir
    max_len = FLAGS.max_len
    checkpoint_every = FLAGS.check_point_every


def evaluate(model,session,data,global_steps=None,summary_writer=None):


    correct_num=0
    total_num=len(data[0])
    for step, (x,y,mask_x) in enumerate(data_process.batch_iter(data, batch_size=FLAGS.batch_size)):

         fetches = model.correct_num
         feed_dict={}
         feed_dict[model.input_data]=x
         feed_dict[model.target]=y
         #feed_dict[model.mask_x]=mask_x
         model.assign_new_batch_size(session,len(x))
         #state = session.run(model._initial_state)
         #for i , (c,h) in enumerate(model._initial_state):
         #   feed_dict[c]=state[i].c
         #   feed_dict[h]=state[i].h
         count=session.run(fetches,feed_dict)
         correct_num+=count

    accuracy=float(correct_num)/total_num
    dev_summary = tf.summary.scalar('dev_accuracy',accuracy)
    dev_summary = session.run(dev_summary)
    if summary_writer:
        summary_writer.add_summary(dev_summary,global_steps)
        summary_writer.flush()
    return accuracy

def run_epoch(model,session,data,global_steps,valid_model,valid_data,train_summary_writer,valid_summary_writer=None):
    for step, (x,y,mask_x) in enumerate(data_process.batch_iter(data, batch_size=FLAGS.batch_size)):

        feed_dict={}
        feed_dict[model.input_data]=x
        #feed_dict[model.emotion_state]=[]
        feed_dict[model.target]=y
        #feed_dict[model.mask_x]=mask_x
        model.assign_new_batch_size(session,len(x))
        fetches = [model.cost,model.accuracy,model.train_op,model.summary]
        #state = session.run(model._initial_state)
        #for i , (c,h) in enumerate(model._initial_state):
        #    feed_dict[c]=state[i].c
        #    feed_dict[h]=state[i].h
        cost,accuracy,_,summary = session.run(fetches,feed_dict)
        train_summary_writer.add_summary(summary,global_steps)
        train_summary_writer.flush()
        model.is_training=False
        valid_accuracy=evaluate(valid_model,session,valid_data,global_steps,valid_summary_writer)
        if(global_steps%100==0):
            print (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
            print("the %i step, train cost is: %f and the train accuracy is %f and the valid accuracy is %f"%(global_steps,cost,accuracy,valid_accuracy))
        global_steps+=1

    return global_steps

def train_step():

    print("loading the dataset...")
    config = Config()
    eval_config=Config()
    eval_config.keep_prob=1.0

    train_data,valid_data,test_data= data_process.load_data(FLAGS.max_len, batch_size=config.batch_size)

    print("begin training")

    # gpu_config=tf.ConfigProto()
    # gpu_config.gpu_options.allow_growth=True
    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-1*FLAGS.init_scale,1*FLAGS.init_scale)
        with tf.variable_scope("model",reuse=None,initializer=initializer):
            model = MLP_Model(config=config,is_training=True)
        # train_summary_op = tf.merge_summary([model.loss_summary,model.accuracy])
        train_summary_dir = os.path.join(config.out_dir,"summaries","train")
        train_summary_writer =  tf.summary.FileWriter(train_summary_dir,session.graph)

        # dev_summary_op = tf.merge_summary([valid_model.loss_summary,valid_model.accuracy])
        dev_summary_dir = os.path.join(eval_config.out_dir,"summaries","dev")
        dev_summary_writer =  tf.summary.FileWriter(dev_summary_dir,session.graph)

        #add checkpoint
        checkpoint_dir = os.path.abspath(os.path.join(config.out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables())


        tf.global_variables_initializer().run()
        global_steps=1
        begin_time=int(time.time())

        for i in range(config.num_epoch):
            print (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
            print("the %d epoch training..."%(i+1))
            lr_decay = config.lr_decay ** max(i-config.max_decay_epoch,0.0)
            model.assign_new_lr(session,config.lr*lr_decay)
            global_steps=run_epoch(model,session,train_data,global_steps,model,valid_data,train_summary_writer,dev_summary_writer)

            if i% config.checkpoint_every==0:
                path = saver.save(session,checkpoint_prefix,global_steps)
                print("Saved model chechpoint to{}\n".format(path))

        print("the train is finished")
        end_time=int(time.time())
        print("training takes %d seconds already\n"%(end_time-begin_time))
        #test_accuracy=evaluate(test_model,session,test_data)

        #print("the test data accuracy is %f"%test_accuracy)
        print (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
        print("program end!")



def main(_):
    train_step()


if __name__ == "__main__":
    tf.app.run()

指定python mlp.py

评估：evalute.py

import os
import time

import numpy as np
import tensorflow as tf

#import datetime
#from rnn_model import RNN_Model
from mlp_model import MLP_Model
import data_process

flags =tf.app.flags
FLAGS = flags.FLAGS



flags.DEFINE_integer('batch_size',64,'the batch_size of the training procedure')
flags.DEFINE_float('lr',0.1,'the learning rate')
flags.DEFINE_float('lr_decay',0.6,'the learning rate decay')
flags.DEFINE_integer('vocabulary_size',40000,'vocabulary_size')
#emotion embedding
flags.DEFINE_integer("emotion_nums",2,'emotion_nums')#posivitive,negative,neural
flags.DEFINE_integer("emotion_embed_dim",128,'emotion embedding_dim')

flags.DEFINE_integer('emdedding_dim',128,'embedding dim')
flags.DEFINE_integer('hidden_neural_size',128,'LSTM hidden neural size')
flags.DEFINE_integer('hidden_layer_num',3,'LSTM hidden layer num')
flags.DEFINE_string('dataset_path','data/subj0.pkl','dataset path')
flags.DEFINE_integer('max_len',100,'max_len of training sentence')
flags.DEFINE_integer('valid_num',100,'epoch num of validation')
flags.DEFINE_integer('checkpoint_num',1000,'epoch num of checkpoint')
flags.DEFINE_float('init_scale',0.1,'init scale')
flags.DEFINE_integer('class_num',2,'class num')
flags.DEFINE_float('keep_prob',0.5,'dropout rate')
flags.DEFINE_integer('num_epoch',81,'num epoch')
flags.DEFINE_integer('max_decay_epoch',30,'num epoch')
flags.DEFINE_integer('max_grad_norm',5,'max_grad_norm')
flags.DEFINE_string('out_dir',os.path.abspath(os.path.join(os.path.curdir,"review_runs2_81")),'output directory')
flags.DEFINE_integer('check_point_every',10,'checkpoint every num epoch ')


class Config(object):

    hidden_neural_size=FLAGS.hidden_neural_size
    vocabulary_size=FLAGS.vocabulary_size
    embed_dim=FLAGS.emdedding_dim
    #emotion
    emotion_nums=FLAGS.emotion_nums
    emotion_embed_dim=FLAGS.emotion_embed_dim
    #
    hidden_layer_num=FLAGS.hidden_layer_num
    class_num=FLAGS.class_num
    keep_prob=FLAGS.keep_prob
    lr = FLAGS.lr
    lr_decay = FLAGS.lr_decay
    batch_size=FLAGS.batch_size
    num_step = FLAGS.max_len
    max_grad_norm=FLAGS.max_grad_norm
    num_epoch = FLAGS.num_epoch
    max_decay_epoch = FLAGS.max_decay_epoch
    valid_num=FLAGS.valid_num
    out_dir=FLAGS.out_dir
    max_len = FLAGS.max_len
    checkpoint_every = FLAGS.check_point_every

def evaluate(model,session,data,global_steps=None,summary_writer=None):


    #pre_label=[]
    accuracy=[]
    for step, (x,y,mask_x) in enumerate(data_process.batch_iter(data, batch_size=FLAGS.batch_size)):

         fetches = model.correct_num
         label=model.prediction
         feed_dict={}
         feed_dict[model.input_data]=x
         #feed_dict[model.target]=y
         #feed_dict[model.mask_x]=mask_x
         model.assign_new_batch_size(session,len(x))
         #state = session.run(model._initial_state)
         #for i , (c,h) in enumerate(model._initial_state):
         #   feed_dict[c]=state[i].c
         #   feed_dict[h]=state[i].h
         #count=session.run(fetches,feed_dict)
         pre=session.run(label,feed_dict)
         correct_num=0
         #pre_label.append(pre)
         for i in range(len(pre)):
             if pre[i]== y[i]:
                 correct_num +=1
         accuracy.append(correct_num/len(pre))



    #accuracy=float(correct_num)/total_num
    #dev_summary = tf.summary.scalar('dev_accuracy',accuracy)
    #dev_summary = session.run(dev_summary)
    '''
    if summary_writer:
        summary_writer.add_summary(dev_summary,global_steps)
        summary_writer.flush()
    '''
    return accuracy

def test_step():
    print("loading the dataset...")
    config = Config()
    eval_config = Config()
    eval_config.keep_prob = 1.0
    train_data, valid_data, test_data = data_process.load_data(FLAGS.max_len, batch_size=config.batch_size)
    print("begin testing....")
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
    with tf.Session() as session:
        initializer = tf.random_uniform_initializer(-1 * FLAGS.init_scale, 1 * FLAGS.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):

            test_model = MLP_Model(config=eval_config, is_training=False)
            curdir = os.path.abspath(os.path.join(config.out_dir, "checkpoints"))
            #curdir ="D:\\emotion_classifier\\runs3_60\\checkpoints\\model-18922"
            ckpt = tf.train.get_checkpoint_state(curdir)
            if ckpt != None:
                print(ckpt.model_checkpoint_path)
                test_model.saver.restore(session, ckpt.model_checkpoint_path)
            else:
                print("该路径不存在，结束！")
                tf.global_variables_initializer().run()
                return

        accs = evaluate(test_model, session, test_data)
        accuracy = np.mean(accs)
        print("精确率为:%f"%(accuracy))

test_step()

数据处理data_process：

#coding=utf-8
import numpy as np
import random
import os
from io import open
import string
import datetime
"""
***yuchuli
"""
PAD = "__PAD__"
GO = "__GO__"
EOS = "__EOS__"  # 对话结束
UNK = "__UNK__"  # 标记未出现在词汇表中的字符
START_VOCABULART = [PAD, GO, EOS, UNK]

PAD_ID = 0
GO_ID = 1
EOS_ID = 2
UNK_ID = 3

dataset_path_1='data/train_label.txt'
dataset_path_2="data/test_label.txt"

def set_dataset_path(path):
    dataset_path=path

if not os.path.exists(dataset_path_1):
    print('training dataset is null')
    exit()

if not os.path.exists(dataset_path_2):
    print('test dataset is null')
    exit()


def load_data(max_len,batch_size,n_words=40000,valid_portion=0.2,sort_by_len=False):

    f=open(dataset_path_1,'rb')
    f1=open(dataset_path_2,'rb')
    f2=open('data/train_set_encode','rb')
    f3=open('data/test_set_encode','rb')

    print ('load training label from %s\nload test label from %s'%(dataset_path_1,dataset_path_2))

    train_set_x=[]
    train_set_y=[]
    test_set_x=[]
    test_set_y=[]

    #load label
    for line in f.readlines():
        y=int(line.strip())
        train_set_y.append(y)

    for line1 in f1.readlines():
        y = int(line1.strip())
        test_set_y.append(y)

    #get the trainset
    for line in f2.readlines():
        line=line.decode('utf-8').strip().split(' ')
        train_set_x.append(line)
    for line in f3.readlines():
        line=line.decode('utf-8').strip().split(' ')
        test_set_x.append(line)
    f.close()
    f1.close()
    f2.close()
    f3.close()

    #string matrix-->int matrix
    def string_to_int(input):
        output=[]
        for line in input:
            line_vec=[]
            for word in line:
                num=int(word)
                line_vec.append(num)
            output.append(line_vec)
        return output

    train_set_x=string_to_int(train_set_x)

    test_set_x=string_to_int(test_set_x)

    valid_set_y=[]
    valid_set_x=[]

    #split train/valid set
    n_samples = len(train_set_x)

    sidx=np.random.permutation(n_samples)
    n_train = int(np.round(n_samples * (1. - valid_portion)))
    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
    train_set_x = [train_set_x[s] for s in sidx[:n_train]]
    train_set_y = [train_set_y[s] for s in sidx[:n_train]]
    train_set=(train_set_x,train_set_y)
    valid_set=(valid_set_x,valid_set_y)
    test_set=(test_set_x,test_set_y)
    # remove unknow words
    def remove_unk(x):
        return [[UNK_ID if w >= n_words else w for w in sen] for sen in x]

    test_set_x, test_set_y = test_set
    valid_set_x, valid_set_y = valid_set
    train_set_x, train_set_y = train_set

    train_set_x = remove_unk(train_set_x)
    valid_set_x = remove_unk(valid_set_x)
    test_set_x = remove_unk(test_set_x)

    def len_argsort(seq):
        return sorted(range(len(seq)), key=lambda x: len(seq[x]))

    if sort_by_len:
        sorted_index = len_argsort(test_set_x)
        test_set_x = [test_set_x[i] for i in sorted_index]
        test_set_y = [test_set_y[i] for i in sorted_index]

        sorted_index = len_argsort(valid_set_x)
        valid_set_x = [valid_set_x[i] for i in sorted_index]
        valid_set_y = [valid_set_y[i] for i in sorted_index]


        sorted_index = len_argsort(train_set_x)
        train_set_x = [train_set_x[i] for i in sorted_index]
        train_set_y = [train_set_y[i] for i in sorted_index]

    train_set = (train_set_x, train_set_y)
    valid_set = (valid_set_x, valid_set_y)
    test_set = (test_set_x, test_set_y)

    new_train_set_x = np.zeros([len(train_set[0]), max_len])
    new_train_set_y = np.zeros(len(train_set[0]))

    new_valid_set_x = np.zeros([len(valid_set[0]), max_len])
    new_valid_set_y = np.zeros(len(valid_set[0]))

    new_test_set_x = np.zeros([len(test_set[0]), max_len])
    new_test_set_y = np.zeros(len(test_set[0]))

    mask_train_x = np.zeros([max_len, len(train_set[0])])
    mask_valid_x = np.zeros([max_len, len(valid_set[0])])
    mask_test_x = np.zeros([max_len, len(test_set[0])])
    #padding
    def padding_and_generate_mask(x, y, new_x, new_y, new_mask_x):
        for i, (x, y) in enumerate(zip(x, y)):
            if len(x) <= max_len:
                new_x[i, 0:len(x)] = x
                new_mask_x[0:len(x), i] = 1
                new_y[i] = y
            else:
                new_x[i] = (x[0:max_len])
                new_mask_x[:, i] = 1
                new_y[i] = y
        new_set = (new_x, new_y, new_mask_x)
        del new_x, new_y
        return new_set

    train_set = padding_and_generate_mask(train_set[0], train_set[1], new_train_set_x, new_train_set_y, mask_train_x)

    valid_set = padding_and_generate_mask(valid_set[0], valid_set[1], new_valid_set_x, new_valid_set_y, mask_valid_x)

    test_set = padding_and_generate_mask(test_set[0], test_set[1], new_test_set_x, new_test_set_y, mask_test_x)

    return train_set,valid_set,test_set


#return batch dataset
def batch_iter(data,batch_size):

    #get dataset and label
    x,y,mask_x=data#wentiguanjian
    x=np.array(x)
    y=np.array(y)
    data_size=len(x)
    num_batches_per_epoch=int((data_size-1)/batch_size)+1
    for batch_index in range(num_batches_per_epoch):
        start_index=batch_index*batch_size
        end_index=min((batch_index+1)*batch_size,data_size)
        return_x = x[start_index:end_index]
        return_y = y[start_index:end_index]
        return_mask_x = mask_x[:,start_index:end_index]
        # if(len(return_x)
        #     print(len(return_x))
        #     print return_x
        #     print return_y
        #     print return_mask_x
        #     import sys
        #     sys.exit(0)

        yield (return_x,return_y,return_mask_x)

最终结果86%左右，还不错。

你可能感兴趣的:(tensorflow学习笔记)

tensorflow学习笔记（二）：机器学习必备API 我愛大泡泡深度学习机器学习深度学习
前一节介绍了一些最基本的概念和使用方法。因为我个人的最终目的还是在深度学习上，所以一些深度学习和机器学习模块是必须要了解的，这其中包括了tf.train、tf.contrib.learn、还有如训练神经网络必备的tf.nn等API。这里准备把常用的API和使用方法按照使用频次进行一个排列，可以当做一个以后使用参考。这一节介绍的内容可以有选择的看。而且最全的信息都在TensorFlow的API里面了
TensorFlow学习笔记 SIENTIST
使用“图”（graph）表示计算任务；在被称为“会话”（session）的“上下文”（context）中执行图；使用“张量”（tensor）表示数据，tensor可以任务是一个n维的数组或列表；通过“变量”（varible）维护状态；使用feed和fetch可以为任意的操作赋值或从中获取数据tensorflow.jpggraph中的节点称为op（operation），每个op能把输入的tensor
tensorflow学习笔记-图像分类模型-AlexNet实现飞天小小猫
之前一篇文章中总结了CNN中图像分类的经典模型，包括论文解读和分析，但是不写个代码搞一把总觉得虚～啊哈哈这个系列里准备把这些个经典模型用tensorflow实现一下。参考之前引用的blog：深度学习AlexNet模型详细分析上代码吧。参照着模型看更好读一些。'''图像分类模型的tensorflow实现之--AlexNetTensorflowVersion:1.4PythonVersion:3.6R
Tensorflow学习笔记（六）——卷积神经网络七月七叶
实现对fashion-minist分类：（1）引包importosos.environ["CUDA_VISIBLE_DEVICES"]="-1"importmatplotlibasmplimportmatplotlib.pyplotasplt%matplotlibinlineimportnumpyasnpimportpandasaspdimportsklearnimportsysimpor
tensorflow vgg基于cifar-10进行训练 GOGOYAO
最近接触tf，想在cifar-10数据集上训练下vgg网络。最开始想先跑vgg16，搜了一大圈，没有一个可以直接跑的（我参考【深度学习系列】用PaddlePaddle和Tensorflow实现经典CNN网络Vgg跑出来的精度就10%），要么是代码是针对1000种分类的，要么是预训练好的。最后在Tensorflow学习笔记：CNN篇（6）——CIFAR-10数据集VGG19实现找到了一个vgg19的
深度学习与Tensorflow学习笔记2 ——回调函数callbacks和Tensorboard 木头里有虫911
上一期我们从Fashion-mnist数据集开始，使用Tensorflow.keras搭建一个简单的神经网络来处理分类问题。通过这个简单例子我们熟悉了tf.keras的调用。本期我们来学习keras下面的回调函数callbacks的用法。这里，简单的再说一句，Tensorflow有非常完善的官方文档，相当于学习手册。（而且还有中文网站：https://tensorflow.google.cn/）在
TensorFlow学习笔记--（4）神经网络模型-数据集预处理 Postlude TensorFlow tensorflow 学习笔记
神经网络初步以scikit-leran鸢尾花为例通过scikit-learn库自带的鸢尾花数据集来测试数据的读入fromsklearnimportdatasetsfrompandasimportDataFrameimportpandasaspdx_data=datasets.load_iris().data#.data返回iris数据集所有输入特征y_data=datasets.load_iris
tensorflow学习笔记：识别图中模糊的手写体数字（2）基于多层神经网络以及TensorBoard可视化网络 heart_ace tensorflow学习笔记 tensorflow 神经网络可视化 python 深度学习
tensorflow学习笔记：识别图中模糊的手写体数字（2）基于多层神经网络以及TensorBoard可视化运行环境tensorflow-gpu1.11.0python3.6.9importtensorflowastfimportos读取MINIST数据集fromtensorflow.examples.tutorials.mnistimportinput_datamnist=input_data.
tensorflow学习笔记（十）：GAN生成手写体数字（MNIST）陈小虾深度学习框架实战 GAN手写体生成 GAN实战
文章目录一、GAN原理二、项目实战2.1项目背景2.2网络描述2.3项目实战一、GAN原理生成对抗网络简称GAN，是由两个网络组成的，一个生成器网络和一个判别器网络。这两个网络可以是神经网络（从卷积神经网络、循环神经网络到自编码器）。生成器从给定噪声中（一般是指均匀分布或者正态分布）产生合成数据，判别器分辨生成器的的输出和真实数据。前者试图产生更接近真实的数据，相应地，后者试图更完美地分辨真实数据
tensorflow学习笔记3 抬头挺胸才算活着
CreateaTensorFlowobjectthatreturnsx+yifx>y,andx-yotherwise.tf.cond相当于其他编程语言的?，比较要用tf.greatertf.cond(tf.greater(x,y),lambda:tf.add(x,y),lambda:tf.subtract(x,y))tf.case第一个参数是字典或者tuples都可以，只要是一对对，然后每一对第一
8月10日TensorFlow学习笔记——TensorFlow 数据类型、创建、索引与切片、维度变换、前向传播 Ashen_0nee tensorflow 学习 python
文章目录前言一、Numpy回归问题实战1、Step1：computeloss2、Step2：computeGradientandupdate二、手写数字识别1、Step1：XandY2、Step2：networkstructure3、Step3：循环计算Loss、梯度并更新参数三、数据类型1、tf.constant()2、TensorProperty(1)、.device(2)、.numpy()(
TensorFlow学习笔记--（3）张量的常用运算函数 Postlude TensorFlow tensorflow 学习笔记
损失函数及求偏导通过tf.GradientTape函数来指定损失函数的变量以及表达式最后通过gradient(%损失函数%,%偏导对象%)来获取求偏导的结果独热编码给出一组特征值来对图像进行分类可以用独热编码0的概率是第0种1的概率是第1种0的概率是第二种tf.one_hot(%某标签值%,%分类数%)这里还没太看懂结果的3X3矩阵是怎么来的如果单纯的是因为有几种类型就有几个1那传入的标签值参数就
tensorflow学习笔记--张量和基本运算 Yohance0_0 tensorflow框架学习深度学习
张量张量的阶和数据类型（1）张量的属性：graph：张量所属的默认图op：张量的操作名name：张量的字符串描述shape：张量形状一维{5}二维{2,3}三维{2，3，4}importtensorflowastfimportosos.environ['TF_CPP_MIN_LOG_LEVEL']='2'a=tf.constant(5.0)graph=tf.get_default_graph()p
tensorflow学习笔记----2.常用函数1 qq_35821503 tensorflow 深度学习
1.强制tensor转换为该数据类型tf.cast(张量名，dtype=数据类型)x1=tf.constant([1,2,3],dtype=tf.float64)print(x1)x2=tf.cast(x1,dtype=tf.int32)print("x2=",x2)运行结果：2.计算张量维度上元素的最小值tf.reduce_min(张量名)print("min=",tf.reduce_min(x
TensorFlow学习笔记----3.常用函数2 qq_35821503 tensorflow 深度学习
一.Gradienttape我们可以在with结构中，使用Gradienttape实现某个函数对指定参数的求导运算配合上一个文件讲的variable函数可以实现损失函数loss对参数w的求导计算with结构记录计算过程，gradient求出张量的梯度withtf.GradientTape()astape:若干个计算过程grad=tape.gradient(函数，对谁求导)withtf.Gradie
TensorFlow学习笔记--MLP多层感知机识别手写数字1-9 北航_Curry TensorFlow2.0 tensorflow 神经网络深度学习 1024程序员节
#简单粗暴tensorflow2.0合集视频p7-p9多层感知机（MLP）利用多层感知机MLP实现手写数字0-9的mnist数据集的识别importtensorflowastfimportnumpyasnp#数据的获取和预处理classMNISTLoader():def__init__(self):mnist=tf.keras.datasets.mnist(self.train_data,self
Tensorflow学习笔记--张量与会话 IT修炼家 tensorflow
张量张量是Tensorflow的核心组件之一，可以理解为Tensorflow就是张量和流组成的，张量可以简单地理解为多维数组，我的理解就是张量是一个数据模板，深度学习所有数据首先转换为张量的格式再进行计算，然后得到学习结果。横向看张量是整形、浮点型的数，另外注意张量计算中，有些计算需要张量数据的类型相同，否则会报错。纵向看张量是不同维度的“数组”，零阶张量是一个数，是计算的最小单元；二阶张量是向量
tensorflow学习笔记--Variable变量爱吃小白兔的大萝卜 tensorflow 学习 python
tf.Variable()变量：创建、初始化、保存、加载。1.创建Variable()构造函数需要变量的初始值，即任何形状和类型的张量Tensor。初始值定义其形状和类型，一旦构建，变量的类型和形状即确定。如果想要稍后改变变量的形状，需要带上validate_shape=False的赋值操作。#创建一个变量w=tf.Variable(tensor,name=)#运算y=tf.matmul(w,其他
tensorflow学习笔记：张量介绍以及张量操作函数 heart_ace tensorflow学习笔记深度学习 tensorflow 张量
张量（tensor）tensorflow程序使用tensor数据结构来代表所有的数据，计算图中，操作间传递的数据都是tensor。tensor堪为一个n维的数组或列表，每个tensor中包含类型（type）、阶（rank）和形状（shape）。tensor类型tensor类型python类型描述DF_FLOATtf.float3232位浮点数DF_DOUBLEtf.float6461为浮点数DF_
[TensorFlow 学习笔记-03]TensorFlow简介 caicaiatnbu TensorFlow学习笔记深度学习 TensorFlow
[版权说明]TensorFlow学习笔记参考：李嘉璇著TensorFlow技术解析与实战黄文坚唐源著TensorFlow实战郑泽宇顾思宇著TensorFlow实战Google深度学习框架乐毅王斌著深度学习-Caffe之经典模型详解与实战TensorFlow中文社区http://www.tensorfly.cn/极客学院著TensorFlow官方文档中文版TensorFlow官方文档英文版以及各位大
TensorFlow学习笔记--（2）张量的常用运算函数 Postlude TensorFlow tensorflow 学习笔记
张量的取值函数求张量的平均值:tf.reduce.mean(%张量名%)求张量的最小值:tf.reduce_min(%张量名%)求张量的最大值:tf.reduce_max(%张量名%)求张量的和:tf.reduce_sum(%张量名%)其次,对于上述所有操作都可在函数后添加一个新的参数axis=%维度%axis=0代表第一维度axis=1代表第二维度以此类推张量的四则运算加减乘除次方/开方特别注意
Tensorflow学习笔记：1-tensorflow-gpu部署 & keras简单使用-2023-2-12 Merlin雷 python机器学习笔记 tensorflow keras
tensorflow-gpu学习笔记：部署&keras简单使用-2023-2-12tensorflow2.6.0GPU版本部署及测试0-查看NVIDIA驱动版本1-安装2-测试3-简单使用4-tf.keras概述1、（单层）线性回归1、导包&数据读取和观察2、预测目标与损失函数3、创建模型4、训练5、预测2、多层感知器3、逻辑回归1、sigmoid函数2、交叉熵损失函数3、模型预测4、画图看损失和
TensorFlow学习笔记--（1）张量的随机生成 Postlude TensorFlow tensorflow 学习笔记
张量的生成如何判断一个张量的维数：看张量的中括号有几层012：零维数列[246]:一维向量[[123][456]]:二维数组两行三列第一行数据为123第二行数据为456以此类推n维张量有n层中括号tf.zeros(%指定一个张量的维数%)生成一个全0的张量tf.ones(%指定一个张量的维数%)生成一个全1的张量tf.fill(%指定一个张量的维数%,%Value%)生成一个全为Value的张量随
Tensorflow学习笔记：Keras函数式API 凿井而饮 tensorflow2 python tensorflow 深度学习
目录一、简介二、使用相同的层计算图定义多个模型三、模型可像层一样被调用四、处理复杂计算图拓扑1.多输入多输出模型2.建立一个小的ResNet五、共享层六、提取和重用层计算图节点七、使用自定义层扩展API八、何时使用函数式API1.函数式API的优势2.函数式API的劣势九、混合搭配的API式样1.将函数式模型用作子类化模型的一部分：2.在函数式API中使用任何子类化层或模型一、简介函数式API创建
tensorflow学习笔记--机器学习基础知识--（1）基本图像分类爱玩的阿是学习笔记 python tensorflow 机器学习深度学习
学习教材是tensorflow官网上的新手教程为了让自己有更深的印象和理解，将自己的学习笔记记录基础分类：对于衣服的图片分类本指南训练了一个神经网络模型来对衣服的图像进行分类，例如运动鞋和衬衫。本指南使用tf.keras在TensorFlow中构建和训练模型。from__future__importabsolute_import,division,print_function,unicode_li
TensorFlow学习笔记（未完待续）苏钟白 tensorflow 学习笔记
文章目录tf.Graph().as_default()sessiontensorflow.placeholder()tf.summarytf.Graph().as_default()withtf.Graph().as_default():withtf.device('/gpu:'+str(GPU_INDEX)):TensorFlow中所有计算都会被转化为计算图上的节点。是一个通过计算图的形式来表述
TensorFlow学习笔记（四）—— 入门 —— 基本使用 tiankong19999 TensorFlow TensorFlow 入门
教程地址：TensorFlow中文社区基本使用使用TensorFlow,你必须明白TensorFlow:使用图(graph)来表示计算任务.在被称之为会话(Session)的上下文(context)中执行图.使用tensor表示数据.通过变量(Variable)维护状态.使用feed和fetch可以为任意的操作(arbitraryoperation)赋值或者从其中获取数据.综述TensorFlow
TensorFlow学习笔记（四）——tf.data API 七月七叶
tf.data.Datasetcsv文件读取为dataset并用于训练tfrecord1.tf.data.Datasettf.data.Dataset使用流程：（1）以源数据创建一个dataset；（2）对数据进行预处理；（3）遍历整个dataset，进行数据处理1.1SourceDatasets（1）由数组、列表等创建，将其转化为tensor#创建一个datasetdataset=tf.data
tensorflow学习笔记————分类MNIST数据集 san.hang 人工智能 python
在使用tensorflow分类MNIST数据集中，最容易遇到的问题是下载MNIST样本的问题。一般是通过使用tensorflow内置的函数进行下载和加载，fromtensorflow.examples.tutorials.mnistimportinput_datamnist=input_data.read_data_sets("MNIST_data",one_hot=True)但是我使用时遇到了“
tensorflow学习笔记：运算函数、复数操作函数、规约计算、序列比较与索引提取以及错误类 heart_ace tensorflow学习笔记运算函数 tensorflow 错误类规约计算函数索引提前
运算函数、复数操作函数、规约计算、序列比较与索引提取以及错误类前一章提到了许多关于张量的操作函数，这里接着将一些运算函数、复数操作函数、规约计算、序列比较与索引提取以及错误类记录下来。算数运算函数函数描述tf.asign(x,y,name=None)令x=ytf.add(x,y,name=None)求和tf.subtract(x,y,name=None)减法tf.multiply(x,y,name
apache ftpserver-CentOS config gengzg apache
<server xmlns="http://mina.apache.org/ftpserver/spring/v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation=" http://mina.apache.o
优化MySQL数据库性能的八种方法 AILIKES sql mysql
1、选取最适用的字段属性　　MySQL可以很好的支持大数据量的存取，但是一般说来，数据库中的表越小，在它上面执行的查询也就会越快。因此，在创建表的时候，为了获得更好的性能，我们可以将表中字段的宽度设得尽可能小。例如，在定义邮政编码这个字段时，如果将其设置为CHAR(255),显然给数据库增加了不必要的空间，甚至使用VARCHAR这种类型也是多余的，因为CHAR(6)就可以很
JeeSite 企业信息化快速开发平台 Kai_Ge JeeSite
JeeSite 企业信息化快速开发平台平台简介 JeeSite是基于多个优秀的开源项目，高度整合封装而成的高效，高性能，强安全性的开源Java EE快速开发平台。 JeeSite本身是以Spring Framework为核心容器，Spring MVC为模型视图控制器，MyBatis为数据访问层， Apache Shiro为权限授权层，Ehcahe对常用数据进行缓存，Activit为工作流
通过Spring Mail Api发送邮件 120153216 邮件 main
原文地址：http://www.open-open.com/lib/view/open1346857871615.html 使用Java Mail API来发送邮件也很容易实现，但是最近公司一个同事封装的邮件API实在让我无法接受，于是便打算改用Spring Mail API来发送邮件，顺便记录下这篇文章。【Spring Mail API】 Spring Mail API都在org.spri
Pysvn 程序员使用指南 2002wmj SVN
源文件:http://ju.outofmemory.cn/entry/35762 这是一篇关于pysvn模块的指南. 完整和详细的API请参考 http://pysvn.tigris.org/docs/pysvn_prog_ref.html. pysvn是操作Subversion版本控制的Python接口模块. 这个API接口可以管理一个工作副本, 查询档案库, 和同步两个. 该
在SQLSERVER中查找被阻塞和正在被阻塞的SQL 357029540 SQL Server
SELECT R.session_id AS BlockedSessionID , S.session_id AS BlockingSessionID , Q1.text AS Block
Intent 常用的用法备忘 7454103 .net android Google Blog F#
Intent 应该算是Android中特有的东西。你可以在Intent中指定程序要执行的动作（比如：view,edit,dial），以及程序执行到该动作时所需要的资料。都指定好后，只要调用startActivity()，Android系统会自动寻找最符合你指定要求的应用程序，并执行该程序。下面列出几种Intent 的用法显示网页:
Spring定时器时间配置 adminjun spring 时间配置定时器
红圈中的值由6个数字组成，中间用空格分隔。第一个数字表示定时任务执行时间的秒，第二个数字表示分钟，第三个数字表示小时，后面三个数字表示日，月，年，< xmlnamespace prefix ="o" ns ="urn:schemas-microsoft-com:office:office" /> 测试的时候，由于是每天定时执行，所以后面三个数
POJ 2421 Constructing Roads 最小生成树 aijuans 最小生成树
来源：http://poj.org/problem?id=2421 题意：还是给你n个点，然后求最小生成树。特殊之处在于有一些点之间已经连上了边。思路：对于已经有边的点，特殊标记一下，加边的时候把这些边的权值赋值为0即可。这样就可以既保证这些边一定存在，又保证了所求的结果正确。代码： #include <iostream> #include <cstdio>
重构笔记——提取方法（Extract Method） ayaoxinchao java 重构提炼函数局部变量提取方法
提取方法（Extract Method）是最常用的重构手法之一。当看到一个方法过长或者方法很难让人理解其意图的时候，这时候就可以用提取方法这种重构手法。下面是我学习这个重构手法的笔记：提取方法看起来好像仅仅是将被提取方法中的一段代码，放到目标方法中。其实，当方法足够复杂的时候，提取方法也会变得复杂。当然，如果提取方法这种重构手法无法进行时，就可能需要选择其他
为UILabel添加点击事件 bewithme UILabel
默认情况下UILabel是不支持点击事件的，网上查了查居然没有一个是完整的答案，现在我提供一个完整的代码。 UILabel *l = [[UILabel alloc] initWithFrame:CGRectMake(60, 0, listV.frame.size.width - 60, listV.frame.size.height)]
NoSQL数据库之Redis数据库管理(PHP-REDIS实例) bijian1013 redis 数据库 NoSQL
一.redis.php <?php //实例化 $redis = new Redis(); //连接服务器 $redis->connect("localhost"); //授权 $redis->auth("lamplijie"); //相关操
SecureCRT使用备注 bingyingao secureCRT 每页行数
SecureCRT日志和卷屏行数设置一、使用securecrt时，设置自动日志记录功能。 1、在C:\Program Files\SecureCRT\下新建一个文件夹(也就是你的CRT可执行文件的路径），命名为Logs； 2、点击Options -> Global Options -> Default Session -> Edite Default Sett
【Scala九】Scala核心三：泛型 bit1129 scala
泛型类 package spark.examples.scala.generics class GenericClass[K, V](val k: K, val v: V) { def print() { println(k + "," + v) } } object GenericClass { def main(args: Arr
素数与音乐 bookjovi 素数数学 haskell
由于一直在看haskell，不可避免的接触到了很多数学知识，其中数论最多，如素数，斐波那契数列等，很多在学生时代无法理解的数学现在似乎也能领悟到那么一点。闲暇之余，从图书馆找了<<The music of primes>>和<<世界数学通史>>读了几遍。其中素数的音乐这本书与软件界熟知的&l
Java-Collections Framework学习与总结-IdentityHashMap BrokenDreams Collections
这篇总结一下java.util.IdentityHashMap。从类名上可以猜到，这个类本质应该还是一个散列表，只是前面有Identity修饰，是一种特殊的HashMap。简单的说，IdentityHashMap和HashM
读《研磨设计模式》-代码笔记-享元模式-Flyweight bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java
PS人像润饰&调色教程集锦 cherishLC PS
1、仿制图章沿轮廓润饰——柔化图像，凸显轮廓 http://www.howzhi.com/course/retouching/ 新建一个透明图层，使用仿制图章不断Alt+鼠标左键选点，设置透明度为21%，大小为修饰区域的1/3左右（比如胳膊宽度的1/3），再沿纹理方向（比如胳膊方向）进行修饰。所有修饰完成后，对该润饰图层添加噪声，噪声大小应该和
更新多个字段的UPDATE语句 crabdave update
更新多个字段的UPDATE语句 update tableA a set (a.v1, a.v2, a.v3, a.v4) = --使用括号确定更新的字段范围
hive实例讲解实现in和not in子句 daizj hive not in in
本文转自：http://www.cnblogs.com/ggjucheng/archive/2013/01/03/2842855.html 当前hive不支持 in或not in 中包含查询子句的语法，所以只能通过left join实现。假设有一个登陆表login(当天登陆记录,只有一个uid),和一个用户注册表regusers(当天注册用户，字段只有一个uid)，这两个表都包含
一道24点的10+种非人类解法（2,3,10,10） dsjt 算法
这是人类算24点的方法？！！！事件缘由：今天晚上突然看到一条24点状态，当时惊为天人，这NM叫人啊？以下是那条状态朱明西 : 24点，算2 3 10 10，我LX炮狗等面对四张牌痛不欲生，结果跑跑同学扫了一眼说，算出来了，2的10次方减10的3次方。。我草这是人类的算24点啊。。然后么。。。我就在深夜很得瑟的问室友求室友算刚出完题，文哥的暴走之旅开始了 5秒后
关于YII的菜单插件 CMenu和面包末breadcrumbs路径管理插件的一些使用问题 dcj3sjt126com yii framework
在使用 YIi的路径管理工具时，发现了一个问题。 <?php
对象与关系之间的矛盾：“阻抗失配”效应[转] come_for_dream 对象
概述 “阻抗失配”这一词组通常用来描述面向对象应用向传统的关系数据库（RDBMS）存放数据时所遇到的数据表述不一致问题。C++程序员已经被这个问题困扰了好多年，而现在的Java程序员和其它面向对象开发人员也对这个问题深感头痛。 “阻抗失配”产生的原因是因为对象模型与关系模型之间缺乏固有的亲合力。“阻抗失配”所带来的问题包括：类的层次关系必须绑定为关系模式（将对象
学习编程那点事 gcq511120594 编程互联网
一年前的夏天，我还在纠结要不要改行，要不要去学php？能学到真本事吗？改行能成功吗？太多的问题，我终于不顾一切，下定决心，辞去了工作，来到传说中的帝都。老师给的乘车方式还算有效，很顺利的就到了学校，赶巧了，正好学校搬到了新校区。先安顿了下来，过了个轻松的周末，第一次到帝都，逛逛吧！接下来的周一，是我噩梦的开始，学习内容对我这个零基础的人来说，除了勉强完成老师布置的作业外，我已经没有时间和精力去
Reverse Linked List II hcx2013 list
Reverse a linked list from position m to n. Do it in-place and in one-pass. For example:Given 1->2->3->4->5->NULL, m = 2 and n = 4, return
Spring4.1新特性——页面自动化测试框架Spring MVC Test HtmlUnit简介 jinnianshilongnian spring 4.1
目录 Spring4.1新特性——综述 Spring4.1新特性——Spring核心部分及其他 Spring4.1新特性——Spring缓存框架增强 Spring4.1新特性——异步调用和事件机制的异常处理 Spring4.1新特性——数据库集成测试脚本初始化 Spring4.1新特性——Spring MVC增强 Spring4.1新特性——页面自动化测试框架Spring MVC T
Hadoop集群工具distcp liyonghui160com
1. 环境描述两个集群：rock 和 stone rock无kerberos权限认证，stone有要求认证。 1. 从rock复制到stone，采用hdfs Hadoop distcp -i hdfs://rock-nn:8020/user/cxz/input hdfs://stone-nn:8020/user/cxz/运行在rock端，即源端问题：报版本
一个备份MySQL数据库的简单Shell脚本 pda158 mysql 脚本
　　主脚本（用于备份mysql数据库）：　　该Shell脚本可以自动备份数据库。只要复制粘贴本脚本到文本编辑器中，输入数据库用户名、密码以及数据库名即可。我备份数据库使用的是mysqlump 命令。后面会对每行脚本命令进行说明。　　 1. 分别建立目录“backup”和“oldbackup” 　　#mkdir /backup 　　#mkdir /oldbackup 　
300个涵盖IT各方面的免费资源（中）——设计与编码篇 shoothao IT资源图标库图片库色彩板字体
A. 免费的设计资源 Freebbble:来自于Dribbble的免费的高质量作品。 Dribbble:Dribbble上“免费”的搜索结果——这是巨大的宝藏。 Graphic Burger:每个像素点都做得很细的绝佳的设计资源。 Pixel Buddha:免费和优质资源的专业社区。 Premium Pixels:为那些有创意的人提供免费的素材。
thrift总结 - 跨语言服务开发 uule thrift
官网官网JAVA例子 thrift入门介绍 IBM-Apache Thrift - 可伸缩的跨语言服务开发框架 Thrift入门及Java实例演示 thrift的使用介绍 RPC POM： <dependency> <groupId>org.apache.thrift</groupId>