一、基础计算
import tensorflow as tf import numpy as np sess=tf.Session() #logits代表wx+b的输出,并没有进行softmax(因为softmax后是一个和为1的概率) logits = np.array([[1, 2, 7], [3, 5, 2], [6, 1, 3], [8, 2, 0], [3, 6, 1]], dtype=np.float32) #labels是[2,1,0,0,1]的ont-hot编码形式 labels = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]], dtype=np.float32) # 公式计算,-np.log(y*softmax_out) # y=n*c,softmax_out是n*c,相当于将每个样本softmax的c个特征中最大的取出来,再取负就是求最小 softmax_out=tf.nn.softmax(logits) cross_entropy1 = -tf.reduce_sum(labels * tf.log(softmax_out), axis=1) #对应元素相乘,非矩阵乘法 print (sess.run(softmax_out)) """ [[2.4561151e-03 6.6764127e-03 9.9086750e-01] [1.1419519e-01 8.4379470e-01 4.2010065e-02] [9.4649917e-01 6.3774614e-03 4.7123417e-02] [9.9719369e-01 2.4717962e-03 3.3452120e-04] [4.7123417e-02 9.4649917e-01 6.3774614e-03]] """ print (sess.run(cross_entropy1)) # [0.00917446 0.16984606 0.05498519 0.00281025 0.05498519]
二、tf.nn.softmax_cross_entropy_with_logits与tf.nn.sparse_softmax_cross_entropy_with_logits
import tensorflow as tf import numpy as np sess = tf.Session() # logits代表wx+b的输出,并没有进行softmax(因为softmax后是一个和为1的概率) logits = np.array([[1, 2, 7], [3, 5, 2], [6, 1, 3], [8, 2, 0], [3, 6, 1]], dtype=np.float32) # labels是[2,1,0,0,1]的ont-hot编码形式 labels = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]], dtype=np.float32) cross_entropy2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) print(sess.run(cross_entropy2)) # [0.00917445 0.16984604 0.05498521 0.00281022 0.05498521] classes = tf.argmax(labels, axis=1) # array([2, 1, 0, 0, 1]) cross_entropy3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=classes) print(sess.run(cross_entropy3)) # [0.00917445 0.16984604 0.05498521 0.00281022 0.05498521]
总结:
1.两个函数的输出结果相同,区别在于输入的labels不同。 2.对于sparse_softmax_cross_entropy_with_logits, labels must have the shape [batch_size] and the dtype int32 or int64. Each label is an int in range [0, num_classes-1]。 3.对于softmax_cross_entropy_with_logits, labels must have the shape [batch_size, num_classes] and dtype float32 or float64。
三、tf.losses.softmax_cross_entropy 和 tf.losses.sparse_softmax_cross_entropy
1.主要用于进行不同样本的loss计算 2.默认weights=1,等价于tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits) 3.weights为标量w时,等价于w*tf.reduce_mean(tf.nn.softmax_corss..)) 4.weights为向量时,算出的每个loss需要乘以对应样本权重,再求均值
import tensorflow as tf import numpy as np sess = tf.Session() logits = np.array([[1, 2, 7], [3, 5, 2], [6, 1, 3], [8, 2, 0], [3, 6, 1]], dtype=np.float32) #labels是[2,1,0,0,1]的ont-hot编码形式 labels = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]], dtype=np.float32) cross1 = tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=logits) cross2 = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits) cross3 = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits,weights=0.2) print (sess.run(cross1)) #[0.00917445 0.16984604 0.05498521 0.00281022 0.05498521] print (sess.run(cross2)) #0.0583602 print (sess.run(tf.reduce_mean(cross1))) #0.0583602 print (sess.run(cross3)) #0.011672 print (sess.run(0.2*tf.reduce_mean(cross1))) #0.011672
tf.losses.sparse_softmax_cross_entropy 同理等价于tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits),只不过输入labels是非one-hot编码格式