import numpy as np
X = 2*np.random.random((6,3))-1
print X
W0 = np.array([[12],[2],[4]])
print W0
Y=np.dot(X,W0)
w = 2*np.random.random((3,1))-1
print w
for j in xrange(1,6000):
l= np.dot(X,w)
#print l
delta = np.dot(X.T,(l-Y))/6
w = w - delta
#print w
print w
是不是很简单
我用tensorflow再写了一遍
import input_data
import tensorflow as tf
import numpy as np
#读取数据
#第一个梯度下降算法
np_x = 2*np.random.random((6,3))-1
X = tf.constant( np_x,dtype=tf.float32,shape=(6,3))
np_W0 = np.array([[12],[2],[4]])
np_Y = np.dot(np_x,np_W0)
np_W = 2*np.random.random((3,1))-1
W = tf.Variable(np_W,dtype=tf.float32)
#print ()
# 构建模型
y = tf.matmul(X, W) # 模型的预测值
y_real = tf.placeholder("float", [6, 1])
loss = tf.reduce_sum(tf.square(y_real - y))
# # 预测值与真实值的交叉熵
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss) # 使用梯度下降优化器最小化交叉熵
# 开始训练
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1000):
# 每次随机选取100个数据进行训练,即所谓的“随机梯度下降(Stochastic Gradient Descent,SGD)”
sess.run(train_step,feed_dict={ y_real:np_Y}) # 正式执行train_step,用feed_dict的数据取代placeholder
if i % 100 == 0:
# 每训练100次后评估模型
print (sess.run(W,feed_dict={ y_real:np_Y}))
三 tensorflow官方文档中例子
import input_data
import tensorflow as tf
#读取数据
minist = input_data.read_data_sets('minist_data', one_hot=True)
print (type(minist))
print (type(minist.test.images))
print ('testimgae',minist.test.images.shape)
print ('label',minist.test.labels.shape)
print ('trainimgae',minist.train.images.shape)
print ('label',minist.train.labels.shape)
weights = tf.Variable(tf.zeros([784, 10]))
biases = tf.Variable(tf.zeros([10]))
# 构建模型
x = tf.placeholder("float", [None, 784])
y = tf.nn.softmax(tf.matmul(x, weights) + biases) # 模型的预测值
y_real = tf.placeholder("float", [None, 10])
#print (tf.shape(weights)[1])
#print (tf.shape(biases)[0])
#print (tf.shape(x)[1] ) # 真实值
#print (tf.shape(y_real)[1])
cross_entropy = -tf.reduce_sum(y_real * tf.log(y)) # 预测值与真实值的交叉熵
shapeOP2 = tf.shape(cross_entropy)
#train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) # 使用梯度下降优化器最小化交叉熵
opt = tf.train.GradientDescentOptimizer(0.01)
grads_and_vars = opt.compute_gradients(cross_entropy)
#list包括的是:梯度和更新变量的元组对
capped_gvs = [ (grad*0.5, var)
for grad, var in grads_and_vars]
#执行对应变量的更新梯度操作
training_op = opt.apply_gradients(capped_gvs)
# 开始训练
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1):
batch_xs, batch_ys = minist.train.next_batch(100)
# 每次随机选取100个数据进行训练,即所谓的“随机梯度下降(Stochastic Gradient Descent,SGD)”
#sess.run(train_step, feed_dict={x: batch_xs, y_real:batch_ys}) # 正式执行train_step,用feed_dict的数据取代placeholder
sess.run(training_op, feed_dict={x: batch_xs, y_real:batch_ys})
print ("cross",sess.run(cross_entropy, feed_dict={x: batch_xs, y_real:batch_ys}))
if i % 100 == 0:
# 每训练100次后评估模型
#print ((-tf.reduce_sum(y_real * tf.log(y))).shape)
#print (y_real.shape)
print('Within session, tf.shape(x1):',sess.run(shapeOP2,feed_dict={x: batch_xs, y_real:batch_ys}))
#tf arg_max 表示这个向量中最大值的下标
correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(y_real, 1)) # 比较预测值和真实值是否一致
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # 统计预测正确的个数,取均值得到准确率
print (sess.run(accuracy, feed_dict={x: minist.test.images, y_real: minist.test.labels}))
注意:
#train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) # 使用梯度下降优化器最小化交叉熵
opt = tf.train.GradientDescentOptimizer(0.01)
grads_and_vars = opt.compute_gradients(cross_entropy)
#list包括的是:梯度和更新变量的元组对
capped_gvs = [ (grad*0.5, var)
for grad, var in grads_and_vars]
#执行对应变量的更新梯度操作
training_op = opt.apply_gradients(capped_gvs)
这两个代码段是等价的。分开写的原因是有可能对gradus 要做特殊的处理