波士顿房价预测神经网络模拟

def load_data():
    from sklearn.datasets import load_boston
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    import numpy as np
    data = load_boston()
   

    # print(data.DESCR)# 数据集描述
    X = data.data
    y = data.target
    ss = StandardScaler()
    X = ss.fit_transform(X)
    y = np.reshape(y, (len(y), 1))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=3)#没有shuffle这个参数
    return X_train, X_test, y_train, y_test

这个函数是载入数据集,并且将其标准化,并将其返回。

def linear_regression():
    X_train,X_test,y_train,y_test = load_data()
    x = tf.placeholder(dtype=tf.float32,shape = [None,13],name = 'input_x')
    y_ = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='input_y')
    w = tf.Variable(tf.truncated_normal(shape=[13, 1], stddev=0.1, dtype=tf.float32, name='weight'))
    b = tf.Variable(tf.constant(value=0, dtype=tf.float32, shape=[1]), name='bias')

    y = tf.matmul(x, w) + b # 矩阵相乘

    loss = 0.5 * tf.reduce_mean(tf.square(y - y_))  # 损失函数表达式

    rmse = tf.sqrt(tf.reduce_mean(tf.square(y - y_)))

    train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(1000):
            feed = {x: X_train, y_: y_train}
            l, r, _ = sess.run([loss, rmse, train_op], feed_dict=feed)
            if i % 20 == 0:
                print("## loss on train: {},rms on train: {}".format(l, r))
        feed = {x: X_test, y_: y_test}
        r = sess.run(rmse, feed_dict=feed)
        print("## RMSE on test:", r)

1 设置神经元模型,由于数据的特征维数是13,所以输入层神经元个数为13个。而输出是一个预测值,所以输出层神经元是1一个。分别给其设置占位符(placeholder),其中x的shape为[None,13],y的shape是[None,1]
2 而w参数是根据隐藏层神经元个数确定的,而隐藏层神经元个数是人为设定,这里只设置一层隐藏层,神经元个数为13个,w的shape为[13,1]。tf.truncated_normal用于生成随机数
3 预测值为y,用tf.matmul将矩阵x和w相乘再加上b。
4 loss为损失函数表达式
5 GradientDescentOptimizer是用于反向传播,修正参数用的
6 训练1000次,每二十次输出一次标准差rmse
7 最后输出测试集的结果

x和y指定的placehold,shape分别为[None,13]和[None,1],None的意思是输入训练的样本数,由于防止样本集过大,所以不设置具体值,而权重w中的shape为[13,1]表示的是前一层神经元个数和后一层神经元个数。

你可能感兴趣的:(神经网络模拟)