深度学习基础教程-用paddle实现-之线性回归

线性回归模型:

本文是学习博客,转载自百度paddle框架的学习文档,代码自己敲了一遍,会有一点改动。
深度学习基础教程-用paddle实现-之线性回归_第1张图片
深度学习基础教程-用paddle实现-之线性回归_第2张图片深度学习基础教程-用paddle实现-之线性回归_第3张图片

深度学习基础教程-用paddle实现-之线性回归_第4张图片深度学习基础教程-用paddle实现-之线性回归_第5张图片深度学习基础教程-用paddle实现-之线性回归_第6张图片深度学习基础教程-用paddle实现-之线性回归_第7张图片深度学习基础教程-用paddle实现-之线性回归_第8张图片深度学习基础教程-用paddle实现-之线性回归_第9张图片

线性回归代码示例:

import paddle
import paddle.fluid as fluid
import numpy
import math
import sys
from paddle.utils.plot import Ploter


def train_test(executor, program, reader, feeder, fetch_list):
    accumulated = 1 * [0]
    count = 0
    for data_test in reader:
        outs = executor.run(
            program = program,
            feed = feeder.feed(data_test),
            fetch_list = fetch_list
        )
        accumulated = [x_c[0] + x_c[1][0] for x_c in zip(accumulated, outs)]  # 累加测试过程中的损失值

        count += 1 #累计测试集中的样本数量

    return [x_d / count for x_d in accumulated]  # 计算平均损失


if __name__ == '__main__':
    # 读取数据
    feature_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT',
                     'convert']
    feature_num = len(feature_names)
    filename = r'D:\pro\paddlepaddle\traindata\housing.data'
    data = numpy.fromfile(filename, sep=' ',dtype='float32')  # 从文件中读取原始数据
    data = data.reshape(data.shape[0] // feature_num, feature_num)
    maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(axis=0) / data.shape[0]
    for i in range(feature_num - 1):
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])  #归一化,把每列都变成和为1的数



    BATCH_SIZE = 20
    ratio = 0.8  # 训练集和验证集的划分比例
    offset = int(data.shape[0] * ratio)
    train_data = data[:offset]
    test_data = data[offset:]

    #把数据转化为一个生成器,原文中是直接传入np.array的,这会导致训练时feed 这个batch时报错“TypeError: 'numpy.ndarray”
    def train_data_generation(train_data):
        def reader():
            for d in train_data:
                yield d[:-1], d[-1:]
        return reader
    def test_data_generation(test_data):
        def reader():
            for d in test_data:
                yield d[:-1], d[-1:]
        return reader

    train_reader = paddle.batch(paddle.reader.shuffle(train_data_generation(train_data), buf_size=500), batch_size=BATCH_SIZE)
    test_reader = paddle.batch(paddle.reader.shuffle(test_data_generation(test_data), buf_size=500), batch_size=BATCH_SIZE)


    # 配置训练程序
    x = fluid.layers.data(name='x', shape=[13], dtype='float32')  # 定义输入的形状和数据类型
    y = fluid.layers.data(name='y', shape=[1], dtype='float32')  # 定义输出的形状和数据类型
    y_predict = fluid.layers.fc(input=x, size=1, act=None)  # 连接输入和输出的全连接层

    main_program = fluid.default_main_program()  # 获取默认全局主函数
    start_program = fluid.default_startup_program()  # 获取默认全局启动程序


    cost = fluid.layers.square_error_cost(input=y_predict, label=y)  # 利用标签数据和输出的预测数据估计方差
    avg_loss = fluid.layers.mean(cost)  # 对方程求均值,得到评价损失

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_loss)


    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    test_program = main_program.clone(for_test=True)

    # 给出需要存储的目录名,并初始化一个执行器。
    params_dirname = r"result_train\fit_a_line.inference.model"
    feeder = fluid.DataFeeder(place=place,feed_list=[x,y])
    exe.run(program = start_program)

    train_prompt = 'train cost'
    test_prompt = 'test cost'
    # plot_prompt = Ploter(train_prompt, test_prompt)



    #paddlepaddle提供了reader机制来读取训练数据。reader会一次提供多列数据,因此我们需要一个python的列表来定义读取顺序。
    # 我们构建一个循环来进行训练,直到训练结果足够好或者循环次数足够多。
    # 如果训练迭代次数满足参数保存的迭代次数,可以把训练参数保存到params_dirname。
    # 设置训练主循环
    num_epochs = 100
    step = 0
    exe_test = fluid.Executor(place)
    for pass_id in range(num_epochs):
        for data_train in train_reader():
            avg_loss_value = exe.run(
                program = main_program,
                feed = feeder.feed(data_train),
                fetch_list = [avg_loss]
            )
            if step % 10 == 0: #每10个批次记录并测试一下train的损失
                # plot_prompt.append(train_prompt,step,avg_loss_value[0])
                # plot_prompt.plot()
                print("%s, Step: %d, Cost: %f" %
                      (train_prompt, step, avg_loss_value[0]))

            if step % 100 == 0: #每10个批次记录并输出test的损失
                test_metics = train_test(
                    executor=exe_test,
                    program = test_program,
                    reader = test_reader(),
                    feeder=feeder,
                    fetch_list = [avg_loss],
                )

                if test_metics[0] < 10.0:  # 如果准确率达到要求,则停止训练
                    break
            step += 1
            if math.isnan(float(avg_loss_value[0])):
                sys.exit("got NaN loss, training failed.")

            # 保存训练参数到之前给定的路径中
            if params_dirname is not None:
                fluid.io.save_inference_model(params_dirname, ['x'], [y_predict], exe)


    #开始预测
    #需要构建一个使用训练好的参数来进行预测的程序,训练好的参数位置在params_dirname
    #准备预测环境
    #类似于训练过程,预测器需要一个预测程序来做预测。我们可以稍加修改我们的训练程序来把预测值包含进来
    infer_exe = fluid.Executor(place)
    inference_scope = fluid.core.Scope()
    #预测
    def save_result(points1,points2):
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        x1 = [inx for inx in range(len(points1))]
        y1 = points1
        y2 = points2
        l1 = plt.plot(x1,y1,'r--',label='predictions')
        l2 = plt.plot(x1,y2,'g--',label='GT')
        plt.plot(x1,y1,'rp-',x1,y2,'g+-')
        plt.title('predictions VS GT')
        plt.legend(l1,l2)
        plt.savefig('./image/prediction_gt.png')

    # 通过fluid.io.load_inference_model,预测器会从params_dirname中读取已经训练好的模型,来对从未遇见过的数据进行预测。
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(params_dirname, infer_exe)  # 载入预训练模型

        batch_size = 10

        infer_reader = paddle.batch(
            paddle.dataset.uci_housing.test(),
            batch_size=batch_size)  # 准备测试集

        infer_data = next(infer_reader())

        infer_feat = numpy.array([ data[0] for data in infer_data],dtype='float32')  #提取测试集中的数据(x)

        infer_label = numpy.array([data[1] for data in infer_data],dtype='float32')  #提取测试集中的标签

        assert feed_target_names[0] == 'x'
        results = infer_exe.run(
            program = inference_program,
            feed={feed_target_names[0]: numpy.array(infer_feat)},
            fetch_list = fetch_targets)  # 进行预测

        # 打印预测结果和标签并可视化结果
        print("infer results: (House Price)")
        for idx, val in enumerate(results[0]):
            print("%d: %.2f" % (idx, val))  # 打印预测结果

        print("\nground truth:")
        for idx, val in enumerate(infer_label):
            print("%d: %.2f" % (idx, val))  # 打印标签值

        save_result(results[0], infer_label)  # 保存图片

你可能感兴趣的:(Paddle)