TensorFlow2.0之汽车油耗实战-注释超详细

import tensorflow as tf
from tensorflow.keras import Model, layers, losses
import numpy as np
import pandas as pd
# 在线下载汽车效能数据集
from tensorflow_core.python.keras import Model
import matplotlib.pyplot as plt


physical_devices = tf.config.experimental.list_physical_devices('GPU')

assert len(physical_devices) > 0, "Not enough GPU hardware devices available"

tf.config.experimental.set_memory_growth(physical_devices[0], True)
dataset_path = "../dataset/auto-mpg.data"
# 利用 pandas 读取数据集,字段有效能(公里数每加仑),气缸数,排量,马力,重量,加速度,型号年份,产地
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']
# 参数为:names指定列名称,sep指定分隔符,skipinitialspace跳过空格,na_values为缺失值的显示
raw_dataset = pd.read_csv(dataset_path, names=column_names, na_values="?", comment='\t', sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
# 查看部分数据
# print(dataset.head())
# print(dataset.sum())
# 统计空白数据
# print(dataset.isna().sum())
# 删除空白数据项
dataset = dataset.dropna()
# print(dataset.sum())
# 处理类别型数据,其中 origin 列代表了类别 1,2,3,分布代表产地:美国、欧洲、日本
# 先弹出(删除并返回)origin 这一列,pop()方法删除字典给定键
origin = dataset.pop('Origin')
# 根据origin列来写入新的3个列
dataset['USA'] = (origin == 1) * 1.0
dataset['Europe'] = (origin == 2) * 1.0
dataset['Japan'] = (origin == 3) * 1.0
# 查看新表格的后几项
# print(dataset.tail())

# 切分为训练集和测试集
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

# 查看训练集的输入x的统计数据
train_stats = train_dataset.describe()
# 仅保留输入x
train_stats.pop("MPG")
# 转置
train_stats = train_stats.transpose()

# 移动MPG油耗效能这一列为真实标签Y
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')


# 标准化数据
def norm(x):
    # 减去每个字段的均值,并除以标准差
    return (x - train_stats['mean']) / train_stats['std']


# 标准化训练集
normed_train_data = norm(train_dataset)
# 标准化测试集
normed_test_data = norm(test_dataset)
# 打印出训练集和测试集的大小
# print(normed_train_data.shape, train_labels.shape)
# print(normed_test_data.shape, test_labels.shape)

# 利用切分的训练集数据构建数据集对象
train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values, train_labels.values))
# 随机打散,批量化
train_db = train_db.shuffle(100).batch(32)


# 创建网络
class Network(Model):
    # 回归网络模型
    def __init__(self):
        super(Network, self).__init__()
        # 创建3个全连接层
        self.fc1 = layers.Dense(64, activation='relu')
        self.fc2 = layers.Dense(64, activation='relu')
        self.fc3 = layers.Dense(1)

    def call(self, inputs, training=None, mask=None):
        # 依次通过3个全连接
        x = self.fc1(inputs)
        x = self.fc2(x)
        x = self.fc3(x)

        return x


# 创建网络类实例
model = Network()
# 通过build函数完成内部张量的创建,其中4为任意设置的batch数量,9为输入特征长度
model.build(input_shape=(4, 9))
# 打印网络信息
model.summary()
# 创建优化器,指定学习率
optimizer = tf.keras.optimizers.RMSprop(0.001)

train_mae_losses = []
test_mae_losses = []

# 定义循环
for epoh in range(200):
    # 遍历一次训练集
    for step, (x, y) in enumerate(train_db):
        # 梯度记录器,训练时需要使用
        with tf.GradientTape() as tape:
            # 通过网络获得输出
            out = model(x)
            # 计算MSE
            loss = tf.reduce_mean(losses.MSE(y, out))
            # 计算MAE
            mae_loss = tf.reduce_mean(losses.MAE(y, out))

            # 打印训练误差
            if step % 10 == 0:
                print(epoh, step, float(loss))
            # 计算梯度并更新
            grads = tape.gradient(loss,model.trainable_variables)
            # zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的对象,这样做的好处是节约了不少的内存
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

    train_mae_losses.append(float(mae_loss))
    out = model(tf.constant(normed_test_data.values))
    test_mae_losses.append(tf.reduce_mean(losses.MAE(test_labels, out)))

plt.figure()
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.plot(train_mae_losses, label='Train')

plt.plot(test_mae_losses, label='Test')
plt.legend()

# plt.ylim([0,10])
plt.legend()
plt.savefig('auto.svg')
plt.show()



你可能感兴趣的:(python,机器学习,深度学习,python,机器学习,tensorflow,神经网络)