数据集地址:https://www.kaggle.com/datasets/prathamtripathi/regression-with-neural-networking
对数据集的预测,发现很少有人通过ANN预测,而且有的代码抽象很不好理解,因此决定自己做一个,通过几天的调整参数,最后R2值也只有0.61,如果有哪位大神可以再继续优,取到了更高的准确度,也教教我。
先给出我们的模型线性模型,下面的_initweight()函数用来初始化模型权重,我这里用了很大的参数,而且使用了残差结构,这个代码文件的名字为main.py
import torch
import torch.nn as nn
class Concret(nn.Module):
def __init__(self):
super(Concret, self).__init__()
self.input = torch.nn.Linear(8, 1400)
self.hidden = torch.nn.Linear(1400, 2400)
self.hidden1 = torch.nn.Linear(2400, 1400)
self.predict = torch.nn.Linear(1400, 1)
self.relu = torch.nn.ReLU()
self._initweight()
def forward(self, x):
out = self.input(x)
# out = self.bn1(out)
out1 = self.relu(out)
out = self.hidden(out1)
# out = self.bn2(out)
out = self.relu(out)
out = self.hidden1(out)
out = out1 + out
out = self.relu(out)
out = self.predict(out)
return out
def _initweight(self):
for m in self.modules():
if m == nn.Linear:
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
给出计算参数的代码,这个代码的文字是caculate.py
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import math
def calc_corr(a, b):
a_avg = sum(a) / len(a)
b_avg = sum(b) / len(b)
cov_ab = sum([(x - a_avg) * (y - b_avg) for x, y in zip(a, b)])
sq = math.sqrt(sum([(x - a_avg) ** 2 for x in a]) * sum([(x - b_avg) ** 2 for x in b]))
corr_factor = cov_ab / sq
return corr_factor
class calculate():
'''
传入两个列表
'''
def __init__(self, x, y):
self.x = x
self.y = y
def sc(self):
print('MAE(平均绝对误差):', mean_absolute_error(self.x, self.y))
print('MSE(均方误差):', mean_squared_error(self.x, self.y))
print('R Square(R方):', r2_score(self.x, self.y))
print('相关系数:', calc_corr(self.x, self.y))
if __name__ == "__main__":
a = calculate([1, 2, 3], [1.1, 2.2, 3.3])
a.sc()
给出作图的代码,名字为plot.py
import matplotlib.pyplot as plt
class plot:
def __init__(self, x, y):
self.x = x
self.y = y
def sandiantu(self):
'''
作出散点回归图
:return:
'''
plt.figure(facecolor='gray')
plt.scatter(self.x, self.y, edgecolor='white')
plt.title('san dian tu')
plt.xlabel('Predict')
plt.ylabel('Truth')
plt.show()
def loss(self):
'''
作出损失图
:return:
'''
c = min(zip(self.y, self.x))
print('第{}轮达到了损失最小值:{}'.format(c[1], c[0]),)
plt.plot(self.x, self.y)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()
最后给出训练模型的代码
from main import Concret
import torch
import pandas
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import matplotlib.pyplot as plt
from calculate import calculate
from plot import plot
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
from sklearn.preprocessing import MinMaxScaler
stand_process = MinMaxScaler()
class MnistDataset(Dataset):
def __init__(self, csv_file):
self.data_df = pandas.read_csv(csv_file, header=None, )
self.data_df.drop(index=0, inplace=True)
self.data_df = self.data_df.astype('float')
# self.data_df = preprocessing.scale(self.data_df) # 归一化处理0-1之外
self.data_df = stand_process.fit_transform(self.data_df)
self.data_df = pandas.DataFrame(self.data_df)
pass
def __len__(self):
return len(self.data_df)
def __getitem__(self, index):
# 目标图像 (标签),这个index的意思是要看这6000条中的哪一条数据
scaler = MinMaxScaler() # 实例化
# 图像数据, 取值范围是0~255,标准化为0~1
target = torch.FloatTensor([self.data_df.iloc[index, 8]])
values = torch.FloatTensor(self.data_df.iloc[index, :8])
# 返回标签、图像数据张量以及目标张量
return values, target
train_items = MnistDataset('concrete_data.csv')
val_items = MnistDataset('val.csv')
train_items = DataLoader(dataset=train_items, batch_size=16, shuffle=True, num_workers=0)
val_items = DataLoader(dataset=val_items, batch_size=101, shuffle=True, num_workers=0)
net = Concret()
net.to(device)
loss_function = nn.MSELoss()
optimzer = torch.optim.Adam(net.parameters(), lr=0.00000045)
x_ray = []
y_ray = []
tst = 0
for epoch in range(140):
net.train()
for values, targets in train_items:
cs = net(values.to(device))
loss = loss_function(cs, targets.to(device))
loss.backward()
optimzer.step()
optimzer.zero_grad()
net.eval()
with torch.no_grad():
for datass in val_items:
x_ceshi, y_ceshi = datass
# 测试
pred_ceshi = net.forward(x_ceshi.to(device))
# pred_ceshi = torch.squeeze(pred_ceshi)
loss_test = loss_function(pred_ceshi, y_ceshi.to(device))
print("ite:{}, loss_test:{}".format(epoch, loss_test), "ite:{}, loss_train:{}".format(epoch, loss))
x_ray.append(epoch)
y_ray.append(loss_test.cpu())
# torch.save(net.state_dict(), "best.pth")
pred_ceshi = pred_ceshi.detach().cpu()
pred_ceshi = pred_ceshi.tolist()
# 逆归一化 预测值
res = [[0]*8 + i for i in pred_ceshi]
res = stand_process.inverse_transform(res)
res = [round(i[8], 2) for i in res]
print(res)
# pred_ceshi = sum(pred_ceshi, [])
print('++++++++++++++++++++++++')
# 逆归一化,真实值
y_ceshi = y_ceshi.cpu().tolist()
rec = [[0]*8 + i for i in y_ceshi]
rec = stand_process.inverse_transform(rec) # 逆归一化
rec = [round(i[8], 2) for i in rec]
print(rec)
# y_ceshi = sum(y_ceshi, [])
print('--------------')
calculate(res, rec).sc()
plot(res, rec).sandiantu()
plot(x_ray, y_ray).loss()
在最后的sklearn中有逆归一化的操作,有很多人对这一块不太懂,我准备再写一个帖子,来解释一下,sklearn的逆归一化碰到的问题。