咋说嘞,神经网络就是一个函数,拟合线性非线性的数据。改了一个小程序(代码修改自:https://blog.csdn.net/weixin_42318554/article/details/121940694),构建了一个两层convnet和两层的mlp看看效果如何,
003-是介绍了SGD的基本原理
004-是介绍深度学习的相关知识
有趣的点:
回顾一下deep learning的历史:
- 1958: Perceptron (linear model)
- 1969: Perceptron has limitation
- 1980s: Multi-layer perceptron
- Do not have significant difference from DNN today
- 1986: Backpropagation
- Usually more than 3 hidden layers is not helpful
- 1989: 1 hidden layer is “good enough”, why deep?
- 2006: RBM initialization (breakthrough)
- 2009: GPU
- 2011: Start to be popular in speech recognition
- 2012: win ILSVRC image competition 感知机(Perceptron)非常像我们的逻辑回归(Logistics
Regression)只不过是没有sigmoid
激活函数。09年的GPU的发展是很关键的,使用GPU矩阵运算节省了很多的时间。
深度学习的三个步骤
我们都知道机器学习有三个step,对于deep learning其实也是3个步骤:- Step1:神经网络(Neural network)
- Step2:模型评估(Goodness of function)
- Step3:选择最优函数(Pick best function)
那对于深度学习的Step1就是神经网络(Neural Network)
到task 004 的时候再在模型中添加zfnet,看看结果
添加ZF-Net的类似模型:
import torch.nn as nn
import torch
class ZFNet_like(nn.Module):
def __init__(self, num_classes=1, init_weights=True):
super(ZFNet_like, self).__init__()
self.features = nn.Sequential( # 打包
nn.Conv1d(1, 48, kernel_size=1, stride=1, padding=0),
nn.ReLU(inplace=True), # inplace 可以载入更大模型
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv1d(48, 128, kernel_size=1, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv1d(128, 192, kernel_size=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv1d(192, 192, kernel_size=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv1d(192, 128, kernel_size=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool1d(kernel_size=1, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
# 全连接
nn.Linear(512, 2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = x.unsqueeze(2)
x = self.features(x)
x = torch.flatten(x, start_dim=1) # 展平 或者view()
x = self.classifier(x)
return x.squeeze(-1)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值
nn.init.constant_(m.bias, 0)
def test():
net = ZFNet_like()
y = net(torch.randn(100,1))
print(y.size())
test()
上代码:
# https://blog.csdn.net/weixin_42318554/article/details/121940694
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
#建立数据集
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)# x data(tensor),shape(100,1)
y = x.pow(2) + 0.2*torch.rand(x.size())# noisy y data(tensor),shape(100,1)
#建立神经网络
#方法一:
class Net(torch.nn.Module):
def __init__(self,n_feature,n_hidden,n_output):
super(Net,self).__init__()#继承__init__功能
#定义每层用什么样的形式
self.hidden = torch.nn.Linear(n_feature,n_hidden)#隐藏层线性输出
self.output = torch.nn.Linear(n_hidden,n_output)#输出层线性输出
def forward(self,x):
x = F.relu(self.hidden(x))#激活函数
x = self.output(x)#输出值
return x
class ConvNet(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(ConvNet,self).__init__()
self.hidden = torch.nn.Conv1d(n_feature, n_hidden, kernel_size=3,padding=1)
self.output = torch.nn.Conv1d(n_hidden, n_feature, kernel_size=3,padding=1)
def forward(self,x):
x = x.unsqueeze(2)
x = F.relu(self.hidden(x))
x = self.output(x)
return x.squeeze(-1)
net_0 = Net(n_feature=1,n_hidden=100,n_output=1)
net_1 = ConvNet(n_feature=1,n_hidden=100,n_output=1)
net_2 = ZFNet_like()
#可视化
plt.ion()
plt.show()
#训练网络
optimizer_0 = torch.optim.SGD(net_0.parameters(),lr=0.2)#随机梯度下降,传入net的所有参数,学习率
optimizer_1 = torch.optim.SGD(net_1.parameters(),lr=0.2)
optimizer_2 = torch.optim.Adam(net_2.parameters(), lr=0.1, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
# optim.SGD(net_2.parameters(),lr=0.1)
loss_func = torch.nn.MSELoss()#损失函数(均方差)
for t in range(2001):
pre_y0 = net_0(x)#给net训练数据,输出预测值
loss_0 = loss_func(pre_y0,y)#计算损失函数
optimizer_0.zero_grad()#清空上一步的残余更新参数值
loss_0.backward()#误差反向传播
optimizer_0.step()#将新参数更新值添加到net的parameters上
#------------------------
pre_y1 = net_1(x)#给net训练数据,输出预测值
loss_1 = loss_func(pre_y1,y)#计算损失函数
optimizer_1.zero_grad()#清空上一步的残余更新参数值
loss_1.backward()#误差反向传播
optimizer_1.step()#将新参数更新值添加到net的parameters上
#------------------------
pre_y2 = net_2(x)#给net训练数据,输出预测值
loss_2 = loss_func(pre_y2,y)#计算损失函数
optimizer_2.zero_grad()#清空上一步的残余更新参数值
loss_2.backward()#误差反向传播
optimizer_2.step()#将新参数更新值添加到net的parameters上
#绘图
if t%200 == 0 :
plt.cla()
plt.scatter(x.data.numpy(),y.data.numpy())
plt.plot(x.data.numpy(),pre_y0.data.numpy(),'r_',lw=5)
plt.plot(x.data.numpy(),pre_y1.data.numpy(),'g_',lw=5)
plt.plot(x.data.numpy(),pre_y2.data.numpy(),'b_',lw=5)
plt.text(0.5,0,'FC_loss=%.4f '%(loss_0.data.numpy()),fontdict={'size':10,'color':'red'})
plt.text(0.5,0.1,'Conv_loss=%.4f'%(loss_1.data.numpy()),fontdict={'size':10,'color':'green'})
plt.text(0.5,0.2,'ZF_loss=%.4f'%(loss_2.data.numpy()),fontdict={'size':10,'color':'blue'})
plt.savefig(str(t)+'.png', bbox_inches='tight')
# plt.legend()
plt.pause(0.1)
展示几个结果:
看了李宏毅老师的课件代码,里面是用numpy实现的loss函数和梯度下降,仔细看下还是很有必要的:
# loss函数构建
for i in range(len(x)):
for j in range(len(y)):
b = x[i]
w = y[j]
Z[j][i] = 0 # meshgrid吐出结果:y为行,x为列
for n in range(len(x_data)):
Z[j][i] += (y_data[n] - b - w * x_data[n]) ** 2
Z[j][i] /= len(x_data)
线性回归过程:
b=-2
w=0.01
lr = 0.000005
iteration = 1400000
b_history = [b]
w_history = [w]
loss_history = []
import time
start = time.time()
for i in range(iteration):
m = float(len(x_d))
y_hat = w * x_d +b
loss = np.dot(y_d - y_hat, y_d - y_hat) / m
grad_b = -2.0 * np.sum(y_d - y_hat) / m
grad_w = -2.0 * np.dot(y_d - y_hat, x_d) / m
# update param
b -= lr * grad_b
w -= lr * grad_w
b_history.append(b)
w_history.append(w)
loss_history.append(loss)
if i % 10000 == 0:
print("Step %i, w: %0.4f, b: %.4f, Loss: %.4f" % (i, w, b, loss))
end = time.time()
print("大约需要时间:",end-start)
Step 0, w: 1.8648, b: -1.9952, Loss: 413789.3821
Step 10000, w: 2.1484, b: -7.1183, Loss: 19355.2329
Step 20000, w: 2.1627, b: -12.1013, Loss: 18858.5836
Step 30000, w: 2.1766, b: -16.9474, Loss: 18388.8578
Step 40000, w: 2.1902, b: -21.6603, Loss: 17944.5958
Step 50000, w: 2.2034, b: -26.2436, Loss: 17524.4173
Step 60000, w: 2.2162, b: -30.7010, Loss: 17127.0167
Step 70000, w: 2.2286, b: -35.0359, Loss: 16751.1593
Step 80000, w: 2.2407, b: -39.2517, Loss: 16395.6772
Step 90000, w: 2.2525, b: -43.3516, Loss: 16059.4658
Step 100000, w: 2.2640, b: -47.3389, Loss: 15741.4804
Step 110000, w: 2.2751, b: -51.2165, Loss: 15440.7331
Step 120000, w: 2.2860, b: -54.9876, Loss: 15156.2893
Step 130000, w: 2.2965, b: -58.6551, Loss: 14887.2653
Step 140000, w: 2.3067, b: -62.2217, Loss: 14632.8251
Step 150000, w: 2.3167, b: -65.6903, Loss: 14392.1781
Step 160000, w: 2.3264, b: -69.0637, Loss: 14164.5766
Step 170000, w: 2.3358, b: -72.3442, Loss: 13949.3134
Step 180000, w: 2.3450, b: -75.5347, Loss: 13745.7197
Step 190000, w: 2.3539, b: -78.6374, Loss: 13553.1628
Step 200000, w: 2.3626, b: -81.6549, Loss: 13371.0444
Step 210000, w: 2.3710, b: -84.5895, Loss: 13198.7988
Step 220000, w: 2.3792, b: -87.4434, Loss: 13035.8905
Step 230000, w: 2.3872, b: -90.2189, Loss: 12881.8136
Step 240000, w: 2.3950, b: -92.9181, Loss: 12736.0891
...
Step 1370000, w: 2.6577, b: -184.3362, Loss: 10198.3508
Step 1380000, w: 2.6580, b: -184.4488, Loss: 10198.0972
Step 1390000, w: 2.6583, b: -184.5583, Loss: 10197.8574
大约需要时间: 20.32868242263794
出图
# plot the figure
plt.subplot(1, 2, 1)
C = plt.contourf(x, y, Z, 50, alpha=0.5, cmap=plt.get_cmap('jet')) # 填充等高线
# plt.clabel(C, inline=True, fontsize=5)
plt.plot([-188.4], [2.67], 'x', ms=12, mew=3, color="orange")
plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black')
plt.xlim(-200, -100)
plt.ylim(-5, 5)
plt.xlabel(r'$b$')
plt.ylabel(r'$w$')
plt.title("线性回归")
plt.subplot(1, 2, 2)
loss = np.asarray(loss_history[2:iteration])
plt.plot(np.arange(2, iteration), loss)
plt.title("损失")
plt.xlabel('step')
plt.ylabel('loss')
plt.show()
# 李宏毅老师课堂中的Demo要140万次才能收敛到最优,结果如下