自动求导
import torch
import numpy as np
N, D_in, H, D_out = 64,1000,100,10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in, H,requires_grad = True)
w2 = torch.randn(H,D_out,requires_grad = True)
learning_rate = 1e-6
for t in range(500):
#forward pass
h = x.mm(w1) #矩阵乘法
h = h.clamp(min = 0)
y_pre = h.mm(w2)
loss = (y-y_pre).pow(2).sum()
loss.backward()
#print(w1.grad)
with torch.no_grad():
w1 -= ( learning_rate * w1.grad)
w2 -= (learning_rate * w2.grad)
w1.grad.zero_()
w2.grad.zero_()
print(w1,w2)
torch.nn
import torch
import torch.nn as nn
import numpy as np
N, D_in, H, D_out = 64,1000,100,10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
model = torch.nn.Sequential(
torch.nn.Linear(D_in,H), # w1*x+b1
torch.nn.ReLU(),
torch.nn.Linear(H,D_out,bias = False),
)
#model = model.cuda()
learning_rate = 1e-6
loss_fn = nn.MSELoss(reduction = 'sum')
for t in range(500):
y_pre = model(x)#forward pass
loss = loss_fn(y_pre,y)
model.zero_grad()
loss.backward()
for param in model.parameters():
param = param-(learning_rate * param.grad)
print(t,loss)
定义 optimizer (Adam):
import torch
import torch.nn as nn
import numpy as np
N, D_in, H, D_out = 64,1000,100,10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
model = torch.nn.Sequential(
torch.nn.Linear(D_in,H), # w1*x+b1
torch.nn.ReLU(),
torch.nn.Linear(H,D_out),
)
#model = model.cuda()
#torch.nn.init.normal_(model[0].weight)
#orch.nn.init.normal_(model[2].weight)
learning_rate = 1e-4
loss_fn = nn.MSELoss(reduction = 'sum')
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
for t in range(600):
# forward pass
y_pre = model(x)
loss = loss_fn(y_pre,y)
print(t, loss.item())
optimizer.zero_grad()
#backward
loss.backward()
#update pramaters
optimizer.step()
Class定义
import torch
import torch.nn as nn
import numpy as np
N, D_in, H, D_out = 64,1000,100,10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
class TwoLayer(torch.nn.Module):
def __init__(self,D_in,H,D_out):
super(TwoLayer,self).__init__()
self.linear1 = torch.nn.Linear(D_in,H)
self.linear2 = torch.nn.Linear(H,D_out)
def forward(self,x):
y_pred = self.linear2(self.linear1(x).clamp(min = 0)) #clamp: ReLU
return y_pred
model = TwoLayer(D_in,H,D_out)
#model = model.cuda()
learning_rate = 1e-4
loss_fn = nn.MSELoss(reduction = 'sum')
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
for t in range(500):
y_pre = model(x)#forward pass
loss = loss_fn(y_pre,y)
print(t, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()