举个例子,我们有个汽车公司,如果车价格越低低,我们可以卖更多的车。
car_price_np = np.array([3,4,5,6,7,8,9], dtype=np.float32).reshape(-1,1)
car_price_tensor = Variable(torch.from_numpy(car_price_np))
number_of_car_sell_np = np.array([ 7.5, 7, 6.5, 6.0, 5.5, 5.0, 4.5], dtype=np.float32).reshape(-1,1)
number_of_car_sell_tensor = Variable(torch.from_numpy(number_of_car_sell_np))
class LinearRegression(nn.Module):
def __init__(self, input_size, output_size):
super(LinearRegression, self).__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
return self.linear(x)
input_dim = 1
output_dim = 1
model = LinearRegression(input_dim, output_dim)
mse = nn.MSELoss()
lr = 0.02
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
loss_list = []
total_iter = 1001
for iteration in range(total_iter):
optimizer.zero_grad()
results = model(car_price_tensor)
loss = mse(results, number_of_car_sell_tensor)
loss.backward()
optimizer.step()
loss_list.append(loss.data)
if (iteration % 100 == 0):
print("epoch {}, loss {}".format(iteration, loss.data))
逻辑回归的步骤,这里使用 MNIST 数据集:
train = pd.read_csv("./train.csv", dtype=np.float32)
targets_numpy = train.label.values
features_numpy = train.loc[:, train.columns != "label"].values / 255
features_train, features_test, targets_train, targets_test = train_test_split(
features_numpy, targets_numpy, test_size = 0.2, random_state = 42
)
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train).type(torch.LongTensor)
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(targets_test).type(torch.LongTensor)
train = torch.utils.data.TensorDataset(featuresTrain, targetsTrain)
test = torch.utils.data.TensorDataset(featuresTest, targetsTest)
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle=False)
网络结构其实线性回归一模一样,只有损失函数的区别
class LogisticRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(LogisticRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
out = self.linear(x)
return out
batch_size = 100
n_iters = 10000
num_epochs = n_iters / (len(features_train) / batch_size)
num_epochs = int(num_epochs)
input_dim = 28 * 28
output_dim = 10
model = LogisticRegressionModel(input_dim, output_dim).cuda()
error = nn.CrossEntropyLoss().cuda()
learning_rate = 1e-3
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
count = 0
loss_list = []
iteration_list = []
# train the model
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
model.train()
train = Variable(images.view(-1, 28 * 28)).cuda()
labels = Variable(labels).cuda()
optimizer.zero_grad()
output = model(train)
loss = error(output, labels)
loss.backward()
optimizer.step()
count += 1
if count % 50 == 0:
model.eval()
correct = 0
total = 0
for images, labels in test_loader:
test = Variable(images.view(-1, 28 * 28)).cuda()
output = model(test)
predicted = torch.max(output.cpu().data, 1)[1]
total += len(labels)
correct += (predicted == labels).sum()
accuracy = 100 * correct / float(total)
loss_list.append(loss.data)
iteration_list.append(count)
if count % 500 == 0:
print("Iteration: {:.4f}, Loss: {:.4f}, Accuracy: {:.2f}%".format(count, loss.cpu().data, accuracy))
训练完成后在验证集上的精度有 85%。
class ANNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(ANNModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.tanh2 = nn.Tanh()
self.fc3 = nn.Linear(hidden_dim, hidden_dim)
self.elu3 = nn.ELU()
self.fc4 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
x = self.relu1(self.fc1(x))
x = self.tanh2(self.fc2(x))
x = self.elu3(self.fc3(x))
x = self.fc4(x)
return x
inpud_dim = 28 * 28
output_dim = 10
hidden_dim = 100
cuda = True
model = ANNModel(input_dim, hidden_dim, output_dim)
error = nn.CrossEntropyLoss()
if cuda:
model = model.cuda()
error = error.cuda()
lr = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr = lr)
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
# model training
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
model.train()
train = Variable(images.view(-1, 28 * 28))
labels = Variable(labels)
if cuda:
train = train.cuda()
labels = labels.cuda()
optimizer.zero_grad()
outputs = model(train)
loss = error(outputs, labels)
loss.backward()
optimizer.step()
count += 1
if count % 50 == 0:
model.eval()
correct = 0
total = 0
for images, labels in test_loader:
test = Variable(images.view(-1, 28 * 28))
if cuda:
test = test.cuda()
outputs = model(test)
predicted = torch.max(outputs.data, 1)[1]
total += len(labels)
correct += (predicted.cpu() == labels).sum()
accuracy = 100 * correct / float(total)
loss_list.append(loss.data)
iteration_list.append(count)
accuracy_list.append(accuracy)
if count % 500 == 0:
# Print Loss
print('Iteration: {} Loss: {:.4f} Accuracy: {:.2f}%'.format(count, loss.data.item(), accuracy))
经过训练以后,在验证集上的精度达到了 95%,可见模型中的非线性是很重要的。
实测,在模型中加入批归一化层,可以加快模型的训练,并且得到效果更好的模型,将模型结构定义如下,最后达到了 97% 的精度。
class ANNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(ANNModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.bn1 = nn.BatchNorm1d(hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.bn2 = nn.BatchNorm1d(hidden_dim)
self.tanh2 = nn.Tanh()
self.fc3 = nn.Linear(hidden_dim, hidden_dim)
self.bn3 = nn.BatchNorm1d(hidden_dim)
self.elu3 = nn.ELU()
self.fc4 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
x = self.relu1(self.bn1(self.fc1(x)))
x = self.tanh2(self.bn2(self.fc2(x)))
x = self.elu3(self.bn3(self.fc3(x)))
x = self.fc4(x)
return x
加上了 BN 层可以让网络训练更快,且得到更好的结果
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3,
stride=1)
self.bn1 = nn.BatchNorm2d(16)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3,
stride=1)
self.bn2 = nn.BatchNorm2d(32)
self.relu2 = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.fc = nn.Linear(32 * 5 * 5, 10)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x = self.fc(x.view(x.shape[0], -1))
return x
model = CNNModel()
error = nn.CrossEntropyLoss()
if cuda:
model = model.cuda()
error = error.cuda()
lr = 1e-1
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
# model training
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
model.train()
train = Variable(images.view(batch_size, 1, 28, 28))
labels = Variable(labels)
if cuda:
train = train.cuda()
labels = labels.cuda()
optimizer.zero_grad()
outputs = model(train)
loss = error(outputs, labels)
loss.backward()
optimizer.step()
count += 1
if count % 50 == 0:
model.eval()
correct = 0
total = 0
for images, labels in test_loader:
test = Variable(images.view(batch_size, 1, 28, 28))
if cuda:
test = test.cuda()
outputs = model(test)
predicted = torch.max(outputs.data, 1)[1]
total += len(labels)
correct += (predicted.cpu() == labels).sum()
accuracy = 100 * correct / float(total)
loss_list.append(loss.data)
iteration_list.append(count)
accuracy_list.append(accuracy)
if count % 500 == 0:
# Print Loss
print('Iteration: {} Loss: {:.4f} Accuracy: {:.2f}%'.format(count, loss.data.item(), accuracy))
通过卷积神经网络,可以达到 98.7% 的精度!