import csv
import codecs
# path为csv文件位置
def create_csv(path):
f = codecs.open(path, 'w+', 'utf-8')
writer = csv.writer(f)
#('W1', 'W2', 'a1', 'b1', 'a2', 'b2', 'bias','loss')为每列的表头
writer.writerow(('W1', 'W2', 'a1', 'b1', 'a2', 'b2', 'bias','loss'))
#fname为csv文件名,data为内容
def wcsv(fname,data):
f = codecs.open(fname, 'a+', 'utf-8')
writer = csv.writer(f)
writer.writerow((data))
#path为csv文件位置
def read_csv(path):
# usecols内为选择的属性列
datas = pd.read_csv(path, usecols=["W1","W2","a1","b1","a2","b2","bias"])
datas = pd.DataFrame(datas)
my_array = np.array(datas)
my_array= my_array.astype(float)
my_tensor = torch.tensor(my_array)
# 返回类型为tensor
return my_tensor
# path 为csv文件位置 col为要画的列
def plot_dcsv(path,col):
fig = plt.figure(figsize=(20, 8))
ax = fig.add_subplot(111)
i=0
data = pd.read_csv(path, usecols=[col])
my_array = np.array(np.array(data))
my_tensor = torch.tensor(my_array[::5])
axis=range(0,len(my_array),5)
ax.plot(axis,my_tensor)
plt.ylabel(col)
plt.title(f'distribution of {col} ')
plt.savefig(f"{col}.jpg")
df是我们要处理的numpy.ndarray数组,进行以下操作可以保留满足指定条件的行:
# 保留第一列都大于0的行
df=df[df[:,0]>0,:]
# 保留第二列都大于0的行
df = df[df[:, 1] > 0, :]
# 保留第三列都大于0的行
df = df[df[:, 2] > 0, :]
1)batch数太小,而类别又比较多的时候,真的可能会导致loss函数震荡而不收敛,尤其是在你的网络比较复杂的时候。
2)随着batchsize增大,处理相同的数据量的速度越快。
3)随着batchsize增大,达到相同精度所需要的epoch数量越来越多。
4)由于上述两种因素的矛盾, Batch_Size 增大到某个时候,达到时间上的最优。
5)由于最终收敛精度会陷入不同的局部极值,因此 Batch_Size 增大到某些时候,达到最终收敛精度上的最优。
6)过大的batchsize的结果是网络很容易收敛到一些不好的局部最优点。同样太小的batch也存在一些问题,比如训练速度很慢,训练不容易收敛等。
7)具体的batch size的选取和训练集的样本数目相关。
8)GPU对2的幂次的batch可以发挥更佳的性能,因此设置成16、32、64、128…时往往要比设置为整10、整100的倍数时表现更优。
原文链接https://blog.csdn.net/zqx951102/article/details/88918948
网络定型或者梯度下降过程中需要进行许多关于超参数的决策。超参数之一是定型周期(epoch)的数量:亦即应当完整遍历数据集多少次(一次为一个epoch)?
如果epoch数量太少,网络有可能发生欠拟合(即对于定型数据的学习不够充分);如果epoch数量太多,则有可能发生过拟合(即网络对定型数据中的“噪声”而非信号拟合)。
早停法旨在解决epoch数量需要手动设置的问题。它也可以被视为一种能够避免网络发生过拟合的正则化方法(与L1/L2权重衰减和丢弃法类似)。根本原因就是因为继续训练会导致测试集上的准确率下降。
那继续训练导致测试准确率下降的原因猜测可能是1. 过拟合
2. 学习率过大导致不收敛
3. 使用正则项的时候,Loss的减少可能不是因为准确率增加导致的,而是因为权重大小的降低。
early stopping 将数据分为训练集和验证集,每个epoch结束后(或每N个epoch后): 在验证集上获取测试结果,随着epoch的增加,如果在验证集上发现测试误差上升,则停止训练;
将停止之后的权重作为网络的最终参数。
这种做法很符合直观感受,因为精度都不再提高了,在继续训练也是无益的,只会提高训练的时间。那么该做法的一个重点便是怎样才认为验证集精度不再提高了呢?并不是说验证集精度一降下来便认为不再提高了,因为可能经过这个Epoch后,精度降低了,但是随后的Epoch又让精度又上去了,所以不能根据一两次的连续降低就判断不再提高。一般的做法是,在训练的过程中,记录到目前为止最好的验证集精度,当连续10次Epoch(或者更多次)没达到最佳精度时,则可以认为精度不再提高了。
最优模型是在垂直虚线的时间点保存下来的模型,即处理测试集时准确率最高的模型。
原文链接https://blog.csdn.net/jinchenpeng/article/details/115984618
代码实现,直接创建一个tool.py将下面的代码拷贝进去:源码
import numpy as np
import torch
class EarlyStopping:
"""Early stops the training if validation loss doesn't improve after a given patience."""
def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
"""
Args:
patience (int): How long to wait after last time validation loss improved.
Default: 7
verbose (bool): If True, prints a message for each validation loss improvement.
Default: False
delta (float): Minimum change in the monitored quantity to qualify as an improvement.
Default: 0
path (str): Path for the checkpoint to be saved to.
Default: 'checkpoint.pt'
trace_func (function): trace print function.
Default: print
"""
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
self.path = path
self.trace_func = trace_func
def __call__(self, val_loss, model):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model)
elif score < self.best_score + self.delta:
self.counter += 1
# self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model)
self.counter = 0
def save_checkpoint(self, val_loss, model):
'''Saves model when validation loss decrease.'''
# if self.verbose:
# self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
torch.save(model.state_dict(), self.path)
self.val_loss_min = val_loss
在其他的.py文件使用early-stopping:
from tool import EarlyStopping
def train_model(model, batch_size, patience, n_epochs,train_loader,valid_loader):
# to track the training loss as the model trains
train_losses = []
# to track the validation loss as the model trains
valid_losses = []
# to track the average training loss per epoch as the model trains
avg_train_losses = []
# to track the average validation loss per epoch as the model trains
avg_valid_losses = []
# initialize the early_stopping object
early_stopping = EarlyStopping(patience=patience, verbose=True)
for epoch in range(1, n_epochs + 1):
###################
# train the model #
###################
model.train() # prep model for training
optimizer = torch.optim.Adam([
# 直接改每一项的lr即可
{'params': model.w1, 'lr': model.lr},
{'params': model.w2, 'lr': model.lr},
{'params': model.a1, 'lr': model.lr},
{'params': model.a2, 'lr': model.lr},
{'params': model.a3, 'lr': model.lr},
{'params': model.b1, 'lr': model.lr},
{'params': model.b2, 'lr': model.lr},
{'params': model.b3, 'lr': model.lr},
{'params': model.bias, 'lr': model.lr},
],
weight_decay=0.0005,
)
loss_fnc = nn.MSELoss()
for batch, (x1, x2, y) in enumerate(train_loader, 1):
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(x1,x2)
# calculate the loss
loss = loss_fnc(output, y)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# record training loss
train_losses.append(loss.item())
######################
# validate the model #
######################
model.eval() # prep model for evaluation
for batch, (x1, x2, y) in enumerate(valid_loader, 1):
# forward pass: compute predicted outputs by passing inputs to the model
output = model(x1,x2)
# calculate the loss
loss = loss_fnc(output,y)
# record validation loss
valid_losses.append(loss.item())
# print training/validation statistics
# calculate average loss over an epoch
train_loss = np.average(train_losses)
valid_loss = np.average(valid_losses)
avg_train_losses.append(train_loss)
avg_valid_losses.append(valid_loss)
epoch_len = len(str(n_epochs))
# clear lists to track next epoch
train_losses = []
valid_losses = []
# early_stopping needs the validation loss to check if it has decresed,
# and if it has, it will make a checkpoint of the current model
early_stopping(valid_loss, model)
if early_stopping.early_stop:
print("Early stopping")
break
# load the last checkpoint with the best model
model.load_state_dict(torch.load('checkpoint.pt'))
return model, avg_train_losses, avg_valid_losses
其中:
patience (int): How long to wait after last time validation loss improved.