1 把谷歌云盘mount到colab上面:
from google.colab import drive
drive.mount('/content/device')
/ 表示当前站点的根目录(域名映射的硬盘目录)
2魔法方法:
percentage (%) affects the process associated with the notebook, and it is called a magic command.
%cd /content/device/MyDrive/
Use % instead of ! for cd (change directory) command
3其他命令
exclamation mark (!) starts a new shell, does the operations, and
then kills that shell
def train_valid_split(data_set, valid_ratio, seed):
'''Split provided training data into training set and validation set'''
valid_set_size = int(valid_ratio * len(data_set))
train_set_size = len(data_set) - valid_set_size
train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))#返回的是dataset类,实现了__getitem__方法 # fix the generator for reproducible results
return np.array(train_set), np.array(valid_set)#必须得有这一步,否则返回的和列表差不多,无法选择特征(
def select_feat(train_data, valid_data, test_data, select_all=True):
'''Selects useful features to perform regression'''
y_train, y_valid = train_data[:,-1], valid_data[:,-1]#所有行的最后一列
raw_x_train, raw_x_valid, raw_x_test = train_data[:,37:-1], valid_data[:,37:-1], test_data#所有行且37到倒数第二列
if select_all:
feat_idx = list(range(raw_x_train.shape[1]))
else:
feat_idx = [0,1,2,3,4] # TODO: Select suitable feature columns.
return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx], raw_x_test[:,feat_idx], y_train, y_valid
显示进度条
for epoch in range(n_epochs):
model.train() # Set your model to train mode.
loss_record = []
# tqdm is a package to visualize your training progress.
train_pbar = tqdm(train_loader, position=0, leave=True)
for x, y in train_pbar:
optimizer.zero_grad() # Set gradient to zero.
x, y = x.to(device), y.to(device) # Move your data to device.
pred = model(x)
loss = criterion(pred, y)
loss.backward() # Compute gradient(backpropagation).
optimizer.step() # Update parameters.
step += 1
loss_record.append(loss.detach().item())
# Display current epoch number and loss on tqdm progress bar.
train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
train_pbar.set_postfix({'loss': loss.detach().item()})
mean_train_loss = sum(loss_record)/len(loss_record)
writer.add_scalar('Loss/train', mean_train_loss, step)
model.eval() # Set your model to evaluation mode.
loss_record = []
for x, y in valid_loader:
x, y = x.to(device), y.to(device)
with torch.no_grad():
pred = model(x)
loss = criterion(pred, y)
loss_record.append(loss.item())
为什么要进入验证模式:
关闭batch_Norm和dropout
为什么要停止梯度计算
不需要更新模型,不需要求梯度了
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = My_Model(input_dim=x_train.shape[1]).to(device)
for x, y in valid_loader:
x, y = x.to(device), y.to(device)
def same_seed(seed):
'''Fixes random number generator seeds for reproducibility.'''
torch.backends.cudnn.deterministic = True#每次返回的卷积算法将是确定的
torch.backends.cudnn.benchmark = False#A bool that, if True, causes cuDNN to benchmark multiple convolution algorithms and select the fastest.
np.random.seed(seed)#保证每次生成随机数一样(可是np怎么确保torch)
torch.manual_seed(seed)#设置生成随机数的种子
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)#为当前GPU设置生成随机数的种子