main.py:
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run LSTM(LSTM Network)")
parser.add_argument('-m', '--mode', type=str, choices=['train', 'test'], default='train',help='Run mode(train/choice)')
parser.add_argument('-c', '--config', type=str, required=True, help='Model config Path')
parser.add_argument('--cuda', type=bool, default=False, help='CUDA')
args = parser.parse_args('--config ./experiment/config/HuJangWord/lstm.json'.split())
config_path = args.config
config = get_config(config_path) # 注意这里的config_path是相对于main.py来说的
if config["K_Flod"] == 0: #不需要K折训练
start_time = datetime.datetime.now()
df_train, df_test, data_frame = No_Flod()
model, train_avg_loss, df_train_loss = train(df_train, data_frame, config["num_epochs"], config["learning_rate"],
config['weight_decay'], config["batch_size"])
test_avg_loss, min_test_loss = test(model, df_test, data_frame, config["batch_size"], df_train_loss)
end_time = datetime.datetime.now()
print(f"LSTM模型训练所用时间为{(end_time - start_time).seconds // 60}分钟{(end_time - start_time).seconds % 60}秒"
f"平均训练损失为{train_avg_loss},平均测试损失为{test_avg_loss}")
else: # K折训练
K_Flod(config["K_Flod"], num_EPOCHS=config["num_epochs"], Learning_rate=config["learning_rate"],
Weight_decay=config['weight_decay'], batch_size=config["batch_size"])
def K_Flod(k, num_EPOCHS=20, Learning_rate=0.01, Weight_decay=0.1, batch_size=256):
train_avg_loss, test_avg_loss = 0, 0
data_frame = pd.read_csv(os.path.join(file_root, file_name))
start_time = datetime.datetime.now()
min_test_loss = float('inf')
for i in range(k):
print(f"接下来进入第{i+1}折训练:")
data = get_k_flod_data(k, i) # 获取第i折数据
model, train_k_loss, df_train_loss = train(data[0], data_frame, num_EPOCHS, Learning_rate, Weight_decay, batch_size)
test_k_loss, min_test_loss = test(model, data[1], data_frame, batch_size, df_train_loss, min_test_loss)
train_avg_loss += train_k_loss
test_avg_loss += test_k_loss
train_avg_loss /= k
test_avg_loss /= k
cur_time = datetime.datetime.now()
print(f"LSTM模型{k}折所用时间为{(cur_time - start_time).seconds//60}分钟{(cur_time - start_time).seconds%60}秒"
f"平均训练损失为{train_avg_loss},平均测试损失为{test_avg_loss}")
def get_k_flod_data(k, i):
"""
As the function name says
:return:
"""
assert k > 1
temp_data_frame = pd.read_csv(os.path.join(file_root, temporary_data_file_name))
fold_size = temp_data_frame.shape[0] // k # 每份的个数:数据总条数/折数(向下取整)
df_train = temp_data_frame.iloc[np.r_[:fold_size * i, fold_size * (i + 1):]].reset_index(drop=True)
df_test = temp_data_frame[fold_size*i:fold_size*(i+1)]
return df_train, df_test
def No_Flod():
# 不需要K折训练的时候进行分隔数据集
data_frame = pd.read_csv(os.path.join(file_root, file_name))
temp_data_frame = pd.read_csv(os.path.join(file_root, temporary_data_file_name))
index = temp_data_frame.index.values
random.shuffle(index)
cri_poi = 0.2*temp_data_frame.shape[0]
df_train = temp_data_frame.iloc[np.r_[index[cri_poi:]]].reset_index(drop=True)
df_test = temp_data_frame.iloc[np.r_[index[:cri_poi]]].reset_index(drop=True)
return df_train, df_test, data_frame
class EduDataSet(Dataset): # 数据集封装
def __init__(self, df_part, data_frame):
self.data_frame = data_frame
self.temp_data_frame = df_part
def __getitem__(self, item):
data = self.temp_data_frame.iloc[item].tolist()
input_x = []
for i in range(6):
index = int(data[i+2])
temp = [0] * 9
if index != -1:
temp = self.data_frame.iloc[index].tolist()[2:]
input_x.append(temp)
input_x = torch.tensor(input_x)
return input_x, data[1]
def __len__(self):
return self.temp_data_frame.shape[0]
这里模型只使用了一个LSTM和一个线性层。
class LSTMpred(nn.Module):
def __init__(self, input_size, hidden_dim):
super(LSTMpred, self).__init__()
self.input_dim = input_size
self.hidden_dim = hidden_dim
self.lstm = nn.LSTM(input_size, hidden_dim)
self.hidden2out = nn.Linear(hidden_dim, 1)
self.hidden = self.init_hidden()
def init_hidden(self):
return (torch.zeros(1, 256, self.hidden_dim, requires_grad=True, dtype=torch.double),
torch.zeros(1, 256, self.hidden_dim, requires_grad=True, dtype=torch.double))
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
x = lstm_out[-1].squeeze(0)
x = self.hidden2out(x)
return x
def train(df_train, data_frame, num_EPOCHS, Learning_rate, Weight_decay, batch_size):
model = LSTMpred(9, 6).double()
optimizer = optime.SGD(model.parameters(), lr=Learning_rate, weight_decay=Weight_decay)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.2)
loss_arr = []
train_sets = EduDataSet(df_train, data_frame)
train_loader = DataLoader(train_sets, batch_size=batch_size, shuffle=True, num_workers=20, drop_last=True, pin_memory=True)
start_time = datetime.datetime.now()
min_train_loss = float('inf')
for epo in range(num_EPOCHS):
temp_loss_arr = []
temp_loss_epo = 0
for index, (inputs, labels) in enumerate(train_loader):
inputs = torch.transpose(inputs, 0, 1)
model.hidden = model.init_hidden()
out = model(inputs.double())
loss = loss_function(out.squeeze(-1), labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
temp_loss_epo += loss.item()
if index % 100 == 0:
temp_loss_arr.append(loss)
cur_time = datetime.datetime.now()
print(f"当前训练循环为{epo}:{index}, 目前耗时为{(cur_time-start_time).seconds},损失为{loss}")
# 计算这一回合的平均损失并更新当前折最小训练损失
temp_loss_epo /= len(train_loader)
if min_train_loss > temp_loss_epo:
min_train_loss = temp_loss_epo
scheduler.step()
loss_arr.append(temp_loss_arr)
df = pd.DataFrame(loss_arr) # 记录该折的损失下降过程
return model, min_train_loss, df
def test(model, df_test, data_frame, batch_size, df_train_loss, min_test_loss=float('inf')):
test_sets = EduDataSet(df_test, data_frame)
test_loader = DataLoader(test_sets, batch_size=batch_size, shuffle=False, num_workers=20, drop_last=True, pin_memory=True)
with torch.no_grad():
test_loss = 0
for index, (inputs, labels) in enumerate(test_loader):
inputs = torch.transpose(inputs, 0, 1)
out = model(inputs.double())
test_loss += loss_function(out.squeeze(-1), labels)
if index%100 == 0:
print(f"当前测试循环为{index},损失为{test_loss}")
test_loss = test_loss/len(test_loader)
print(f"测试完成,平均损失值为{test_loss}")
if min_test_loss > test_loss:
min_test_loss = test_loss
df_train_loss.to_csv(os.path.join(file_root, file_name), index=False, encoding="utf-8")
torch.save(model.state_dict(), os.path.join(file_root, model_name))
with open(os.path.join(file_root, file_log), 'a') as f:
curtime = datetime.datetime.now()
f.write(f"当前时间为:{curtime}, test_min_loss: {test_loss}, train_loss_decline: {file_name}\n")
return test_loss, min_test_loss