pip install ray
,很多依赖包版本更不上,会一直报错,这个问题我花了很长时间解决。ray
pip install ray==1.1.0
aiohttp
包的版本改一下,参考pip install aiohttp==3.7.3
#加载包
import numpy as np
import pandas as pd
from plotnine import*
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
import matplotlib as mpl
import matplotlib.pyplot as plt
#中文显示问题
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# notebook嵌入图片
%matplotlib inline
# 提高分辨率
%config InlineBackend.figure_format='retina'
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer,mean_squared_error
from sklearn.preprocessing import MinMaxScaler
# 忽略警告
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.nn as nn
from torch import optim
import torchvision.transforms as trans
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from ray import tune
import ray.cloudpickle as pickle
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import EarlyStopping
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
name = ['序号','新生儿体重','孕妇怀孕期','新生儿胎次状况','孕妇怀孕时年龄','孕妇怀孕前身高','孕妇怀孕前体重','孕妇吸烟状况']
data = pd.read_csv("D:/学习资料/数学/数学建模培训与比赛/孕妇吸烟与胎儿健康/data0901.txt", sep = '\t',names = name,encoding="gbk",index_col = '序号')
data.head()
# 剔除缺失值
data = data[(data['孕妇吸烟状况'] < 9) &
(data['孕妇怀孕前体重'] < 999) &
(data['孕妇怀孕前身高'] < 99) &
(data['孕妇怀孕时年龄'] < 99) &
(data['孕妇怀孕期'] < 999)]
# 归一化
scaler_to = preprocessing.MinMaxScaler()
scaler_to = scaler_to.fit(data)
data_to = scaler_to.transform(data)
data_to = pd.DataFrame(data_to)
data_to.columns = data.columns
# 划分训练集与测试集
X_train,X_valid,y_train,y_valid=train_test_split(data_to.iloc[:,1:],data_to.iloc[:,0],test_size=0.1,random_state=42)
X_train.shape,X_valid.shape
# 转换为张量
x_train_to = torch.tensor(X_train.values).to(torch.float)
y_train_to = torch.tensor(y_train.values).to(torch.float)
x_train_to.shape
class linreg(nn.Module):
def __init__(self,input_shape,linear,output_shape):
super(linreg,self).__init__()
self.l1 = torch.nn.Linear(input_shape,linear)
self.l2 = torch.nn.Linear(linear,output_shape)
def forward(self,x):
x = F.relu(self.l1(x))
return self.l2(x)
linear
,即单层神经元个数。还有迭代次数epochs
。config['linear']
def train_model(config):
# 需要调整的参数:神经元个数
model = linreg(x_train_to.shape[1],int(config['linear']),1)
# 选择MSE作为误差衡量标准
criterion = torch.nn.MSELoss()
# 优化器选择adam,学习率为0.001,衰减为0.001
optimizer = optim.Adam(model.parameters(),
lr=0.001,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=0.001)
# 需要调整的参数:迭代次数
epochs = int(config['times'])
# 收集误差信息
loss_list = []
for epoch in range(epochs):
epoch += 1
optimizer.zero_grad()
outputs = model(x_train_to)
loss = criterion(outputs,y_train_to)
loss.backward()
optimizer.step()
loss_list.append(loss.detach().numpy())
# 因为ray包只接受一个优化指标
# 所以这里对误差取平均值,这样一组参数只有一个评价分数
# 注意这里的my_loss,该变量为评价变量
tune.report(my_loss=np.mean(loss_list))
config = {"linear": tune.grid_search(list(np.arange(2,6,1))),
"times":tune.grid_search(list(np.arange(50,250,10)))}
result = tune.run(train_model,
# CPU使用核心
resources_per_trial={"cpu": 8},
# 搜索空间
config=config)
print("======================== Result =========================")
print(result.results_df)
get_best_config()
方法输出评价变量最小的参数对result.get_best_config(metric="my_loss", mode="min")
my_loss
在3次迭代中变化标准差小于0.001,则停止训练。from ray.tune.suggest.bayesopt import BayesOptSearch
# 定义搜索空间
space = {'linear': (2, 6),
'times': (50, 200)}
# 定义评价目标,最大/最小化目标
bayesopt = BayesOptSearch(space,metric="my_loss",mode="min")
result = tune.run(train_model,
resources_per_trial={"cpu": 8},
# 定义评价目标,最大/最小化目标,容忍次数
stop = EarlyStopping(metric = "my_loss",
mode = "min",
patience = 3),
search_alg=bayesopt,
# 迭代次数(贝叶斯优化次数)
num_samples=30)
print("======================== Result =========================")
print(result.results_df)
get_best_config()
方法输出评价变量最小的参数对result.get_best_config(metric="my_loss", mode="min")
输出:
{'linear': 3.49816047538945, 'times': 192.60714596148742}
loss
值:result.get_best_trial(metric="my_loss", mode="min").last_result["my_loss"]
输出:
0.023971058
def train_model():
model = linreg(x_train_to.shape[1],3,1)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(),
lr=0.001,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=0.001)
epochs = 192
loss_list = []
for epoch in range(epochs):
epoch += 1
optimizer.zero_grad()
outputs = model(x_train_to)
loss = criterion(outputs,y_train_to)
loss.backward()
optimizer.step()
loss_list.append(loss.detach().numpy())
plt.figure(dpi = 600)
plt.plot(loss_list)
plt.xlabel("epoch")
plt.ylabel("loss")
print("Loss: {}".format(loss_list[-1]))
train_model()
输出:
Loss: 0.023285632953047752