import gym
# Create environment
env = gym.make("MountainCar-v0")
eposides = 10
for eq in range(eposides):
obs = env.reset()
done = False
rewards = 0
while not done:
action = env.action_space.sample()
obs, reward, done, action, info = env.step(action)
env.render()
rewards += reward
print(rewards)
本文搭建的环境返回值为4个,可直接用该DQN模型训练,但在上一期基于Q-learning的文章中,自我搭建的环境中step有5个返回值,无法在该DQN的模型中运行,后面如果想用5个返回值,该代码要重写。
import gym
from stable_baselines3 import DQN
# Create environment
env = gym.make("MountainCar-v0")
model = DQN(
"MlpPolicy",
env,
verbose=1,
learning_rate=5e-4)
# Train the agent and display a progress bar
model.learn(
total_timesteps=int(1.5e6),
progress_bar=True)
# Save the agent
model.save("DQN_MountainCar")
import gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
def test_something(): #帮助pytest找到测试案例,无该两行代码,程序无法运行
assert 1 == 1
# Create environment
env = gym.make("MountainCar-v0", render_mode="human") # render_mode="human"在显示屏幕上显示
# load model
model = DQN.load("DQN_MountainCar", env=env)
mean_reward, std_reward = evaluate_policy(
model,
model.get_env(),
deterministic=True,
render=True,
n_eval_episodes=10)
print(mean_reward)
1.直接用 pip install stable-baselines3[extra],安装失败,换成以下命令安装
pip install stable-baselines3[extra] -i https://pypi.tuna.tsinghua.edu.cn/simple some-package
2.源代码为了测试方便,测试过程并不显示,代码为
env = gym.make("MountainCar-v0")
笔者希望可以显示测试过程,代码改为
env = gym.make("MountainCar-v0", render_mode="human")
1.强化学习案例复现(1)--- MountainCar基于Q-learning_笑傲江湖2023的博客-CSDN博客
1.Stable-Baselines3安装_下载stable-baselines3==1.2.0_cclbl的博客-CSDN博客
2.
基于强化学习的gym Mountain Car稳定控制_mountaincar环境-CSDN博客