测试环境:ubuntu16.04,kinetic
下载openai_ros
git clone https://bitbucket.org/theconstructcore/openai_ros.git
openai_ros相关的依赖:
message_runtime rospy gazebo_msgs std_msgs geometry_msgs controller_manager_msgs
例如你没有相关依赖就会报错:
安装controller_manager_msgs:
sudo apt-get install ros-kinetic-controller-manager-msgs
相关例子:
在catkin_ws / src目录下创建一个my_turtlebot2_training包:
再创建一个start_training.py的Python文件, 并添加可执行权限 chmod +x:
#!/usr/bin/env python
import gym
import numpy
import time
import qlearn
from gym import wrappers
# ROS packages required
import rospy
import rospkg
from openai_ros.openai_ros_common import StartOpenAI_ROS_Environment
if __name__ == '__main__':
rospy.init_node('example_turtlebot2_maze_qlearn',
anonymous=True, log_level=rospy.WARN)
# Init OpenAI_ROS ENV
task_and_robot_environment_name = rospy.get_param(
'/turtlebot2/task_and_robot_environment_name')
env = StartOpenAI_ROS_Environment(
task_and_robot_environment_name)
# Create the Gym environment
rospy.loginfo("Gym environment done")
rospy.loginfo("Starting Learning")
# Set the logging system
rospack = rospkg.RosPack()
pkg_path = rospack.get_path('turtle2_openai_ros_example')
outdir = pkg_path + '/training_results'
env = wrappers.Monitor(env, outdir, force=True)
rospy.loginfo("Monitor Wrapper started")
last_time_steps = numpy.ndarray(0)
# Loads parameters from the ROS param server
# Parameters are stored in a yaml file inside the config directory
# They are loaded at runtime by the launch file
Alpha = rospy.get_param("/turtlebot2/alpha")
Epsilon = rospy.get_param("/turtlebot2/epsilon")
Gamma = rospy.get_param("/turtlebot2/gamma")
epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount")
nepisodes = rospy.get_param("/turtlebot2/nepisodes")
nsteps = rospy.get_param("/turtlebot2/nsteps")
running_step = rospy.get_param("/turtlebot2/running_step")
# Initialises the algorithm that we are going to use for learning
qlearn = qlearn.QLearn(actions=range(env.action_space.n),
alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
initial_epsilon = qlearn.epsilon
start_time = time.time()
highest_reward = 0
# Starts the main training loop: the one about the episodes to do
for x in range(nepisodes):
rospy.logdebug("############### WALL START EPISODE=>" + str(x))
cumulated_reward = 0
done = False
if qlearn.epsilon > 0.05:
qlearn.epsilon *= epsilon_discount
# Initialize the environment and get first state of the robot
observation = env.reset()
state = ''.join(map(str, observation))
# Show on screen the actual situation of the robot
# env.render()
# for each episode, we test the robot for nsteps
for i in range(nsteps):
rospy.logwarn("############### Start Step=>" + str(i))
# Pick an action based on the current state
action = qlearn.chooseAction(state)
rospy.logwarn("Next action is:%d", action)
# Execute the action in the environment and get feedback
observation, reward, done, info = env.step(action)
rospy.logwarn(str(observation) + " " + str(reward))
cumulated_reward += reward
if highest_reward < cumulated_reward:
highest_reward = cumulated_reward
nextState = ''.join(map(str, observation))
# Make the algorithm learn based on the results
rospy.logwarn("# state we were=>" + str(state))
rospy.logwarn("# action that we took=>" + str(action))
rospy.logwarn("# reward that action gave=>" + str(reward))
rospy.logwarn("# episode cumulated_reward=>" +
str(cumulated_reward))
rospy.logwarn(
"# State in which we will start next step=>" + str(nextState))
qlearn.learn(state, action, reward, nextState)
if not (done):
rospy.logwarn("NOT DONE")
state = nextState
else:
rospy.logwarn("DONE")
last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
break
rospy.logwarn("############### END Step=>" + str(i))
#raw_input("Next Step...PRESS KEY")
# rospy.sleep(2.0)
m, s = divmod(int(time.time() - start_time), 60)
h, m = divmod(m, 60)
rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(
round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(
cumulated_reward) + " Time: %d:%02d:%02d" % (h, m, s)))
rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str(
initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |"))
l = last_time_steps.tolist()
l.sort()
# print("Parameters: a="+str)
rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean()))
rospy.loginfo("Best 100 score: {:0.2f}".format(
reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))
env.close()
创建了一个强化学习训练循环,并使用 Q-learning 强化学习进行训练。
每次培训都有一个关联的配置文件,其中包含该任务所需的参数,在包中,创建一个名为config的新文件夹,并在其中创建一个名为my_turtlebot2_maze_params.yaml的新文件:
turtlebot2: #namespace
task_and_robot_environment_name: 'MyTurtleBot2Wall-v0'
ros_ws_abspath: "/home/user/simulation_ws"
running_step: 0.04 # amount of time the control will be executed
pos_step: 0.016 # increment in position for each command
#qlearn parameters
alpha: 0.1
gamma: 0.7
epsilon: 0.9
epsilon_discount: 0.999
nepisodes: 500
nsteps: 10000
running_step: 0.06 # Time for each step
参数分为两个不同的部分:
环境相关参数:取决于你选择的RobotEnvironment和TaskEnvironment。
RL算法参数:那些取决于你选择的RL算法
创建启动文件夹。在启动文件夹中,创建一个名为start_training.launch的新文件:
在终端中启动代码:
roslaunch my_turtlebot2_training start_training.launch
其中会遇到的问题:
未安装gym模块 ImportError: No module named gym
git clone https://github.com/openai/gym
cd gym
pip install -e .
缺qlearn