遇到问题先看这篇文章,我收集了许多群友遇到的问题。
我的毕业论文主要是使用DQN,PPO,SAC仿真,然后放到车上跑(效果不太好)。
DQN和PPO是离散控制,SAC是连续控制。
代码说明:
DQN是依照turtlebot3官方代码修改的pytorch版本,因为tensorflow配置环境有点难(用过的都知道);
PPO是使用gym上面的代码修改的;
SAC是一个大佬已经写好的。
DDPG是上一届师兄的毕业设计
ubuntu18.04 + pytorch+ ros-melodic+gazebo9
可以匹配所有环境—很容易配置—思路按照下面的
ubuntu18.04安装跳过 ,虚拟机和双系统都可以
ROS-melodic 安装:然后按照提示来安装,11211安装选择顺序
wget http://fishros.com/install -O fishros && . fishros
然后按照提示来安装,11211安装选择顺序
因为要使用到from tf.transformations import euler_from_quaternion, quaternion_from_euler
的这个函数要使用python3
mkdir -p castkin_ws/src
cd catkin_ws/src
git clone https://github.com/ros/geometry.git
git clone https://github.com/ros/geometry2.git
git clone https://gitee.com/fangxiaosheng666/PPO-SAC-DQN-DDPG
cd ..
catkin_make -DPYTHON_EXECUTABLE=/usr/bin/python3
python2编译
mkdir -p ws/src
cd ws/src
git clone https://github.com/ROBOTIS-GIT/turtlebot3.git
git clone https://github.com/ROBOTIS-GIT/turtlebot3_simulations.git
git clone https://github.com/ROBOTIS-GIT/turtlebot3_msgs.git
cd ..
catkin_make
参考:TurtleBot3 (robotis.com)
roscd turtlebot3_description/urdf/
gedit turtlebot3_burger.gazebo.xacro
#如果想可视化激光雷达,把下面改成true
#把激光雷达数据改成24
24 # The number of sample. Modify it to 24
1
0.0
6.28319
cd ws
rosdep install --from-paths src --ignore-src -r -y
catkin_make
如果没有自己训练好的模型self.load_models =False
加载验证或者继续训练自己的模型:self.load_models =true
路径要自己修改好
self.load_models =False#
if self.load_models:
load_model1 = torch.load("/home/ffd/DRL/PPO/model/maze/98ep.pt")
# load_model2 =torch.load("/home/ffd/QDN/model/210criter.pkl")
self.actor_net.load_state_dict(load_model1['actor_net'])
self.critic_net.load_state_dict(load_model1['critic_net'])
print("load model:",str(self.load_ep))
print("load model successful!!!!!!")
def save_model(self,dir):
state = {'target_net':self.target_net.state_dict(),'eval_net':self.eval_net.state_dict(), 'optimizer':self.optimizer.state_dict(), 'epoch':e}
torch.save(state,"/home/ffd/QDN/model/"+ dir+"a.pt")
sudo gedit /opt/ros/melodic/lib/python2.7/dist-packages/rospy/impl/tcpros_base.py
对照添加
except AttributeError:
pass
def close(self):
"""close i/o and release resources"""
if not self.done:
try:
if self.socket is not None:
try:
self.socket.shutdown(socket.SHUT_RDWR)
except:
pass
finally:
self.socket.close()
except AttributeError:
pass
finally:
self.socket = self.read_buff = self.write_buff = self.protocol = None
super(TCPROSTransport, self).close()
加载地图名字修改
目标点修改(可以根据自己的世界要求修改目标点)如果是加载自己的地图,需要把self.stage =2 改成4,然后修改下面的坐标。
self.modelPath = os.path.dirname(os.path.realpath(__file__))
self.modelPath = self.modelPath.replace('/home/ffd/DRL/PPO',
'/home/ffd/DRL/PPO/model.sdf')
self.stage = 2
while position_check:
goal_x_list = [0.6, 1.9, 0.5, 0.2, -0.8, -1, -1.9, 0.5, 2, 0.5, 0, -0.1, -2]
goal_y_list = [0, -0.5, -1.9, 1.5, -0.9, 1, 1.1, -1.5, 1.5, 1.8, -1, 1.6, -0.8]
self.index = random.randrange(0, 13)
print(self.index, self.last_index)
if self.last_index == self.index:
position_check = True
else:
self.last_index = self.index
position_check = False
这些坐标点是根据gazebo地图给的
<launch>
<arg name="model" default="$(env TURTLEBOT3_MODEL)" doc="model type [burger, waffle, waffle_pi]"/>
<arg name="x_pos" default="-0.7"/>
<arg name="y_pos" default="0.0"/>
<arg name="z_pos" default="0.0"/>
<include file="$(find gazebo_ros)/launch/empty_world.launch">
<arg name="world_name" value="$(find turtlebot3_gazebo)/worlds/turtlebot3_stage_4.world"/>
<arg name="paused" value="false"/>
<arg name="use_sim_time" value="true"/>
<arg name="gui" value="true"/>
<arg name="headless" value="false"/>
<arg name="debug" value="false"/>
include>
<param name="robot_description" command="$(find xacro)/xacro --inorder $(find turtlebot3_description)/urdf/turtlebot3_$(arg model).urdf.xacro" />
<node name="spawn_urdf" pkg="gazebo_ros" type="spawn_model" args="-urdf -model turtlebot3_burger -x $(arg x_pos) -y $(arg y_pos) -z $(arg z_pos) -param robot_description" />
<node pkg="turtlebot3_dqn" type ="combination_obstacle_1" name="combination_obstacle_1" output="screen"/>
<node pkg="turtlebot3_dqn" type ="combination_obstacle_2" name="combination_obstacle_2" output="screen"/>
launch>
source ~/ws/devel/setup.bash
roslaunch turtlebot3_gazebo turtlebot3_stage_2.launch
source ~/catkin_ws/devel/setup.bash
python3 PPO.py
PPO:
PPO算法在ROS-turtlebot3仿真
DQN:
DQN-200回合效果
SAC:
SAC算法
代码地址:
git clone https://gitee.com/fangxiaosheng666/PPO-SAC
基于离散动作的PPO:视频
在机器人导航中使用深度强化学习
基于连续动作的SAC:视频
sac 连续控制
使用pytorch的tensorborad.参考
tb.add_scalar('reward', episode_reward_sum,e)
tb.add_scalar('value_loss',agent.value_loss, e)
tb.add_scalar('action_loss', agent.action_loss, e)
tensorboard --logdir C:\Users\26503\Desktop\毕业设计\训练数据\DQN#数据文件的文件夹
PPO主程序代码:
#!/usr/bin/env python3
# coding=UTF-8
from collections import namedtuple
from itertools import count
import os, time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Normal, Categorical
from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
from environment_stage_4_ppo import Env
import time
import rospy
import tensorboard
from std_msgs.msg import Float32MultiArray
tb =SummaryWriter()
# Parameters
gamma = 0.99
render = False
seed = 1
log_interval = 10
num_state =68#激光雷达+4
num_action = 5#小车正面180/5
env=Env(num_action)
torch.manual_seed(seed)#为CPU设置种子用于生成随机数,以使得结果是确定的
# env.seed(seed)
Transition = namedtuple('Transition', ['state', 'action', 'a_log_prob', 'reward', 'next_state'])
class Actor(nn.Module):#Actor网络
def __init__(self):#定义网络
super(Actor, self).__init__()
self.fc1 = nn.Linear(num_state, 100)
# self.fc1.weight.data.normal_(0, 0.1)
self.fc2 =nn.Linear(100,100)
# self.fc2.weight.data.normal_(0,0.1)
self.action_head = nn.Linear(100, num_action)
# self.action_head.weight.data.normal_(0, 0.1)
def forward(self, x):#前向传播
x = F.relu(self.fc1(x))
x=F.relu(self.fc2(x))
x=F.dropout(self.fc2(x))
action_prob = F.softmax(self.action_head(x), dim=1)
return action_prob
class Critic(nn.Module):#Critic网络
def __init__(self):#定义网络
super(Critic, self).__init__()
self.fc1= nn.Linear(num_state, 100)
# self.fc1.weight.data.normal_(0, 0.1)
self.fc2 =nn.Linear(100,100)
# self.fc2.weight.data.normal_(0,0.1)
self.state_value = nn.Linear(100, 1)
def forward(self, x):#前向传播
x = F.relu(self.fc1(x))
x=F.dropout(self.fc2(x))
value = self.state_value(x)
return value
class PPO(object):
clip_param = 0.2
max_grad_norm = 0.5
ppo_update_time = 10
buffer_capacity = 1000
batch_size = 128
def __init__(self):
super(PPO, self).__init__()
self.actor_net = Actor()
self.critic_net = Critic()
self.buffer = []
self.counter = 0
self.training_step = 0
self.action_loss= 0.
self.value_loss =0.
self.load_models =False
self.load_ep =104
self.savepath = os.path.dirname(os.path.realpath(__file__))
self.actor_optimizer = optim.Adam(self.actor_net.parameters(), 1e-3)
self.critic_net_optimizer = optim.Adam(self.critic_net.parameters(), 3e-3)
# Adam(Adaptive Moment Estimation)本质上是带有动量项的RMSprop,它利用梯度的一阶矩估计和二阶矩估计动态调整每个参数的学习率。它的优点主要在于经过偏置校正后,每一次迭代学习率都有个确定范围,使得参数比较平稳。
#加载模型
if self.load_models:
load_model1 = torch.load("/home/ffd/DRL/PPO/model/maze/98ep.pt")
self.actor_net.load_state_dict(load_model1['actor_net'])
self.critic_net.load_state_dict(load_model1['critic_net'])
print("load model:",str(self.load_ep))
print("load model successful!!!!!!")
#选择动作
def select_action(self, state):
state = torch.from_numpy(state).float().unsqueeze(0)
with torch.no_grad():
action_prob = self.actor_net(state)
c = Categorical(action_prob)
action = c.sample()
return action.item(), action_prob[:,action.item()].item()
#获取值函数
def get_value(self, state):
state = torch.from_numpy(state)
with torch.no_grad():
value = self.critic_net(state)
return value.item()
#保存神经网络参数
def save_param(self,e):
state = {'actor_net':self.actor_net.state_dict(),'critic_net':self.critic_net.state_dict(), 'actor_optimizer':self.actor_optimizer.state_dict(), 'critic_optimizer':self.critic_net_optimizer,'epoch':e}
torch.save(state,self.savepath+str(e)+"state2.pt")
#保存训练数据(记忆库)
def store_transition(self, transition):
self.buffer.append(transition)
self.counter += 1
#计算损失并更新
def update(self, i_ep):
state = torch.tensor([t.state for t in self.buffer], dtype=torch.float)
action = torch.tensor([t.action for t in self.buffer], dtype=torch.long).view(-1, 1)
reward = [t.reward for t in self.buffer]
old_action_log_prob = torch.tensor([t.a_log_prob for t in self.buffer], dtype=torch.float).view(-1, 1)
R = 0
Gt = []
for r in reward[::-1]:
R = r + gamma * R
Gt.insert(0, R)
Gt = torch.tensor(Gt, dtype=torch.float)
#print("The agent is updateing....")
for i in range(self.ppo_update_time):
for index in BatchSampler(SubsetRandomSampler(range(len(self.buffer))), self.batch_size, False):
if self.training_step % 1000 ==0:
print('I_ep {} ,train {} times'.format(i_ep,self.training_step))
#with torch.no_grad():
Gt_index = Gt[index].view(-1, 1)
V = self.critic_net(state[index])
delta = Gt_index - V
advantage = delta.detach()
# epoch iteration, PPO core!!一次训练的参数更新
action_prob = self.actor_net(state[index]).gather(1, action[index]) # new policy
#采用 Adam 随机梯度上升算法最大化 PPO-Clip 的目标函数来更新策略
#
ratio = (action_prob/old_action_log_prob[index])
surr1 = ratio * advantage
surr2 = torch.clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage
# update actor network
action_loss = -torch.min(surr1, surr2).mean() # MAX->MIN desent
self.action_loss = torch.max(action_loss)
# self.writer.add_scalar('loss/action_loss', action_loss, global_step=self.training_step)
self.actor_optimizer.zero_grad()
action_loss.backward()
nn.utils.clip_grad_norm_(self.actor_net.parameters(), self.max_grad_norm)
self.actor_optimizer.step()
#update critic network
value_loss = F.mse_loss(Gt_index, V)
self.value_loss = torch.max(value_loss)
# self.writer.add_scalar('loss/value_loss', value_loss, global_step=self.training_step)
self.critic_net_optimizer.zero_grad()
value_loss.backward()
nn.utils.clip_grad_norm_(self.critic_net.parameters(), self.max_grad_norm)
self.critic_net_optimizer.step()
self.training_step += 1
del self.buffer[:] # clear experience
#主程序,训练部分
def main():
agent = PPO()
rospy.init_node('turtlebot3_dqn_stage_4')
pub_result = rospy.Publisher('result', Float32MultiArray, queue_size=5)
pub_get_action = rospy.Publisher('get_action', Float32MultiArray, queue_size=5)
result = Float32MultiArray()
get_action = Float32MultiArray()
start_time =time.time()
# env=Env()
for e in range(300):
state = env.reset()#env.reset()函数用于重置环境
episode_reward_sum = 0 # 初始化该循环对应的episode的总奖励
done=False
episode_step=6000
for t in range(episode_step):
action, action_prob = agent.select_action(state)
next_state, reward, done= env.step(action)#获取当前动作的奖励和这个动作后的状态
trans = Transition(state, action, action_prob, reward, next_state)
agent.store_transition(trans)
state = next_state
episode_reward_sum+=reward
pub_get_action.publish(get_action)
if e % 1 ==0: # dqn.save_model(str(e))
agent.save_param(e)
if t >=600:
rospy.loginfo("time out!")
done =True
#每回合结束会自动保存数据到tensorbroad,训练结束可以查看数据变化
#每回合结束会每回合结束会发布回合数据到result话题,可以使用rosbag打包数据然后转txt,最后自己处理数据。
if done :
result.data =[episode_reward_sum,agent.action_loss,agent.value_loss]
pub_result.publish(result)
tb.add_scalar('reward', episode_reward_sum,e)
tb.add_scalar('value_loss',agent.value_loss, e)
tb.add_scalar('action_loss', agent.action_loss, e)
m,s =divmod(int(time.time()- start_time),60)
h,m =divmod(m,60)
agent.update(e)
rospy.loginfo('Ep: %d score: %.2f memory: %d episode_step: %.2f time: %d:%02d:%02d' , e ,episode_reward_sum, agent.counter,t, h, m, s)
break
if __name__ == '__main__':
main()
print("end")
有关目标点设置的文件
#!/usr/bin/env python
#################################################################################
# Copyright 2018 ROBOTIS CO., LTD.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#################################################################################
# Authors: Gilbert #
import rospy
import random
import time
import os
from gazebo_msgs.srv import SpawnModel, DeleteModel
from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Pose
class Respawn():
def __init__(self):
self.modelPath = os.path.dirname(os.path.realpath(__file__))
self.f = open(self.modelPath+"/model.sdf", 'r')
self.model = self.f.read()
self.stage = 2
self.goal_position = Pose()
self.init_goal_x = 0.6
self.init_goal_y = 0.0
self.goal_position.position.x = self.init_goal_x
self.goal_position.position.y = self.init_goal_y
self.modelName = 'goal'
self.obstacle_1 = 0.6, 0.6
self.obstacle_2 = 0.6, -0.6
self.obstacle_3 = -0.6, 0.6
self.obstacle_4 = -0.6, -0.6
self.last_goal_x = self.init_goal_x
self.last_goal_y = self.init_goal_y
self.last_index = 0
self.sub_model = rospy.Subscriber('gazebo/model_states', ModelStates, self.checkModel)
self.check_model = False
self.index = 0
def checkModel(self, model):
self.check_model = False
for i in range(len(model.name)):
if model.name[i] == "goal":
self.check_model = True
def respawnModel(self):
while True:
if not self.check_model:
rospy.wait_for_service('gazebo/spawn_sdf_model')
spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_sdf_model', SpawnModel)
spawn_model_prox(self.modelName, self.model, 'robotos_name_space', self.goal_position, "world")
rospy.loginfo("Goal position : %.1f, %.1f", self.goal_position.position.x,
self.goal_position.position.y)
break
else:
pass
def deleteModel(self):
while True:
if self.check_model:
rospy.wait_for_service('gazebo/delete_model')
del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel)
del_model_prox(self.modelName)
break
else:
pass
def getPosition(self, position_check=False, delete=False):
if delete:
self.deleteModel()
if self.stage != 4:
while position_check:
goal_x = random.randrange(-12, 13) / 10.0
goal_y = random.randrange(-12, 13) / 10.0
if abs(goal_x - self.obstacle_1[0]) <= 0.4 and abs(goal_y - self.obstacle_1[1]) <= 0.4:
position_check = True
elif abs(goal_x - self.obstacle_2[0]) <= 0.4 and abs(goal_y - self.obstacle_2[1]) <= 0.4:
position_check = True
elif abs(goal_x - self.obstacle_3[0]) <= 0.4 and abs(goal_y - self.obstacle_3[1]) <= 0.4:
position_check = True
elif abs(goal_x - self.obstacle_4[0]) <= 0.4 and abs(goal_y - self.obstacle_4[1]) <= 0.4:
position_check = True
elif abs(goal_x - 0.0) <= 0.4 and abs(goal_y - 0.0) <= 0.4:
position_check = True
else:
position_check = False
if abs(goal_x - self.last_goal_x) < 1 and abs(goal_y - self.last_goal_y) < 1:
position_check = True
self.goal_position.position.x = goal_x
self.goal_position.position.y = goal_y
else:
while position_check:
goal_x_list = [0.6, 1.9, 0.5, 0.2, -0.8, -1, -1.9, 0.5, 2, 0.5, 0, -0.1, -2]
goal_y_list = [0, -0.5, -1.9, 1.5, -0.9, 1, 1.1, -1.5, 1.5, 1.8, -1, 1.6, -0.8]
self.index = random.randrange(0, 13)
print(self.index, self.last_index)
if self.last_index == self.index:
position_check = True
else:
self.last_index = self.index
position_check = False
self.goal_position.position.x = goal_x_list[self.index]
self.goal_position.position.y = goal_y_list[self.index]
time.sleep(0.5)
self.respawnModel()
self.last_goal_x = self.goal_position.position.x
self.last_goal_y = self.goal_position.position.y
return self.goal_position.position.x, self.goal_position.position.y
#!/usr/bin/env python
# coding=UTF-8
#################################################################################
# Copyright 2018 ROBOTIS CO., LTD.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#################################################################################
# Authors: Gilbert #
import rospy
import numpy as np
import math
from math import pi
from geometry_msgs.msg import Twist, Point, Pose
from sensor_msgs.msg import LaserScan
from nav_msgs.msg import Odometry
from std_srvs.srv import Empty
from tf.transformations import euler_from_quaternion, quaternion_from_euler
from respawnGoal import Respawn
class Env():
def __init__(self, action_size):
self.goal_x = 0
self.goal_y = 0
self.heading = 0
self.action_size = action_size
self.initGoal = True
self.get_goalbox = False
self.position = Pose()
self.obstacle_min_range =0.
self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5)
self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry)
self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty)
self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty)
self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty)
self.respawn_goal = Respawn()
#获取目标点距离
def getGoalDistace(self):
goal_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2)
return goal_distance
#获取里程计信息
def getOdometry(self, odom):
self.position = odom.pose.pose.position
orientation = odom.pose.pose.orientation
orientation_list = [orientation.x, orientation.y, orientation.z, orientation.w]
_, _, yaw = euler_from_quaternion(orientation_list)
goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x)
heading = goal_angle - yaw
if heading > pi:
heading -= 2 * pi
elif heading < -pi:
heading += 2 * pi
self.heading = round(heading, 2)
def getState(self, scan):
scan_range = []
heading = self.heading
min_range = 0.1 #碰撞距离
done = False
for i in range(len(scan.ranges)):
if scan.ranges[i] == float('Inf'):
scan_range.append(3.5)
elif np.isnan(scan.ranges[i]):
scan_range.append(0)
else:
scan_range.append(scan.ranges[i])
obstacle_min_range = round(min(scan_range), 2)#选择最小的激光雷达信息
self.obstacle_min_range = obstacle_min_range
obstacle_angle = np.argmin(scan_range)#数组里面最小的值
#min_range>激光雷达信息即为碰撞
if obstacle_min_range< 0.12 :
done = True #碰撞
current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y),2)#计算小车里程计到目标点的距离
if current_distance < 0.2:#小车距离目标点0.2即为到达目标点
self.get_goalbox = True#到达目标点
return scan_range + [heading, current_distance, obstacle_min_range, obstacle_angle], done#返回state28个数据
def setReward(self, state, done, action):#传入state,done,action
yaw_reward = []#角度奖励
obstacle_min_range = state[-2]#获取激光雷达信息最小的数据
self.obstacle_min_range = obstacle_min_range#
current_distance = state[-3]#获取当前数据
heading = state[-4]#小车的朝向角
for i in range(5):
angle = -pi / 4 + heading + (pi / 8 * i) + pi / 2#角度分解
tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0])#角度计算
yaw_reward.append(tr)#储存角度奖励
if 0.1<obstacle_min_range < 0.2:#激光雷达最小数据小于0.1
scan_reward = -1/(obstacle_min_range+0.3)#奖励范围-3.33到-2.5
else :
scan_reward =2
distance_rate = 2 ** (current_distance / self.goal_distance)#距离比
reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate) +scan_reward
# reward =scan_reward
#碰撞
if done:
rospy.loginfo("Collision!!")
reward = -500+scan_reward
# self.goal_x,self.goal_y = self.respawn_goal.getPosition(True,delete=True)
self.pub_cmd_vel.publish(Twist())
#到达目标点
if self.get_goalbox:
rospy.loginfo("Goal!!")
reward = 1000+scan_reward
self.pub_cmd_vel.publish(Twist())#停止运动
self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True)#删除模型
self.goal_distance = self.getGoalDistace()#获得目标点
self.get_goalbox = False#置False
return reward
def step(self, action):
# obstacle_min_range = state[-2]
max_angular_vel = 1.5#最大角速度
ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5
# global obstacle_min_range
vel_cmd = Twist()
# vel_cmd.linear.x = 0.15
vel_cmd.angular.z = ang_vel
# self.obstacle_min_range =obstacle_min_range
if self.obstacle_min_range <0.2:
vel_cmd.linear.x =self.obstacle_min_range*0.1
# else:
vel_cmd.linear.x = 0.2
self.pub_cmd_vel.publish(vel_cmd)
data = None
while data is None:
try:
data = rospy.wait_for_message('scan', LaserScan, timeout=5)
except:
pass
state, done = self.getState(data)
reward = self.setReward(state, done, action)
return np.array(state), reward, done
def reset(self):
rospy.wait_for_service('gazebo/reset_simulation')
try:
self.reset_proxy()
except (rospy.ServiceException) as e:
print("gazebo/reset_simulation service call failed")
data = None
while data is None:
try:
data = rospy.wait_for_message('scan', LaserScan, timeout=5)
except:
pass
if self.initGoal:
self.goal_x, self.goal_y = self.respawn_goal.getPosition()
self.initGoal = False
self.goal_distance = self.getGoalDistace()
state, done = self.getState(data)
return np.array(state)
有问题欢迎留言
下一期介绍实车部署