# brain.py
"""
Created on Wednesday Jan 16 2019
@author: Seyed Mohammad Asghari
@github: https://github.com/s3yyy3d-m
"""
import os
from keras.models import Sequential, Model
from keras.layers import Dense, Lambda, Input, Concatenate
from keras.optimizers import *
import tensorflow as tf
from keras import backend as K
HUBER_LOSS_DELTA = 1.0
def huber_loss(y_true, y_predict):
err = y_true - y_predict
cond = K.abs(err) < HUBER_LOSS_DELTA
L2 = 0.5 * K.square(err)
L1 = HUBER_LOSS_DELTA * (K.abs(err) - 0.5 * HUBER_LOSS_DELTA)
loss = tf.where(cond, L2, L1)
return K.mean(loss)
class Brain(object):
def __init__(self, state_size, action_size, brain_name, arguments):
self.state_size = state_size
self.action_size = action_size
self.weight_backup = brain_name
self.batch_size = arguments['batch_size']
self.learning_rate = arguments['learning_rate']
self.test = arguments['test']
self.num_nodes = arguments['number_nodes']
self.dueling = arguments['dueling']
self.optimizer_model = arguments['optimizer']
self.model = self._build_model()
self.model_ = self._build_model()
def _build_model(self):
if self.dueling:
x = Input(shape=(self.state_size,))
# a series of fully connected layer for estimating V(s)
y11 = Dense(self.num_nodes, activation='relu')(x)
y12 = Dense(self.num_nodes, activation='relu')(y11)
y13 = Dense(1, activation="linear")(y12)
# a series of fully connected layer for estimating A(s,a)
y21 = Dense(self.num_nodes, activation='relu')(x)
y22 = Dense(self.num_nodes, activation='relu')(y21)
y23 = Dense(self.action_size, activation="linear")(y22)
w = Concatenate(axis=-1)([y13, y23])
# combine V(s) and A(s,a) to get Q(s,a)
z = Lambda(lambda a: K.expand_dims(a[:, 0], axis=-1) + a[:, 1:] - K.mean(a[:, 1:], keepdims=True),
output_shape=(self.action_size,))(w)
else:
x = Input(shape=(self.state_size,))
# a series of fully connected layer for estimating Q(s,a)
y1 = Dense(self.num_nodes, activation='relu')(x)
y2 = Dense(self.num_nodes, activation='relu')(y1)
z = Dense(self.action_size, activation="linear")(y2)
model = Model(inputs=x, outputs=z)
if self.optimizer_model == 'Adam':
optimizer = Adam(lr=self.learning_rate, clipnorm=1.)
elif self.optimizer_model == 'RMSProp':
optimizer = RMSprop(lr=self.learning_rate, clipnorm=1.)
else:
print('Invalid optimizer!')
model.compile(loss=huber_loss, optimizer=optimizer)
if self.test:
if not os.path.isfile(self.weight_backup):
print('Error:no file')
else:
model.load_weights(self.weight_backup)
return model
def train(self, x, y, sample_weight=None, epochs=1, verbose=0): # x is the input to the network and y is the output
self.model.fit(x, y, batch_size=len(x), sample_weight=sample_weight, epochs=epochs, verbose=verbose)
def predict(self, state, target=False):
if target: # get prediction from target network
return self.model_.predict(state)
else: # get prediction from local network
return self.model.predict(state)
def predict_one_sample(self, state, target=False):
return self.predict(state.reshape(1,self.state_size), target=target).flatten()
def update_target_model(self):
self.model_.set_weights(self.model.get_weights())
def save_model(self):
self.model.save(self.weight_backup)
# agents_landmarks_multiagent.py
"""
Created on Wednesday Jan 16 2019
@author: Seyed Mohammad Asghari
@github: https://github.com/s3yyy3d-m
"""
import numpy as np
import os
import random
import argparse
import pandas as pd
from environments.agents_landmarks.env import agentslandmarks
from dqn_agent import Agent
import glob
ARG_LIST = ['learning_rate', 'optimizer', 'memory_capacity', 'batch_size', 'target_frequency', 'maximum_exploration',
'max_timestep', 'first_step_memory', 'replay_steps', 'number_nodes', 'target_type', 'memory',
'prioritization_scale', 'dueling', 'agents_number', 'grid_size', 'game_mode', 'reward_mode']
def get_name_brain(args, idx):
file_name_str = '_'.join([str(args[x]) for x in ARG_LIST])
return './results_agents_landmarks/weights_files/' + file_name_str + '_' + str(idx) + '.h5'
def get_name_rewards(args):
file_name_str = '_'.join([str(args[x]) for x in ARG_LIST])
return './results_agents_landmarks/rewards_files/' + file_name_str + '.csv'
def get_name_timesteps(args):
file_name_str = '_'.join([str(args[x]) for x in ARG_LIST])
return './results_agents_landmarks/timesteps_files/' + file_name_str + '.csv'
class Environment(object):
def __init__(self, arguments):
current_path = os.path.dirname(__file__) # Where your .py file is located
self.env = agentslandmarks(arguments, current_path)
self.episodes_number = arguments['episode_number']
self.render = arguments['render']
self.recorder = arguments['recorder']
self.max_ts = arguments['max_timestep']
self.test = arguments['test']
self.filling_steps = arguments['first_step_memory']
self.steps_b_updates = arguments['replay_steps']
self.max_random_moves = arguments['max_random_moves']
self.num_agents = arguments['agents_number']
self.num_landmarks = self.num_agents
self.game_mode = arguments['game_mode']
self.grid_size = arguments['grid_size']
def run(self, agents, file1, file2):
total_step = 0
rewards_list = []
timesteps_list = []
max_score = -10000
for episode_num in xrange(self.episodes_number):
state = self.env.reset()
if self.render:
self.env.render()
random_moves = random.randint(0, self.max_random_moves)
# create randomness in initial state
for _ in xrange(random_moves):
actions = [4 for _ in xrange(len(agents))]
state, _, _ = self.env.step(actions)
if self.render:
self.env.render()
# converting list of positions to an array
state = np.array(state)
state = state.ravel()
done = False
reward_all = 0
time_step = 0
while not done and time_step < self.max_ts:
# if self.render:
# self.env.render()
actions = []
for agent in agents:
actions.append(agent.greedy_actor(state))
next_state, reward, done = self.env.step(actions)
# converting list of positions to an array
next_state = np.array(next_state)
next_state = next_state.ravel()
if not self.test:
for agent in agents:
agent.observe((state, actions, reward, next_state, done))
if total_step >= self.filling_steps:
agent.decay_epsilon()
if time_step % self.steps_b_updates == 0:
agent.replay()
agent.update_target_model()
total_step += 1
time_step += 1
state = next_state
reward_all += reward
if self.render:
self.env.render()
rewards_list.append(reward_all)
timesteps_list.append(time_step)
print("Episode {p}, Score: {s}, Final Step: {t}, Goal: {g}".format(p=episode_num, s=reward_all,
t=time_step, g=done))
if self.recorder:
os.system("ffmpeg -r 2 -i ./results_agents_landmarks/snaps/%04d.png -b:v 40000 -minrate 40000 -maxrate 4000k -bufsize 1835k -c:v mjpeg -qscale:v 0 "
+ "./results_agents_landmarks/videos/{a1}_{a2}_{a3}_{a4}.avi".format(a1=self.num_agents,
a2=self.num_landmarks,
a3=self.game_mode,
a4=self.grid_size))
files = glob.glob('./results_agents_landmarks/snaps
"""
env.py
Created on Wednesday Jan 16 2019
@author: Seyed Mohammad Asghari
@github: https://github.com/s3yyy3d-m
"""
import random
import operator
import numpy as np
import pygame
import sys
import os
# Define some colors
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
RED = (255, 0, 0)
BLUE = (0, 0, 255)
GRAY = (128, 128, 128)
ORANGE = (255, 128, 0)
# This sets the WIDTH and HEIGHT of each grid location
WIDTH = 60
HEIGHT = 60
# This sets the margin between each cell
MARGIN = 1
class agentslandmarks:
UP = 0
DOWN = 1
LEFT = 2
RIGHT = 3
STAY = 4
A = [UP, DOWN, LEFT, RIGHT, STAY]
A_DIFF = [(-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)]
def __init__(self, args, current_path):
self.game_mode = args['game_mode']
self.reward_mode = args['reward_mode']
self.num_agents = args['agents_number']
self.num_landmarks = self.num_agents
self.grid_size = args['grid_size']
self.state_size = (self.num_agents + self.num_landmarks) * 2
self.agents_positions = []
self.landmarks_positions = []
self.render_flag = args['render']
self.recorder_flag = args['recorder']
# enables visualizer
if self.render_flag:
[self.screen, self.my_font] = self.gui_setup()
self.step_num = 1
resource_path = os.path.join(current_path, 'environments') # The resource folder path
resource_path = os.path.join(resource_path, 'agents_landmarks') # The resource folder path
image_path = os.path.join(resource_path, 'images') # The image folder path
img = pygame.image.load(os.path.join(image_path, 'agent.jpg')).convert()
self.img_agent = pygame.transform.scale(img, (WIDTH, WIDTH))
img = pygame.image.load(os.path.join(image_path, 'landmark.jpg')).convert()
self.img_landmark = pygame.transform.scale(img, (WIDTH, WIDTH))
img = pygame.image.load(os.path.join(image_path, 'agent_landmark.jpg')).convert()
self.img_agent_landmark = pygame.transform.scale(img, (WIDTH, WIDTH))
img = pygame.image.load(os.path.join(image_path, 'agent_agent_landmark.jpg')).convert()
self.img_agent_agent_landmark = pygame.transform.scale(img, (WIDTH, WIDTH))
img = pygame.image.load(os.path.join(image_path, 'agent_agent.jpg')).convert()
self.img_agent_agent = pygame.transform.scale(img, (WIDTH, WIDTH))
if self.recorder_flag:
self.snaps_path = os.path.join(current_path, 'results_agents_landmarks') # The resource folder path
self.snaps_path = os.path.join(self.snaps_path, 'snaps') # The resource folder path
self.cells = []
self.positions_idx = []
# self.agents_collide_flag = args['collide_flag']
# self.penalty_per_collision = args['penalty_collision']
self.num_episodes = 0
self.terminal = False
def set_positions_idx(self):
cells = [(i, j) for i in range(0, self.grid_size) for j in range(0, self.grid_size)]
positions_idx = []
if self.game_mode == 0:
# first enter the positions for the landmarks and then for the agents. If the grid is n*n, then the
# positions are
# 0 1 2 ... n-1
# n n+1 n+2 ... 2n-1
# 2n 2n+1 2n+2 ... 3n-1
# . . . . .
# . . . . .
# . . . . .
# (n-1)*n (n-1)*n+1 (n-1)*n+2 ... n*n+1
# , e.g.,
# positions_idx = [0, 6, 23, 24] where 0 and 6 are the positions of landmarks and 23 and 24 are positions
# of agents
positions_idx = []
if self.game_mode == 1:
positions_idx = np.random.choice(len(cells), size=self.num_landmarks + self.num_agents,
replace=False)
return [cells, positions_idx]
def reset(self): # initialize the world
self.terminal = False
[self.cells, self.positions_idx] = self.set_positions_idx()
# separate the generated position indices for walls, pursuers, and evaders
landmarks_positions_idx = self.positions_idx[0:self.num_landmarks]
agents_positions_idx = self.positions_idx[self.num_landmarks:self.num_landmarks + self.num_agents]
# map generated position indices to positions
self.landmarks_positions = [self.cells[pos] for pos in landmarks_positions_idx]
self.agents_positions = [self.cells[pos] for pos in agents_positions_idx]
initial_state = list(sum(self.landmarks_positions + self.agents_positions, ()))
return initial_state
def step(self, agents_actions):
# update the position of agents
self.agents_positions = self.update_positions(self.agents_positions, agents_actions)
if self.reward_mode == 0:
binary_cover_list = []
for landmark in self.landmarks_positions:
distances = [np.linalg.norm(np.array(landmark) - np.array(agent_pos), 1)
for agent_pos in self.agents_positions]
min_dist = min(distances)
if min_dist == 0:
binary_cover_list.append(min_dist)
else:
binary_cover_list.append(1)
# check the terminal case
if sum(binary_cover_list) == 0:
reward = 0
self.terminal = True
else:
reward = -1
self.terminal = False
if self.reward_mode == 1:
binary_cover_list = []
for landmark in self.landmarks_positions:
distances = [np.linalg.norm(np.array(landmark) - np.array(agent_pos), 1)
for agent_pos in self.agents_positions]
min_dist = min(distances)
if min_dist == 0:
binary_cover_list.append(0)
else:
binary_cover_list.append(1)
reward = -1 * sum(binary_cover_list)
# check the terminal case
if reward == 0:
self.terminal = True
else:
self.terminal = False
if self.reward_mode == 2:
# calculate the sum of minimum distances of agents to landmarks
reward = 0
for landmark in self.landmarks_positions:
distances = [np.linalg.norm(np.array(landmark) - np.array(agent_pos), 1)
for agent_pos in self.agents_positions]
reward -= min(distances)
# check the terminal case
if reward == 0:
self.terminal = True
new_state = list(sum(self.landmarks_positions + self.agents_positions, ()))
return [new_state, reward, self.terminal]
def update_positions(self, pos_list, act_list):
positions_action_applied = []
for idx in xrange(len(pos_list)):
if act_list[idx] != 4:
pos_act_applied = map(operator.add, pos_list[idx], self.A_DIFF[act_list[idx]])
# checks to make sure the new pos in inside the grid
for i in xrange(0, 2):
if pos_act_applied[i] < 0:
pos_act_applied[i] = 0
if pos_act_applied[i] >= self.grid_size:
pos_act_applied[i] = self.grid_size - 1
positions_action_applied.append(tuple(pos_act_applied))
else:
positions_action_applied.append(pos_list[idx])
final_positions = []
for pos_idx in xrange(len(pos_list)):
if positions_action_applied[pos_idx] == pos_list[pos_idx]:
final_positions.append(pos_list[pos_idx])
elif positions_action_applied[pos_idx] not in pos_list and positions_action_applied[
pos_idx] not in positions_action_applied[
0:pos_idx] + positions_action_applied[
pos_idx + 1:]:
final_positions.append(positions_action_applied[pos_idx])
else:
final_positions.append(pos_list[pos_idx])
return final_positions
def action_space(self):
return len(self.A)
def render(self):
pygame.time.delay(500)
pygame.display.flip()
for event in pygame.event.get():
if event.type == pygame.QUIT:
sys.exit()
self.screen.fill(BLACK)
text = self.my_font.render("Step: {0}".format(self.step_num), 1, WHITE)
self.screen.blit(text, (5, 15))
for row in range(self.grid_size):
for column in range(self.grid_size):
pos = (row, column)
frequency = self.find_frequency(pos, self.agents_positions)
if pos in self.landmarks_positions and frequency >= 1:
if frequency == 1:
self.screen.blit(self.img_agent_landmark,
((MARGIN + WIDTH) * column + MARGIN, (MARGIN + HEIGHT) * row + MARGIN + 50))
else:
self.screen.blit(self.img_agent_agent_landmark,
((MARGIN + WIDTH) * column + MARGIN, (MARGIN + HEIGHT) * row + MARGIN + 50))
elif pos in self.landmarks_positions:
self.screen.blit(self.img_landmark,
((MARGIN + WIDTH) * column + MARGIN, (MARGIN + HEIGHT) * row + MARGIN + 50))
elif frequency >= 1:
if frequency == 1:
self.screen.blit(self.img_agent,
((MARGIN + WIDTH) * column + MARGIN, (MARGIN + HEIGHT) * row + MARGIN + 50))
elif frequency > 1:
self.screen.blit(self.img_agent_agent,
((MARGIN + WIDTH) * column + MARGIN, (MARGIN + HEIGHT) * row + MARGIN + 50))
else:
print('Error!')
else:
pygame.draw.rect(self.screen, WHITE,
[(MARGIN + WIDTH) * column + MARGIN, (MARGIN + HEIGHT) * row + MARGIN + 50, WIDTH,
HEIGHT])
if self.recorder_flag:
file_name = "%04d.png" % self.step_num
pygame.image.save(self.screen, os.path.join(self.snaps_path, file_name))
if not self.terminal:
self.step_num += 1
def gui_setup(self):
# Initialize pygame
pygame.init()
# Set the HEIGHT and WIDTH of the screen
board_size_x = (WIDTH + MARGIN) * self.grid_size
board_size_y = (HEIGHT + MARGIN) * self.grid_size
window_size_x = int(board_size_x)
window_size_y = int(board_size_y * 1.2)
window_size = [window_size_x, window_size_y]
screen = pygame.display.set_mode(window_size)
# Set title of screen
pygame.display.set_caption("Agents-and-Landmarks Game")
myfont = pygame.font.SysFont("monospace", 30)
return [screen, myfont]
def find_frequency(self, a, items):
freq = 0
for item in items:
if item == a:
freq += 1
return freq
"""
dqn_agent.py
Created on Wednesday Jan 16 2019
@author: Seyed Mohammad Asghari
@github: https://github.com/s3yyy3d-m
"""
import numpy as np
import random
from brain import Brain
from uniform_experience_replay import Memory as UER
from prioritized_experience_replay import Memory as PER
MAX_EPSILON = 1.0
MIN_EPSILON = 0.01
MIN_BETA = 0.4
MAX_BETA = 1.0
class Agent(object):
epsilon = MAX_EPSILON
beta = MIN_BETA
def __init__(self, state_size, action_size, bee_index, brain_name, arguments):
self.state_size = state_size
self.action_size = action_size
self.bee_index = bee_index
self.learning_rate = arguments['learning_rate']
self.gamma = 0.95
self.brain = Brain(self.state_size, self.action_size, brain_name, arguments)
self.memory_model = arguments['memory']
if self.memory_model == 'UER':
self.memory = UER(arguments['memory_capacity'])
elif self.memory_model == 'PER':
self.memory = PER(arguments['memory_capacity'], arguments['prioritization_scale'])
else:
print('Invalid memory model!')
self.target_type = arguments['target_type']
self.update_target_frequency = arguments['target_frequency']
self.max_exploration_step = arguments['maximum_exploration']
self.batch_size = arguments['batch_size']
self.step = 0
self.test = arguments['test']
if self.test:
self.epsilon = MIN_EPSILON
def greedy_actor(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
else:
return np.argmax(self.brain.predict_one_sample(state))
def find_targets_per(self, batch):
batch_len = len(batch)
states = np.array([o[1][0] for o in batch])
states_ = np.array([o[1][3] for o in batch])
p = self.brain.predict(states)
p_ = self.brain.predict(states_)
pTarget_ = self.brain.predict(states_, target=True)
x = np.zeros((batch_len, self.state_size))
y = np.zeros((batch_len, self.action_size))
errors = np.zeros(batch_len)
for i in range(batch_len):
o = batch[i][1]
s = o[0]
a = o[1][self.bee_index]
r = o[2]
s_ = o[3]
done = o[4]
t = p[i]
old_value = t[a]
if done:
t[a] = r
else:
if self.target_type == 'DDQN':
t[a] = r + self.gamma * pTarget_[i][np.argmax(p_[i])]
elif self.target_type == 'DQN':
t[a] = r + self.gamma * np.amax(pTarget_[i])
else:
print('Invalid type for target network!')
x[i] = s
y[i] = t
errors[i] = np.abs(t[a] - old_value)
return [x, y, errors]
def find_targets_uer(self, batch):
batch_len = len(batch)
states = np.array([o[0] for o in batch])
states_ = np.array([o[3] for o in batch])
p = self.brain.predict(states)
p_ = self.brain.predict(states_)
pTarget_ = self.brain.predict(states_, target=True)
x = np.zeros((batch_len, self.state_size))
y = np.zeros((batch_len, self.action_size))
errors = np.zeros(batch_len)
for i in range(batch_len):
o = batch[i]
s = o[0]
a = o[1][self.bee_index]
r = o[2]
s_ = o[3]
done = o[4]
t = p[i]
old_value = t[a]
if done:
t[a] = r
else:
if self.target_type == 'DDQN':
t[a] = r + self.gamma * pTarget_[i][np.argmax(p_[i])]
elif self.target_type == 'DQN':
t[a] = r + self.gamma * np.amax(pTarget_[i])
else:
print('Invalid type for target network!')
x[i] = s
y[i] = t
errors[i] = np.abs(t[a] - old_value)
return [x, y]
def observe(self, sample):
if self.memory_model == 'UER':
self.memory.remember(sample)
elif self.memory_model == 'PER':
_, _, errors = self.find_targets_per([[0, sample]])
self.memory.remember(sample, errors[0])
else:
print('Invalid memory model!')
def decay_epsilon(self):
# slowly decrease Epsilon based on our experience
self.step += 1
if self.test:
self.epsilon = MIN_EPSILON
self.beta = MAX_BETA
else:
if self.step < self.max_exploration_step:
self.epsilon = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * (self.max_exploration_step - self.step)/self.max_exploration_step
self.beta = MAX_BETA + (MIN_BETA - MAX_BETA) * (self.max_exploration_step - self.step)/self.max_exploration_step
else:
self.epsilon = MIN_EPSILON
def replay(self):
if self.memory_model == 'UER':
batch = self.memory.sample(self.batch_size)
x, y = self.find_targets_uer(batch)
self.brain.train(x, y)
elif self.memory_model == 'PER':
[batch, batch_indices, batch_priorities] = self.memory.sample(self.batch_size)
x, y, errors = self.find_targets_per(batch)
normalized_batch_priorities = [float(i) / sum(batch_priorities) for i in batch_priorities]
importance_sampling_weights = [(self.batch_size * i) ** (-1 * self.beta)
for i in normalized_batch_priorities]
normalized_importance_sampling_weights = [float(i) / max(importance_sampling_weights)
for i in importance_sampling_weights]
sample_weights = [errors[i] * normalized_importance_sampling_weights[i] for i in xrange(len(errors))]
self.brain.train(x, y, np.array(sample_weights))
self.memory.update(batch_indices, errors)
else:
print('Invalid memory model!')
def update_target_model(self):
if self.step % self.update_target_frequency == 0:
self.brain.update_target_model()
prioritized_experience_replay.py
"""
Created on Wednesday Jan 16 2019
@author: Seyed Mohammad Asghari
@github: https://github.com/s3yyy3d-m
"""
import random
from sum_tree import SumTree as ST
class Memory(object):
e = 0.05
def __init__(self, capacity, pr_scale):
self.capacity = capacity
self.memory = ST(self.capacity)
self.pr_scale = pr_scale
self.max_pr = 0
def get_priority(self, error):
return (error + self.e) ** self.pr_scale
def remember(self, sample, error):
p = self.get_priority(error)
self_max = max(self.max_pr, p)
self.memory.add(self_max, sample)
def sample(self, n):
sample_batch = []
sample_batch_indices = []
sample_batch_priorities = []
num_segments = self.memory.total() / n
for i in xrange(n):
left = num_segments * i
right = num_segments * (i + 1)
s = random.uniform(left, right)
idx, pr, data = self.memory.get(s)
sample_batch.append((idx, data))
sample_batch_indices.append(idx)
sample_batch_priorities.append(pr)
return [sample_batch, sample_batch_indices, sample_batch_priorities]
def update(self, batch_indices, errors):
for i in xrange(len(batch_indices)):
p = self.get_priority(errors[i])
self.memory.update(batch_indices[i], p)
sum_tree.py
import numpy
class SumTree(object):
def __init__(self, capacity):
self.write = 0
self.capacity = capacity
self.tree = numpy.zeros(2*capacity - 1)
self.data = numpy.zeros(capacity, dtype=object)
def _propagate(self, idx, change):
parent = (idx - 1)
self.tree[parent] += change
if parent != 0:
self._propagate(parent, change)
def _retrieve(self, idx, s):
left = 2 * idx + 1
right = left + 1
if left >= len(self.tree):
return idx
if s <= self.tree[left]:
return self._retrieve(left, s)
else:
return self._retrieve(right, s-self.tree[left])
def total(self):
return self.tree[0]
def add(self, p, data):
idx = self.write + self.capacity - 1
self.data[self.write] = data
self.update(idx, p)
self.write += 1
if self.write >= self.capacity:
self.write = 0
def update(self, idx, p):
change = p - self.tree[idx]
self.tree[idx] = p
self._propagate(idx, change)
# def get_real_idx(self, data_idx):
#
# tempIdx = data_idx - self.write
# if tempIdx >= 0:
# return tempIdx
# else:
# return tempIdx + self.capacity
def get(self, s):
idx = self._retrieve(0, s)
dataIdx = idx - self.capacity + 1
# realIdx = self.get_real_idx(dataIdx)
return idx, self.tree[idx], self.data[dataIdx]
uniform_experience_replay.py
"""
Created on Wednesday Jan 16 2019
@author: Seyed Mohammad Asghari
@github: https://github.com/s3yyy3d-m
"""
import random
from collections import deque
class Memory(object):
def __init__(self, capacity):
self.capacity = capacity
self.memory = deque(maxlen=self.capacity)
def remember(self, sample):
self.memory.append(sample)
def sample(self, n):
n = min(n, len(self.memory))
sample_batch = random.sample(self.memory, n)
return sample_batch