In [1]: from collections import namedtuple , deque
...: Transition = namedtuple('Transition',
...: ('state', 'action', 'next_state', 'reward'))
...:
...: print(Transition)
...:
...: # bat=Transition(*zip(*transitions))
In [2]: memory=deque([])
In [3]: memory
Out[3]: deque([])
In [4]: memory.append(Transition(*([1],[2],[3],[4])))
In [5]: memory
Out[5]: deque([Transition(state=[1], action=[2], next_state=[3], reward=[4])])
In [6]: memory.append(Transition(*([11],[22],[33],[44])))
In [7]: memory
Out[7]:
deque([Transition(state=[1], action=[2], next_state=[3], reward=[4]),
Transition(state=[11], action=[22], next_state=[33], reward=[44])])
In [8]: memory.append(Transition(*([111],[222],[333],[444])))
In [9]: memory
Out[9]:
deque([Transition(state=[1], action=[2], next_state=[3], reward=[4]),
Transition(state=[11], action=[22], next_state=[33], reward=[44]),
Transition(state=[111], action=[222], next_state=[333], reward=[444])])
In [10]: un_zip=zip(*memory)
In [11]: un_zip
Out[11]:
In [12]: un_zip_list = list(un_zip)
In [13]: for item in un_zip_list:
...: print(item)
...:
([1], [11], [111])
([2], [22], [222])
([3], [33], [333])
([4], [44], [444])
In [14]: for item in un_zip_list:
...: print(type(item))
...:
In [16]: new_state=Transition(*un_zip_list)
In [17]: new_state
Out[17]: Transition(state=([1], [11], [111]), action=([2], [22], [222]), next_state=([3], [33], [333]), reward=([4], [44], [444]))