《强化学习》k臂赌博机问题python编程练习2.5

《强化学习》k臂赌博机问题python编程练习2.5_第1张图片

100次独立模拟100次循环的数据(算力有限)

# -*- coding: utf-8 -*-
"""
Created on Fri May  1 14:47:35 2020

@author: Ziz
"""

import numpy as np
import random 
import time
import matplotlib.pyplot as plt
import pickle
# fr = open('dataFile.txt','wb')
# pickle.dumps([x,y],fr,-1)
# fr.close()

# fr = open('dataFile.txt','rb')
# pickle.load(fr)
# fr.close()


x = []
y = []
inner_loop = 5
outer_loop = 5
for loop in range(1,51):
    start_time =time.time()
    train_steps = loop*20
    average_expect_out = 0
    for loop in range(outer_loop):
        q_values = np.zeros((10,1))
        
        for i,c in enumerate(q_values):
            q_values[i]=np.random.randn()
            
            
        def get_Rt(a):
            return np.random.randn()+q_values[a]
        
        alpha = 0.1
        average_expect=0  
        for n in range(inner_loop):
            q_n = np.zeros((10,1))
            Q = np.zeros((10,1))
            total_expect = 0
            
            for i in range(train_steps):
                if(i==0) :
                    a = np.random.randint(0,10)
                else:
                    if(np.random.rand()<0.1):
                        a = np.random.randint(0,10)
                    else:
                        a = np.where(Q==np.max(Q))
                        a = a[0][0]
                R_n = get_Rt(a)
                total_expect += R_n
                # q_n[a]+=1
                Q[a] = Q[a] + (R_n-Q[a])*alpha #/q_n[a]
            total_expect/=train_steps
            average_expect += total_expect
        average_expect/=inner_loop
        average_expect_out+=average_expect
    average_expect_out/=outer_loop
    x.append(train_steps)
    y.append(average_expect_out)
    end_time = time.time()
    print(x)
    print(y)
    print('time consume = ',end_time-start_time)
plt.plot(x,y)

你可能感兴趣的:(python,深度学习,强化学习)