趣游--环境交互

强化学习与环境模拟器交互:

import xlrd
import random
import pandas as pd

#导入更新了体力值的表格
data=xlrd.open_workbook("D:/PCstudy/data/Question_reults_end.xls")
table = data.sheets()[0]#选择第0张表
nrows = table.nrows  # 行数
ncols = table.ncols  # 列数
datamatrix = [] # 构造列表
for i in range(nrows):
        rows = table.row_values(i) #获取第i行的内容
        datamatrix.append(rows)  #写入数据

# 构造用户特征数据
users = {}# 包括是否去过景点、年龄、性别、收入、城市、学历、职业
vis = [] # 是否去过景点
age = [] # 年龄
sex = [] # 性别
income = [] # 收入
citys = [] # 城市
school = [] # 学历
wokers = [] # 职业
for i in range(nrows):
    if i == 0:
        continue
    vis.append(datamatrix[i][0])
    age.append(datamatrix[i][1])
    sex.append(datamatrix[i][2])
    income.append(datamatrix[i][3])
    citys.append(datamatrix[i][4])
    school.append(datamatrix[i][22])
    wokers.append(datamatrix[i][23])

# 转换成pandas
dfresult= pd.DataFrame()
dfPclass = pd.get_dummies(vis)
dfPclass.columns = ['vis_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(age)
dfPclass.columns = ['age_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(sex)
dfPclass.columns = ['sex_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(income)
dfPclass.columns = ['income_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(citys)
dfPclass.columns = ['citys_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(school)
dfPclass.columns = ['school_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(wokers)
dfPclass.columns = ['wokers_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)

users = dfresult.values

#导入更新了地图的表格
data=xlrd.open_workbook("D:/PCstudy/data/graphs.xlsx")
graph = data.sheets()[0]#选择第0张表
grows = graph.nrows  # 行数
gcols = graph.ncols  # 列数
graphs = [] # 构造列表
for i in range(grows):
        rows = graph.row_values(i) #获取第i行的内容
        graphs.append(rows)  #写入数据

#记录去过的景点,一共17个景点
#'蜀汉牌楼(出入口1)', '廊桥(出入口2)', '龙劲飞瀑', '樊家大院',
# '旺旺乐园-旋转木马', '川晋游乐园', '黄龙溪博物馆', '龙溪大酒店',
# '栖凤长廊', '石桥', '凤岛', '五孔桥',
# '镇江寺', '唐家大院', '古戏台', '古码头', '古龙寺' 800*17=13600m 1360m/800=1.6  16
#初始化速度,消耗的时间看作消耗的体力
speed = 300
same = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
        [0,1,0,4,2,2,0,0,0,1,0,1,3,4,5,5,3]]
same_values = [1,1-0.886,1-0.913,1-0.866,1-0.845,1-0.456]

study_values = [[0.1,0.1,0.1,0.05,0.1,0.05,0.1,0,0.1,0.01,0,0,0.05,0.01,0.01,0,0],
                [0.1,0.1,0.05,0.01,0,0.01,0,0.01,0.1,0.05,0.01,0.05,0.05,0.01,0.05,0.05,0.05],
                [0.01,0.01,0.01,0.1,0,0,0.1,0.01,0.1,0.05,0.05,0.05,0.1,0.1,0.2,0.2,0.2],
                [0.01,0.01,0.01,0.01,0,0,0,0,0.01,0.01,0.01,0.01,0.05,0.1,0.05,0.1,0.2]]
POI_history = []

dicts = {}

class envir():
    POWER0 = 0
    POI0 = []
    k = 0 #第k个用户
    v = []
    def step(self,action):
        done = False
        if envir.POWER0 < 1:
            done = True
        i = action  # 去下一个景点【0,16】
        last = envir.v[len(envir.v) - 1]  # 上一个去过的景点

        envir.POWER0 -= 1
        envir.POWER0 =envir.POWER0 - 2.4666*(graphs[last + 1][i + 1] / speed)   # 消耗体力:路程除以速度,消耗的时间就是消耗的体力
        same[0][i] += 1  # 记录下现在去的景点
        envir.v.append(i)
        envir.POI0= POI_updata(envir.POI0, envir.POWER0,envir.k)  # 更新POI
        # 返回所有POI,体力值,用户特征数据
        return done,envir.POI0,envir.POWER0,users[envir.k-1]

    def reset(self):
            i = random.randint(1, 89)  # 随机选取一个用户
            envir.k = i

            # 所有景点的初始感兴趣度为
            data = datamatrix[i]
            envir.POI0 = data[5:22]
            for w in range(17):
                envir.POI0[w] *= 0.1

            # 初始体力为
            envir.POWER0 = datamatrix[i][ncols - 1]

            # 起始地点,永远是蜀汉牌楼
            envir.v = []
            envir.v.append(0)  # 加入第一个景点
            same[0][0] += 1
            envir.POI0 = POI_updata(envir.POI0,envir.POWER0, i)

            # 返回所有POI,体力值,当前景点,用户特征数据
            return envir.POI0,envir.POWER0,i,users[envir.k-1]


#定义函数,POI值的改变
def POI_updata(POI0,POWER0,k):
    #随着体力改变
        #print(POWER0)
        if POWER0 <= 1 : POI0 = [i * 0.42496 for i in POI0]
        elif POWER0 <= 3 : POI0 = [i * 0.59735 for i in POI0]
        elif POWER0 <= 5: POI0 = [i * 0.675874 for i in POI0]
        elif POWER0 <= 7: POI0 = [i * 0.785986 for i in POI0]
        elif POWER0 <= 12: POI0 = [i * 0.855324 for i in POI0]
        elif POWER0 <= 23: POI0 = [i * 0.95432 for i in POI0]
        else :  POI0 = [i * 0.9785326 for i in POI0]
    #随着相似度改变
        sum = [1,1,1,1,1,1]  # 记录每一类相似度总函数
        same_historl = [0,0,0,0,0,0]
        for i in range(17):  #每一类去过的次数
                if same[0][i] >1 and same[1][i] >0: same_historl[same[1][i]]+=1  #无类似的不会统计
        for i in range(6):  #每一类累加的相似度
                if same_historl[i]>=2: sum[i]+=(1-same_values[same_historl[i]])
        for i in range(17):   #对景点进行相似度处理
                if same[1][i] == 0 and same[0][i] > 1:
                    POI0[i]*=0.51234  # 去过完全相同且没有跟它类似的景点
                else :POI0[i]*=(1/sum[same[1][i]])
    #随着工资改变
        money = datamatrix[k][3]
        if money == "一万一下":
            POI0[4]*=0.76321
            POI0[5]*=0.76321
        elif money == "一万至十万":
            POI0[4] *= 0.81456
            POI0[5] *= 0.81456
        elif money == "十万至二十五万" :
            POI0[4] *= 0.864876
            POI0[5] *= 0.864876
        elif money == "二十五万至五十万":
            POI0[4] *= 0.916567
            POI0[5] *= 0.916567
        elif money == "五十万至七十五万":
            POI0[4] *= 0.954345
            POI0[5] *= 0.954345
        elif money == "七十五万至一百万":
            POI0[4] *= 0.978234
            POI0[5] *= 0.978234
    # 随着学历改变
        study = datamatrix[k][22]
        if study == "初中及以下":
                for i in range(17):
                        POI0[i]*=(1+study_values[0][i]*0.16662)
        elif study == "高中\中专\职校":
                for i in range(17):
                        POI0[i]*=(1+study_values[1][i]*0.16662)
        elif study == "大学":
                for i in range(17):
                        POI0[i] *= (1 + study_values[2][i]*0.16662)
        else :
                for i in range(17):
                        POI0[i] *= (1 + study_values[3][i]*0.16662)
    # 偏置
        for i in range(17):
            POI0[i] += random.uniform(-0.005, 0.005)
            if POI0[i] >1 : POI0[i] = 0.998
            elif POI0[i] <0 : POI0[i] = 0
        return POI0

你可能感兴趣的:(机器学习,python,人工智能)