强化学习与环境模拟器交互:
import xlrd
import random
import pandas as pd
#导入更新了体力值的表格
data=xlrd.open_workbook("D:/PCstudy/data/Question_reults_end.xls")
table = data.sheets()[0]#选择第0张表
nrows = table.nrows # 行数
ncols = table.ncols # 列数
datamatrix = [] # 构造列表
for i in range(nrows):
rows = table.row_values(i) #获取第i行的内容
datamatrix.append(rows) #写入数据
# 构造用户特征数据
users = {}# 包括是否去过景点、年龄、性别、收入、城市、学历、职业
vis = [] # 是否去过景点
age = [] # 年龄
sex = [] # 性别
income = [] # 收入
citys = [] # 城市
school = [] # 学历
wokers = [] # 职业
for i in range(nrows):
if i == 0:
continue
vis.append(datamatrix[i][0])
age.append(datamatrix[i][1])
sex.append(datamatrix[i][2])
income.append(datamatrix[i][3])
citys.append(datamatrix[i][4])
school.append(datamatrix[i][22])
wokers.append(datamatrix[i][23])
# 转换成pandas
dfresult= pd.DataFrame()
dfPclass = pd.get_dummies(vis)
dfPclass.columns = ['vis_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(age)
dfPclass.columns = ['age_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(sex)
dfPclass.columns = ['sex_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(income)
dfPclass.columns = ['income_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(citys)
dfPclass.columns = ['citys_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(school)
dfPclass.columns = ['school_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(wokers)
dfPclass.columns = ['wokers_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
users = dfresult.values
#导入更新了地图的表格
data=xlrd.open_workbook("D:/PCstudy/data/graphs.xlsx")
graph = data.sheets()[0]#选择第0张表
grows = graph.nrows # 行数
gcols = graph.ncols # 列数
graphs = [] # 构造列表
for i in range(grows):
rows = graph.row_values(i) #获取第i行的内容
graphs.append(rows) #写入数据
#记录去过的景点,一共17个景点
#'蜀汉牌楼(出入口1)', '廊桥(出入口2)', '龙劲飞瀑', '樊家大院',
# '旺旺乐园-旋转木马', '川晋游乐园', '黄龙溪博物馆', '龙溪大酒店',
# '栖凤长廊', '石桥', '凤岛', '五孔桥',
# '镇江寺', '唐家大院', '古戏台', '古码头', '古龙寺' 800*17=13600m 1360m/800=1.6 16
#初始化速度,消耗的时间看作消耗的体力
speed = 300
same = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,1,0,4,2,2,0,0,0,1,0,1,3,4,5,5,3]]
same_values = [1,1-0.886,1-0.913,1-0.866,1-0.845,1-0.456]
study_values = [[0.1,0.1,0.1,0.05,0.1,0.05,0.1,0,0.1,0.01,0,0,0.05,0.01,0.01,0,0],
[0.1,0.1,0.05,0.01,0,0.01,0,0.01,0.1,0.05,0.01,0.05,0.05,0.01,0.05,0.05,0.05],
[0.01,0.01,0.01,0.1,0,0,0.1,0.01,0.1,0.05,0.05,0.05,0.1,0.1,0.2,0.2,0.2],
[0.01,0.01,0.01,0.01,0,0,0,0,0.01,0.01,0.01,0.01,0.05,0.1,0.05,0.1,0.2]]
POI_history = []
dicts = {}
class envir():
POWER0 = 0
POI0 = []
k = 0 #第k个用户
v = []
def step(self,action):
done = False
if envir.POWER0 < 1:
done = True
i = action # 去下一个景点【0,16】
last = envir.v[len(envir.v) - 1] # 上一个去过的景点
envir.POWER0 -= 1
envir.POWER0 =envir.POWER0 - 2.4666*(graphs[last + 1][i + 1] / speed) # 消耗体力:路程除以速度,消耗的时间就是消耗的体力
same[0][i] += 1 # 记录下现在去的景点
envir.v.append(i)
envir.POI0= POI_updata(envir.POI0, envir.POWER0,envir.k) # 更新POI
# 返回所有POI,体力值,用户特征数据
return done,envir.POI0,envir.POWER0,users[envir.k-1]
def reset(self):
i = random.randint(1, 89) # 随机选取一个用户
envir.k = i
# 所有景点的初始感兴趣度为
data = datamatrix[i]
envir.POI0 = data[5:22]
for w in range(17):
envir.POI0[w] *= 0.1
# 初始体力为
envir.POWER0 = datamatrix[i][ncols - 1]
# 起始地点,永远是蜀汉牌楼
envir.v = []
envir.v.append(0) # 加入第一个景点
same[0][0] += 1
envir.POI0 = POI_updata(envir.POI0,envir.POWER0, i)
# 返回所有POI,体力值,当前景点,用户特征数据
return envir.POI0,envir.POWER0,i,users[envir.k-1]
#定义函数,POI值的改变
def POI_updata(POI0,POWER0,k):
#随着体力改变
#print(POWER0)
if POWER0 <= 1 : POI0 = [i * 0.42496 for i in POI0]
elif POWER0 <= 3 : POI0 = [i * 0.59735 for i in POI0]
elif POWER0 <= 5: POI0 = [i * 0.675874 for i in POI0]
elif POWER0 <= 7: POI0 = [i * 0.785986 for i in POI0]
elif POWER0 <= 12: POI0 = [i * 0.855324 for i in POI0]
elif POWER0 <= 23: POI0 = [i * 0.95432 for i in POI0]
else : POI0 = [i * 0.9785326 for i in POI0]
#随着相似度改变
sum = [1,1,1,1,1,1] # 记录每一类相似度总函数
same_historl = [0,0,0,0,0,0]
for i in range(17): #每一类去过的次数
if same[0][i] >1 and same[1][i] >0: same_historl[same[1][i]]+=1 #无类似的不会统计
for i in range(6): #每一类累加的相似度
if same_historl[i]>=2: sum[i]+=(1-same_values[same_historl[i]])
for i in range(17): #对景点进行相似度处理
if same[1][i] == 0 and same[0][i] > 1:
POI0[i]*=0.51234 # 去过完全相同且没有跟它类似的景点
else :POI0[i]*=(1/sum[same[1][i]])
#随着工资改变
money = datamatrix[k][3]
if money == "一万一下":
POI0[4]*=0.76321
POI0[5]*=0.76321
elif money == "一万至十万":
POI0[4] *= 0.81456
POI0[5] *= 0.81456
elif money == "十万至二十五万" :
POI0[4] *= 0.864876
POI0[5] *= 0.864876
elif money == "二十五万至五十万":
POI0[4] *= 0.916567
POI0[5] *= 0.916567
elif money == "五十万至七十五万":
POI0[4] *= 0.954345
POI0[5] *= 0.954345
elif money == "七十五万至一百万":
POI0[4] *= 0.978234
POI0[5] *= 0.978234
# 随着学历改变
study = datamatrix[k][22]
if study == "初中及以下":
for i in range(17):
POI0[i]*=(1+study_values[0][i]*0.16662)
elif study == "高中\中专\职校":
for i in range(17):
POI0[i]*=(1+study_values[1][i]*0.16662)
elif study == "大学":
for i in range(17):
POI0[i] *= (1 + study_values[2][i]*0.16662)
else :
for i in range(17):
POI0[i] *= (1 + study_values[3][i]*0.16662)
# 偏置
for i in range(17):
POI0[i] += random.uniform(-0.005, 0.005)
if POI0[i] >1 : POI0[i] = 0.998
elif POI0[i] <0 : POI0[i] = 0
return POI0