import random
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import xlrd
from torch.autograd import Variable
import numpy as np
import json
'''
STEP 1: LOADING DATASET 读json文件
'''
data=xlrd.open_workbook("D:/PCstudy/data/Question_reults_end.xls")
table = data.sheets()[0]
nrows = table.nrows
ncols = table.ncols
datamatrix = []
for i in range(nrows):
rows = table.row_values(i)
datamatrix.append(rows)
users = {}
vis = []
age = []
sex = []
income = []
citys = []
school = []
wokers = []
for i in range(nrows):
if i == 0:
continue
vis.append(datamatrix[i][0])
age.append(datamatrix[i][1])
sex.append(datamatrix[i][2])
income.append(datamatrix[i][3])
citys.append(datamatrix[i][4])
school.append(datamatrix[i][22])
wokers.append(datamatrix[i][23])
dfresult= pd.DataFrame()
dfPclass = pd.get_dummies(vis)
dfPclass.columns = ['vis_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(age)
dfPclass.columns = ['age_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(sex)
dfPclass.columns = ['sex_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(income)
dfPclass.columns = ['income_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(citys)
dfPclass.columns = ['citys_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(school)
dfPclass.columns = ['school_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
dfPclass = pd.get_dummies(wokers)
dfPclass.columns = ['wokers_' + str(x) for x in dfPclass.columns]
dfresult = pd.concat([dfresult, dfPclass], axis=1)
users = dfresult.values
user = 0
poi = 0
'''
--- 读取json数据,读取用户的游玩数据 ---
对于一个batch,有n个数据,每个数据是(P1,P2,P3,...Pk),Pk是17个POI数组
'''
def one_batch(x):
in_num = []
out_num = []
for y in range(64):
size = 0
while size < x+2:
i = random.randint(1, 89)
path = "D:/PCstudy/pythonProject1/routing/user_data/user_visit_json"
path += str(i)
path += ".json"
with open(path, "r") as f:
row_data = json.load(f)
j = random.randint(1, 100)
j = str(j)
list = row_data[j]['visit_historal']
list = np.array(list)
size = np.size(list)
user_i = np.array(users[i-1])
user_i = user_i.tolist()
k = row_data[j]['visit_historal'][x]
POW_k = row_data[j]['POW_history'][x]
b = []
b.append(k)
b.append(POW_k)
POI_list = []
for w in range(x+1):
POI_w = row_data[j]['POI_historal'][w]
POI_w = POI_w + user_i + b
POI_list.append(POI_w)
in_num.append(POI_list)
out_num.append(row_data[j]['POI_historal'][x+1])
in_num = np.float32(in_num)
in_num = torch.from_numpy(in_num)
out_num = np.float32(out_num)
out_num = torch.from_numpy(out_num)
return (in_num,out_num)
'''
STEP 2: MAKING DATASET ITERABLE
'''
batch_size = 64
num_epochs = 10
num_epochs = int(num_epochs)
'''
STEP 3: CREATE MODEL CLASS
'''
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
if torch.cuda.is_available():
h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
else:
h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
if torch.cuda.is_available():
c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
else:
c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
out, (hn, cn) = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
'''
STEP 4: INSTANTIATE MODEL CLASS
'''
input_dim = 51
hidden_dim = 100
layer_dim = 3
output_dim = 17
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
if torch.cuda.is_available():
model.cuda()
'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = torch.nn.MSELoss()
'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
'''
STEP 7: TRAIN THE MODEL
'''
seq_dim = 0
iter = 0
for epoch in range(num_epochs):
for i in range(64):
x = random.randint(0, 7)
seq_dim = x+1
(images, labels) = one_batch(x)
if torch.cuda.is_available():
images = Variable(images.view(-1, seq_dim, input_dim).cuda())
labels = Variable(labels.cuda())
else:
images = Variable(images.view(-1, seq_dim, input_dim))
labels = Variable(labels)
optimizer.zero_grad()
print(np.shape(images))
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
iter += 1
if iter % 500 == 0:
correct = 0
total = 0
test_loader = one_batch(x)
for images, labels in test_loader:
if torch.cuda.is_available():
images = Variable(images.view(-1, seq_dim, input_dim).cuda())
else:
images = Variable(images.view(-1, seq_dim, input_dim))
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
if torch.cuda.is_available():
correct += (predicted.cpu() == labels.cpu()).sum()
else:
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
if accuracy >=0.8:
torch.save(LSTMModel.state_dict(), "lstmdata.para")
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
anser = LSTMModel()
anser.load_state_dict(torch.load("lstmdata.para"))