1.根据地理位置以及阈值获得邻近矩阵
根据 info的信息
2.根据“时间模式”获得 模式矩阵
根据流量特征,使用fast_dtwb
一周的时间,我们选择每日8-10点,下午5-7点时间段,以每个小时的流量进行拼接。
所以时间轴为4*5 =20为数据。返回所有节点流量特征序列。# 邻接矩阵的获取
分为获得weighted矩阵或unweighted矩阵
class Obtain_Neib(object):
def __init__(self,ENVNUM,ADJ,THRE):
self.ENVNUM = str(ENVNUM)
self.ADJ =ADJ
self.THRE =THRE
self.root = "../data"+self.ENVNUM
infile =self.root+"/info.csv"
nodefile =self.root+"/allnodes.npy"
self.info =pd.read_csv(infile,header=None,index_col=None)
print(self.info.shape)
self.Allnodes =np.load(nodefile)
self.NUM =len(self.Allnodes)
def get_dis_adj(self):
D =np.zeros(shape=(self.NUM,self.NUM))
for i in range(self.NUM):
v = self.Allnodes[i]
for j in range(self.NUM):
w = self.Allnodes[j]
D[i][j]=self.get_dis_nodes(v,w)
return D
def get_dis_nodes(self,v,w) :
oj=self.info.loc[self.info[0]==v][1].values[0]
ow=self.info.loc[self.info[0]==v][2].values[0]
nj=self.info.loc[self.info[0]==w][1].values[0]
nw=self.info.loc[self.info[0]==w][2].values[0]
Dis = geodesic((nw,nj),(ow,oj)).m
return Dis/100
def get_matrix(self,D=None):
if D is None:
D =self.get_dis_adj()
A = np.zeros(shape=(self.NUM,self.NUM))
if self.ADJ =="weighted":
for i in range(self.NUM):
for j in range(self.NUM):
if round(D[i][j],1) ==0:
A[i][j] = 10
elif round(D[i][j],1) >10 :
A[i][j] = 0
else :
A[i][j] =round(1/(round(D[i][j],1 )),2)
## 归一化
big_A = np.max(A)
small_A = np.min(A)
A = (A - small_A) / (big_A - small_A)
return np.round(A,2)
elif ADJ =="unweighted":
Au =A <self.THRE
return Au
else:
print("ADJ not corrent set !")
print("please choose 'weighted' or 'unweighted'!")
print("return the Distanse matrix!")
return self.get_dis_adj()
def save(self,filename=None,A = None):
if A is None:
A = self.get_matrix()
if filename is None:
filename = self.root+"/NA_"+self.ADJ +".npy"
np.save(filename,A)
调用:
ENVNUM="11001"
ADJ ="unweighted"
THRE =10
on= Obtain_Neib(ENVNUM,ADJ,THRE)
on.save()
import numpy as np
import pandas as pd
import os
from collections import defaultdict
from tqdm import trange
def Judge(day,hour,DAY,HOUR):
if (day==DAY) and (hour== HOUR):
return True
else:
return False
class Obtain_Time(object):
def __init__(self, ENVNUM):
self.ENVNUM = str(ENVNUM)
nodefile = "../data" + self.ENVNUM + "/allnodes.npy"
self.Allnodes = np.load(nodefile)
self.NUM = len(self.Allnodes)
df = pd.read_csv("../data11001/bikefor7days.csv", index_col=None)
df = df.drop(df[df['j_d'] == 24].index) # 删除24日的日期
df = df.drop(df[df['j_d'] == 25].index) # 删除25日的日期
df["T"] = df["j_h"].apply(lambda x: True if x in [8, 9, 17, 18] else 0) # 选择时间
self.df = df[df["T"] == 1]
self.columns = self.df.columns
## 用于验证函数的打补丁
self.NODE = 11001
self.DAY = 20
self.HOUR = 8
def Judge(self,day, hour, DAY, HOUR):
if (day == DAY) and (hour == HOUR):
return True
else:
return False
def get_single_flow(self,node):
self.NODE =node
Df =self.df[self.df["Nodes"]==self.NODE]
if len(Df)== 0:
return np.zeros(shape=(20,))
r = []
for DAY in [20, 21, 22, 23, 26]:
for HOUR in [8,9,17,18]:
Df["judge"] = Df[["Days","Hours"]].apply(
lambda x: self.Judge(x["Days"],x["Hours"],DAY,HOUR),axis=1)
r.append(Df["judge"].sum())
return np.array(r)
def feature(self):
# 先求需求时间:定义存储-改df列名-node循环
De = np.zeros(shape=(self.NUM, 20))
a = ["Nodes", '还车站点', '借车时间', '还车时间', "Days", "Hours",
'j_m', 'h_d', 'h_h', 'h_m',"T"]
self.df.columns = a
for i in range(self.NUM):
node =self.Allnodes[i]
filename = "../drem/" + str(node) + "_demand.npy"
if os.path.exists(filename):
#continue#有错误了,因为第二次运行的时候直接跳过,导致该行为0数组
De[i] = self.get_single_flow(node)
else:
De[i] = self.get_single_flow(node)
np.save(filename, De[i])
# 再求供应时间:定义存储-再改df列名-node循环
print(self.df.columns)
Su = np.zeros(shape=(self.NUM, 20))
b = ['借车站点', "Nodes" , '借车时间', '还车时间', 'j_d',
'j_h', 'j_m', "Days", "Hours", "h_m","T"]
self.df.columns = b
for i in range(self.NUM):
node = self.Allnodes[i]
filename = "../drem/" + str(node) + "_supply.npy"
if os.path.exists(filename):
#continue
Su[i] = np.load(filename)
else:
Su[i] = self.get_single_flow(node)
np.save(filename, Su[i])
# 收尾:先改回原列名-数据合并-保存数据用于验证
self.df.columns = self.columns
out = np.concatenate((De, Su), axis=1)
print(self.ENVNUM + " env time feature's shape is:", out.shape)
print("save the outset!")
outfile = "../data" + self.ENVNUM + "/Timefeature_dtw.npy"
np.save(outfile, out)
return out
调用:
ENVNUM="11001"
ot = Obtain_Time(ENVNUM)
out = ot.feature()
ENVNUM="11001"
outfile = "../data" + ENVNUM + "/Timefeature_dtw.npy"
feature=np.load(outfile)
Allnode = np.load("../data"+ENVNUM+"/allnodes.npy")
NUM =len(Allnode)
D = np.zeros(shape=(NUM,NUM))
for i in trange(NUM):
filename = "../drem/" + str(i) + "_corre.npy"
if os.path.exists(filename):
#continue
D[i] = np.load(filename)
else:
for j in range(NUM):
if i==j:
D[i][j] = 0
else:
D[i][j],path = fastdtw(feature[i],feature[j])
np.save(filename,D[i]) # 保存过程数据,防止发生意外,反复计算
np.save("../data" + ENVNUM + "/NT_dtw.npy",D)
def get_matrix(ENVNUM,ADJ,D=None,Thre=None):
if D is None:
D =np.load("../data" + ENVNUM + "/NT_dtw.npy")
##求D的大阈值
if Thre is None:
Thre = int(min(np.mean(D),np.median(D))/5)
print("median and mean limit the value is :",Thre)
NUM = D.shape[0]
A = np.zeros(shape=(NUM,NUM))
if ADJ =="weighted":
for i in range(NUM):
for j in range(NUM):
if round(D[i][j],1) ==0:
A[i][j] = Thre
elif round(D[i][j],1) >Thre :
A[i][j] = 0
else :
A[i][j] =round(1/(round(D[i][j],1 )),2)
## 归一化
big_A = np.max(A)
small_A = np.min(A)
A = (A - small_A) / (big_A - small_A)
return np.round(A,2)
elif ADJ =="unweighted":
Au =A <Thre
return Au
def save(ENVNUM,ADJ,D=None):
if D is None:
D = get_matrix(ENVNUM,ADJ)
filename = "../data"+str(ENVNUM)+"/TA_"+ADJ+".npy"
np.save(filename,D)
调用:
ENVNUM ="11001"
ADJ = "unweighted"
save(ENVNUM,ADJ)
注意在2.2节的调用不同,原因在于11001的数据量太大,需要每一步进行save,否则每次重新运算太耗时,保留运算过程,避免时间的浪费,而13098是小数据,则不用保存。
## 获取时间特征
ENVNUM="13098"
ot = Obtain_Time(ENVNUM)
out=ot.feature()
## 获取fast-dtw的距离矩阵
outfile = "../data" + ENVNUM + "/Timefeature_dtw.npy"
feature=np.load(outfile)
Allnode = np.load("../data"+ENVNUM+"/allnodes.npy")
NUM =len(Allnode)
D = np.zeros(shape=(NUM,NUM))
for i in trange(NUM):
for j in range(NUM):
if i==j:
D[i][j] = 0
else:
D[i][j],path = fastdtw(feature[i],feature[j])
np.save("../data" + ENVNUM + "/NT_dtw.npy",D)
## 获取归一化矩阵
ADJ = "unweighted"
save(ENVNUM,ADJ)
ADJ = "weighted"
save(ENVNUM,ADJ)
## 获取时间特征
ENVNUM="11001"
ot = Obtain_Time(ENVNUM)
out=ot.feature()
## 获取fast-dtw的距离矩阵
outfile = "../data" + ENVNUM + "/Timefeature_dtw.npy"
feature=np.load(outfile)
Allnode = np.load("../data"+ENVNUM+"/allnodes.npy")
NUM =len(Allnode)
D = np.zeros(shape=(NUM,NUM))
for i in trange(NUM):
filename = "../drem/" + str(i) + "_corre.npy"
if os.path.exists(filename):
#continue
D[i] = np.load(filename)
else:
for j in range(NUM):
if i==j:
D[i][j] = 0
else:
D[i][j],path = fastdtw(feature[i],feature[j])
np.save(filename,D[i]) # 保存过程数据,防止发生意外,反复计算
np.save("../data" + ENVNUM + "/NT_dtw.npy",D)
## 获取归一化矩阵
ADJ = "unweighted"
save(ENVNUM,ADJ)
ADJ = "weighted"
save(ENVNUM,ADJ)
fast-dtw的简介及代码