【2016bike】构造图矩阵的代码记录

邻接矩阵的获取

1.根据地理位置以及阈值获得邻近矩阵

根据 info的信息

2.根据“时间模式”获得 模式矩阵

根据流量特征,使用fast_dtwb
一周的时间,我们选择每日8-10点,下午5-7点时间段,以每个小时的流量进行拼接。
所以时间轴为4*5 =20为数据。返回所有节点流量特征序列。# 邻接矩阵的获取

一、获得邻居矩阵

分为获得weighted矩阵或unweighted矩阵

class Obtain_Neib(object):
    def __init__(self,ENVNUM,ADJ,THRE):  
        self.ENVNUM = str(ENVNUM)
        self.ADJ =ADJ
        self.THRE =THRE
        self.root = "../data"+self.ENVNUM
        infile =self.root+"/info.csv"
        nodefile =self.root+"/allnodes.npy"
        self.info =pd.read_csv(infile,header=None,index_col=None)
        print(self.info.shape)
        self.Allnodes =np.load(nodefile)
        self.NUM =len(self.Allnodes)
        
    def get_dis_adj(self):
        D =np.zeros(shape=(self.NUM,self.NUM))
        for i in range(self.NUM):
            v = self.Allnodes[i]
            for j in range(self.NUM):
                w = self.Allnodes[j]
                D[i][j]=self.get_dis_nodes(v,w)
        return D        
    def get_dis_nodes(self,v,w) :
        oj=self.info.loc[self.info[0]==v][1].values[0]
        ow=self.info.loc[self.info[0]==v][2].values[0]
        nj=self.info.loc[self.info[0]==w][1].values[0]
        nw=self.info.loc[self.info[0]==w][2].values[0]
        Dis = geodesic((nw,nj),(ow,oj)).m 
        return Dis/100
    
    def get_matrix(self,D=None):
        if D is None:
            D =self.get_dis_adj()
        A = np.zeros(shape=(self.NUM,self.NUM))
        if  self.ADJ =="weighted":
            for i in range(self.NUM):
                for j in range(self.NUM):
                    if round(D[i][j],1) ==0:
                        A[i][j] = 10
                    elif round(D[i][j],1) >10 :
                        A[i][j] = 0
                    else :
                        A[i][j] =round(1/(round(D[i][j],1 )),2)
            ## 归一化
            big_A = np.max(A)
            small_A = np.min(A)
            A = (A - small_A) / (big_A - small_A)
            return np.round(A,2)
        elif ADJ =="unweighted":
            Au =A <self.THRE
            return Au    
        else:
            print("ADJ not corrent set !")
            print("please choose 'weighted' or 'unweighted'!") 
            print("return the Distanse matrix!")
            return self.get_dis_adj()
            
    def save(self,filename=None,A = None):
        if A is None:
            A = self.get_matrix()
        if filename is None:
            filename = self.root+"/NA_"+self.ADJ +".npy"
        np.save(filename,A) 

调用:

ENVNUM="11001"
ADJ ="unweighted"
THRE =10
on= Obtain_Neib(ENVNUM,ADJ,THRE)
on.save() 

二、获得时间模式矩阵

2.1 获得相关时间序列的特征

import  numpy as np
import pandas as pd
import os
from collections import defaultdict
from tqdm import trange
def Judge(day,hour,DAY,HOUR):
    if (day==DAY) and (hour== HOUR):
        return True
    else:
        return False
class Obtain_Time(object):
    def __init__(self, ENVNUM):
        self.ENVNUM = str(ENVNUM)
        nodefile = "../data" + self.ENVNUM + "/allnodes.npy"
        self.Allnodes = np.load(nodefile)
        self.NUM = len(self.Allnodes)
        df = pd.read_csv("../data11001/bikefor7days.csv", index_col=None)
        df = df.drop(df[df['j_d'] == 24].index)  # 删除24日的日期
        df = df.drop(df[df['j_d'] == 25].index)  # 删除25日的日期
        df["T"] = df["j_h"].apply(lambda x: True if x in [8, 9, 17, 18] else 0)  # 选择时间
        self.df = df[df["T"] == 1]

        self.columns = self.df.columns

        ## 用于验证函数的打补丁
        self.NODE = 11001
        self.DAY = 20
        self.HOUR = 8

    def Judge(self,day, hour, DAY, HOUR):
        if (day == DAY) and (hour == HOUR):
            return True
        else:
            return False

    def get_single_flow(self,node):
        self.NODE =node
        Df =self.df[self.df["Nodes"]==self.NODE]
        if len(Df)== 0:
            return np.zeros(shape=(20,))

        r = []
        for DAY in [20, 21, 22, 23, 26]:
            for HOUR in [8,9,17,18]:
                Df["judge"] = Df[["Days","Hours"]].apply(
                    lambda x: self.Judge(x["Days"],x["Hours"],DAY,HOUR),axis=1)
                r.append(Df["judge"].sum())
        return np.array(r)

    def feature(self):
        # 先求需求时间:定义存储-改df列名-node循环
        De = np.zeros(shape=(self.NUM, 20))
        a = ["Nodes", '还车站点', '借车时间', '还车时间', "Days", "Hours",
             'j_m', 'h_d', 'h_h', 'h_m',"T"]
        self.df.columns = a
        for i in range(self.NUM):
            node =self.Allnodes[i]
            filename = "../drem/" + str(node) + "_demand.npy"
            if os.path.exists(filename):
                #continue#有错误了,因为第二次运行的时候直接跳过,导致该行为0数组  
                De[i] = self.get_single_flow(node)
            else:
                De[i] = self.get_single_flow(node)
                np.save(filename, De[i])
        # 再求供应时间:定义存储-再改df列名-node循环
        print(self.df.columns)
        Su = np.zeros(shape=(self.NUM, 20))
        b = ['借车站点', "Nodes" , '借车时间', '还车时间', 'j_d',
             'j_h', 'j_m', "Days", "Hours", "h_m","T"]
        self.df.columns = b
        for i in range(self.NUM):
            node = self.Allnodes[i]
            filename = "../drem/" + str(node) + "_supply.npy"
            if os.path.exists(filename):
                #continue
                Su[i] = np.load(filename)
            else:
                Su[i] = self.get_single_flow(node)
                np.save(filename, Su[i])
        # 收尾:先改回原列名-数据合并-保存数据用于验证
        self.df.columns = self.columns
        out = np.concatenate((De, Su), axis=1)
        print(self.ENVNUM + " env time feature's shape is:", out.shape)
        print("save the outset!")
        outfile = "../data" + self.ENVNUM + "/Timefeature_dtw.npy"
        np.save(outfile, out)
        return out

调用

ENVNUM="11001"
ot = Obtain_Time(ENVNUM)
out = ot.feature()

2.2 利用fast-dtw求特征

ENVNUM="11001"
outfile = "../data" + ENVNUM + "/Timefeature_dtw.npy"
feature=np.load(outfile)  
Allnode = np.load("../data"+ENVNUM+"/allnodes.npy") 
NUM =len(Allnode)
D = np.zeros(shape=(NUM,NUM))
for i in trange(NUM):
    filename = "../drem/" + str(i) + "_corre.npy"
    if os.path.exists(filename):
        #continue
        D[i] = np.load(filename)
    else:
        for j in range(NUM):
            if i==j:
                D[i][j] = 0
            else:
                D[i][j],path = fastdtw(feature[i],feature[j])
        np.save(filename,D[i]) # 保存过程数据,防止发生意外,反复计算
np.save("../data" + ENVNUM + "/NT_dtw.npy",D)

2.3 对距离矩阵进行归一化

def get_matrix(ENVNUM,ADJ,D=None,Thre=None):
    if D is None:
        D =np.load("../data" + ENVNUM + "/NT_dtw.npy")
    ##求D的大阈值
    if Thre is None:
        Thre = int(min(np.mean(D),np.median(D))/5)
        print("median and mean limit the value is :",Thre)
    NUM = D.shape[0]
    A = np.zeros(shape=(NUM,NUM))
    if  ADJ =="weighted":
        for i in range(NUM):
            for j in range(NUM):
                if round(D[i][j],1) ==0:
                    A[i][j] = Thre
                elif round(D[i][j],1) >Thre :
                    A[i][j] = 0
                else :
                    A[i][j] =round(1/(round(D[i][j],1 )),2)
        ## 归一化
        big_A = np.max(A)
        small_A = np.min(A)
        A = (A - small_A) / (big_A - small_A)
        return np.round(A,2)
    elif ADJ =="unweighted":
        Au =A <Thre
        return Au    
def save(ENVNUM,ADJ,D=None):
    if D is None:
        D = get_matrix(ENVNUM,ADJ)
    filename = "../data"+str(ENVNUM)+"/TA_"+ADJ+".npy"
    np.save(filename,D) 

调用

ENVNUM ="11001"
ADJ = "unweighted"
save(ENVNUM,ADJ) 

三、以13098为例的全部调用

注意在2.2节的调用不同,原因在于11001的数据量太大,需要每一步进行save,否则每次重新运算太耗时,保留运算过程,避免时间的浪费,而13098是小数据,则不用保存。

## 获取时间特征
ENVNUM="13098"
ot = Obtain_Time(ENVNUM) 
out=ot.feature() 
## 获取fast-dtw的距离矩阵
outfile = "../data" + ENVNUM + "/Timefeature_dtw.npy"
feature=np.load(outfile)  
Allnode = np.load("../data"+ENVNUM+"/allnodes.npy") 
NUM =len(Allnode)
D = np.zeros(shape=(NUM,NUM))
for i in trange(NUM):
    for j in range(NUM):
        if i==j:
            D[i][j] = 0
        else:
            D[i][j],path = fastdtw(feature[i],feature[j])
np.save("../data" + ENVNUM + "/NT_dtw.npy",D)
## 获取归一化矩阵
ADJ = "unweighted"
save(ENVNUM,ADJ) 
ADJ = "weighted"
save(ENVNUM,ADJ) 

四、以11001为例的全部调用

## 获取时间特征
ENVNUM="11001"
ot = Obtain_Time(ENVNUM) 
out=ot.feature() 
## 获取fast-dtw的距离矩阵
outfile = "../data" + ENVNUM + "/Timefeature_dtw.npy"
feature=np.load(outfile)  
Allnode = np.load("../data"+ENVNUM+"/allnodes.npy") 
NUM =len(Allnode)
D = np.zeros(shape=(NUM,NUM))
for i in trange(NUM):
    filename = "../drem/" + str(i) + "_corre.npy"
    if os.path.exists(filename):
        #continue
        D[i] = np.load(filename)
    else:
        for j in range(NUM):
            if i==j:
                D[i][j] = 0
            else:
                D[i][j],path = fastdtw(feature[i],feature[j])
        np.save(filename,D[i]) # 保存过程数据,防止发生意外,反复计算
np.save("../data" + ENVNUM + "/NT_dtw.npy",D)
## 获取归一化矩阵
ADJ = "unweighted"
save(ENVNUM,ADJ) 
ADJ = "weighted"
save(ENVNUM,ADJ) 

附录

fast-dtw的简介及代码

你可能感兴趣的:(#,代码片段,邻接矩阵,归一化,python,bike)