#读取gps文件夹下面的所有文件合并为一个数据框
from pylab import mpl
import pandas as pd
mpl.rcParams['font.sans-serif'] = ['SimHei']
import os
path='D:/Rpython/file/time/' #文件夹下有多个文件的读取
list1=os.listdir(path)
import matplotlib.pyplot as plt
data1=pd.DataFrame()
for i in list1:
data=pd.read_csv(path+i,engine='python')
data1=pd.concat([data1,data])
#提取68路的数据
temp=data1[data1.线路名称=='68路']
#画出散点图
plt.figure()
plt.scatter(temp.iloc[:,0],temp.iloc[:,1]) #选取的是公交车经度列和纬度列
plt.show()
#聚类找到30个站,将虚拟类标签转换为实际站点
def cengci(temp,num):
from scipy.cluster import hierarchy
Z = hierarchy.linkage(temp, method ='ward',metric='euclidean') #(数据,层次聚类的方法,度量:欧氏距离)
label = hierarchy.cut_tree(Z, height=num) # 取固定位置的类别数
label = label.reshape(label.size, )
return (label)
from Cengci import cengci #层次是自己写的一个相当于第三方库
import numpy as np
for i in np.arange(0,0.05,0.001):
nu=len(set(cengci(temp.iloc[:,:2], i)))
if nu<31:
break
else:
print('当前高度为{},聚类数目为{}'.format(i,nu))
label=cengci(temp.iloc[:,:2],0.042)
temp['站点']=label
#画出带有实际站点标记的散点图
list2=[9,1,21,16,19,0,17,4,5,26,24,23,10,20,3,28,29,11,12,27,6,18,15,7,2,22,8,14,13,25]
for i in list2:
temp.loc[label==i,'站点']=list2.index(i)+1
plt.figure()
for i in range(1,31):
tem=temp[temp.站点==i]
plt.scatter(tem.iloc[:,0],tem.iloc[:,1])
plt.text(tem.iloc[:,0].mean(),tem.iloc[:,1].mean(),i)
plt.show()
#将数据按早晚高峰,分为5个时间段,并统计各自站点上车的人数,合并为数据框
lis=[5,8,10,17,19,23]
temp['业务时间']=[i.time() for i in pd.to_datetime(temp.业务时间,format='%Y%m%d %H:%M')]
lis=[i.time() for i in pd.to_datetime(lis,format='%H')]
weigth=pd.DataFrame()
for i in range(5):
ban=temp[(temp.业务时间lis[i])]
ban1=ban.站点.value_counts()
weigth=pd.concat([weigth,ban1],axis=1)
lis=[5,8,10,17,19,23]
weigth.columns=[str(lis[i])+'-'+str(lis[i+1]) for i in range(5)]
plt.figure()
for i in range(5):
plt.plot(weigth.iloc[:,i]/(lis[i+1]-lis[i]))
plt.legend(weigth.columns)
plt.show()
from pyecharts import Line
line=Line('折线图')
for i in range(5):
line.add('%d-%d个时段'%(lis[i],lis[i+1]),list(range(30)),weigth.iloc[:,i]/(lis[i+1]-lis[i]))
line.render('zhexiantu1.html')
#构建泊松分布的下车概率矩阵
def posong(r,k):
import math
from functools import reduce
if k==0:
num=(math.e**(-r))*(r**k)
else:
num=(math.e**(-r))*(r**k)/reduce(lambda x,y:x*y,range(1,k+1))
return(num)
sum([posong(5,i) for i in range(0,11)])
li=[posong(5,i) for i in range(0,11)]
matrix=np.zeros([30,30])
for i in range(30):
for j in range(i,30):
matrix[i,j]=posong(15,j-i)
for i in range(30):
matrix[i,:]=matrix[i,:]/(matrix[i,:].sum())
#构建加权下车概率矩阵,权重为每站的上车人数
for j in range(5):
matrix1=matrix.copy()
bang=weigth.iloc[:,j]
bang.index=range(0,30)
for i in range(30):
matrix1[i,:]=matrix[i,:]*bang/((matrix[i,:]*bang).sum())
#求出每个站的下车人数,构建OD矩
for i in range(30):
OD[:30,i]=round(matrix1[:,i]*bang,0)
OD[30,i]=OD[:30,i].sum()
OD[:30,30]=[OD[i,:].sum() for i in range(30)]
pd.DataFrame(OD).to_excel('OD_matrix.xls',index=False,header=None)