使用示例中的样例数据集在github仓库中,链接为:https://github.com/ni1o1/transbigdata/tree/main/example
下面的案例展示如何用TransBigData包处理公交GPS数据,以内置方法计算公交车辆的到离站信息、统计公交单程耗时与运营车速
import transbigdata as tbd
import pandas as pd
import geopandas as gpd
读取GPS数据
BUS_GPS= pd.read_csv(r'busgps.csv',header = None)
BUS_GPS.columns = ['GPSDateTime', 'LineId', 'LineName', 'NextLevel', 'PrevLevel',
'Strlatlon', 'ToDir', 'VehicleId', 'VehicleNo', 'unknow']
#时间转换为datetime格式
BUS_GPS['GPSDateTime'] = pd.to_datetime(BUS_GPS['GPSDateTime'])
经纬度坐标转换
#切分经纬度的字符串
BUS_GPS['lon'] = BUS_GPS['Strlatlon'].apply(lambda r:r.split(',')[0])
BUS_GPS['lat'] = BUS_GPS['Strlatlon'].apply(lambda r:r.split(',')[1])
#坐标系转换
BUS_GPS['lon'],BUS_GPS['lat'] = tbd.gcj02towgs84(BUS_GPS['lon'].astype(float),BUS_GPS['lat'].astype(float))
BUS_GPS.head(5)
GPSDateTime | LineId | LineName | NextLevel | PrevLevel | Strlatlon | ToDir | VehicleId | VehicleNo | unknow | lon | lat | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2019-01-16 23:59:59 | 7100 | 71 | 2 | 1 | 121.335413,31.173188 | 1 | 沪D-R7103 | Z5A-0021 | 1 | 121.330858 | 31.175129 |
1 | 2019-01-17 00:00:00 | 7100 | 71 | 2 | 1 | 121.334616,31.172271 | 1 | 沪D-R1273 | Z5A-0002 | 1 | 121.330063 | 31.174214 |
2 | 2019-01-17 00:00:00 | 7100 | 71 | 24 | 23 | 121.339955,31.173025 | 0 | 沪D-R5257 | Z5A-0020 | 1 | 121.335390 | 31.174958 |
3 | 2019-01-17 00:00:01 | 7100 | 71 | 14 | 13 | 121.409491,31.20433 | 0 | 沪D-R5192 | Z5A-0013 | 1 | 121.404843 | 31.206179 |
4 | 2019-01-17 00:00:03 | 7100 | 71 | 15 | 14 | 121.398615,31.200253 | 0 | 沪D-T0951 | Z5A-0022 | 1 | 121.393966 | 31.202103 |
读取公交线数据
shp = r'busline.json'
linegdf = gpd.GeoDataFrame.from_file(shp,encoding = 'gbk')
line = linegdf.iloc[:1].copy()
line.plot()
读取公交站点数据
shp = r'busstop.json'
stop = gpd.GeoDataFrame.from_file(shp,encoding = 'gbk')
stop = stop[stop['linename'] == '71路(延安东路外滩-申昆路枢纽站)']
stop.plot()
arriveinfo = tbd.busgps_arriveinfo(BUS_GPS,line,stop)
数据清洗中…
运行位置匹配中……
匹配到离站信息………………………………………………………………………………………………………………………………………
arriveinfo
arrivetime | leavetime | stopname | VehicleId | |
---|---|---|---|---|
0 | 2019-01-17 07:19:42 | 2019-01-17 07:31:14 | 延安东路外滩 | 1 |
1 | 2019-01-17 09:53:08 | 2019-01-17 10:09:34 | 延安东路外滩 | 1 |
0 | 2019-01-17 07:13:23 | 2019-01-17 07:15:45 | 西藏中路 | 1 |
1 | 2019-01-17 07:34:24 | 2019-01-17 07:35:38 | 西藏中路 | 1 |
2 | 2019-01-17 09:47:03 | 2019-01-17 09:50:22 | 西藏中路 | 1 |
... | ... | ... | ... | ... |
2 | 2019-01-17 16:35:52 | 2019-01-17 16:36:49 | 吴宝路 | 148 |
3 | 2019-01-17 19:21:09 | 2019-01-17 19:23:44 | 吴宝路 | 148 |
0 | 2019-01-17 13:36:26 | 2019-01-17 13:45:04 | 申昆路枢纽站 | 148 |
1 | 2019-01-17 15:52:26 | 2019-01-17 16:32:46 | 申昆路枢纽站 | 148 |
2 | 2019-01-17 19:24:54 | 2019-01-17 19:25:55 | 申昆路枢纽站 | 148 |
8984 rows × 4 columns
onewaytime = tbd.busgps_onewaytime(arriveinfo,stop,
start = '延安东路外滩',
end = '申昆路枢纽站',col = ['VehicleId','stopname'])
## 绘制耗时分布箱型图
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['font.serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus']=False
fig = plt.figure(1,(8,4),dpi = 250)
ax1 = plt.subplot(111)
sns.boxplot(x = 'shour',y = onewaytime['duration']/60,hue = '方向',data = onewaytime)
plt.ylabel('始发站至终点站耗时(分钟)')
plt.xlabel('小时')
plt.ylim(0)
plt.show()
#转换坐标系为投影坐标系,方便后面计算距离
line.crs = {'init':'epsg:4326'}
line_2416 = line.to_crs(epsg = 2416)
#公交线路数据里面的geometry
lineshp = line_2416['geometry'].iloc[0]
linename = line_2416['name'].iloc[0]
lineshp
#筛选去掉车速过快的
#车速单位转换为km/h
onewaytime['speed'] = (lineshp.length/onewaytime['duration'])*3.6
onewaytime = onewaytime[onewaytime['speed']<=60]
## 车速分布
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['font.serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus']=False
fig = plt.figure(1,(8,4),dpi = 250)
ax1 = plt.subplot(111)
sns.boxplot(x = 'shour',y = 'speed',hue = '方向',data = onewaytime)
plt.ylabel('运营速度(km/h)')
plt.xlabel('小时')
plt.ylim(0)
plt.show()