1.兑现门岛内早高峰阶段潮汐点进行有效定位,进一步设计高峰期群智优化方案
2.引导用户邻近停车点位,进行削峰填谷,缓解潮汐点拥堵问题
3.聚焦前沿、独辟蹊径;群体智能、局部最优
共享单车轨迹数据为共享单车在使用时候产生的位置定位数据,具体包含单车在不同时间段(默认是15秒记录一次)下的经纬度信息。
import pandas as pd
import numpy as np
path = r'C:\Users\30604\Desktop\a'
#共享单车轨迹数据:共享单车轨迹数据为共享单车在使用的时候产生的位置定位数据,
#具体包含单车在不同时间段(默认15秒记录一次)下的经纬度信息。
# 读取数据:读取单车轨迹数据
bike_track = pd.concat([
pd.read_csv(r'C:\Users\30604\Desktop\a\gxdc_gj20201221.csv'),
pd.read_csv(r'C:\Users\30604\Desktop\a\gxdc_gj20201222.csv'),
pd.read_csv(r'C:\Users\30604\Desktop\a\gxdc_gj20201223.csv'),
pd.read_csv(r'C:\Users\30604\Desktop\a\gxdc_gj20201224.csv'),
pd.read_csv(r'C:\Users\30604\Desktop\a\gxdc_gj20201225.csv'),
])
#按照单车ID和时间进行排序
bike_track = bike_track.sort_values(['BICYCLE_ID','LOCATING_TIME'])
# print(bike_track.describe())
bike_track.head()
bike_track.describe()
import folium
#建立厦门地图,放大12倍
m = folium.Map(location=[24.482426,118.157606], zoom_start=12)
m.save(r'C:\Users\30604\Desktop\a.html')
# 绘制指定共享单车的轨迹
my_PolyLine = folium.PolyLine(locations=bike_track[bike_track['BICYCLE_ID']=='000152773681a23a7f2d9af8e8902703'][['LATITUDE', 'LONGITUDE']].values,weight=5)
#添加
m.add_child(my_PolyLine)
m.save(r'C:\Users\30604\Desktop\a.html')
pd.read_csv(PATH+'gxdc_tcd.csv').head()
因此,我们可以定义bike_fence_format,对后面的fence_loc进行处理。
def bike_fence_format(s):
s = s.replace('[','').replace(']','').split(',')
s = np.array(s).astype(float).reshape(5,-1)
return s
bike_fence = pd.read_csv(PATH + 'gxdc_tcd.csv')
bike_fence['FENCE_LOC'] = bike_fence['FENCE_LOC'].apply(bike_fence_format)
import folium
m = folium.Map(location=[24.482426,118.157606], zoom_start=12)
for data in bike_fence['FENCE_LOC'].values[:100]:
folium.Marker(
list(data[0,::-1])
).add_to(m)
m.save(PATH+'fence.html')
pd.read_csv(PATH + 'gxdc_dd.csv').head()
bike_order = pd.read_csv(PATH + 'gxdc_dd.csv')
bike_order = bike_order.sort_values(['BICYCLE_ID', 'UPDATE_TIME'])
m = folium.Map(location=[24.482426, 118.157606], zoom_start=12)
my_PolyLine=folium.PolyLine(locations=bike_order[bike_order['BICYCLE_ID'] == '0000ff105fd5f9099b866bccd157dc50'][['LATITUDE', 'LONGITUDE']].values,weight=5)
m.add_child(my_PolyLine)
m.save(PATH+'dd.html')
【任务要求】
为了更好地掌握早高峰潮汐现象的变化规律与趋势,参赛者需基于主办方提供的数据进行数据分析和计算模型构建等工作,识别出工作日早高峰07:00-09:00潮汐现象最突出的40个区域,列出各区域所包含的共享单车停车点位编号名称,并提供计算方法说明及计算模型,为下一步优化措施提供辅助支撑。
想要完成具体的潮汐点的计算,需要将订单或轨迹与具体的停车点进行匹配,需要计算在不同时间下的每个停车点停了多少车,又骑走了多少车。
确定停车点经纬度的四边形范围
# 停车点纬度范围
bike_fence['MIN_LATITUDE'] = bike_fence['FENCE_LOC'].apply(
lambda x: np.min(x[:,1]))
bike_fence['MAX_LATITUDE'] = bike_fence['FENCE_LOC'].apply(
lambda x: np.max(x[:,1]))
# 停车点经度范围
bike_fence['MIN_LONGITUDE'] = bike_fence['FENCE_LOC'].apply(
lambda x: np.min(x[:, 0]))
bike_fence['MAX_LONGITUDE'] = bike_fence['FENCE_LOC'].apply(
lambda x: np.max(x[:, 0]))
# 查看情况
bike_fence.head(3).to_csv(r'C:\Users\30604\Desktop\a\temp.csv',encoding='utf-8-sig')
# 计算面积
from geopy.distance import geodesic
bike_fence['FENCE_AREA'] = bike_fence.apply(
lambda x: geodesic(
(x['MIN_LATITUDE'],x['MIN_LONGITUDE']),
(x['MAX_LATITUDE'],x['MAX_LONGITUDE'])).meters
,axis=1)
# 计算中心经纬度
bike_fence['FENCE_CENTER'] = bike_fence['FENCE_LOC'].apply(
lambda x: np.mean(x[:-1,::-1],0)
)
如何把单车维度与停车点进行匹配?这个问题会影响具体的匹配精度,同时也会影响匹配的速度。在此,我们使用geohash库来对经纬度进行编码,这样可以增加统计的速度,也可以动态调整经纬匹配的范围。
import geohash
bike_order['geohash'] = bike_order.apply(
lambda x : geohash.encode(x['LATITUDE'],x['LONGITUDE'],precision=6)
, axis = 1)
bike_fence['geohash'] = bike_fence['FENCE_CENTER'].apply(
lambda x : geohash.encode(x[0],x[1],precision=6)
)
在完成具体的经纬度匹配后,接下来就需要完成具体的区域流量统计,即统计某一范围内的不同时间的流量(入流量和出流量)。
bike_order['UPDATE_TIME'] = pd.to_datetime(bike_order['UPDATE_TIME'])
bike_order['DAY'] = bike_order['UPDATE_TIME'].dt.day.astype(object)
bike_order['DAY'] = bike_order['DAY'].apply(str)
bike_order['HOUR'] = bike_order['UPDATE_TIME'].dt.hour.astype(object)
bike_order['HOUR'] = bike_order['HOUR'].apply(str)
bike_order['HOUR'] = bike_order['HOUR'].str.pad(width=2, side='left', fillchar='0')
bike_order['DAY_HOUR'] = bike_order['DAY'] + bike_order['HOUR']
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS']==1],
values = 'LOCK_STATUS', index = ['geohash'],
columns = ['DAY_HOUR'], aggfunc='count',
fill_value=0)
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS']==0],
values = 'LOCK_STATUS', index = ['geohash'],
columns = ['DAY_HOUR'], aggfunc='count',
fill_value=0)
通过上面的代码就可以实现入流量和出流量的统计,接下来以一个区域为例,绘制可视化图表。
## 实例
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
bike_inflow.loc['wsk593'].plot()
bike_outflow.loc['wsk593'].plot()
plt.xticks(list(range(bike_inflow.shape[1])), bike_inflow.columns, rotation=40)
plt.legend(['入流量', '出流量'])
按照天进行单车流量统计
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 1],
values='LOCK_STATUS', index=['geohash'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_outflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 0],
values='LOCK_STATUS', index=['geohash'],
columns=['DAY'], aggfunc='count', fill_value=0
)
根据出入的流量,计算每个位置的存量:
# 计算每天的存量
bike_remain = (bike_inflow - bike_outflow).fillna(0)
## 当存量<0时,令存量=0
bike_remain[bike_remain < 0] = 0
# 求平均
bike_remain = bike_remain.sum(1)
这里,假设我们需要统计街道维度的潮汐情况,我们可以先把街道信息提取:
# 街道
bike_fence['STREET'] = bike_fence['FENCE_ID'].apply(
lambda x : x.split('_')[0])
# 查看
bike_fence['STREET'].unique()
## len=993
计算密度:
# 计算密度
bike_density = bike_fence.groupby(['STREET'])['geohash'].unique().apply(
lambda hs: np.sum([bike_remain[x] for x in hs])
) / bike_fence.groupby(['STREET'])['FENCE_AREA'].sum()
# 按照密度倒序
bike_density = bike_density.sort_values(ascending=False).reset_index()
如果使用Geohash,只能得到精确到街道的信息,不够准确。除了Geohash方法之外,我们还可以使用haversine距离实现空间地理距离的计算。
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(metric = "haversine", n_jobs=-1, algorithm='brute')
knn.fit(np.stack(bike_fence['FENCE_CENTER'].values))
计算订单中所对应的停车点位置:
dist, index = knn.kneighbors(bike_order[['LATITUDE','LONGITUDE']].values[:20000], n_neighbors=1)
import hnswlib
import numpy as np
p = hnswlib.Index(space='l2', dim=2)
p.init_index(max_elements=300000, ef_construction=1000, M=32)
p.set_ef(1024)
p.set_num_threads(14)
p.add_items(np.stack(bike_fence['FENCE_CENTER'].values))
index, dist = p.knn_query(bike_order[['LATITUDE','LONGITUDE']].values[:], k=1)
bike_order['fence'] = bike_fence.iloc[index.flatten()]['FENCE_ID'].values
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 1],
values='LOCK_STATUS', index=['fence'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_outflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 0],
values='LOCK_STATUS', index=['fence'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_remain = (bike_inflow - bike_outflow).fillna(0)
bike_remain[bike_remain < 0] = 0
bike_remain = bike_remain.sum(1)
bike_density = bike_remain / bike_fence.set_index('FENCE_ID')['FENCE_AREA']
bike_density = bike_density.sort_values(ascending=False).reset_index()
bike_density = bike_density.fillna(0)
在某一个特定的时间下,绘制:
import folium
from folium import plugins
from folium.plugins import HeatMap
map_hooray = folium.Map(location=[24.482426, 118.157606], zoom_start=14)
HeatMap(bike_order.loc[(bike_order['DAY_HOUR'] == '2106') & (bike_order['LOCK_STATUS'] == 1),
['LATITUDE', 'LONGITUDE']]).add_to(map_hooray)
for data in bike_fence['FENCE_LOC'].values[::10]:
folium.Marker(
data[0, ::-1]
).add_to(map_hooray)
map_hooray.save(PATH+'hotheat.html')
可以看到:
• 并不是所有的车都停到了停车点,有一些车停到了距离停车点较远的距离
• 有一些停车点存在潮汐现象,有一些不存在潮汐现象
如果乘客到了一个潮汐停车点A:
• 推荐的停车点B应该和A距离不远
• 推荐的停车点B应该没有潮汐情况
• 考虑交通因素
def fence_recommend1(fence_id):
fence_center = bike_fence.loc[bike_fence['FENCE_ID']==fence_id, 'FENCE_CENTER'].values[0]
# 具体街道方向
fence_dir = fence_id.split('_')[1]
# 根据距离计算最近的20个待选位置
dist, index = knn.kneighbors([fence_center], n_neighbors=20)
# 对每个待选位置进行筛选
for idx in index[0]:
# 剔除已经有很多车的
if bike_fence.iloc[idx]['DENSITY'] > 10:
continue
# 剔除需要过街的
if fence_dir not in bike_fence.iloc[idx]['FENCE_ID']:
continue
return bike_fence.iloc[idx]['FENCE_ID']
return None
如果还没有到停车点,则根据经纬度推荐最近的非潮汐停车点。
def fence_recommend2(la, lo):
dist, index = knn.kneighbors([[la, lo]], n_neighbors=20)
for idx in index[0][1:]:
if bike_fence.iloc[idx]['DENSITY'] > 10:
continue
return bike_fence.iloc[idx]['FENCE_ID']
return None