python_根据规土委乘客刷卡数据_找出用户的上车、下车站点经纬度

import os
import threading
from datetime import datetime

import pandas as pd

from src.relate_data import get_bus_geo_polyline
from utils.read_write import  writeOneCSV

'''
    此文件用于找出用户的上车下车站点经纬度
'''

def get_terminal():
    guituwei = 'D:\data\403.csv'
    terminal = pd.read_csv(guituwei)
    return terminal


def terminal_line_license():
    terminal_line_license = 'merge_all.csv'
    line_license_data = pd.read_csv(terminal_line_license)
    return line_license_data



# 按照乘客刷卡对应的POS卡号,找到相对应的车辆车牌,
def find_plate(terminal_id):
    plate_ser = relate_terminal[relate_terminal['终端编号']==terminal_id]
    if not plate_ser.empty:
        plate = str(plate_ser.iat[0,4])
        return plate
    return ''

# 根据用户的刷卡时间找到离公交车轨迹定位时间最近的时间,从而对应用户上车的坐标
# 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
# ['line', 'license', 'time', 'longitude','latitude','speed','other']
def compare_time_find_station(card_terminal, plate_geo):
    up_station_list = []
    card_car_date_time = datetime.strptime(card_terminal[2], "%Y-%m-%d %H:%M:%S")
    time_list = plate_geo['time']
    time_list  = time_list.sort_values()
    time_list_copy = time_list.tolist()
    geo_time = str_search(time_list_copy,card_car_date_time)
    if geo_time:
        one_plate_geo = plate_geo[plate_geo['time'] == geo_time]
        up_station_list.append(card_terminal[0])
        up_station_list.append(card_terminal[3])
        up_station_list.append(card_terminal[2])
        up_station_list.append(one_plate_geo.iat[0,1])
        up_station_list.append(one_plate_geo.iat[0,3])
        up_station_list.append(one_plate_geo.iat[0,4])
    return up_station_list


def str_search(li,card_car_date_time):
    start = 0
    end = len(li) - 1
    # 只要start和end 还没错开 就一直找
    while start <= end:
        # 通过计算获取当前查找范围的中间位置
        mid = (start + end) // 2
        geo_date_time = li[mid]
        geo_time = datetime.strptime(geo_date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
        if end - start < 2:
            return geo_date_time
        # 如果mid比item大,说明item可能会出现在mid左边,对左边再查找
        elif geo_time > card_car_date_time:
            end = mid - 1
        # mid 比item小,说明item有可能在mid右边,对右边再查找
        else:
            start = mid + 1

# 根据车牌可以找到公交车线路定位轨迹
# 前提是车是否在指定路线的轨迹上跑,如果符合则可实现乘客的刷卡数据和公交车线路定位轨迹数据的关联
def find_plate_geo(plate):
    plate_file_path = src + plate + '.csv'
    if os.path.exists(plate_file_path):
        bus_geo = get_bus_geo_polyline(plate_file_path)
        return bus_geo
    return pd.DataFrame(columns=['k'])


def loop():
    for x in range(0, 17638533):
        card_terminal = terminal_record.loc[x]
        card_terminal_plate = find_plate(card_terminal[3])
        # 根据车牌找到公交车线路定位轨迹
        plate_geo = find_plate_geo(card_terminal_plate)
        # 根据用户的刷卡时间找到最近的公交车定位时间,从而对应用户的上车坐标
        if not plate_geo.empty:
            up_station = compare_time_find_station(card_terminal, plate_geo)
            if up_station:
                writeOneCSV(up_station, 'bus_o.csv')



if __name__ == "__main__":
    # 设备线路终端编号对照表  终端编号	公司名称	线路或站点	车牌
    relate_terminal = terminal_line_license()
    src = 'D3\\'

    # 规土委乘客刷卡数据    CARDID	TRADETYPE	TRADEDATE	TERMINALID(终端编号)
    terminal_record = get_terminal()
    loop()

如有问题或需要帮助,请私聊我或留言!
如需数据示例,请私聊我!

你可能感兴趣的:(python数据处理,python数据挖掘)