2018-06-03 PYTHON code

# python 2.7 ##

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# import datetime
# import lightgbm as lgb
# from math import radians, cos, sin, asin, sqrt
import os
cwd = os.getcwd()



###
path_train = cwd+"/data/dm/train.csv"  # 训练文件路径
path_test = "/data/dm/test.csv"  # 测试文件路径
path_result_out = "model/pro_result.csv" #预测结果文件路径


ori_data = pd.read_csv(path_train)
# ori_data.sort(['A', 'B'], ascending=[1, 0])
data = ori_data
# sort the data
data = data.sort_values(by=['TERMINALNO',  'TIME'])
# show the unix time by minutes
data['TIME'] = data['TIME']/60
data['TIME'] = data['TIME'].astype('int')
# define new_trip_id, some the original id are wrong
data['NEW_TRIPID'] = data['TRIP_ID']
# swap two columns
columnsTitles = ['TERMINALNO', 'TIME', 'NEW_TRIPID', 'LONGITUDE', 'LATITUDE', 'DIRECTION', 'HEIGHT', 'SPEED', 'CALLSTATE',
               'Y', 'TRIP_ID']
data = data.reindex(columns=columnsTitles)

plt.ioff()


# def plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Term):
#     N_trips = len(Y4)
#     f, axs = plt.subplots(6, N_trips, figsize=(N_trips / 6 * 6, 6))
#     f.subplots_adjust(hspace=.5, wspace=.5)
#     axs = axs.ravel()
#
#     for j in range(0, N_trips):  # iterate on trips
#         axs[(1 - 1) * N_trips + j].plot(X[j], Y1[j])
#         axs[(2 - 1) * N_trips + j].plot(X[j], Y2[j])
#         axs[(3 - 1) * N_trips + j].plot(X[j], Y3[j])
#         axs[(4 - 1) * N_trips + j].plot(X[j], Y4[j])
#         axs[(5 - 1) * N_trips + j].plot(X[j], Y5[j])
#         axs[(6 - 1) * N_trips + j].plot(X[j], Y6[j])
#     f.savefig(str(Term) + "test.png")


# plot
def plt_trips(x, y1, y2, y3, y4, y5, y6, Tmp_Term):
    # f = plt.figure(figsize=(10, 8))
    # f1, f2, f3, f4, f5, f6 = (f.add_subplot(str(23)+str(i)) for i in range(1, 7))
    y_label = ['Longitude', 'Latitude', 'Direction', 'Height', 'Speed', 'Call_state']
    y = [y1, y2, y3, y4, y5, y6]
    fig, axs = plt.subplots(6, 1, figsize=(0.05*len(x), 8))
    fig.subplots_adjust(hspace=.5, wspace=.5)

    axs = axs.ravel()

    for i in range(6):
        axs[i].plot(x, y[i])
        axs[i].set_xlabel('time')
        axs[i].set_ylabel(y_label[i])
        axs[i].set_title('Time and ' + y_label[i])
    fig.savefig(str(Tmp_Term) + "test.png")
    # plt.close(fig)  # close the figure
    #
    # plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
    #                     wspace=0.35)
    # plt.show()


# def div2Trip(data):
#     Curr_Term = data.iloc[0, 0]
#     Curr_Time = data.iloc[0, 1]
#     Curr_Trip = data.iloc[0, 2]
#     # Create 1d list to store a single trip
#     x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
#     # Create a variable-sized 2d list for a terminal
#     X, Y1, Y2, Y3, Y4, Y5, Y6 = ([] for i in range(7))
#
#     for i in range(0, 100):
#         Tmp_Term = data.iloc[i, 0]
#         Tmp_Time = data.iloc[i, 1]
#         Tmp_Trip = data.iloc[i, 2]
#         if Tmp_Term == Curr_Term:
#             if Tmp_Trip == Curr_Trip:
#                 # print('aaa')
#                 x.append(data.iloc[i, 1]-Curr_Time)
#                 y1.append(data.iloc[i, 3])
#                 y2.append(data.iloc[i, 4])
#                 y3.append(data.iloc[i, 5])
#                 y4.append(data.iloc[i, 6])
#                 y5.append(data.iloc[i, 7])
#                 y6.append(data.iloc[i, 8])
#             else:
#                 Curr_Time = data.iloc[i, 1]
#                 Curr_Trip = data.iloc[i, 2]
#                 X += [x]
#                 Y1 += [y1]
#                 Y2 += [y2]
#                 Y3 += [y3]
#                 Y4 += [y4]
#                 Y5 += [y5]
#                 Y6 += [y6]
#                 print('bbb')
#                 x = []
#                 y1, y2, y3, y4, y5, y6 = ([] for i in range(6))
#                 x.append(data.iloc[i, 1]-Curr_Time)
#                 y1.append(data.iloc[i, 3])
#                 y2.append(data.iloc[i, 4])
#                 y3.append(data.iloc[i, 5])
#                 y4.append(data.iloc[i, 6])
#                 y5.append(data.iloc[i, 7])
#                 y6.append(data.iloc[i, 8])
#         else:
#             print ('ddd')
#             Curr_Term = data.iloc[i, 0]
#             plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Curr_Term - 1)
#             # continue


def comb2trips(data):
    # initialize
    curr_term = data.iloc[0, 0]
    begin_time = data.iloc[0, 1]
    curr_trip = 1
    gap_time = 0    # time gap between this trip end and next trip beginning
    # Create 1d list to store a single trip
    x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
    # change the tripI-id for the first line
    data.iloc[0, 2] = 1

    for i in range(1, len(data)):
        tmp_term = data.iloc[i, 0]
        tmp_time = data.iloc[i, 1]
        #  tmp_trip = data.iloc[i, 2]
        if tmp_term == curr_term:
            # if tmp_trip != curr_trip:
            # if find the trip has changed
            if (tmp_time - data.iloc[i-1, 1]) > 5:  # 5*60 if in seconds
                gap_time += tmp_time - data.iloc[i-1, 1]
                curr_trip += 1
                print('Find a new trip ' + str(i))

            x.append(data.iloc[i, 1] - begin_time - gap_time)
            y1.append(data.iloc[i, 3])
            y2.append(data.iloc[i, 4])
            y3.append(data.iloc[i, 5])
            y4.append(data.iloc[i, 6])
            y5.append(data.iloc[i, 7])
            y6.append(data.iloc[i, 8])
        else:
            curr_term = data.iloc[i, 0]
            curr_trip = 1
            begin_time = data.iloc[i, 1]  # redefine begin time for a new term
            gap_time = 0
            print ('Curr_Term ' + str(curr_term))
            # plt_trips(x, y1, y2, y3, y4, y5, y6, curr_term - 1)
            x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
        # re_define the trip index
        data.iloc[i, 2] = curr_trip


comb2trips(data)

data.to_pickle('rearranged_data')
df = pd.read_pickle('rearranged_data')

del data
del ori_data

df['NEW_TRIPID'].describe()
df['TRIP_ID'].describe()

df['Y'].describe()


def features_append(features, series):
    return features


def define_test(data):
    length = max(data.iloc[:, 0])
    result = pd.DataFrame(index=range(length), columns=['Id', 'Pred'])
    return result


def pred_ratio(features):
    return 0


def pred_gen(df, result):
    # initialize
    curr_term = 1
    features = []
    for i in range(0, len(df)):
        tmp_term = df.iloc[i, 0]
        if tmp_term == curr_term:
            features_append(features, df.iloc[i, 4])
        else:
            print(tmp_term)
            result.iloc[curr_term-1, 0] = curr_term
            result.iloc[curr_term-1, 1] = pred_ratio(features)
            curr_term = tmp_term
            features = []
        if i == len(df)-1:
            print('end of final term: ')
            print(tmp_term)
            result.iloc[curr_term - 1, 0] = curr_term
            result.iloc[curr_term - 1, 1] = pred_ratio(features)
            curr_term = tmp_term
            features = []


result = define_test(df)
pred_gen(df, result)

你可能感兴趣的:(2018-06-03 PYTHON code)