# python 2.7 ##
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# import datetime
# import lightgbm as lgb
# from math import radians, cos, sin, asin, sqrt
import os
cwd = os.getcwd()
###
path_train = cwd+"/data/dm/train.csv" # 训练文件路径
path_test = "/data/dm/test.csv" # 测试文件路径
path_result_out = "model/pro_result.csv" #预测结果文件路径
ori_data = pd.read_csv(path_train)
# ori_data.sort(['A', 'B'], ascending=[1, 0])
data = ori_data
# sort the data
data = data.sort_values(by=['TERMINALNO', 'TIME'])
# show the unix time by minutes
data['TIME'] = data['TIME']/60
data['TIME'] = data['TIME'].astype('int')
# define new_trip_id, some the original id are wrong
data['NEW_TRIPID'] = data['TRIP_ID']
# swap two columns
columnsTitles = ['TERMINALNO', 'TIME', 'NEW_TRIPID', 'LONGITUDE', 'LATITUDE', 'DIRECTION', 'HEIGHT', 'SPEED', 'CALLSTATE',
'Y', 'TRIP_ID']
data = data.reindex(columns=columnsTitles)
plt.ioff()
# def plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Term):
# N_trips = len(Y4)
# f, axs = plt.subplots(6, N_trips, figsize=(N_trips / 6 * 6, 6))
# f.subplots_adjust(hspace=.5, wspace=.5)
# axs = axs.ravel()
#
# for j in range(0, N_trips): # iterate on trips
# axs[(1 - 1) * N_trips + j].plot(X[j], Y1[j])
# axs[(2 - 1) * N_trips + j].plot(X[j], Y2[j])
# axs[(3 - 1) * N_trips + j].plot(X[j], Y3[j])
# axs[(4 - 1) * N_trips + j].plot(X[j], Y4[j])
# axs[(5 - 1) * N_trips + j].plot(X[j], Y5[j])
# axs[(6 - 1) * N_trips + j].plot(X[j], Y6[j])
# f.savefig(str(Term) + "test.png")
# plot
def plt_trips(x, y1, y2, y3, y4, y5, y6, Tmp_Term):
# f = plt.figure(figsize=(10, 8))
# f1, f2, f3, f4, f5, f6 = (f.add_subplot(str(23)+str(i)) for i in range(1, 7))
y_label = ['Longitude', 'Latitude', 'Direction', 'Height', 'Speed', 'Call_state']
y = [y1, y2, y3, y4, y5, y6]
fig, axs = plt.subplots(6, 1, figsize=(0.05*len(x), 8))
fig.subplots_adjust(hspace=.5, wspace=.5)
axs = axs.ravel()
for i in range(6):
axs[i].plot(x, y[i])
axs[i].set_xlabel('time')
axs[i].set_ylabel(y_label[i])
axs[i].set_title('Time and ' + y_label[i])
fig.savefig(str(Tmp_Term) + "test.png")
# plt.close(fig) # close the figure
#
# plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
# wspace=0.35)
# plt.show()
# def div2Trip(data):
# Curr_Term = data.iloc[0, 0]
# Curr_Time = data.iloc[0, 1]
# Curr_Trip = data.iloc[0, 2]
# # Create 1d list to store a single trip
# x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
# # Create a variable-sized 2d list for a terminal
# X, Y1, Y2, Y3, Y4, Y5, Y6 = ([] for i in range(7))
#
# for i in range(0, 100):
# Tmp_Term = data.iloc[i, 0]
# Tmp_Time = data.iloc[i, 1]
# Tmp_Trip = data.iloc[i, 2]
# if Tmp_Term == Curr_Term:
# if Tmp_Trip == Curr_Trip:
# # print('aaa')
# x.append(data.iloc[i, 1]-Curr_Time)
# y1.append(data.iloc[i, 3])
# y2.append(data.iloc[i, 4])
# y3.append(data.iloc[i, 5])
# y4.append(data.iloc[i, 6])
# y5.append(data.iloc[i, 7])
# y6.append(data.iloc[i, 8])
# else:
# Curr_Time = data.iloc[i, 1]
# Curr_Trip = data.iloc[i, 2]
# X += [x]
# Y1 += [y1]
# Y2 += [y2]
# Y3 += [y3]
# Y4 += [y4]
# Y5 += [y5]
# Y6 += [y6]
# print('bbb')
# x = []
# y1, y2, y3, y4, y5, y6 = ([] for i in range(6))
# x.append(data.iloc[i, 1]-Curr_Time)
# y1.append(data.iloc[i, 3])
# y2.append(data.iloc[i, 4])
# y3.append(data.iloc[i, 5])
# y4.append(data.iloc[i, 6])
# y5.append(data.iloc[i, 7])
# y6.append(data.iloc[i, 8])
# else:
# print ('ddd')
# Curr_Term = data.iloc[i, 0]
# plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Curr_Term - 1)
# # continue
def comb2trips(data):
# initialize
curr_term = data.iloc[0, 0]
begin_time = data.iloc[0, 1]
curr_trip = 1
gap_time = 0 # time gap between this trip end and next trip beginning
# Create 1d list to store a single trip
x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
# change the tripI-id for the first line
data.iloc[0, 2] = 1
for i in range(1, len(data)):
tmp_term = data.iloc[i, 0]
tmp_time = data.iloc[i, 1]
# tmp_trip = data.iloc[i, 2]
if tmp_term == curr_term:
# if tmp_trip != curr_trip:
# if find the trip has changed
if (tmp_time - data.iloc[i-1, 1]) > 5: # 5*60 if in seconds
gap_time += tmp_time - data.iloc[i-1, 1]
curr_trip += 1
print('Find a new trip ' + str(i))
x.append(data.iloc[i, 1] - begin_time - gap_time)
y1.append(data.iloc[i, 3])
y2.append(data.iloc[i, 4])
y3.append(data.iloc[i, 5])
y4.append(data.iloc[i, 6])
y5.append(data.iloc[i, 7])
y6.append(data.iloc[i, 8])
else:
curr_term = data.iloc[i, 0]
curr_trip = 1
begin_time = data.iloc[i, 1] # redefine begin time for a new term
gap_time = 0
print ('Curr_Term ' + str(curr_term))
# plt_trips(x, y1, y2, y3, y4, y5, y6, curr_term - 1)
x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
# re_define the trip index
data.iloc[i, 2] = curr_trip
comb2trips(data)
data.to_pickle('rearranged_data')
df = pd.read_pickle('rearranged_data')
del data
del ori_data
df['NEW_TRIPID'].describe()
df['TRIP_ID'].describe()
df['Y'].describe()
def features_append(features, series):
return features
def define_test(data):
length = max(data.iloc[:, 0])
result = pd.DataFrame(index=range(length), columns=['Id', 'Pred'])
return result
def pred_ratio(features):
return 0
def pred_gen(df, result):
# initialize
curr_term = 1
features = []
for i in range(0, len(df)):
tmp_term = df.iloc[i, 0]
if tmp_term == curr_term:
features_append(features, df.iloc[i, 4])
else:
print(tmp_term)
result.iloc[curr_term-1, 0] = curr_term
result.iloc[curr_term-1, 1] = pred_ratio(features)
curr_term = tmp_term
features = []
if i == len(df)-1:
print('end of final term: ')
print(tmp_term)
result.iloc[curr_term - 1, 0] = curr_term
result.iloc[curr_term - 1, 1] = pred_ratio(features)
curr_term = tmp_term
features = []
result = define_test(df)
pred_gen(df, result)