1.取输入道路的每10min,或者20min,或者30min的平均速度和经过车辆数
def qushu(tablenames,filename1):
result = []
for table_name in tablenames:
print(table_name)
sql_str = "SELECT AVG(\"VELOCITY\"), count(*), count(distinct \"TM_SERIAL\") AS COUNT_TM, " \
"TIMESTAMP WITH TIME ZONE \'epoch\' + " \
"INTERVAL '1 second' * round(extract(\'epoch\' from \"ctime\") / 600) * 600 as timestamp " \
"FROM public.\"{0}\" "\
"where \"osm_id_new\" >= {1} and \"osm_id_new\" <= {2} " \
"GROUP BY timestamp".format(table_name,start_road,end_road)
print(sql_str)
cursor.execute(sql_str)
rows = cursor.fetchall()
for row in rows:
result.append(row)
with open(filename1,'a',newline='') as f:
csv_writer = csv.writer(f)
for i in result:
csv_writer.writerow(i)
2.对取出的数进行处理,取出每个时间段的 前一个时间段和后一个时间段的 速度和车辆数
def parsedata(contents,filename2):
result1 = []
for i in contents:
content = i.replace("\n","").split(',')
list1 = []
avg_velocity = content[0]
count_tm = content[2]
time = content[3].replace("+08:00","")
time = datetime.datetime.strptime(time,'%Y-%m-%d %H:%M:%S')
hour = time.hour
minute = time.minute
hour_minute = int(hour)*6 + int(minute)/10
list1.append(content[3])
list1.append(time)
list1.append(avg_velocity)
list1.append(count_tm)
list1.append(hour)
list1.append(minute)
list1.append(hour_minute)
result1.append(list1)
changdu = len(result1)
result2 = []
for i, val in enumerate(result1):
hour_minute = val[6]
if(i == 0):
if((hour_minute + 1) == result1[i+1][6]):
#与下一个时间点连续
avg_velocity_qian = val[2]
avg_velocity_hou = result1[i+1][2]
count_tm_qian = val[3]
count_tm_hou = result1[i+1][3]
else:
avg_velocity_qian = val[2]
avg_velocity_hou = val[2]
count_tm_qian = val[3]
count_tm_hou = val[3]
val.append(float(avg_velocity_qian))
val.append(float(avg_velocity_hou))
val.append(int(count_tm_qian))
val.append(int(count_tm_hou))
result2.append(val)
elif(i == changdu-1):
if(hour_minute == result1[i-1][6]):
#与上一个时间点连续
avg_velocity_qian = avg_velocity_qian = result1[i-1][2]
avg_velocity_hou = val[2]
count_tm_qian = result1[i-1][2]
count_tm_hou = val[3]
else:
avg_velocity_qian = val[2]
avg_velocity_hou = val[2]
count_tm_qian = val[3]
count_tm_hou = val[3]
val.append(float(avg_velocity_qian))
val.append(float(avg_velocity_hou))
val.append(int(count_tm_qian))
val.append(int(count_tm_hou))
result2.append(val)
else:
if((hour_minute + 1 == result1[i+1][6]) and (hour_minute -1 == result1[i-1][6])):
#与上下两个时间点都连续
avg_velocity_qian = result1[i-1][2]
avg_velocity_hou = result1[i+1][2]
count_tm_qian = result1[i-1][3]
count_tm_hou = result1[i+1][3]
elif((hour_minute + 1 == result1[i+1][6]) and (hour_minute -1 != result1[i-1][6])):
#与下一个时间节点连续,与上一个时间节点不连续
avg_velocity_qian = val[2]
avg_velocity_hou = result1[i+1][2]
count_tm_qian = val[3]
count_tm_hou = result1[i+1][3]
elif((hour_minute + 1 != result1[i+1][6]) and (hour_minute -1 == result1[i-1][6])):
#与上一个时间点连续,与下一个时间点不连续
avg_velocity_qian = result1[i-1][2]
avg_velocity_hou = val[2]
count_tm_qian = result1[i-1][3]
count_tm_hou = val[3]
else:
avg_velocity_qian = val[2]
avg_velocity_hou = val[2]
count_tm_qian = val[3]
count_tm_hou = val[3]
val.append(float(avg_velocity_qian))
val.append(float(avg_velocity_hou))
val.append(int(count_tm_qian))
val.append(int(count_tm_hou))
result2.append(val)
with open(filename2,'a',newline='') as f:
csv_writer = csv.writer(f)
title=['time','time2','avg_velocity','count_tm','hour','minute','hour_minute','avg_velocity_qian','avg_velocity_hou','count_tm_qian','count_tm_hou']
csv_writer.writerow(title)
for i in result2:
csv_writer.writerow(i)
3.孤立森林训练
def score(filename2,filename3):
dataset = pd.read_csv(filename2, engine='python')
dataset = dataset.fillna(0)
X_col = pd.DataFrame(dataset, columns=['avg_velocity_qian','avg_velocity','avg_velocity_hou','count_tm_qian','count_tm','count_tm_hou','hour','hour_minute'])
X1_col = pd.DataFrame(dataset, columns=['avg_velocity','count_tm','hour','hour_minute'])
X_col = X_col.values
X1_col = X1_col.values
rs = np.random.RandomState(64)
lendata = dataset.shape[0]
ifmodel = IsolationForest(n_estimators=500, verbose=2,n_jobs=2, max_samples=256, random_state=rs,max_features=8,contamination='auto')
ifmodel1 = IsolationForest(n_estimators=500, verbose=2,n_jobs=2, max_samples=256, random_state=rs,max_features=4,contamination='auto')
ifmodel.fit(X_col)
ifmodel1.fit(X1_col)
Iso_anomaly_score = ifmodel.decision_function(X_col)
Iso_anomaly_score_8 = abs(Iso_anomaly_score - 0.5)
Iso_anomaly_score1 = ifmodel1.decision_function(X1_col)
Iso_anomaly_score1_4 = abs(Iso_anomaly_score1 - 0.5)
Iso_predict = ifmodel.predict(X_col)
ano_lable = np.column_stack(((dataset['time'],dataset['time2'],dataset['avg_velocity_qian'],dataset['avg_velocity'],dataset['avg_velocity_hou'],dataset['count_tm_qian'],dataset['count_tm'],dataset['count_tm_hou'],dataset['hour'],dataset['hour_minute'], Iso_anomaly_score, Iso_anomaly_score_8, Iso_anomaly_score1 , Iso_anomaly_score1_4, Iso_predict)))
df = pd.DataFrame(data=ano_lable, columns=['time','time2','avg_velocity_qian','avg_velocity','avg_velocity_hou', 'count_tm_qian','count_tm','count_tm_hou','hour','hour_minute','Iso_anomaly_score', 'Iso_anomaly_score_8', 'Iso_anomaly_score1', 'Iso_anomaly_score1_4','Iso_predict'])
df.to_csv(filename3)
4.完整代码
tablenames = []
with open("tablenames.csv","r") as f:
a = f.readlines()
for i in a:
tablenames.append(i.replace("\n",""))
print(tablenames)
conn = psycopg2.connect(database="taxi", user="postgres",password="AdminVge100", host="127.0.0.1", port="5432")
cursor = conn.cursor()
with open('secondary.csv','r') as f:
contents = f.readlines()
for osm_id_new in contents:
print(osm_id_new)
osm_id_new = osm_id_new.replace("\n","").split(",")
start_road = osm_id_new[0]
end_road = osm_id_new[1]
filename1 = ".\\data\\10min_" + start_road + "-" + end_road + ".csv"
filename2 = ".\\dataparse\\10min_" + start_road + "-" + end_road + "_test.csv"
filename3 = ".\\result\\10min_" + start_road + "-" + end_road + "_result8.csv"
try:
qushu(tablenames,filename1)
with open(filename1,'r') as f:
contents = f.readlines()
parsedata(contents,filename2)
score(filename2,filename3)
except BaseException:
pass
5.对异常结果进行汇总
要注意修改 汇总哪个异常分数
Score8:8个参数
Score4:4个参数
import pandas as pd
import os
from pandas.core.frame import DataFrame
import csv
filename = r"E:\李猛硕士毕设\实验部分\实验数据\2021-02-05\primary\primary\30min\result\30min_113000-113005_result8.csv"
result1 = []
file_dir = r"E:\李猛硕士毕设\实验部分\实验数据\2021-02-05\secondary\secondary\20min\result"
for root, dirs, files in os.walk(file_dir, topdown=False):
files = files
result = []
for file_name in files:
filename = file_dir + "\\" + file_name
dict1 = {}
dict2 = {}
with open(filename,'r') as f:
next(f)
contents = f.readlines()
if(len(contents)<1800):
continue
for i in contents:
try:
i = i.replace("\n","").split(',')
avg_velocity = i[4]
hour = int(i[9])
hour_minute = i[10]
count_tm = i[7]
result.append(i)
if(hour > 6 and hour < 22 ):
if hour_minute not in dict1:
dict1[hour_minute] = []
dict1[hour_minute].append(float(avg_velocity))
else:
dict1[hour_minute].append(float(avg_velocity))
if hour_minute not in dict2:
dict2[hour_minute] = []
dict2[hour_minute].append(int(count_tm))
else:
dict2[hour_minute].append(int(count_tm))
except BaseException:
pass
for key,value in dict1.items():
value.sort()
for key,value in dict2.items():
value.sort()
with open(filename,'r') as f:
next(f)
contents = f.readlines()
for i in contents:
try:
i = i.replace("\n","").split(',')
avg_velocity = i[4]
count_tm = i[7]
hour_minute = i[10]
hour = int(i[9])
Iso_anomaly_score1 = float(i[14])
if(hour>6 and hour < 22 ):
avg_velocity_sort = dict1[hour_minute].index(float(avg_velocity))
count_tm_sort = dict2[hour_minute].index(int(count_tm))
i.append(avg_velocity_sort)
i.append(count_tm_sort)
i.append(file_name)
if(Iso_anomaly_score1 > 0.6):
result1.append(i)
except BaseException:
pass
print(len(result))
with open('20min_secondary_4_异常汇总.csv','a',newline='') as f:
csv_writer = csv.writer(f)
title=['id','time','time2','avg_velocity_qian','avg_velocity','avg_velocity_hou','count_tm_qian','count_tm','count_tm_hou','hour','hour_minute','Iso_anomaly_score','Iso_anomaly_score_8','Iso_anomaly_score1','Iso_anomaly_score1_4','predict','avg_velocity_sort','count_tm_sort','file_name']
csv_writer.writerow(title)
for i in result1:
csv_writer.writerow(i)
6.为异常样本打上标签,并按照日期分割成多张csv
import csv
from datetime import datetime
result = []
with open('20min_4_label.csv','r') as f:
next(f)
contents = f.readlines()
for i in contents:
i = i.replace("\n","").split(",")
result.append(i)
#为路段打标签
road_label=[]
for i in result:
start = int(i[-2])
end = int(i[-1])
label = i[-3]
time = i[0]
#判断变快还是变慢
if(label == '1' or label == '2' or label == '5' or label == '6'):
#变慢
change = 1
else:
#变快
change = 2
for j in range(start,end+1):
list2 = []
list2.append(j)
list2.append(label)
list2.append(change)
list2.append(time)
road_label.append(list2)
dict1 = {}
for i in road_label:
osm_id_new = i[0]
riqi = i[-1]
change = i[-2]
a = datetime.strptime(riqi, "%Y/%m/%d %H:%M").date()
if(a not in dict1):
dict1[a] = {}
dict1[a][change]=[]
dict1[a][change].append(osm_id_new)
else:
if(change not in dict1[a]):
dict1[a][change]=[]
dict1[a][change].append(osm_id_new)
else:
dict1[a][change].append(osm_id_new)
filepath = r".\\road_label\\"
for key1,value1 in dict1.items():
key1 = key1.strftime('%Y%m%d')
filename = filepath + key1 + '.csv'
with open(filename,'a',newline='') as m:
csv_writer3 = csv.writer(m)
csv_writer3.writerow(['osm_id_new','change'])
for key2,value2 in value1.items():
value2 = list(set(value2))
for j in value2:
list3 = []
list3.append(j)
list3.append(key2)
csv_writer3.writerow(list3)
with open("road_label.csv","a",newline='') as t:
csv_writer2 = csv.writer(t)
title = ['osm_id_new','label','change','time']
csv_writer2.writerow(title)
for c in road_label:
csv_writer2.writerow(c)