数据写入计算模块

import numpy as np;
import math
import pandas as pd;
import csv

f = open('data.csv','w',newline='',encoding='utf-8-sig')
csv_writer = csv.writer(f)
csv_writer.writerow(["tableName","TcpCount","UdpCount","Other","SrcIpEntropy","DstIpEntropy","SrcPortEntropy","DstPortEntropy","DstPayloadBytes", "DstTotalBytes","DstPacketCount","COUNT"])
def entropy(list):                              #求熵函数
    result=-1;
    if(len(list)>0):
        result=0;
    for x in list:
        result=((-x)*math.log(x,2)+result)
    return result;

i=1
while(i < 147):
    t="table"+str(i)
    df=pd.read_csv(t+".csv")
    ILPC=df.loc[:,'ipLayerProtocolCode'].value_counts()             #ipLayerProtocolCode中各个的数量
    FSSI=df.loc[:,'firstSeenSrcIp'].value_counts(normalize=True)      #firstSeenSrcIp的概率
    FSSIList=list(FSSI.values)
    FSDI=df.loc[:,'firstSeenDestIp'].value_counts(normalize=True)      #firstSeenDestIp的概率
    FSDIList=list(FSDI.values)
    FSSP=df.loc[:,'firstSeenSrcPort'].value_counts(normalize=True)      #firstSeenSrcPort的概率
    FSSPList=list(FSSP.values)
    FSDP=df.loc[:,'firstSeenDestPort'].value_counts(normalize=True)      #firstSeenDestPort的概率
    FSDPList=list(FSDP.values)
    FSDPB=df.loc[:,'firstSeenDestPayloadBytes'].sum()                    #firstSeenDestPayloadBytes的总和
    FSDTB=df.loc[:,'firstSeenDestTotalBytes'].sum()                    #firstSeenDestTotalBytes的总和
    FSDPT=df.loc[:,'firstSeenDestPacketCount'].sum()                    #firstSeenDestPacketCount的总和
    totalCount=df.loc[:,'firstSeenDestPacketCount']                            #总的连接数
    if(len(ILPC)==3):
        x=ILPC[2]
    else:
        x=0
    count=totalCount.shape[0]
    print(i)
    csv_writer.writerow([t,ILPC[0],ILPC[1],x,entropy(FSSIList),entropy(FSDIList),entropy(FSSPList),entropy(FSDPList),FSDPB, FSDTB,FSDPT,count])
    i=i+1
f.close()
print("1")




你可能感兴趣的:(自学记录)