机器学习学习笔记--逻辑回归检测java溢出攻击

#-*- coding:utf-8 -*-

import re

import matplotlib.pyplot as plt

import os

from sklearn.feature_extraction.text import CountVectorizer

from sklearn import cross_validation

import numpy as np

from sklearn.neural_network import MLPClassifier

from sklearn import linear_model,datasets

def load_one_file(filename):

x=[]

with open(filename) as f:

line=f.readline()

line=line.strip('\n')

return line

def load_adfa_training_files(rootdir):

x=[]

y=[]

list=os.listdir(rootdir)

for i in range(0,len(list)):

path=os.path.join(rootdir,list[i])

if os.path.isfile(path):

x.append(load_one_file(path))

print "Load file(%s)" % path

y.append(0)

return x,y

def dirlist(path,allfile):

filelist=os.listdir(path)

for filename in filelist:

filepath = os.path.join(path,filename)

if os.path.isdir(filepath):

dirlist(filepath,allfile)

else:

allfile.append(filepath)

return allfile

def load_adfa_java_files(rootdir):

x=[]

y=[]

allfile=dirlist(rootdir,[])

for file in allfile:

if re.match(r"/home/qin/code/python/web-ml/1book-master/data/ADFA-LD/Attack_Data_Master/Java_Meterpreter_\d+/UAD-Java-Meterpreter*",file):

print "Load file(%s)" % file

x.append(load_one_file(file))

y.append(1)

return x,y

if __name__ == "__main__":

x1,y1 = load_adfa_training_files("/home/qin/code/python/web-ml/1book-master/data/ADFA-LD/Training_Data_Master/")

x2,y2 = load_adfa_java_files("/home/qin/code/python/web-ml/1book-master/data/ADFA-LD/Attack_Data_Master/")

x=x1+x2

y=y1+y2

vectorizer = CountVectorizer(min_df=1)

x=vectorizer.fit_transform(x)

x=x.toarray()

mlp = MLPClassifier(hidden_layer_sizes=(150,50),max_iter=10,alpha=1e-4,

solver='sgd',verbose=10,tol=1e-4,random_state=1,learning_rate_init=.1)

logreg = linear_model.LogisticRegression(C=1e5)

score=cross_validation.cross_val_score(logreg,x,y,n_jobs=-1,cv=10)

print np.mean(score)



机器学习学习笔记--逻辑回归检测java溢出攻击_第1张图片

你可能感兴趣的:(机器学习学习笔记--逻辑回归检测java溢出攻击)