1. 源码修改
(1)版本更改导致问题
gitbub源码为python2,改为python3
#头文件变更
from sklearn import model_selection
#调用函数变更
print(model_selection.cross_val_score(clf, x, y, n_jobs=-1, cv=10))
(2)运行报错
INTEL MKL ERROR: ҳ���ļ�̫С������ɲ����� mkl_intel_thread.dll.
Intel MKL FATAL ERROR: Cannot load mkl_intel_thread.dll.
Exception in thread QueueManagerThread:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\psutil\_pswindows.py", line 716, in wrapper
return fun(self, *args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\psutil\_pswindows.py", line 873, in kill
return cext.proc_kill(self.pid)
PermissionError: [WinError 5] 拒绝访问。
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py", line 674, in _queue_management_worker
recursive_terminate(p)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\externals\loky\backend\utils.py", line 26, in recursive_terminate
_recursive_terminate_with_psutil(process)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\externals\loky\backend\utils.py", line 41, in _recursive_terminate_with_psutil
child.kill()
File "C:\ProgramData\Anaconda3\lib\site-packages\psutil\__init__.py", line 392, in wrapper
return fun(self, *args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\psutil\__init__.py", line 1368, in kill
self._proc.kill()
File "C:\ProgramData\Anaconda3\lib\site-packages\psutil\_pswindows.py", line 718, in wrapper
raise convert_oserror(err, pid=self.pid, name=self._name)
psutil.AccessDenied: psutil.AccessDenied (pid=42256)
修改为
print(model_selection.cross_val_score(clf, x, y, n_jobs=1, cv=10))
这里解释下参数
n_jobs=1
进程个数,默认为1。 若值为 -1,则用所有的CPU进行运算。 若值为1,则不进行并行运算,这样的话方便调试。
3.KDD99数据集
def load_kdd99(filename):
x=[]
with open(filename) as f:
for line in f:
line=line.strip('\n')
line=line.split(',')
x.append(line)
return x
4.构建特征向量
(1)仅选取pop3相关类型数据
(2)对于guess_passwd类型的数据的label标记为1
(3)对于正常数据的label标记为0
(4)特征选取0、4:8、22:30,并将其float化
def get_guess_passwdandNormal(x):
v=[]
w=[]
y=[]
for x1 in x:
if ( x1[41] in ['guess_passwd.','normal.'] ) and ( x1[2] == 'pop_3' ):
if x1[41] == 'guess_passwd.':
y.append(1)
else:
y.append(0)
x1 = [x1[0]] + x1[4:8]+x1[22:30]
v.append(x1)
for x1 in v :
v1=[]
for x2 in x1:
v1.append(float(x2))
w.append(v1)
return w,y
5.完整代码
# -*- coding:utf-8 -*-
from sklearn import model_selection
from sklearn import tree
import pydotplus
def load_kdd99(filename):
x=[]
with open(filename) as f:
for line in f:
line=line.strip('\n')
line=line.split(',')
x.append(line)
return x
def get_guess_passwdandNormal(x):
v=[]
w=[]
y=[]
for x1 in x:
if ( x1[41] in ['guess_passwd.','normal.'] ) and ( x1[2] == 'pop_3' ):
if x1[41] == 'guess_passwd.':
y.append(1)
else:
y.append(0)
x1 = [x1[0]] + x1[4:8]+x1[22:30]
v.append(x1)
for x1 in v :
v1=[]
for x2 in x1:
v1.append(float(x2))
w.append(v1)
return w,y
if __name__ == '__main__':
v=load_kdd99("../data/kddcup99/corrected")
x,y=get_guess_passwdandNormal(v)
clf = tree.DecisionTreeClassifier()
print(model_selection.cross_val_score(clf, x, y, n_jobs=1, cv=10))
clf = clf.fit(x, y)
dot_data = tree.export_graphviz(clf, out_file=None)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("../photo/6/pop3.pdf")
6.运行结果
[0.98637602 1. 1. 1. 1. 1.
1. 1. 1. 1. ]
查看可视化的决策树