机器学习--输出分类概率--matlab代码

概览

本文给出了下面3种输出分类概率学习器的matlab代码,以及例程。

  1. svm
  2. kNN
  3. ELM

程序包
链接:https://pan.baidu.com/s/1B6R2B7mzEDahWnDQ0zlepw
提取码:9rbp

例程

%% 机器学习-例程
%% 简单介绍
% 
% 
% 功能:基于训练数据,利用学习器,构建预测模型。
% 
% 输入:训练数据(特征+标签),测试数据(特征+标签)。
% 
% 输出:预测的标签
% 
% 学习器的选择:稳定学习器优先SVM,不稳定学习器优先ELM。(可以对分类数据做可视化分析,根据分布特点选择对应偏好的分类器)
% 
% 
% 注意:以下例程中,测试数据是‘特征+标签’,输出是测试精度和预测样本。在实际应用中,可直接将测试样本的标签设置为0,然后进行预测标签。
%% 数据整理

    load iris.mat       % 加载数据集,特征在前,标签在后,每一行是一个样本,每一列是一个特征
    tr=iris(1:100,:);   % 取iris数据的前100行作为训练样本
    te=iris(101:end,:); % 取后50行作为测试样本

%% 学习器
% 1. SVM:支持向量机

    [A_SVM,SVM_label] = SVM1(tr,te);
    % 需要安装libsvm
    % 原理:https://zhuanlan.zhihu.com/p/77750026

% 2.KNN_ave:基于平均距离改进的k-近邻

    k = 1;
    [A_kNN_1,label_knn_1,Probability] = knn_ave(tr,te,k); % 改进型kNN

% 3.ELM:极限学习机
    NumberofHiddenNeurons = 10;
    [TestingAccuracy, elmlabel,pro, TrainingAccuracy, TrainingTime, TestingTime] = f_ELM(tr,te,NumberofHiddenNeurons);

子程序

function [TestingAccuracy,predicted_label,Scores,Time]=SVM1(tr,te)
tic
model = svmtrain(tr(:,end), tr(:,1:end-1), '-c 1 -g 0.07 -t 0 -b 1 -q');
[predicted_label, TestingAccuracy,Scores] = svmpredict(te(:,end), te(:,1:end-1), model,'-b 1');
TestingAccuracy=TestingAccuracy(1)./100;
p=[];
zhonglei=size(unique(tr(:,end)),1);
for i=1:size(te,1)
    [~,b]=max(Scores(i,1:zhonglei));
    p(i,:)=[predicted_label(i),b];
end
 s=sortrows(p,1);
 A2=[];
 ss=unique(predicted_label);
  ll=size(ss,1);
for j=1:ll
    s1=find(s(:,1)==ss(j,1));
    s2=s1(1);
    A2(j,1)=ss(j,1);
    A2(j,2)=s(s2,2);
end
scores=[];
for k=1:size(A2,1)
scores(:,k)=Scores(:,A2(k,2)); %注意大小写
end
Scores=[scores,predicted_label];
[Scores]=guiyi(Scores);
Time=toc;
end
function [A_1,knnlabel_1,Probability]=knn_ave(tr,te,k)
%knn :      搜寻距离测试样本最近的k的训练样本,k个样本中平均距离最短的标签预测为测试样本的标签。
%Input:      tr: 训练数据(标签在最后一列)
%            te: 测试数据(标签在最后一列)
%            k: 近邻数 

%Output:     A:测试精度
%            knnlabel:预测标签
if ~exist('k', 'var')
     k = 3;
end               %如果没有输入k值,取k=3


num_label = size(unique(tr(:,end)),1);    
data=[tr;te];
n=size(data,2);
m1=size(tr,1);
m2=size(te,1);    %m1为训练样本数,m2为测试样本数

trd=tr(:,1:n-1);
trl=tr(:,n);
ted=te(:,1:n-1);
tel=te(:,n);      %-d为数据,-l为标签

probability=zeros(size(te,1),num_label);

knnlabel_1=zeros(m2,1);
for j=1:m2
    distance=zeros(m1,1);
    for i=1:m1
        distance(i)=norm(ted(j,:)-trd(i,:));    %计算测试数据与每个训练数据的欧式距离 
    end

 [distance1,index]=sort(distance); 
 x1=trl(index,end);
 distance1(:,2)=x1;      %distance1的第一列是距离,第二列是标签
 di=zeros(num_label,2);
for w=1:num_label
    x2=find(distance1(:,2)==w);
    x2=x2(1:k,:);
    dis=distance1(x2,1);
    dis=sum(dis)/k;
    di(w,1)=dis;
    di(w,2)=w;
end                      %把每一种标签都找出距离最近的k的样本,并计算平均距离

c=sum(di(:,1))./di(:,1)';
c=c/max(c,[],2);
probability(j,:)=c;      %输出概率:距离的总和除以各个距离,然后除以其中最大值,得类概率

b=sortrows(di,1);
knnlabel_1(j,1)=b(1,2);  %平均距离最近的标签为预测标签
end
Probability=[probability,knnlabel_1];
bj=(knnlabel_1==tel);
a=nnz(bj);
A_1=a/m2;                %输出识别率
function [TestingAccuracy, elmlabel,pro, TrainingAccuracy, TrainingTime, TestingTime] = f_ELM(tr,te,NumberofHiddenNeurons)

% Input:
% Tr                    - Filename of training data set
% Te                    - Filename of testing data set
% Note that: each row represents a instance, last column is label, begins from 1
% Elm_Type              - 0 for regression; 1 for (both binary and multi-classes) classification
% NumberofHiddenNeurons - Number of hidden neurons assigned to the ELM
% ActivationFunction    - Type of activation function:
%                           'sig' for Sigmoidal function
%                           'sin' for Sine function
%                           'hardlim' for Hardlim function
%                           'tribas' for Triangular basis function
%                           'radbas' for Radial basis function (for additive type of SLFNs instead of RBF type of SLFNs)
%
% Output: 
% TrainingTime          - Time (seconds) spent on training ELM
% TestingTime           - Time (seconds) spent on predicting ALL testing data
% TrainingAccuracy      - Training accuracy: 
%                           RMSE for regression or correct classification rate for classification
% TestingAccuracy       - Testing accuracy: 
%                           RMSE for regression or correct classification rate for classification
%elmlabel               - predict label by elm for testingdata

Elm_Type=1;
ActivationFunction='sig';
Tr=tr;
Te=te;
    if ~exist('Elm_Type', 'var')
        Elm_Type = 1;
    end
    if ~exist('NumberofHiddenNeurons', 'var')
        NumberofHiddenNeurons = 10;
    end
    if ~exist('ActivationFunction', 'var')
        ActivationFunction = 'sig';
    end

%%%%%%%%%%% Macro definition
REGRESSION=0;
CLASSIFIER=1;
%%%%%%%%%%% Load training dataset
T=Tr(:,end)';
P=Tr(:,1:end-1)';
clear Tr;                                   %   Release raw training data array
%%%%%%%%%%% Load testing dataset
TV.T=Te(:,end)';
TV.P=Te(:,1:end-1)';
clear Te;                                    %   Release raw testing data array

NumberofTrainingData=size(P,2);
NumberofTestingData=size(TV.P,2);
NumberofInputNeurons=size(P,1);

if Elm_Type~=REGRESSION
    %%%%%%%%%%%% Preprocessing the data of classification
    sorted_target=sort(cat(2,T,TV.T),2);
    label=zeros(1,1);                     %   Find and save in 'label' class label from training and testing data sets
    label(1,1)=sorted_target(1,1);
    j=1;
    for i = 2:(NumberofTrainingData+NumberofTestingData)
        if sorted_target(1,i) ~= label(1,j)
            j=j+1;
            label(1,j) = sorted_target(1,i);
        end
    end
    number_class=j;
    NumberofOutputNeurons=number_class;
       
    %%%%%%%%%% Processing the targets of training
    temp_T=zeros(NumberofOutputNeurons, NumberofTrainingData);
    for i = 1:NumberofTrainingData
        for j = 1:number_class
            if label(1,j) == T(1,i)
                break; 
            end
        end
        temp_T(j,i)=1;
    end
    T=temp_T*2-1;

    %%%%%%%%%% Processing the targets of testing
    temp_TV_T=zeros(NumberofOutputNeurons, NumberofTestingData);
    for i = 1:NumberofTestingData
        for j = 1:number_class
            if label(1,j) == TV.T(1,i)
                break; 
            end
        end
        temp_TV_T(j,i)=1;
    end
    TV.T=temp_TV_T*2-1;

end                                                 %   end if of Elm_Type

%%%%%%%%%%% Calculate weights & biases
start_time_train=cputime;

%%%%%%%%%%% Random generate input weights InputWeight (w_i) and biases BiasofHiddenNeurons (b_i) of hidden neurons
InputWeight=rand(NumberofHiddenNeurons,NumberofInputNeurons)*2-1;
BiasofHiddenNeurons=rand(NumberofHiddenNeurons,1);
tempH=InputWeight*P;
clear P;                                    %   Release input of training data 
ind=ones(1,NumberofTrainingData);
BiasMatrix=BiasofHiddenNeurons(:,ind);      %   Extend the bias matrix BiasofHiddenNeurons to match the demention of H
tempH=tempH+BiasMatrix;

%%%%%%%%%%% Calculate hidden neuron output matrix H
switch lower(ActivationFunction)
    case {'sig','sigmoid'}
        %%%%%%%% Sigmoid 
        H = 1 ./ (1 + exp(-tempH));
    case {'sin','sine'}
        %%%%%%%% Sine
        H = sin(tempH);    
    case {'hardlim'}
        %%%%%%%% Hard Limit
        H = double(hardlim(tempH));
    case {'tribas'}
        %%%%%%%% Triangular basis function
        H = tribas(tempH);
    case {'radbas'}
        %%%%%%%% Radial basis function
        H = radbas(tempH);
        %%%%%%%% More activation functions can be added here                
end
clear tempH;                           %   Release the temparary array for calculation of hidden neuron output matrix H

%%%%%%%%%%% Calculate output weights OutputWeight (beta_i)
OutputWeight=pinv(H') * T';         % implementation without regularization factor //refer to 2006 Neurocomputing paper
%OutputWeight=inv(eye(size(H,1))/C+H * H') * H * T';   % faster method 1 //refer to 2012 IEEE TSMC-B paper
%implementation; one can set regularizaiton factor C properly in classification applications 
%OutputWeight=(eye(size(H,1))/C+H * H') \ H * T';      % faster method 2 //refer to 2012 IEEE TSMC-B paper
%implementation; one can set regularizaiton factor C properly in classification applications

%If you use faster methods or kernel method, PLEASE CITE in your paper properly: 
%Guang-Bin Huang, Hongming Zhou, Xiaojian Ding, and Rui Zhang, 
%"Extreme Learning Machine for Regression and Multi-Class Classification," 
%submitted to IEEE Transactions on Pattern Analysis and Machine Intelligence, October 2010. 

end_time_train=cputime;
TrainingTime=end_time_train-start_time_train;      %   Calculate CPU time (seconds) spent for training ELM

%%%%%%%%%%% Calculate the training accuracy
Y=(H' * OutputWeight)';                          %   Y: the actual output of the training data
if Elm_Type == REGRESSION
    TrainingAccuracy=sqrt(mse(T - Y));           %   Calculate training accuracy (RMSE) for regression case
end
clear H;

%%%%%%%%%%% Calculate the output of testing input
start_time_test=cputime;
tempH_test=InputWeight*TV.P;
clear TV.P;             %   Release input of testing data             
ind=ones(1,NumberofTestingData);
BiasMatrix=BiasofHiddenNeurons(:,ind);         %   Extend the bias matrix BiasofHiddenNeurons to match the demention of H
tempH_test=tempH_test + BiasMatrix;
switch lower(ActivationFunction)
    case {'sig','sigmoid'}
        %%%%%%%% Sigmoid 
        H_test = 1 ./ (1 + exp(-tempH_test));
    case {'sin','sine'}
        %%%%%%%% Sine
        H_test = sin(tempH_test);        
    case {'hardlim'}
        %%%%%%%% Hard Limit
        H_test = hardlim(tempH_test);        
    case {'tribas'}
        %%%%%%%% Triangular basis function
        H_test = tribas(tempH_test);        
    case {'radbas'}
        %%%%%%%% Radial basis function
        H_test = radbas(tempH_test);        
        %%%%%%%% More activation functions can be added here        
end
TY=(H_test' * OutputWeight)';                   %   TY: the actual output of the testing data
probability=TY';
[TYmax,elmlabel]=max(TY);
elmlabel=elmlabel';
probability=[probability,elmlabel];
end_time_test=cputime;
TestingTime=end_time_test-start_time_test;      %   Calculate CPU time (seconds) spent by ELM predicting the whole testing data

if Elm_Type == REGRESSION
    TestingAccuracy=sqrt(mse(TV.T - TY));       %   Calculate testing accuracy (RMSE) for regression case
end

if Elm_Type == CLASSIFIER
%%%%%%%%%% Calculate training & testing classification accuracy
    MissClassificationRate_Training=0;
    MissClassificationRate_Testing=0;

    for i = 1 : size(T, 2)
        [x, label_index_expected]=max(T(:,i));
        [x, label_index_actual]=max(Y(:,i));
        if label_index_actual~=label_index_expected
            MissClassificationRate_Training=MissClassificationRate_Training+1;
        end
    end
    TrainingAccuracy=1-MissClassificationRate_Training/size(T,2);
    for i = 1 : size(TV.T, 2)
        [x, label_index_expected]=max(TV.T(:,i));
        [x, label_index_actual]=max(TY(:,i));
        if label_index_actual~=label_index_expected
            MissClassificationRate_Testing=MissClassificationRate_Testing+1;
        end
    end
    TestingAccuracy=1-MissClassificationRate_Testing/size(TV.T,2);  
    for i=1:size(te,1)
    c=probability(i,1:size(tr,2)-1);
    c=c-min(c,[],2);
    c=c./max(c,[],2);
    c=c+0.00001;
    [c]=guiyi(c);
    pro(i,:)=[c,probability(i,end)];
    

    end
end

你可能感兴趣的:(matlab编程,机器学习,机器学习,算法,人工智能,matlab)