三支决策算法(Three Decison Way)用于多分类任务的Matlab程序

function [ result ] = TDW_multiclass( TrainingData_File)
%% This is a function expand TDW to multiclass
% 该函数是基于one-vs-one方法的得到的处理多类问题的TDW分类器
% trainX 训练样本的特征向量构成的n行m列矩阵,每一行是一个样本
% trainY 训练样本的标签构成的n行Q列矩阵,每一行对应一个样本,Q表示类别的个数,
% 如果第i个样本属于第j类 则trainY(i,j)=1,且trainY(i,:)中的其它元素都为-1

%%  Get the detail of dataset
train_data = load(TrainingData_File);
[Y, X,~,~, ~,~] = Data(train_data);
trainY = Y';
trainX = X';
[n,m]=size(trainY);                                                         %训练样本集中样本的个数n,以及类别的个数m
Sample_size = zeros(m,1);                                                   %每类样本的个数

 Class = {[], [], [], []};
 Sample_area = zeros(n,m);
%%  Get the area of 2 class of samples 
for r = 1:m,
    flagp = [];
    flagpY = [];
    for i = 1:n
        if trainY(i,r) == 1
            flagp = [flagp;trainX(i,:)];                                    %属于第r类的样本
            flagpY = [flagpY;trainY(i,:)];
        end
    end
    np = size(flagp,1);                                                     %训练集中的第r类样本个数np
    
    if r+1<=m
        for j = r+1:m  
            flagn = [];
            flagnY = [];
            for i = 1:n
                if trainY(i,j) == 1
                    flagn = [flagn;trainX(i,:)];                            %属于第j(j = r+1)类的样本
                    flagnY =[flagnY;trainY(i,:)];
                end;
            end
            nn = size(flagn,1);                                             %训练集中的第j类样本个数

            clear flag; 
            flag = TDW_Binary([flagp;flagn],[flagpY;flagnY]);               %每次取两类样本训练分类器对测试集进行判别
            Class{1,r} = [Class{1,r} flag(1:np,1)];
            Class{1,j} = [Class{1,j} flag(np+1:np+nn,1)];
        end
    end
    for i = 1:np
        table = tabulate(Class{1,r}(i,:));
        [~,b] = max(table(:,2));
        Sample_area(i,r) = table(b,1);                                      %第r类的第i个样本属于的area
    end
        Sample_size(1,r) = np;
end
clear flag flag1 table;

%% Classify datasets using formulas

% Set Rho=m/n,the algorithm preference for classifying date set as 'Overlapping' 
Sample_Number = zeros(3,1);
for i =1:m
    table = tabulate(Sample_area(:,i));
    if find(table(:,1) == -1)
        a = find(table(:,1) == -1);
        Sample_Number(1,1) = Sample_Number(1,1) + table(a,2);               %第一行为各类样本负域的个数,第二行为正域的个数,第三行为边界域的个数
    end
    if find(table(:,1) == 1)
        a = find(table(:,1) == 1);
        Sample_Number(2,1) = Sample_Number(2,1) + table(a,2);
    end
    if find(table(:,1) == 2)
        a = find(table(:,1) == 2);
        Sample_Number(3,1) = Sample_Number(3,1) + table(a,2);
    end
end


Rho = m/n;                                            %设定分界参数   For now,how to determine the partermeter is the main problem
Ratio_NP = Sample_Number(1,1)/n;                      %负域样本比例
Ratio_BN = Sample_Number(3,1)/Sample_Number(1,1);     %边界域样本与负域样本比值
Ratio_BP = Sample_Number(3,1)/n;                      %边界域样本比例
Sample_Number(3,1)
if Ratio_NP > Rho && Ratio_BN < 1
    result = ['Outlier'];
elseif Ratio_BP > 10*Rho && Ratio_BN > 1
    result = ['Overlapping'];
else
    result = ['Inter|Intar'];
end
disp(['DatasetCategory=',result]);

end
function [DatasetCategory] = TDW_Func(TrainingData_File)
%% ***This function aim to classify dataset(binary only)*** %%
%% Set dataset and initialization
% TrainingData_File=['adult.csv'];
train_data = load(TrainingData_File);
[train_target, P,NumberofData,NumberofInputNeurons, ~,~] = Data(train_data);
n=0;k=0;
%% Distance between each sample and the rest of the sample
Distance=zeros(NumberofData-1,NumberofData); 
for i=1:1:NumberofData
    for j=1:1:NumberofData
        sum=0;
        if i>j
            for k=1:NumberofInputNeurons
                sum=sum+power((P(k,i)-P(k,j)),2);
                Distance(j,i)=sqrt(sum);
            end
        elseif ialph
        NumberofPos=NumberofPos+1;
        PosData(1,NumberofPos)=train_target(1,i);
        PosData(2:NumberofInputNeurons+1,NumberofPos)=P(:,i);
    elseif fx(i)<=alph && fx(i)>=beta
        NumberofBnd=NumberofBnd+1;
        BndData(1,NumberofBnd)=train_target(1,i);
        BndData(2:NumberofInputNeurons+1,NumberofBnd)=P(:,i);
    elseif fx(i)0.7*NumberofData
    DatasetCategory=['Inter_calss'];
elseif NumberofNeg>0.1*NumberofData && NumberofBnd<0.2*NumberofData
    DatasetCategory=['Outlier'];
elseif NumberofBnd>0.3*NumberofData && NumberofNeg<0.1*NumberofData
    DatasetCategory=['Overlapping'];
else
    DatasetCategory=['Intraclass'];
end
disp(['NumberofPos=',num2str(NumberofPos)]);
disp(['NumberofBnd=',num2str(NumberofBnd)]);
disp(['NumberofNeg=',num2str(NumberofNeg)]);
disp(['DatasetCategory=',DatasetCategory]);
end

20200811
补上鸽了很久很久的TDW_Binary( X,Y )代码

function [ result ] = TDW_Binary( X,Y )
%% This function is aim to get the Negative,Boundary and Postive sample of a dataset


%% intilizition                
global K;
a = find(Y(1,:) == 1);          %标签标识的训练目标,需将其数值化(1,-1)为行向量
train_target = Y(:,a)';
P = X';
[NumberofInputNeurons,NumberofData] = size(P);
PBN = zeros(NumberofData,1);
n=0;
k=0;
%% Distance between each sample and the rest of the sample
Distance=zeros(NumberofData-1,NumberofData); 
for i=1:1:NumberofData
    for j=1:1:NumberofData
        sum=0;
        if i>j
            for k=1:NumberofInputNeurons
                sum=sum+power((P(k,i)-P(k,j)),2);
                Distance(j,i)=sqrt(sum);
            end
        elseif i=alph                                                           %正域判断
        NumberofPos=NumberofPos+1;  
        PosData(1,NumberofPos)=train_target(1,i);
        PosData(2:NumberofInputNeurons+1,NumberofPos)=P(:,i);
        PBN(i,1) = 1;
    elseif fx(i)beta                                          %边界域判断
        NumberofBnd=NumberofBnd+1;
        BndData(1,NumberofBnd)=train_target(1,i);
        BndData(2:NumberofInputNeurons+1,NumberofBnd)=P(:,i);
        PBN(i,1) = 2;
    elseif fx(i)<=beta                                                       %负域判断
        NumberofNeg=NumberofNeg+1;        
        NegData(1,NumberofNeg)=train_target(1,i);
        NegData(2:NumberofInputNeurons+1,NumberofNeg)=P(:,i);
        PBN(i,1) = -1;
    end 
end
result = PBN;


end

你可能感兴趣的:(中级)