function [ result ] = TDW_multiclass( TrainingData_File)
%% This is a function expand TDW to multiclass
% 该函数是基于one-vs-one方法的得到的处理多类问题的TDW分类器
% trainX 训练样本的特征向量构成的n行m列矩阵,每一行是一个样本
% trainY 训练样本的标签构成的n行Q列矩阵,每一行对应一个样本,Q表示类别的个数,
% 如果第i个样本属于第j类 则trainY(i,j)=1,且trainY(i,:)中的其它元素都为-1
%% Get the detail of dataset
train_data = load(TrainingData_File);
[Y, X,~,~, ~,~] = Data(train_data);
trainY = Y';
trainX = X';
[n,m]=size(trainY); %训练样本集中样本的个数n,以及类别的个数m
Sample_size = zeros(m,1); %每类样本的个数
Class = {[], [], [], []};
Sample_area = zeros(n,m);
%% Get the area of 2 class of samples
for r = 1:m,
flagp = [];
flagpY = [];
for i = 1:n
if trainY(i,r) == 1
flagp = [flagp;trainX(i,:)]; %属于第r类的样本
flagpY = [flagpY;trainY(i,:)];
end
end
np = size(flagp,1); %训练集中的第r类样本个数np
if r+1<=m
for j = r+1:m
flagn = [];
flagnY = [];
for i = 1:n
if trainY(i,j) == 1
flagn = [flagn;trainX(i,:)]; %属于第j(j = r+1)类的样本
flagnY =[flagnY;trainY(i,:)];
end;
end
nn = size(flagn,1); %训练集中的第j类样本个数
clear flag;
flag = TDW_Binary([flagp;flagn],[flagpY;flagnY]); %每次取两类样本训练分类器对测试集进行判别
Class{1,r} = [Class{1,r} flag(1:np,1)];
Class{1,j} = [Class{1,j} flag(np+1:np+nn,1)];
end
end
for i = 1:np
table = tabulate(Class{1,r}(i,:));
[~,b] = max(table(:,2));
Sample_area(i,r) = table(b,1); %第r类的第i个样本属于的area
end
Sample_size(1,r) = np;
end
clear flag flag1 table;
%% Classify datasets using formulas
% Set Rho=m/n,the algorithm preference for classifying date set as 'Overlapping'
Sample_Number = zeros(3,1);
for i =1:m
table = tabulate(Sample_area(:,i));
if find(table(:,1) == -1)
a = find(table(:,1) == -1);
Sample_Number(1,1) = Sample_Number(1,1) + table(a,2); %第一行为各类样本负域的个数,第二行为正域的个数,第三行为边界域的个数
end
if find(table(:,1) == 1)
a = find(table(:,1) == 1);
Sample_Number(2,1) = Sample_Number(2,1) + table(a,2);
end
if find(table(:,1) == 2)
a = find(table(:,1) == 2);
Sample_Number(3,1) = Sample_Number(3,1) + table(a,2);
end
end
Rho = m/n; %设定分界参数 For now,how to determine the partermeter is the main problem
Ratio_NP = Sample_Number(1,1)/n; %负域样本比例
Ratio_BN = Sample_Number(3,1)/Sample_Number(1,1); %边界域样本与负域样本比值
Ratio_BP = Sample_Number(3,1)/n; %边界域样本比例
Sample_Number(3,1)
if Ratio_NP > Rho && Ratio_BN < 1
result = ['Outlier'];
elseif Ratio_BP > 10*Rho && Ratio_BN > 1
result = ['Overlapping'];
else
result = ['Inter|Intar'];
end
disp(['DatasetCategory=',result]);
end
function [DatasetCategory] = TDW_Func(TrainingData_File)
%% ***This function aim to classify dataset(binary only)*** %%
%% Set dataset and initialization
% TrainingData_File=['adult.csv'];
train_data = load(TrainingData_File);
[train_target, P,NumberofData,NumberofInputNeurons, ~,~] = Data(train_data);
n=0;k=0;
%% Distance between each sample and the rest of the sample
Distance=zeros(NumberofData-1,NumberofData);
for i=1:1:NumberofData
for j=1:1:NumberofData
sum=0;
if i>j
for k=1:NumberofInputNeurons
sum=sum+power((P(k,i)-P(k,j)),2);
Distance(j,i)=sqrt(sum);
end
elseif ialph
NumberofPos=NumberofPos+1;
PosData(1,NumberofPos)=train_target(1,i);
PosData(2:NumberofInputNeurons+1,NumberofPos)=P(:,i);
elseif fx(i)<=alph && fx(i)>=beta
NumberofBnd=NumberofBnd+1;
BndData(1,NumberofBnd)=train_target(1,i);
BndData(2:NumberofInputNeurons+1,NumberofBnd)=P(:,i);
elseif fx(i)0.7*NumberofData
DatasetCategory=['Inter_calss'];
elseif NumberofNeg>0.1*NumberofData && NumberofBnd<0.2*NumberofData
DatasetCategory=['Outlier'];
elseif NumberofBnd>0.3*NumberofData && NumberofNeg<0.1*NumberofData
DatasetCategory=['Overlapping'];
else
DatasetCategory=['Intraclass'];
end
disp(['NumberofPos=',num2str(NumberofPos)]);
disp(['NumberofBnd=',num2str(NumberofBnd)]);
disp(['NumberofNeg=',num2str(NumberofNeg)]);
disp(['DatasetCategory=',DatasetCategory]);
end
20200811
补上鸽了很久很久的TDW_Binary( X,Y )代码
function [ result ] = TDW_Binary( X,Y )
%% This function is aim to get the Negative,Boundary and Postive sample of a dataset
%% intilizition
global K;
a = find(Y(1,:) == 1); %标签标识的训练目标,需将其数值化(1,-1)为行向量
train_target = Y(:,a)';
P = X';
[NumberofInputNeurons,NumberofData] = size(P);
PBN = zeros(NumberofData,1);
n=0;
k=0;
%% Distance between each sample and the rest of the sample
Distance=zeros(NumberofData-1,NumberofData);
for i=1:1:NumberofData
for j=1:1:NumberofData
sum=0;
if i>j
for k=1:NumberofInputNeurons
sum=sum+power((P(k,i)-P(k,j)),2);
Distance(j,i)=sqrt(sum);
end
elseif i=alph %正域判断
NumberofPos=NumberofPos+1;
PosData(1,NumberofPos)=train_target(1,i);
PosData(2:NumberofInputNeurons+1,NumberofPos)=P(:,i);
PBN(i,1) = 1;
elseif fx(i)beta %边界域判断
NumberofBnd=NumberofBnd+1;
BndData(1,NumberofBnd)=train_target(1,i);
BndData(2:NumberofInputNeurons+1,NumberofBnd)=P(:,i);
PBN(i,1) = 2;
elseif fx(i)<=beta %负域判断
NumberofNeg=NumberofNeg+1;
NegData(1,NumberofNeg)=train_target(1,i);
NegData(2:NumberofInputNeurons+1,NumberofNeg)=P(:,i);
PBN(i,1) = -1;
end
end
result = PBN;
end