朴素贝叶斯多分类问题matlab实现

本文采用多项式模型实现朴素贝叶斯在多分类问题中的解决,适用于特征变量(x)为离散的情况。相关理论推导可参见博客https://blog.csdn.net/u012162613/article/details/48323777

模型参数计算

function [p_yk,p_yk_xi,class_num_x,class_num ] = Naive_Bayesian(x, y)
% 多项式朴素贝叶斯分类器的实现

m=size(x,1);%记录样本总数
n=size(x,2);%记录特征数
alpha=1;
%平滑系数 0 < alpha < 1 is called Lidstone smoothing;alpha = 1 is called Laplace smoothing 

%记录类别数及每个类别编号
yy=y(:);
yy=sort(yy);
d=diff([yy;max(yy)+1]);
count = diff(find([1;d]));%记录每个类别出现次数
class_num=yy(find(d));%记录类别编号

%计算先验概率 p(yk)
for i=1:length(class_num)
    p_yk(i)=(count(i)+alpha)/(m+length(class_num)*alpha);
end

%记录每个特征的类别数及每个类别编号
for i=1:size(x,2)
    xx=x(:,i);
    xx=sort(xx);
    d=diff([xx;max(xx)+1]);
    count_x{i,1} = diff(find([1;d]));%记录x中每维特征的类别数
    class_num_x{i,1}=xx(find(d));%记录x中每维特征的类别
end

%计算N(yk,xi)
N_yk_xi=cell(n,length(class_num));
for i=1:n
    for j=1:length(class_num)
        N_yk_xi{i,j}=zeros(length(class_num_x{i,1}),1);
    end
end
for i=1:m
    for j=1:n
        for k=1:length(class_num_x{j,1})
            for q=1:length(class_num)
                 if x(i,j)==class_num_x{j,1}(k,1)&&y(i,1)==class_num(q,1)
                     N_yk_xi{j,q}(k,1)=N_yk_xi{j,q}(k,1)+1;
                 end
            end
        end
    end
end

%计算条件概率p(xi|yk)
p_yk_xi=cell(n,length(class_num));
for i=1:n
    for j=1:length(class_num)
        for k=1:length(count_x{i,1})
            p_yk_xi{i,j}(k,1)=(N_yk_xi{i,j}(k,1)+alpha)/(count(j,1)+alpha*n);
        end
    end
end
 
end

预测

function [ y, probability] = predict_BN( x, p_yk, p_yk_xi, class_num_x, class_num )
% 朴素贝叶斯预测器

for i=1:length(x)
    for j=1:length(class_num)
        fx(i,j)=p_yk(j);
        for k=1:size(x,2)
            for q=1:size(class_num_x{k,1},1)
                if x(i,k)==class_num_x{k,1}(q,1)
                    fx(i,j)=fx(i,j)*p_yk_xi{k,j}(q,1);%计算输入x属于每个类别的概率
                end
            end
        end
    end
end

for i=1:length(x)
    [probability(i,1),index]=max(fx(i,:));%输出处于最大概率类别的概率值
    y(i,1)=class_num(index);%输出类别
end

主函数

% 示例1
x_train=[1,2;3,2;4,1;5,3;1,3];
y_train=[2;2;3;2;4];
x_test=[1,2;1,3];
y_test=[2;2];

% 示例2:鸢尾花
load fisheriris
x=meas;
y(1:50,1)=1;
y(51:100,1)=2;
y(101:150,1)=3;%导入鸢尾花数据集,将类别分别记为1,2,3
%划分测试集与训练集
paixu=randperm(length(x));
a=0.2;%设置训练集规模
for i=1:floor(a*length(x))
    x_train(i,:)=x(paixu(i),:);
    y_train(i,1)=y(paixu(i));
end
for i=floor(a*length(x))+1:length(x)
    x_test(i-floor(a*length(x)),:)=x(paixu(i),:);
    y_test(i-floor(a*length(x)),1)=y(paixu(i));
end

[ p_yk, p_yk_xi, class_num_x, class_num ] = Naive_Bayesian( x_train, y_train );%朴素贝叶斯参数计算
[ y, probability] = predict_BN( x_test, p_yk, p_yk_xi, class_num_x, class_num );%朴素贝叶斯预测
count=0;
for i=1:length(x_test)
    if y(i)==y_test(i)
        count=count+1;
    end
end
accuracy=count/length(x_test);

你可能感兴趣的:(机器学习,机器学习,算法,matlab)