基于Fisher线性判别分析的手写数字识别

基于Fisher线性判别分析的手写数字识别

<1>Fisher算法简介:
为简单起见,我们以两类问题1和2的分类来说明Fisher判别法的原理,看下面的图,为识别w1类和w2类,通过选择适当的投影方向可以比较好的分开这两类,Fisher线性判别的思想就是选择投影方向,使投影后的两类相隔尽可能的远,而同时每一类内部的样本又尽可能聚集。关键是找出那个最佳的投影方向。
基于Fisher线性判别分析的手写数字识别_第1张图片

我们假设w1共有N1个样本,w2共有N2个样本,N= N1+N2。两个类别在输入空间的均值向量为:
基于Fisher线性判别分析的手写数字识别_第2张图片

各类的类内离散度矩阵为:
这里写图片描述

总类内离散度矩阵为:
这里写图片描述

根据Fisher算法的思想,要使两类投影后两类尽可能分开,而各类内部又尽可能聚集,最优的投影方向即为:
这里写图片描述

确定投影方向之后,再确定一个分类阈值Wo,并采取决策规则:
这里写图片描述

在自己的实验中,样本的数量肯定是有限的,可以将分类阈值定为:
这里写图片描述
判别的决策就改为:
这里写图片描述

<2>基于Fisher线性判别实现数字识别的想法:
根据Fisher算法可以实现两类的识别,数字识别为多类识别,我们可以通过多个两类分类器来实现。具体的算法是先从1开始逐个与比它大的数字比较,当遇到比1更加匹配输入的数字是则将1排除,而改为从2开始逐个与比它大的数比较,由此进行到找出最匹配的数字为止。
基于Fisher线性判别分析的手写数字识别_第3张图片

<3>数字特征的提取
利用Fisher线性判别时,每一类都对应着一个特征线性向量,不同的类之间对应的向量也是有差别的,Fisher就是找出这些特征向量的最优投影方向,使之在投影方向上能够有最大间距。

特征有很多不同的提取方法,本实验采取的是将手写数字图片二值化,再讲二值化图片分割为7×7块,计算每块内的像素点不为0的百分比,这样就可以构成一个49维的特征向量。

本人使用了Matlab作为研究平台,设计了一个gui界面(可以手写输入数字),采用Fisher线性识别,样本是已经采集好的。
基于Fisher线性判别分析的手写数字识别_第4张图片

部分Matlab代码如下:
<1>         *********手写数字matlab实现(部分)*************
%%手写输入程序

%鼠标按下
function figure_patten_WindowButtonDownFcn(hObject, eventdata, handles)
% hObject    handle to figure_patten (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
global draw_enable     %定义一个标志,1表示绘图,0表示停止绘图
global x;
global y;
global h1;
imSize=10;
draw_enable=1;

axis([1 imSize 1 imSize]);  %设定axes1大小

       % grid on;
      %  box on; 
if draw_enable==1
    p=get(gca,'currentpoint');          %鼠标按下,获取当前坐标
    x(1)=p(1);         
    y(1)=p(3);  
end

%鼠标移动
function figure_patten_WindowButtonMotionFcn(hObject, eventdata, handles)
% hObject    handle to figure_patten (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
axes(handles.axes1);
global draw_enable;
global x;
global y;
global h1;
p=get(gca,'currentpoint');
if draw_enable==1
  x(2)=p(1);          %鼠标第一次移动后的坐标为x(2),y(2)
    y(2)=p(3);   

x_gap = 0.1;    % 定义x方向增量
        y_gap = 0.1;    % 定义y方向增量
        if x(2) > x(1)
            step_x = x_gap;
        else
            step_x = -x_gap;
        end
        if y(2) > y(1)
            step_y = y_gap;
        else
            step_y = -y_gap;
        end  
        % 定义x,y的变化范围和步长
        if abs(x(2)-x(1)) < 0.01        % 线平行于y轴,即斜率不存在时
            iy = y(1):step_y:y(2);
            ix = x(2).*ones(1,size(iy,2));
        else
            ix = x(1):step_x:x(2) ;    % 定义x的变化范围和步长
            % 当斜率存在,即k = (Y-InitialY)/(X-InitialX) ~= 0
            iy = (y(2)-y(1))/(x(2)-x(1)).*(ix-x(1))+y(1);   
        end
        ImageX = [ix, x(2)]; 
        ImageY = cat(2, iy, y(2));
       h1= line(ImageX,ImageY, 'marker', '.', 'markerSize',8, ...
            'LineStyle', '-', 'LineWidth', 4, 'Color', 'Red');
        dlmwrite('IXT.txt', ImageX, '-append', 'delimiter', '\t', 'precision', 6);
        dlmwrite('IYT.txt', ImageY, '-append', 'delimiter', '\t', 'precision', 6);
        x(1) = x(2);       %记住当前点坐标
        y(1) = y(2);       %记住当前点坐标   

end

%鼠标松开
function figure_patten_WindowButtonUpFcn(hObject, eventdata, handles)
% hObject    handle to figure_patten (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
global draw_enable;
draw_enable=0;
<2>         **********Fisher识别按钮程序************
%%Fisher识别按钮
function pushbutton6_Callback(hObject, eventdata, handles)
% hObject    handle to pushbutton6 (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
pix=getframe(handles.axes1);    %获取axes1中的图像数据(手写的数字)

bw=rgb2gray(pix.cdata);     %灰度化
level=graythresh(bw);   



bw = im2bw(bw,level);   %二值化
imwrite(bw,'outfile.jpg','jpg');    %保存图像为JPG格式
axes(handles.axes2);

%剪切图像
[y2temp x2temp] = size(bw);     
x1=1;
y1=1;
x2=x2temp;
y2=y2temp;

% Finding left side blank spaces
cntB=1;
while (sum(bw(:,cntB))==y2temp)
    x1=x1+1;
    cntB=cntB+1;
end

% Finding right side blank spaces
cntB=1;
while (sum(bw(cntB,:))==x2temp)
    y1=y1+1;
    cntB=cntB+1;
end

% Finding upper side blank spaces
cntB=x2temp;
while (sum(bw(:,cntB))==y2temp)
    x2=x2-1;
    cntB=cntB-1;
end

% Finding lower side blank spaces
cntB=y2temp;
while (sum(bw(cntB,:))==x2temp)
    y2=y2-1;
    cntB=cntB-1;
end

% Crop the image to the edge
bw1=imcrop(bw,[x1,y1,(x2-x1),(y2-y1)]); 
imshow(bw1);

setappdata(handles.figure_patten,'bw1',bw1);    %设置变量bw1
global i;
name=strcat('D:\matlab\matlabinstall\bin\image\num',num2str(i),'.jpg');
imwrite(bw1,name,'jpg');    %保存图片,以连续的数字命名,便于样本的保存好提取。
i=i+1;

axes(handles.axes3);    %特征图像显示在aexs3

%特征提取
bw_7050=imresize(bw1,[70,70]);  %分成7*7
for cnt=1:7
    for cnt2=1:7
        Atemp=sum(bw_7050((cnt*10-9:cnt*10),(cnt2*10-9:cnt2*10)));
        lett((cnt-1)*7+cnt2)=sum(Atemp);
    end
end

lett=((100-lett)/100);  %所画图像的特征向量
A=zeros(70,70);
for num1=1:7
    for num2=1:7
        if lett((num1-1)*7+num2)>0.13
            for num3=num1*10-9:num1*10
                for num4=num2*10-9:num2*10
            A(num3,num4)=0;
                end
            end

        else
             for num3=num1*10-9:num1*10
                for num4=num2*10-9:num2*10
            A(num3,num4)=1;
                end
            end

        end
    end
end

imshow(A);

Rchar=Fisher(lett);     %调用识别程序(Fisher判别)
 set(handles.text_result,'String',num2str(Rchar));  %显示识别结果
<3>             *********Fisher()实现************
function  [Rchar]=Fisher(lett)
% 计算样本的先验概率和类条件概率密度
load NUM0.mat
load NUM1.mat
load NUM2.mat
load NUM3.mat
load NUM4.mat
load NUM5.mat
load NUM6.mat
load NUM7.mat
load NUM8.mat
load NUM9.mat       %导入样本

%求样本数
long0=length(NUM0);
long1=length(NUM1);
long2=length(NUM2);
long3=length(NUM3);
long4=length(NUM4);
long5=length(NUM5);
long6=length(NUM6);
long7=length(NUM7);
long8=length(NUM8);
long9=length(NUM9);
%转换数据格式
for i=1:long0
NUM00(i,:)=NUM0{i};
end

for i=1:long1
NUM11(i,:)=NUM1{i};
end

for i=1:long2
NUM22(i,:)=NUM2{i};
end

for i=1:long3
NUM33(i,:)=NUM3{i};
end

for i=1:long4
NUM44(i,:)=NUM4{i};
end

for i=1:long5
NUM55(i,:)=NUM5{i};
end

for i=1:long6
NUM66(i,:)=NUM6{i};
end

for i=1:long7
NUM77(i,:)=NUM7{i};
end

for i=1:long8
NUM88(i,:)=NUM8{i};
end

for i=1:long9
NUM99(i,:)=NUM9{i};
end
%求样本的平均
mean0=mean(NUM00);
mean1=mean(NUM11);
mean2=mean(NUM22);
mean3=mean(NUM33);
mean4=mean(NUM44);
mean5=mean(NUM55);
mean6=mean(NUM66);
mean7=mean(NUM77);
mean8=mean(NUM88);
mean9=mean(NUM99);
%类内离散度Si的计算
S0='0';     %0样本类内离散度
for i=1:long0
    S0=S0+(NUM00(i)-mean0)'*(NUM00(i)-mean0);
end
S{1}=S0;

S1='0';        %1样本类内离散度
for i=1:long1
    S1=S1+(NUM11(i)-mean1)'*(NUM11(i)-mean1);
end
S{2}=S1;

S2='0';     %2样本类内离散度
for i=1:long2
    S2=S2+(NUM22(i)-mean2)'*(NUM22(i)-mean2);
end
S{3}=S2;

S3='0';         %3样本类内离散度
for i=1:long3
    S3=S3+(NUM33(i)-mean3)'*(NUM33(i)-mean3);
end
S{4}=S3;

S4='0';     %4样本类内离散度
for i=1:long4
    S4=S4+(NUM44(i)-mean4)'*(NUM44(i)-mean4);
end
S{5}=S4;

S5='0';     %5样本类内离散度
for i=1:long5
    S5=S5+(NUM55(i)-mean5)'*(NUM55(i)-mean5);
end
S{6}=S5;

S6='0';         %6样本类内离散度
for i=1:long6
    S6=S6+(NUM66(i)-mean6)'*(NUM66(i)-mean6);
end
S{7}=S6;

S7='0';         %7样本类内离散度
for i=1:long7
    S7=S7+(NUM77(i)-mean7)'*(NUM77(i)-mean7);
end
S{8}=S7;

S8='0';         %8样本类内离散度
for i=1:long8
    S8=S8+(NUM88(i)-mean8)'*(NUM88(i)-mean8);
end
S{9}=S8;

S9='0';         %9样本类内离散度
for i=1:long9
    S9=S9+(NUM99(i)-mean9)'*(NUM99(i)-mean9);
end
S{10}=S9;

%%对这10类分别进行两两类识别比较
%求两两类的总类内离散度、类间离散度(45种情况)
M{1}=mean0;
M{2}=mean1;
M{3}=mean2;
M{4}=mean3;
M{5}=mean4;
M{6}=mean5;
M{7}=mean6;
M{8}=mean7;
M{9}=mean8;
M{10}=mean9;
Sw=cell(10,10);
Sb=cell(10,10);
for i=1:9
    for j=i+1:10
        Sw{i,j}=S{i}+S{j};
        Sb{i,j}=(M{i}-M{j})'*(M{i}-M{j});
    end
end

%求两两类判别的阈值,判别函数,最有投影方向。

Wo=zeros(10,10);
W=cell(10,10);
Gx=cell(10,10);
for i=1:9
    for j=i+1:10
       Sw{i,j}=Sw{i,j}+0.0001*eye(49);
        W{i,j}=inv(Sw{i,j})*(M{i}-M{j})';    %最优投影方向
        Gx{i,j}=(W{i,j}')*(lett-0.5*(M{i}+M{j}))';
    end

end
count=1;
k=0;
for i=count:9   %从第1类开始两两比较

    for j=(count+1):10

         if Gx{i,j}<0      %不属于i类,则转为从第i+1类开始比较

             if count==9        %已经确定不是8就是9,则停止继续往下比较(count不再加1)
                 char=10;
             else
             count=count+1;     %转为第i+1类
             k=0;
             break;
             end
         else
             char=count;    %将当前类的序号赋值给变量char
         k=k+1;     % 计算判定的次数

         end

    end
    if k==10-count      %若判定完则跳出循环
        break;
    end
end

switch char-1     %根据变量char求取对应的判别数字
     case 0
         Rchar='0';
     case 1
         Rchar='1';
     case 2
         Rchar='2';
     case 3
         Rchar='3';
     case 4
         Rchar='4';
     case 5
         Rchar='5';
     case 6
         Rchar='6';
     case 7
         Rchar='7';
     case 8
         Rchar='8';
     case 9
         Rchar='9';
 end

<4>         ************添加为样本部分程序*************
dd=str2double(get(handles.edit1,'String'));
h_patten=getappdata(handles.Rightnum,'h_patten');
bw1=getappdata(h_patten.figure_patten,'bw1');
bw_7050=imresize(bw1,[70,70]);%分成7*7
for cnt=1:7
    for cnt2=1:7
        Atemp=sum(bw_7050((cnt*10-9:cnt*10),(cnt2*10-9:cnt2*10)));
        example1((cnt-1)*7+cnt2)=sum(Atemp);
    end
end

example2=((100-example1)/100)
load NUM0.mat
load NUM1.mat
load NUM2.mat
load NUM3.mat
load NUM4.mat
load NUM5.mat
load NUM6.mat
load NUM7.mat
load NUM8.mat
load NUM9.mat



long0=length(NUM0);
long1=length(NUM1);
long2=length(NUM2);
long3=length(NUM3);
long4=length(NUM4);
long5=length(NUM5);
long6=length(NUM6);
long7=length(NUM7);
long8=length(NUM8);
long9=length(NUM9);



switch dd

    case 0
        for i=1:long0;
            NUM0{i}=NUM0{i};
        end
        long0=long0+1;
        NUM0{long0}=example2;
        save('NUM0.mat','NUM0');
    case 1
         for i=1:long1;
            NUM1{i}=NUM1{i};
        end
        long1=long1+1;
        NUM1{long1}=example2;
      save('NUM1.mat','NUM1');
    case 2
         for i=1:long2;
            NUM2{i}=NUM2{i};
        end
        long2=long2+1;
        NUM2{long2}=example2;
        save('NUM2.mat','NUM2');
    case 3
         for i=1:long3;
            NUM3{i}=NUM3{i};
        end
        long3=long3+1;
        NUM3{long3}=example2;
        save('NUM3.mat','NUM3');

    case 4
         for i=1:long4;
            NUM4{i}=NUM4{i};
        end
        long4=long4+1;
        NUM4{long4}=example2;
        save('NUM4.mat','NUM4');

    case 5
         for i=1:long5;
            NUM5{i}=NUM5{i};
        end
        long5=long5+1;
        NUM5{long5}=example2;
        save('NUM5.mat','NUM5');

    case 6
         for i=1:long6;
            NUM6{i}=NUM6{i};
        end
        long6=long6+1;
        NUM6{long6}=example2;
        save('NUM6.mat','NUM6');

    case 7
         for i=1:long7;
            NUM7{i}=NUM7{i};
        end
        long7=long7+1;
        NUM7{long7}=example2;
        save('NUM7.mat','NUM7');

    case 8
         for i=1:long8;
            NUM8{i}=NUM8{i};
        end
        long8=long8+1;
        NUM8{long8}=example2;
        save('NUM8.mat','NUM8');

    case 9
         for i=1:long9;
            NUM9{i}=NUM9{i};
        end
        long9=long9+1;
        NUM9{long9}=example2;
        save('NUM9.mat','NUM9');

end

你可能感兴趣的:(模式识别)