<1>Fisher算法简介:
为简单起见,我们以两类问题1和2的分类来说明Fisher判别法的原理,看下面的图,为识别w1类和w2类,通过选择适当的投影方向可以比较好的分开这两类,Fisher线性判别的思想就是选择投影方向,使投影后的两类相隔尽可能的远,而同时每一类内部的样本又尽可能聚集。关键是找出那个最佳的投影方向。
我们假设w1共有N1个样本,w2共有N2个样本,N= N1+N2。两个类别在输入空间的均值向量为:
根据Fisher算法的思想,要使两类投影后两类尽可能分开,而各类内部又尽可能聚集,最优的投影方向即为:
在自己的实验中,样本的数量肯定是有限的,可以将分类阈值定为:
判别的决策就改为:
<2>基于Fisher线性判别实现数字识别的想法:
根据Fisher算法可以实现两类的识别,数字识别为多类识别,我们可以通过多个两类分类器来实现。具体的算法是先从1开始逐个与比它大的数字比较,当遇到比1更加匹配输入的数字是则将1排除,而改为从2开始逐个与比它大的数比较,由此进行到找出最匹配的数字为止。
<3>数字特征的提取
利用Fisher线性判别时,每一类都对应着一个特征线性向量,不同的类之间对应的向量也是有差别的,Fisher就是找出这些特征向量的最优投影方向,使之在投影方向上能够有最大间距。
特征有很多不同的提取方法,本实验采取的是将手写数字图片二值化,再讲二值化图片分割为7×7块,计算每块内的像素点不为0的百分比,这样就可以构成一个49维的特征向量。
本人使用了Matlab作为研究平台,设计了一个gui界面(可以手写输入数字),采用Fisher线性识别,样本是已经采集好的。
部分Matlab代码如下:
<1> *********手写数字matlab实现(部分)*************
%%手写输入程序
%鼠标按下
function figure_patten_WindowButtonDownFcn(hObject, eventdata, handles)
% hObject handle to figure_patten (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
global draw_enable %定义一个标志,1表示绘图,0表示停止绘图
global x;
global y;
global h1;
imSize=10;
draw_enable=1;
axis([1 imSize 1 imSize]); %设定axes1大小
% grid on;
% box on;
if draw_enable==1
p=get(gca,'currentpoint'); %鼠标按下,获取当前坐标
x(1)=p(1);
y(1)=p(3);
end
%鼠标移动
function figure_patten_WindowButtonMotionFcn(hObject, eventdata, handles)
% hObject handle to figure_patten (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
axes(handles.axes1);
global draw_enable;
global x;
global y;
global h1;
p=get(gca,'currentpoint');
if draw_enable==1
x(2)=p(1); %鼠标第一次移动后的坐标为x(2),y(2)
y(2)=p(3);
x_gap = 0.1; % 定义x方向增量
y_gap = 0.1; % 定义y方向增量
if x(2) > x(1)
step_x = x_gap;
else
step_x = -x_gap;
end
if y(2) > y(1)
step_y = y_gap;
else
step_y = -y_gap;
end
% 定义x,y的变化范围和步长
if abs(x(2)-x(1)) < 0.01 % 线平行于y轴,即斜率不存在时
iy = y(1):step_y:y(2);
ix = x(2).*ones(1,size(iy,2));
else
ix = x(1):step_x:x(2) ; % 定义x的变化范围和步长
% 当斜率存在,即k = (Y-InitialY)/(X-InitialX) ~= 0
iy = (y(2)-y(1))/(x(2)-x(1)).*(ix-x(1))+y(1);
end
ImageX = [ix, x(2)];
ImageY = cat(2, iy, y(2));
h1= line(ImageX,ImageY, 'marker', '.', 'markerSize',8, ...
'LineStyle', '-', 'LineWidth', 4, 'Color', 'Red');
dlmwrite('IXT.txt', ImageX, '-append', 'delimiter', '\t', 'precision', 6);
dlmwrite('IYT.txt', ImageY, '-append', 'delimiter', '\t', 'precision', 6);
x(1) = x(2); %记住当前点坐标
y(1) = y(2); %记住当前点坐标
end
%鼠标松开
function figure_patten_WindowButtonUpFcn(hObject, eventdata, handles)
% hObject handle to figure_patten (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
global draw_enable;
draw_enable=0;
<2> **********Fisher识别按钮程序************
%%Fisher识别按钮
function pushbutton6_Callback(hObject, eventdata, handles)
% hObject handle to pushbutton6 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles structure with handles and user data (see GUIDATA)
pix=getframe(handles.axes1); %获取axes1中的图像数据(手写的数字)
bw=rgb2gray(pix.cdata); %灰度化
level=graythresh(bw);
bw = im2bw(bw,level); %二值化
imwrite(bw,'outfile.jpg','jpg'); %保存图像为JPG格式
axes(handles.axes2);
%剪切图像
[y2temp x2temp] = size(bw);
x1=1;
y1=1;
x2=x2temp;
y2=y2temp;
% Finding left side blank spaces
cntB=1;
while (sum(bw(:,cntB))==y2temp)
x1=x1+1;
cntB=cntB+1;
end
% Finding right side blank spaces
cntB=1;
while (sum(bw(cntB,:))==x2temp)
y1=y1+1;
cntB=cntB+1;
end
% Finding upper side blank spaces
cntB=x2temp;
while (sum(bw(:,cntB))==y2temp)
x2=x2-1;
cntB=cntB-1;
end
% Finding lower side blank spaces
cntB=y2temp;
while (sum(bw(cntB,:))==x2temp)
y2=y2-1;
cntB=cntB-1;
end
% Crop the image to the edge
bw1=imcrop(bw,[x1,y1,(x2-x1),(y2-y1)]);
imshow(bw1);
setappdata(handles.figure_patten,'bw1',bw1); %设置变量bw1
global i;
name=strcat('D:\matlab\matlabinstall\bin\image\num',num2str(i),'.jpg');
imwrite(bw1,name,'jpg'); %保存图片,以连续的数字命名,便于样本的保存好提取。
i=i+1;
axes(handles.axes3); %特征图像显示在aexs3
%特征提取
bw_7050=imresize(bw1,[70,70]); %分成7*7
for cnt=1:7
for cnt2=1:7
Atemp=sum(bw_7050((cnt*10-9:cnt*10),(cnt2*10-9:cnt2*10)));
lett((cnt-1)*7+cnt2)=sum(Atemp);
end
end
lett=((100-lett)/100); %所画图像的特征向量
A=zeros(70,70);
for num1=1:7
for num2=1:7
if lett((num1-1)*7+num2)>0.13
for num3=num1*10-9:num1*10
for num4=num2*10-9:num2*10
A(num3,num4)=0;
end
end
else
for num3=num1*10-9:num1*10
for num4=num2*10-9:num2*10
A(num3,num4)=1;
end
end
end
end
end
imshow(A);
Rchar=Fisher(lett); %调用识别程序(Fisher判别)
set(handles.text_result,'String',num2str(Rchar)); %显示识别结果
<3> *********Fisher()实现************
function [Rchar]=Fisher(lett)
% 计算样本的先验概率和类条件概率密度
load NUM0.mat
load NUM1.mat
load NUM2.mat
load NUM3.mat
load NUM4.mat
load NUM5.mat
load NUM6.mat
load NUM7.mat
load NUM8.mat
load NUM9.mat %导入样本
%求样本数
long0=length(NUM0);
long1=length(NUM1);
long2=length(NUM2);
long3=length(NUM3);
long4=length(NUM4);
long5=length(NUM5);
long6=length(NUM6);
long7=length(NUM7);
long8=length(NUM8);
long9=length(NUM9);
%转换数据格式
for i=1:long0
NUM00(i,:)=NUM0{i};
end
for i=1:long1
NUM11(i,:)=NUM1{i};
end
for i=1:long2
NUM22(i,:)=NUM2{i};
end
for i=1:long3
NUM33(i,:)=NUM3{i};
end
for i=1:long4
NUM44(i,:)=NUM4{i};
end
for i=1:long5
NUM55(i,:)=NUM5{i};
end
for i=1:long6
NUM66(i,:)=NUM6{i};
end
for i=1:long7
NUM77(i,:)=NUM7{i};
end
for i=1:long8
NUM88(i,:)=NUM8{i};
end
for i=1:long9
NUM99(i,:)=NUM9{i};
end
%求样本的平均
mean0=mean(NUM00);
mean1=mean(NUM11);
mean2=mean(NUM22);
mean3=mean(NUM33);
mean4=mean(NUM44);
mean5=mean(NUM55);
mean6=mean(NUM66);
mean7=mean(NUM77);
mean8=mean(NUM88);
mean9=mean(NUM99);
%类内离散度Si的计算
S0='0'; %0样本类内离散度
for i=1:long0
S0=S0+(NUM00(i)-mean0)'*(NUM00(i)-mean0);
end
S{1}=S0;
S1='0'; %1样本类内离散度
for i=1:long1
S1=S1+(NUM11(i)-mean1)'*(NUM11(i)-mean1);
end
S{2}=S1;
S2='0'; %2样本类内离散度
for i=1:long2
S2=S2+(NUM22(i)-mean2)'*(NUM22(i)-mean2);
end
S{3}=S2;
S3='0'; %3样本类内离散度
for i=1:long3
S3=S3+(NUM33(i)-mean3)'*(NUM33(i)-mean3);
end
S{4}=S3;
S4='0'; %4样本类内离散度
for i=1:long4
S4=S4+(NUM44(i)-mean4)'*(NUM44(i)-mean4);
end
S{5}=S4;
S5='0'; %5样本类内离散度
for i=1:long5
S5=S5+(NUM55(i)-mean5)'*(NUM55(i)-mean5);
end
S{6}=S5;
S6='0'; %6样本类内离散度
for i=1:long6
S6=S6+(NUM66(i)-mean6)'*(NUM66(i)-mean6);
end
S{7}=S6;
S7='0'; %7样本类内离散度
for i=1:long7
S7=S7+(NUM77(i)-mean7)'*(NUM77(i)-mean7);
end
S{8}=S7;
S8='0'; %8样本类内离散度
for i=1:long8
S8=S8+(NUM88(i)-mean8)'*(NUM88(i)-mean8);
end
S{9}=S8;
S9='0'; %9样本类内离散度
for i=1:long9
S9=S9+(NUM99(i)-mean9)'*(NUM99(i)-mean9);
end
S{10}=S9;
%%对这10类分别进行两两类识别比较
%求两两类的总类内离散度、类间离散度(45种情况)
M{1}=mean0;
M{2}=mean1;
M{3}=mean2;
M{4}=mean3;
M{5}=mean4;
M{6}=mean5;
M{7}=mean6;
M{8}=mean7;
M{9}=mean8;
M{10}=mean9;
Sw=cell(10,10);
Sb=cell(10,10);
for i=1:9
for j=i+1:10
Sw{i,j}=S{i}+S{j};
Sb{i,j}=(M{i}-M{j})'*(M{i}-M{j});
end
end
%求两两类判别的阈值,判别函数,最有投影方向。
Wo=zeros(10,10);
W=cell(10,10);
Gx=cell(10,10);
for i=1:9
for j=i+1:10
Sw{i,j}=Sw{i,j}+0.0001*eye(49);
W{i,j}=inv(Sw{i,j})*(M{i}-M{j})'; %最优投影方向
Gx{i,j}=(W{i,j}')*(lett-0.5*(M{i}+M{j}))';
end
end
count=1;
k=0;
for i=count:9 %从第1类开始两两比较
for j=(count+1):10
if Gx{i,j}<0 %不属于i类,则转为从第i+1类开始比较
if count==9 %已经确定不是8就是9,则停止继续往下比较(count不再加1)
char=10;
else
count=count+1; %转为第i+1类
k=0;
break;
end
else
char=count; %将当前类的序号赋值给变量char
k=k+1; % 计算判定的次数
end
end
if k==10-count %若判定完则跳出循环
break;
end
end
switch char-1 %根据变量char求取对应的判别数字
case 0
Rchar='0';
case 1
Rchar='1';
case 2
Rchar='2';
case 3
Rchar='3';
case 4
Rchar='4';
case 5
Rchar='5';
case 6
Rchar='6';
case 7
Rchar='7';
case 8
Rchar='8';
case 9
Rchar='9';
end
<4> ************添加为样本部分程序*************
dd=str2double(get(handles.edit1,'String'));
h_patten=getappdata(handles.Rightnum,'h_patten');
bw1=getappdata(h_patten.figure_patten,'bw1');
bw_7050=imresize(bw1,[70,70]);%分成7*7
for cnt=1:7
for cnt2=1:7
Atemp=sum(bw_7050((cnt*10-9:cnt*10),(cnt2*10-9:cnt2*10)));
example1((cnt-1)*7+cnt2)=sum(Atemp);
end
end
example2=((100-example1)/100)
load NUM0.mat
load NUM1.mat
load NUM2.mat
load NUM3.mat
load NUM4.mat
load NUM5.mat
load NUM6.mat
load NUM7.mat
load NUM8.mat
load NUM9.mat
long0=length(NUM0);
long1=length(NUM1);
long2=length(NUM2);
long3=length(NUM3);
long4=length(NUM4);
long5=length(NUM5);
long6=length(NUM6);
long7=length(NUM7);
long8=length(NUM8);
long9=length(NUM9);
switch dd
case 0
for i=1:long0;
NUM0{i}=NUM0{i};
end
long0=long0+1;
NUM0{long0}=example2;
save('NUM0.mat','NUM0');
case 1
for i=1:long1;
NUM1{i}=NUM1{i};
end
long1=long1+1;
NUM1{long1}=example2;
save('NUM1.mat','NUM1');
case 2
for i=1:long2;
NUM2{i}=NUM2{i};
end
long2=long2+1;
NUM2{long2}=example2;
save('NUM2.mat','NUM2');
case 3
for i=1:long3;
NUM3{i}=NUM3{i};
end
long3=long3+1;
NUM3{long3}=example2;
save('NUM3.mat','NUM3');
case 4
for i=1:long4;
NUM4{i}=NUM4{i};
end
long4=long4+1;
NUM4{long4}=example2;
save('NUM4.mat','NUM4');
case 5
for i=1:long5;
NUM5{i}=NUM5{i};
end
long5=long5+1;
NUM5{long5}=example2;
save('NUM5.mat','NUM5');
case 6
for i=1:long6;
NUM6{i}=NUM6{i};
end
long6=long6+1;
NUM6{long6}=example2;
save('NUM6.mat','NUM6');
case 7
for i=1:long7;
NUM7{i}=NUM7{i};
end
long7=long7+1;
NUM7{long7}=example2;
save('NUM7.mat','NUM7');
case 8
for i=1:long8;
NUM8{i}=NUM8{i};
end
long8=long8+1;
NUM8{long8}=example2;
save('NUM8.mat','NUM8');
case 9
for i=1:long9;
NUM9{i}=NUM9{i};
end
long9=long9+1;
NUM9{long9}=example2;
save('NUM9.mat','NUM9');
end