下载地址
本文主要用ESN网络实现,MNIST手写数字体识别。
实验软件:Matlab2018a
利用回声状态网络,实现对MNIST手写数字体的识别,采用oneHot编码的方式,实现对标签的编码。利用ESN网络,实现对网络的训练。
%加载路径和加载数据
initTAO
%加载数据和标签,并归一化训练、测试数据
normal_trainData=trainData/255;
normal_testData =testData/255;
%将训练标签展开
trainResult10 = oneHot(train_labels1,10);
%% 设置网络参数
Nr=200; %储备池的大小
spectralRadius=0.85; %权谱半径,小于1
regularization=1e-3; %岭回归的正则化系数
washOut=100;
inputScaling=0.5;
esn=ESN(Nr,'spectralRadius',spectralRadius,'regularization',regularization,'inputScaling',inputScaling);
%训练
trainLen=9000;
esn.train(normal_trainData(1:trainLen,:),trainResult10(1:trainLen,:),washOut)
%预测
train_predict=esn.predict(normal_trainData(1:trainLen,:));
%训练精度
[accuracy,precious,predictValue]= resultsProcess(train_predict.',train_labels1(1:trainLen));
fprintf('训练集正确率:%d / %d \n',precious,trainLen)
fprintf('训练精度: %f\n', accuracy)
%测试精度
testLen=1000;
test_predict=esn.predict(normal_testData(1:testLen,:));
[accuracy,precious,predictValue]= resultsProcess(test_predict',test_labels1(1:testLen));
fprintf('测试集正确率:%d / %d \n',precious,testLen)
fprintf('训练精度: %f\n', accuracy)
clear;
clc;
load('MnistData.mat');
%% MnistData.mat 包含训练集和测试集
% 训练集数据 trainData 60000*784
% 训练集标签 train_labels1 60000*1
% 测试集数据 testData 10000*784
% 测试集标签 test_labels1 10000*1
独热编码(One-Hot)
function trainLabel10Dim=oneHot(originalLabels,Dim)
%%对于一个输入的标签,采用oneHot编码,
%%trainLabel10Dim 每一行为一个标签
%%originalLabels 表示原始的标签,Dim表示类别数
%%对于手写数字分类,采用十维的向量来表示,
%%如 1 ,[1,0,0,0,0,0,0,0,0,0]
%%如10, [0,0,0,0,0,0,0,0,0,1]
originalLabels(originalLabels==0)=10; %将 0 号标签改为 10
trainLabel10Dim=zeros(length(originalLabels),Dim); %提前设置矩阵,减少运行时间
for k=1:length(originalLabels)
trainLabel10Dim(k,originalLabels(k))=1; %将对应位 置1
end
end
机器学习:回声状态网络(Echo State Networks)
classdef ESN < handle
% Echo State Network
%首先创建ESN网络对象,其可修改的属性如下
%第二步调用train成员函数
%第三步调用预测成员函数
properties
Nr %储备池的神经元个数
alpha %是否是漏积分
rho %储备池权谱半径
inputScaling %输入的缩放因子
biasScaling %偏置的缩放因子
lambda %正则化系数
connectivity %系数程度
readout_training %输出权值的训练方法
Win %输入连接权值矩阵
Wb %偏置矩阵
Wr %内部连接权值矩阵
Wout %输出连接去权值矩阵
internalState %储备池的状态矩阵
outDim %输出的维度/类别数量
inputDim %输入维度
end
methods
function esn = ESN(Nr,varargin)
esn.Nr = Nr;
esn.alpha = 1;
esn.rho = 0.9;
esn.inputScaling = 1;
esn.biasScaling = 1;
esn.lambda = 1;
esn.connectivity = 1;
esn.readout_training = 'ridgeregression';
numvarargs = length(varargin);
for i = 1:2:numvarargs
switch varargin{i}
case 'leakRate', esn.alpha = varargin{i+1};
case 'spectralRadius', esn.rho = varargin{i+1};
case 'inputScaling', esn.inputScaling = varargin{i+1};
case 'biasScaling', esn.biasScaling = varargin{i+1};
case 'regularization', esn.lambda = varargin{i+1};
case 'connectivity', esn.connectivity = varargin{i+1};
case 'readoutTraining', esn.readout_training = varargin{i+1};
otherwise, error('the option does not exist');
end
end
end
function train(esn, trX, target, washout)
%trX表示训练的数据
%target表示目标
%修改 trX为N*dimData的矩阵
%修改 trY为N*dimTaret的矩阵
%如要生成[-a,a]之间服从均匀分布的数据,则为R = a - 2*a*rand(m,n)
[inputQuantity,esn.inputDim]=size(trX);
[~,esn.outDim] = size(target);
%产生服从[-inputScaling,inputScaling]的均匀分布
esn.Win=esn.inputScaling *(rand(esn.Nr, esn.inputDim) *2 - 1);
esn.Wb = esn.biasScaling * (rand(esn.Nr, 1) * 2 - 1);
%sprand(m,n,density)生成一个m×n的服从均匀分布的随机稀疏矩阵,非零元素的分布密度是density
esn.Wr = full(sprand(esn.Nr,esn.Nr, esn.connectivity));
esn.Wr(esn.Wr ~= 0) = esn.Wr(esn.Wr ~= 0) * 2 - 1;
esn.Wr = esn.Wr * (esn.rho / max(abs(eig(esn.Wr)))); %缩放权值权谱半径
%状态矩阵,每一列代表了,偏置状态+输入状态+内部状态
X = zeros(1+esn.inputDim+esn.Nr, inputQuantity-washout);
x=zeros(esn.Nr,1); %内部状态矩阵
for s = 1:inputQuantity
u = trX(s,:).'; %取出一行,即一张照片
x_ = tanh(esn.Win*u + esn.Wr*x + esn.Wb);
x = (1-esn.alpha)*x + esn.alpha*x_;
if (s > washout)
X(:,s - washout) = [1;u;x];
end
end
esn.internalState = X(1+esn.inputDim+1:end,:);
esn.Wout = feval(esn.readout_training,X ,target(washout+1:end,:), esn);
end
function y = predict(esn, data)
[N,~] = size(data);
Y_out10 = zeros(esn.outDim,N);
x=zeros(esn.Nr,1);
for k =1 : N
u = data(k, :).';
x_ = tanh(esn.Win*u + esn.Wr*x + esn.Wb);
x = (1-esn.alpha)*x + esn.alpha*x_;
Y_out10( : ,k) = esn.Wout*[1;u;x]; %预测值yt
end
y = Y_out10';
end
end
end
function [accuary,precious,value]=resultsProcess(results10Dim,expectResults)
%预测的输出结果为 数量*10维度
%期望的结果为 1维,如3,5,8,0 等
%每一组中的最大的数据,即为最后预测的结果
%如[0.2,0.1,0.88,0.1,-0.02,...,0.2],则预测的结果为 3
%注意,若第十位为最大值,那么预测结果为0
[~,value]=max(results10Dim);
value(value==10)=0; %预测结果
precious = sum(value==expectResults);%正确数量
accuary = precious/length(expectResults);%精度
end
岭回归(Ridge Regression)
function [ W ] = ridgeregression( X, Y, esn)
W = Y.'*X.'/(X*X'+esn.lambda*eye(esn.Nr+1+esn.inputDim));
end