SOM算法是一种无监督学习的神经网络算法。由输入层和竞争层(输出层)组成。SOM是一种聚类方法。
//初始化,调用train并将数据集归一化处理后传入
public SOM(double[][] dataset,int M,int N,int iterationNum)
//对数据集进行归一化处理
public double[][] normalizedDataset(double[][] dataset)
//计算向量的模,为normalizedDataset调用
public double calMod(double[] rowArr)
//初始化竞争层
public int[][] initCompetitionLayer(int row,int col)
//初始化权重矩阵
public double[][] initWeigthMatrix(int competitionLayerSize,int feature)
public void train(double[][] dataset,int M,int N,int iterationNum)
public HashMap<Integer,ArrayList<double[]>> classify(double[][] dataset)
public double learning(int iterationNum,double distance)
//返回优胜邻域内的点的序号(不是x,y坐标)和其到优胜点的距离
public HashMap<Integer,Double> getArea(int winnerIndex,double area,int[][] competitionLayer)
//获取优胜单元的序号
public int getWinner(double[] selectedSample,double[][] weigthMatrix)
//计算两向量间的距离,被getWinner调用
public double calDistance(double[] selectedSample,double[] weight)
//计算竞争层中各点之间的距离
public double calDistanceForNode(int[] otherNode,int[] winnerNode)
import java.util.ArrayList;
import java.util.HashMap;
public class SOM {
private double[][] trainedWeightMatrix;
public SOM2(double[][] dataset,int M,int N,int iterationNum){
double[][] normalizeddataset=normalizedDataset(dataset);
train(normalizeddataset,M,N,iterationNum);
}
public void train(double[][] dataset,int M,int N,int iterationNum){
int featureNum= dataset[0].length;
int sampleNum= dataset.length;
int[][] competitionLayer = initCompetitionLayer(M, N);
double[][] weigthMatrix = initWeigthMatrix(M * N, featureNum);
double area=1.2;
for(int t=0;t<iterationNum;t++){
for (int z = 0; z < dataset.length; z++) {
double[] selectedSample = dataset[z];
int winnerIndex=getWinner(selectedSample,weigthMatrix);
HashMap<Integer,Double> inAreaList = getArea(winnerIndex, area, competitionLayer);//得到优胜领域内的节点
//更新节点,包括优胜点
for(Integer index:inAreaList.keySet()){
for (int i = 0; i < featureNum; i++) {
weigthMatrix[index][i]=weigthMatrix[index][i]+learning(t,inAreaList.get(index))*(selectedSample[i]-weigthMatrix[index][i]);
}
}
}
}
trainedWeightMatrix=weigthMatrix;
}
// train完后调用该方法来对数据集进行分类
public HashMap<Integer,ArrayList<double[]>> classify(double[][] dataset){
double[][] normalized=normalizedDataset(dataset);
HashMap<Integer,ArrayList<double[]>> result=new HashMap<>();
for (int i = 0; i < normalized.length; i++) {
int winnerIndex=getWinner(normalized[i],trainedWeightMatrix);
boolean isExist=false;
for(Integer tag: result.keySet()){//遍历是否已存在
if(tag==winnerIndex){//已存在
result.get(winnerIndex).add(normalized[i]);
isExist=true;
break;
}
}
if (!isExist){
ArrayList<double[]> alist=new ArrayList<>();
alist.add(normalized[i]);
result.put(winnerIndex,alist);
}
}
return result;
}
//学习率。distance是该点到优胜点的距离
public double learning(int iterationNum,double distance){
return (0.3/(iterationNum+1))*(Math.exp(-distance));
}
//返回优胜邻域内的点的序号(不是x,y坐标)和其到优胜点的距离
public HashMap<Integer,Double> getArea(int winnerIndex,double area,int[][] competitionLayer){
HashMap<Integer,Double> inArea=new HashMap<>();
for (int i = 0; i < competitionLayer.length; i++) {
double distance=calDistanceForNode(competitionLayer[i],competitionLayer[winnerIndex]);
if (distance<area){
inArea.put(i,distance);
}
}
return inArea;
}
//返回winner的竞争单元序号(非x,y坐标)
public int getWinner(double[] selectedSample,double[][] weigthMatrix){
double Min_Distance=10;
int winnerIndex=0;
for (int i = 0; i < weigthMatrix.length; i++) {
double distance=calDistance(selectedSample,weigthMatrix[i]);
if (distance<Min_Distance){
Min_Distance=distance;
winnerIndex=i;
}
}
return winnerIndex;
}
//计算竞争层各点之间的距离
public double calDistanceForNode(int[] otherNode,int[] winnerNode){
double sum=0;
for (int i = 0; i < winnerNode.length; i++) {
double subtraction=winnerNode[i]-otherNode[i];
sum+=subtraction*subtraction;
}
return Math.sqrt(sum);
}
//计算样本和权值之间的距离
public double calDistance(double[] selectedSample,double[] weight){
double sum=0;
for (int i = 0; i < selectedSample.length; i++) {
double subtraction=selectedSample[i]-weight[i];
sum+=subtraction*subtraction;
}
return Math.sqrt(sum);
}
//初始化权重矩阵 competitionLayerSize=M*N
public double[][] initWeigthMatrix(int competitionLayerSize,int feature){
double[][] weightMatrix=new double[competitionLayerSize][feature];
for (int i = 0; i < competitionLayerSize; i++) {
for (int j = 0; j < feature; j++) {
weightMatrix[i][j]=Math.random();
}
}
return weightMatrix;
}
//对数据集进行归一化处理
public double[][] normalizedDataset(double[][] dataset){
for (int j = 0, datasetLength = dataset.length; j < datasetLength; j++) {
double mod = calMod(dataset[j]);
for (int i = 0; i < dataset[0].length; i++) {
dataset[j][i]=dataset[j][i]/mod;
}
}
return dataset;
}
//求模
public double calMod(double[] rowArr){
double temp=0;
for (double num:rowArr){
temp+=num*num;
}
return Math.sqrt(temp);
}
//初始化竞争层,并以此矩阵计算点到点之间的距离。
public int[][] initCompetitionLayer(int row,int col){
int[][] competitionLayer=new int[row*col][2]; // 数组的列为2是固定的,代表坐标x,y
int count=0;
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
competitionLayer[count]= new int[]{i, j};
count++;
}
}
return competitionLayer;
}
public static void main(String[] args) {
double[][] dataset=new double[][]{
{5.1,3.5,1.4,0.2},
{4.9,3.0,1.4,0.2},
{4.7,3.2,1.3,0.2},
{4.6,3.1,1.5,0.2},
{5.0,3.6,1.4,0.2},
{5.4,3.9,1.7,0.4},
{4.6,3.4,1.4,0.3},
{5.0,3.4,1.5,0.2},
{4.4,2.9,1.4,0.2},
{4.9,3.1,1.5,0.1},
{5.4,3.7,1.5,0.2},
{4.8,3.4,1.6,0.2},
{4.8,3.0,1.4,0.1},
{4.3,3.0,1.1,0.1},
{5.8,4.0,1.2,0.2},
{5.7,4.4,1.5,0.4},
{5.4,3.9,1.3,0.4},
{5.1,3.5,1.4,0.3},
{5.7,3.8,1.7,0.3},
{5.1,3.8,1.5,0.3},
{5.4,3.4,1.7,0.2},
{5.1,3.7,1.5,0.4},
{4.6,3.6,1.0,0.2},
{5.1,3.3,1.7,0.5},
{4.8,3.4,1.9,0.2},
{5.0,3.0,1.6,0.2},
{5.0,3.4,1.6,0.4},
{5.2,3.5,1.5,0.2},
{5.2,3.4,1.4,0.2},
{4.7,3.2,1.6,0.2},
{4.8,3.1,1.6,0.2},
{5.4,3.4,1.5,0.4},
{5.2,4.1,1.5,0.1},
{5.5,4.2,1.4,0.2},
{4.9,3.1,1.5,0.2},
{5.0,3.2,1.2,0.2},
{5.5,3.5,1.3,0.2},
{4.9,3.6,1.4,0.1},
{4.4,3.0,1.3,0.2},
{5.1,3.4,1.5,0.2},
{5.0,3.5,1.3,0.3},
{4.5,2.3,1.3,0.3},
{4.4,3.2,1.3,0.2},
{5.0,3.5,1.6,0.6},
{5.1,3.8,1.9,0.4},
{4.8,3.0,1.4,0.3},
{5.1,3.8,1.6,0.2},
{4.6,3.2,1.4,0.2},
{5.3,3.7,1.5,0.2},
{5.0,3.3,1.4,0.2}};
SOM s=new SOM(dataset,2,2,10000);
double[][] newDataset = s.normalizedDataset(dataset);
HashMap<Integer, ArrayList<double[]>> outc = s.classify(newDataset);
for(Integer index: outc.keySet()){
System.out.println("*********************第"+index+"类*********************");
for(double[] doubles: outc.get(index)){
System.out.print("[");
for (int i = 0; i < dataset[0].length; i++) {
System.out.print(doubles[i]+",");
}
System.out.print("]");
System.out.println("");
}
System.out.println("");
}
}
}
https://blog.csdn.net/chenge_j/article/details/72537568