红色为0类,绿色为1类
0.辅助工具类
public class MyMathUtil {
public static INDArray FUN_IND(INDArray value, DoubleFunction
doubleFunction){ if(value!=null){ if(value.shape()[0]>1&&value.shape()[1]>1){ double[][] s = value.toDoubleMatrix(); for(int i=0;i public static double MysigMoid(double value) { //Math.E=e;Math.Pow(a,b)=a^b double ey = Math.pow(Math.E, -value); return 1 / (1 + ey); } public static INDArray MysigMoid(INDArray value) { return FUN_IND(value,v->MysigMoid(v)); } public static double Mytanh(double value) { double ex = Math.pow(Math.E, value);// e^x double ey = Math.pow(Math.E, -value);//e^(-x) double sinhx = ex-ey; double coshx = ex+ey; return sinhx/coshx; } public static INDArray Mytanh(INDArray value) { return FUN_IND(value,v->Mytanh(v)); }
}
//规定数据必须为数据标签格式
public interface INData { INDArray getX(); INDArray getY(); int getSize(); }
public interface TrainData extends INData{ List
getBatchList(); }
//数据可以通过设置batch_size进行分组 public class MyTrainData implements TrainData { private INDArray x; private INDArray y; private int batch_size; public MyTrainData(INDArray x,INDArray y,int batch_size){ this.x = x.transpose(); this.y = y.transpose(); this.batch_size = batch_size; } public MyTrainData(INDArray x,INDArray y){ this.x = x.transpose(); this.y = y.transpose(); this.batch_size = -1; } @Override public INDArray getX() { return x; } @Override public INDArray getY() { return y; } @Override public List
getBatchList() { List res = new ArrayList<>(); shufflecard(); if(batch_size!=-1){ int lastColumnOrder = 0; for(int i=batch_size;i //定义模型基本方法
public interface model { void train(TrainData data); INDArray predict(INDArray x); double getLearningrate(); int getIteration(); model setLearningrate(double rate); model setIteration(int iteration); }
1.初始化参数
private double learningrate=0.1;//学习率 private int iteration = 10000;//迭代次数 //4个神经元 2个特征值 400个数据 //W1 = 4*2 X = 2*400 Z1 = 4*400 =》A1 = 4*400; private INDArray Network_1LAYER_W = Nd4j.randn(4, 2); private INDArray Network_1LAYER_B = Nd4j.zeros(4, 1); //1个神经元 4个特征值 1个数据 //W2 = 1*4 A1 = 4*400 Z2 = 1*400 => A2 = 1*400; private INDArray Network_2LAYER_W = Nd4j.randn(1, 4); private INDArray Network_2LAYER_B = Nd4j.zeros(1, 1);
2.前向传播
private Map
forward(INDArray X){ Map map = new HashMap<>(); INDArray Z1 = getNetwork_1LAYER_W().mmul(X).addColumnVector(getNetwork_1LAYER_B()); INDArray A1 = MyMathUtil.Mytanh(Z1); INDArray Z2 = getNetwork_2LAYER_W().mmul(A1).addColumnVector(getNetwork_2LAYER_B()); INDArray A2 = MyMathUtil.MysigMoid(Z2); map.put("Z1",Z1); map.put("A1",A1); map.put("Z2",Z2); map.put("A2",A2); return map; }
3.反向传播
//反向传播 //dL/dA2 = -(Y-A2) //dA2/dZ2 = A2(1-A2) //dZ2/dW2 = A1 //dZ/dB2 = 1 //dL/dZ2 = (dL/dA2)*(dA2/dZ2) = -(Y-A2)*A2(1-A2) //dL/dW2 = (dL/dZ2)*(dZ2/dW2) = -(Y-A2)*A2(1-A2)*A1 //dL/dB2 = (dL/dZ2)*(dZ2/dB2) = -(Y-A2)*A2(1-A2)*1 //dZ2/dA1 = W2 //dA1/dZ1 = 1 − A1^2 //dL/dZ1 = (dL/dZ2)*(dZ2/dA1)*(dA1/dZ1) = W2^T*(-(Y-A2)*A2(1-A2)) //dZ1/dW1 = X //dZ1/dB1 = 1 //dL/dW1 = (dL/dZ1)*(dZ1/dW1) = W2^T*(-(Y-A2)*A2(1-A2))*X^T //dL/dB1 = (dL/dZ1)*(dZ1/dB1) = W2^T*(-(Y-A2)*A2(1-A2)) INDArray dZ2 = Y.sub(A2).mul(A2).mul(A2.sub(1)); //1*400 INDArray dW2 = dZ2.mmul(A1.transpose()); //1*4 INDArray dB2 = dZ2.mmul(Nd4j.ones(data.getX().shape()[1],1)); //1*1 INDArray dA1Z1 = Nd4j.ones(A1.shape()).sub(A1.mul(A1)); INDArray dZ1 = getNetwork_2LAYER_W().transpose().mmul(dZ2).mul(dA1Z1); //(W2^T dZ2)*(1-A1^2); INDArray dW1 = dZ1.mmul(X.transpose()); //4*2 INDArray dB1 = dZ1.mmul(Nd4j.ones(data.getX().shape()[1],1)); //4*1
4.loss函数
loss函数为loss = (1/2m)*(y-yi)^2,其中y是标签,yi为网络预测值,m为数据个数。
double loss = Y.sub(A2).mmul(Y.sub(A2).transpose()).sumNumber().doubleValue()/Y.shape()[1];
5.梯度下降
INDArray W1 = getNetwork_1LAYER_W().sub(dW1.div(data.getX().shape()[1]).mul(getLearningrate())); INDArray W2 = getNetwork_2LAYER_W().sub(dW2.div(data.getX().shape()[1]).mul(getLearningrate())); INDArray B1 = getNetwork_1LAYER_B().sub(dB1.div(data.getX().shape()[1]).mul(getLearningrate())); INDArray B2 = getNetwork_2LAYER_B().sub(dB2.div(data.getX().shape()[1]).mul(getLearningrate()));
6.预测函数
@Override public INDArray predict(INDArray x) { Map
map = forward(x); return map.get("A2"); }
private model pointmodel = new NeuralNetwork();
//point1包括三个属性x,y,z。分别代表一个点的x坐标,y坐标,和类型(代码略)
//把数据集取出之后按照8:2的比例划分为训练集和测试集(略),此处已经分好,train_point是训练数据,test_point测试数据
for(point1 point:train_point){ X1.add(point.getX()); X1.add(point.getY()); Y1.add(point.getZ()); } for(point1 point:test_point){ X2.add(point.getX()); X2.add(point.getY()); Y2.add(point.getZ()); }
INDArray X = Nd4j.create(X1).reshape(new int[]{train_point.size(), 2}); INDArray Y = Nd4j.create(Y1).reshape(new int[]{train_point.size(), 1}); TrainData data = new MyTrainData(X,Y); INDArray I_X2 = Nd4j.create(X2).reshape(new int[]{test_point.size(), 2}); INDArray I_Y2 = Nd4j.create(Y2).reshape(new int[]{test_point.size(), 1}); TrainData data2 = new MyTrainData(I_X2,I_Y2); pointmodel.train(data);
//测试集验证效果 INDArray p_Y = pointmodel.predict(data2.getX()); System.out.println(p_Y); System.out.println(data2.getY()); System.out.println(scord(p_Y,data2.getY()));
private float scord(INDArray value,INDArray Y) { int res = 0; int sum = 0; if(value.shape()[0]>1){ double[][] s = value.toDoubleMatrix(); double[][] Ys = Y.toDoubleMatrix(); for(int i=0;i
0.5&&Ys[i][j]>0.5){ res++; }else if(s[i][j]<0.5&&Ys[i][j]<0.5){ res++; } sum++; } } if(sum>0){ return ((float)res/sum)*100; }else{ return 0; } }else{ double[] s = value.toDoubleVector(); double[] Ys = Y.toDoubleVector(); for(int i=0;i 0.5&&Ys[i]>0.5){ res++; }else if(s[i]<0.5&&Ys[i]<0.5){ res++; } sum++; } if(sum>0){ return ((float)res/sum)*100; }else{ return 0; } } }
1.未训练之前的模型效果
2.对数据集切割成训练集与测试集
训练集
测试集
训练之后
测试数据正确率在65-70%左右。已经有初步拟合数据的能力。
完整代码:https://github.com/woshiyigebing/my_dl4j