浅层神经网络--java版本

目标:构建一个浅层神经网络来对坐标轴里的点进行分类。

数据集:

浅层神经网络--java版本_第1张图片

红色为0类,绿色为1类

网络架构:

浅层神经网络--java版本_第2张图片

构建浅层神经网络

0.辅助工具类

public class MyMathUtil {
public static INDArray FUN_IND(INDArray value, DoubleFunction doubleFunction){
    if(value!=null){
        if(value.shape()[0]>1&&value.shape()[1]>1){
            double[][] s = value.toDoubleMatrix();
            for(int i=0;i
public static double MysigMoid(double value) {
    //Math.E=e;Math.Pow(a,b)=a^b
    double ey = Math.pow(Math.E, -value);
    return 1 / (1 + ey);
}

public static INDArray MysigMoid(INDArray value) {
    return FUN_IND(value,v->MysigMoid(v));
}

public static double Mytanh(double value) {
    double ex = Math.pow(Math.E, value);// e^x
    double ey = Math.pow(Math.E, -value);//e^(-x)
    double sinhx = ex-ey;
    double coshx = ex+ey;
    return sinhx/coshx;
}
public static INDArray Mytanh(INDArray value) {
    return FUN_IND(value,v->Mytanh(v));
}

}

//规定数据必须为数据标签格式

public interface INData {
    INDArray getX();
    INDArray getY();
    int getSize();
}
public interface TrainData extends INData{
     List getBatchList();
}
//数据可以通过设置batch_size进行分组
public class MyTrainData implements TrainData {
    private INDArray x;
    private INDArray y;
    private int batch_size;

    public MyTrainData(INDArray x,INDArray y,int batch_size){
        this.x = x.transpose();
        this.y = y.transpose();
        this.batch_size = batch_size;
    }
    public MyTrainData(INDArray x,INDArray y){
        this.x = x.transpose();
        this.y = y.transpose();
        this.batch_size = -1;
    }

    @Override
    public INDArray getX() {
        return x;
    }

    @Override
    public INDArray getY() {
        return y;
    }

    @Override
    public List getBatchList() {
        List res = new ArrayList<>();
        shufflecard();
        if(batch_size!=-1){
            int lastColumnOrder = 0;
            for(int i=batch_size;i

//定义模型基本方法

public interface model {
     void train(TrainData data);
     INDArray predict(INDArray x);
     double getLearningrate();
     int getIteration();
     model setLearningrate(double rate);
     model setIteration(int iteration);
}

1.初始化参数

private double learningrate=0.1;//学习率

private int iteration = 10000;//迭代次数

//4个神经元 2个特征值 400个数据
//W1 = 4*2  X = 2*400 Z1 = 4*400   =》A1 = 4*400;
private INDArray Network_1LAYER_W = Nd4j.randn(4, 2);
private INDArray Network_1LAYER_B = Nd4j.zeros(4, 1);
//1个神经元 4个特征值 1个数据
//W2 = 1*4 A1 = 4*400 Z2 = 1*400 => A2 = 1*400;
private INDArray Network_2LAYER_W = Nd4j.randn(1, 4);
private INDArray Network_2LAYER_B = Nd4j.zeros(1, 1);

2.前向传播

private Map forward(INDArray X){
    Map map = new HashMap<>();
    INDArray Z1 = getNetwork_1LAYER_W().mmul(X).addColumnVector(getNetwork_1LAYER_B());
    INDArray A1 = MyMathUtil.Mytanh(Z1);
    INDArray Z2 = getNetwork_2LAYER_W().mmul(A1).addColumnVector(getNetwork_2LAYER_B());
    INDArray A2 = MyMathUtil.MysigMoid(Z2);
    map.put("Z1",Z1);
    map.put("A1",A1);
    map.put("Z2",Z2);
    map.put("A2",A2);
    return map;
}

3.反向传播

//反向传播
//dL/dA2 = -(Y-A2)
//dA2/dZ2 = A2(1-A2)
//dZ2/dW2 = A1
//dZ/dB2 = 1

//dL/dZ2 = (dL/dA2)*(dA2/dZ2) = -(Y-A2)*A2(1-A2)
//dL/dW2 = (dL/dZ2)*(dZ2/dW2) = -(Y-A2)*A2(1-A2)*A1
//dL/dB2 = (dL/dZ2)*(dZ2/dB2) = -(Y-A2)*A2(1-A2)*1

//dZ2/dA1 = W2
//dA1/dZ1 = 1 − A1^2

//dL/dZ1 = (dL/dZ2)*(dZ2/dA1)*(dA1/dZ1) = W2^T*(-(Y-A2)*A2(1-A2))

//dZ1/dW1 = X
//dZ1/dB1 = 1

//dL/dW1 = (dL/dZ1)*(dZ1/dW1) = W2^T*(-(Y-A2)*A2(1-A2))*X^T
//dL/dB1 = (dL/dZ1)*(dZ1/dB1) = W2^T*(-(Y-A2)*A2(1-A2))

INDArray dZ2 = Y.sub(A2).mul(A2).mul(A2.sub(1)); //1*400
INDArray dW2 = dZ2.mmul(A1.transpose()); //1*4
INDArray dB2 = dZ2.mmul(Nd4j.ones(data.getX().shape()[1],1));  //1*1
INDArray dA1Z1 = Nd4j.ones(A1.shape()).sub(A1.mul(A1));
INDArray dZ1 = getNetwork_2LAYER_W().transpose().mmul(dZ2).mul(dA1Z1);   //(W2^T dZ2)*(1-A1^2);
INDArray dW1 = dZ1.mmul(X.transpose());  //4*2
INDArray dB1 = dZ1.mmul(Nd4j.ones(data.getX().shape()[1],1)); //4*1

4.loss函数

 loss函数为loss = (1/2m)*(y-yi)^2,其中y是标签,yi为网络预测值,m为数据个数。

double loss =  Y.sub(A2).mmul(Y.sub(A2).transpose()).sumNumber().doubleValue()/Y.shape()[1];

5.梯度下降

INDArray W1 = getNetwork_1LAYER_W().sub(dW1.div(data.getX().shape()[1]).mul(getLearningrate()));
INDArray W2 = getNetwork_2LAYER_W().sub(dW2.div(data.getX().shape()[1]).mul(getLearningrate()));
INDArray B1 = getNetwork_1LAYER_B().sub(dB1.div(data.getX().shape()[1]).mul(getLearningrate()));
INDArray B2 = getNetwork_2LAYER_B().sub(dB2.div(data.getX().shape()[1]).mul(getLearningrate()));

6.预测函数

@Override
public INDArray predict(INDArray x) {
    Map map = forward(x);
    return map.get("A2");
}

 运用神经网络解决点分类问题

private model pointmodel = new NeuralNetwork();

//point1包括三个属性x,y,z。分别代表一个点的x坐标,y坐标,和类型(代码略)

//把数据集取出之后按照8:2的比例划分为训练集和测试集(略),此处已经分好,train_point是训练数据,test_point测试数据

for(point1 point:train_point){
    X1.add(point.getX());
    X1.add(point.getY());
    Y1.add(point.getZ());
}

for(point1 point:test_point){
    X2.add(point.getX());
    X2.add(point.getY());
    Y2.add(point.getZ());
}
INDArray X = Nd4j.create(X1).reshape(new int[]{train_point.size(), 2});
INDArray Y = Nd4j.create(Y1).reshape(new int[]{train_point.size(), 1});
TrainData data = new MyTrainData(X,Y);
INDArray I_X2 = Nd4j.create(X2).reshape(new int[]{test_point.size(), 2});
INDArray I_Y2 = Nd4j.create(Y2).reshape(new int[]{test_point.size(), 1});
TrainData data2 = new MyTrainData(I_X2,I_Y2);
pointmodel.train(data);
//测试集验证效果
INDArray p_Y = pointmodel.predict(data2.getX());
System.out.println(p_Y);
System.out.println(data2.getY());
System.out.println(scord(p_Y,data2.getY()));
private float scord(INDArray value,INDArray Y) {
    int res = 0;
    int sum = 0;
    if(value.shape()[0]>1){
        double[][] s = value.toDoubleMatrix();
        double[][] Ys = Y.toDoubleMatrix();
        for(int i=0;i0.5&&Ys[i][j]>0.5){
                    res++;
                }else if(s[i][j]<0.5&&Ys[i][j]<0.5){
                    res++;
                }
                sum++;
            }
        }
        if(sum>0){
            return ((float)res/sum)*100;
        }else{
            return 0;
        }
    }else{
        double[] s = value.toDoubleVector();
        double[] Ys = Y.toDoubleVector();
        for(int i=0;i0.5&&Ys[i]>0.5){
                res++;
            }else if(s[i]<0.5&&Ys[i]<0.5){
                res++;
            }
            sum++;
        }
        if(sum>0){
            return ((float)res/sum)*100;
        }else{
            return 0;
        }
    }
}

 结果:

1.未训练之前的模型效果

浅层神经网络--java版本_第3张图片

2.对数据集切割成训练集与测试集

训练集

浅层神经网络--java版本_第4张图片

测试集

浅层神经网络--java版本_第5张图片

训练之后

浅层神经网络--java版本_第6张图片

测试数据正确率在65-70%左右。已经有初步拟合数据的能力。

完整代码:https://github.com/woshiyigebing/my_dl4j

你可能感兴趣的:(笔记,dl4j)