深度学习-利用卷积网络识别动物

深度学习框架很多,我讲的是deeplearning4j,因为它能和spark结合,代码是java,虽然我java也很烂

数据源是4种动物的照片,有熊,鸭,鹿,龟

示例的分类结果不是很理想,建议我们通过以下方式提高:

1.增加照片数量

2.进行更多的数据预处理

3.增加训练次数,所有数据都训练完了才叫一次训练

4.调整模型配置

5.调整学习率,更新器,激活函数,损失函数,正则化参数等

这也是实战中深度学习问题需要面对的问题
贴出代码,dl4j几乎没什么注释,我只能根据相关资料来分析,有不对的地方还望指出

public class AnimalsClassification {
    protected static final Logger log = LoggerFactory.getLogger(AnimalsClassification.class);//通过反射获取日志名
    protected static int height = 100;//照片是100*100
    protected static int width = 100;
    protected static int channels = 3;//过滤器数量,就是输入层和几个过滤器连接,每个过滤器都按不同规则对输入层进行处理
    protected static int numExamples = 80;//80个样本
    protected static int numLabels = 4;//4个类别
    protected static int batchSize = 20;//每次处理20个样本,这80个样本分4批训练完,参数会更新4次,80个样本训练完了才是一步训练

    protected static long seed = 42;
    protected static Random rng = new Random(seed);//随机数生成器
    protected static int listenerFreq = 1;//参数更新一次,就打印一次score
    protected static int iterations = 1;//每步训练的迭代次数,正常来讲4批是一次,但是我们可以增加迭代次数让每步训练迭代更多次
    protected static int epochs = 50;//训练步数,够多的,时间应该很长
    protected static double splitTrainTest = 0.8;//80%训练,20%测试
    protected static int nCores = 2;//装载数据的队列数
    protected static boolean save = false;//不存储

    protected static String modelType = "AlexNet"; // LeNet, AlexNet or Custom but you need to fill it out//使用AlexNet网络


    public void run(String[] args) throws Exception {

        log.info("Load data....");
        /**cd
         * Data Setup -> organize and limit data file paths:
         *  - mainPath = path to image files//图片路径
         *  - fileSplit = define basic dataset split with limits on format//定义数据划分
         *  - pathFilter = define additional file load filter to limit size and balance batch content//定义额外的文件加载过滤器用来限制大小平衡批内容
         **/
        ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator();//按文件名产生标签0,1,2,3
        File mainPath = new File(System.getProperty("user.dir"), "dl4j-examples/src/main/resources/animals/");//图片主路径
        FileSplit fileSplit = new FileSplit(mainPath, NativeImageLoader.ALLOWED_FORMATS, rng);//把所有图片弄成一个经过shuffle的数组
        BalancedPathFilter pathFilter = new BalancedPathFilter(rng, labelMaker, numExamples, numLabels, batchSize);//平衡每个batch中label的数量


        /**
         * Data Setup -> train test split//划分训练和测试集
         *  - inputSplit = define train and test split
         **/
        InputSplit[] inputSplit = fileSplit.sample(pathFilter, numExamples * (1 + splitTrainTest), numExamples * (1 - splitTrainTest));//第二个参数是144,第三个是16,这里不是普通的80%,20%,内部有自己的策略,所以最终训练测试条数也跟预想的不一致

        InputSplit trainData = inputSplit[0];//训练数据68条
        InputSplit testData = inputSplit[1];//测试数据8条

        /**
         * Data Setup -> transformation//把图片数据转换成数字数据集
         *  - Transform = how to tranform images and generate large dataset to train on
         **/
        ImageTransform flipTransform1 = new FlipImageTransform(rng);//构建图片转换的实例,不翻转或者只进行水平垂直翻转
        ImageTransform flipTransform2 = new FlipImageTransform(new Random(123));
        ImageTransform warpTransform = new WarpImageTransform(rng, 42);//这个实例可以进行翻转,最大翻转为42
//        ImageTransform colorTransform = new ColorConversionTransform(new Random(seed), COLOR_BGR2YCrCb);
        List transforms = Arrays.asList(new ImageTransform[]{flipTransform1, warpTransform, flipTransform2});//这就符合我们说的3个channel了

        /**
         * Data Setup -> normalization
         *  - how to normalize images and generate large dataset to train on
         **/
        DataNormalization scaler = new ImagePreProcessingScaler(0, 1);//把像素规范化到0,1区间

        log.info("Build model....");

        // Uncomment below to try AlexNet. Note change height and width to at least 100//下面的注释尝试AlexNet,注意高和宽至少要有100个像素

//        MultiLayerNetwork network = new AlexNet(height, width, channels, numLabels, seed, iterations).init();

        MultiLayerNetwork network;
        switch (modelType) {//匹配网络,这里我们用的是AlexNet

            case "LeNet":
                network = lenetModel();
                break;
            case "AlexNet":
                network = alexnetModel();
                break;
            case "custom":
                network = customModel();
                break;
            default:
                throw new InvalidInputTypeException("Incorrect model provided.");
        }
        network.init();//初始化网络
        network.setListeners(new ScoreIterationListener(listenerFreq));//设置监听器,参数更新一次就打印一次score

        /**
         * Data Setup -> define how to load data into net://定义如何把数据载入网络
         *  - recordReader = the reader that loads and converts image data pass in inputSplit to initialize//reader装载并转换图片数据,并传入数组inputSplit完成初始化


         *  - dataIter = a generator that only loads one batch at a time into memory to save memory//数据迭代器,每次只载入一批数据到内存
         *  - trainIter = uses MultipleEpochsIterator to ensure model runs through the data for all epochs//训练迭代器,使用多步迭代器保证模型每步迭代都能使用所有数据 

         **/
        ImageRecordReader recordReader = new ImageRecordReader(height, width, channels, labelMaker);//构建图片读取器
        DataSetIterator dataIter;//数据迭代器
        MultipleEpochsIterator trainIter;//训练迭代器


        log.info("Train model....");
        // Train without transformations//训练不翻转的数据,目的是跑一遍初始化参数
        recordReader.initialize(trainData, null);//初始化读取器
        dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numLabels);//构造数据迭代器,传入读取器,批大小,label索引,label数量
        scaler.fit(dataIter);//收集规范化统计信息
        dataIter.setPreProcessor(scaler);//对数据进行规范化
        trainIter = new MultipleEpochsIterator(epochs, dataIter, nCores);//构建训练迭代器传入步数,数据迭代器,队列数
        network.fit(trainIter);//训练数据

        // Train with transformations//训练翻转的数据,有了初始化参数再上各种翻转数据
        for (ImageTransform transform : transforms) {//3组翻转过滤器,代码和之前一样
            System.out.print("\nTraining on transformation: " + transform.getClass().toString() + "\n\n");
            recordReader.initialize(trainData, transform);
            dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numLabels);
            scaler.fit(dataIter);
            dataIter.setPreProcessor(scaler);
            trainIter = new MultipleEpochsIterator(epochs, dataIter, nCores);//
            network.fit(trainIter);
        }

        log.info("Evaluate model....");
        recordReader.initialize(testData);//初始化测试数据
        dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numLabels);
        scaler.fit(dataIter);//统计信息是分批统计的,这个要注意下
        dataIter.setPreProcessor(scaler);
        Evaluation eval = network.evaluate(dataIter);//评估测试数据
        log.info(eval.stats(true));//打印测试统计信息

        // Example on how to get predict results with trained model//如果获取预测结果
        dataIter.reset();//清空数据迭代器
        DataSet testDataSet = dataIter.next();//清空再next获取的是一条数据
        String expectedResult = testDataSet.getLabelName(0);//获取label,索引是0
        List predict = network.predict(testDataSet);//预测这个数据,返回的是对个各类的概率,所以是数组
        String modelResult = predict.get(0);//它会把概率最大的放到0的位置,所以get(0)就得到预测值了
        System.out.print("\nFor a single example that is labeled " + expectedResult + " the model predicted " + modelResult + "\n\n");//打印

        if (save) {
            log.info("Save model....");
            String basePath = FilenameUtils.concat(System.getProperty("user.dir"), "src/main/resources/");//基础路径
            NetSaverLoaderUtils.saveNetworkAndParameters(network, basePath);//保存网络,参数和更新器
            NetSaverLoaderUtils.saveUpdators(network, basePath);
        }
        log.info("****************Example finished********************");
    }

    private ConvolutionLayer convInit(String name, int in, int out, int[] kernel, int[] stride, int[] pad, double bias) {
        return new ConvolutionLayer.Builder(kernel, stride, pad).name(name).nIn(in).nOut(out).biasInit(bias).build();
    }//卷积输入层,参数包括名字,过滤器数量,输出节点数,卷积核大小,步副大小,补充边框大小,偏差

    private ConvolutionLayer conv3x3(String name, int out, double bias) {
        return new ConvolutionLayer.Builder(new int[]{3,3}, new int[] {1,1}, new int[] {1,1}).name(name).nOut(out).biasInit(bias).build();
    }//3*3的卷积层,卷积核大小3*3,步副大小1*1,补充边框1*1

    private ConvolutionLayer conv5x5(String name, int out, int[] stride, int[] pad, double bias) {
        return new ConvolutionLayer.Builder(new int[]{5,5}, stride, pad).name(name).nOut(out).biasInit(bias).build();
    }//5*5的卷积层,卷积核大小5*5


    private SubsamplingLayer maxPool(String name,  int[] kernel) {
        return new SubsamplingLayer.Builder(kernel, new int[]{2,2}).name(name).build();
    }//子采样层,本例中卷积核大小是2*2,步副2*2

    private DenseLayer fullyConnected(String name, int out, double bias, double dropOut, Distribution dist) {
        return new DenseLayer.Builder().name(name).nOut(out).biasInit(bias).dropOut(dropOut).dist(dist).build();
    }//全连接层,本例中输出4096个节点,偏差为1,随机丢弃比例50%,参数服从均值为0,方差为0.005的高斯分布

    public MultiLayerNetwork lenetModel() {//le网络模型
        /**
         * Revisde Lenet Model approach developed by ramgo2 achieves slightly above random
         * Reference: https://gist.github.com/ramgo2/833f12e92359a2da9e5c2fb6333351c5
         **/
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()//构造神经网络配置
            .seed(seed)//随机种子
            .iterations(iterations)//迭代次数
            .regularization(false).l2(0.005) // tried 0.0001, 0.0005//不使用正则化,0.005没意义
            .activation("relu")//激活函数relu
            .learningRate(0.0001) // tried 0.00001, 0.00005, 0.000001//学习率
            .weightInit(WeightInit.XAVIER)//参数服从均值为0,方差为2.0/(fanIn + fanOut)的高斯分布,fanIn是上一层节点数,fanOut是当前层节点数
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)//随机梯度下降作为优化器
            .updater(Updater.RMSPROP).momentum(0.9)//采用可变学习率,动量衰减参数为0.9的参数优化方法
            .list()//list代表多层网络,0,1,2,3,4层已经介绍过,5层是输出层,
            .layer(0, convInit("cnn1", channels, 50 ,  new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}, 0))
            .layer(1, maxPool("maxpool1", new int[]{2,2}))
            .layer(2, conv5x5("cnn2", 100, new int[]{5, 5}, new int[]{1, 1}, 0))
            .layer(3, maxPool("maxool2", new int[]{2,2}))
            .layer(4, new DenseLayer.Builder().nOut(500).build())
            .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)//使用交叉熵作为损失函数
                .nOut(numLabels)//输出节点数就是类的个数
                .activation("softmax")//softmax作为激活函数
                .build())
            .backprop(true).pretrain(false)//使用后向反馈,不使用预训练
            .cnnInputSize(height, width, channels).build();//输入的高,宽,过滤器数量

        return new MultiLayerNetwork(conf);//传入配置

    }

    public MultiLayerNetwork alexnetModel() {//本例中用的alexnet
        /**
         * AlexNet model interpretation based on the original paper ImageNet Classification with Deep Convolutional Neural Networks
         * and the imagenetExample code referenced.
         * http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
         **/

        double nonZeroBias = 1;//偏差
        double dropOut = 0.5;//随机丢弃比例

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()//和lenetModel一样
            .seed(seed)
            .weightInit(WeightInit.DISTRIBUTION)//根据给定的分布采样参数
            .dist(new NormalDistribution(0.0, 0.01))//均值为0,方差为0.01的正态分布
            .activation("relu")
            .updater(Updater.NESTEROVS)//采用梯度修正的参数优化方法
            .iterations(iterations)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients//采用除以梯度2范数来规范化梯度防止梯度消失或突变
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .learningRate(1e-2)
            .biasLearningRate(1e-2*2)//偏差学习率
            .learningRateDecayPolicy(LearningRatePolicy.Step)//按步按批衰减学习率
            .lrPolicyDecayRate(0.1)//设置衰减率
            .lrPolicySteps(100000)//设置衰减步
            .regularization(true)
            .l2(5 * 1e-4)
            .momentum(0.9)
            .miniBatch(false)//不限制最小批
            .list()//13层的网络,第1,3层构建了alexnet计算层,目的是对当前输出的结果做平滑处理,参数有相邻核映射数n=5,规范化常亮k=2,指数常量beta=0.75,系数常量alpha=1e-4
            .layer(0, convInit("cnn1", channels, 96, new int[]{11, 11}, new int[]{4, 4}, new int[]{3, 3}, 0))
            .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
            .layer(2, maxPool("maxpool1", new int[]{3,3}))
            .layer(3, conv5x5("cnn2", 256, new int[] {1,1}, new int[] {2,2}, nonZeroBias))
            .layer(4, new LocalResponseNormalization.Builder().name("lrn2").build())
            .layer(5, maxPool("maxpool2", new int[]{3,3}))
            .layer(6,conv3x3("cnn3", 384, 0))
            .layer(7,conv3x3("cnn4", 384, nonZeroBias))
            .layer(8,conv3x3("cnn5", 256, nonZeroBias))
            .layer(9, maxPool("maxpool3", new int[]{3,3}))
            .layer(10, fullyConnected("ffn1", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005)))
            .layer(11, fullyConnected("ffn2", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005)))
            .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                .name("output")
                .nOut(numLabels)
                .activation("softmax")
                .build())
            .backprop(true)
            .pretrain(false)
            .cnnInputSize(height,width,channels).build();

        return new MultiLayerNetwork(conf);

    }

    public static MultiLayerNetwork customModel() {//自定义网络
        /**
         * Use this method to build your own custom model.
         **/
        return null;
    }

    public static void main(String[] args) throws Exception {
        new AnimalsClassification().run(args);//主类传参运行
    }

}


你可能感兴趣的:(深度学习,deeplearning4j)