深度学习框架很多,我讲的是deeplearning4j,因为它能和spark结合,代码是java,虽然我java也很烂
数据源是4种动物的照片,有熊,鸭,鹿,龟
示例的分类结果不是很理想,建议我们通过以下方式提高:
1.增加照片数量
2.进行更多的数据预处理
3.增加训练次数,所有数据都训练完了才叫一次训练
4.调整模型配置
5.调整学习率,更新器,激活函数,损失函数,正则化参数等
这也是实战中深度学习问题需要面对的问题
贴出代码,dl4j几乎没什么注释,我只能根据相关资料来分析,有不对的地方还望指出
public class AnimalsClassification { protected static final Logger log = LoggerFactory.getLogger(AnimalsClassification.class);//通过反射获取日志名 protected static int height = 100;//照片是100*100 protected static int width = 100; protected static int channels = 3;//过滤器数量,就是输入层和几个过滤器连接,每个过滤器都按不同规则对输入层进行处理 protected static int numExamples = 80;//80个样本 protected static int numLabels = 4;//4个类别 protected static int batchSize = 20;//每次处理20个样本,这80个样本分4批训练完,参数会更新4次,80个样本训练完了才是一步训练 protected static long seed = 42; protected static Random rng = new Random(seed);//随机数生成器 protected static int listenerFreq = 1;//参数更新一次,就打印一次score protected static int iterations = 1;//每步训练的迭代次数,正常来讲4批是一次,但是我们可以增加迭代次数让每步训练迭代更多次 protected static int epochs = 50;//训练步数,够多的,时间应该很长 protected static double splitTrainTest = 0.8;//80%训练,20%测试 protected static int nCores = 2;//装载数据的队列数 protected static boolean save = false;//不存储 protected static String modelType = "AlexNet"; // LeNet, AlexNet or Custom but you need to fill it out//使用AlexNet网络 public void run(String[] args) throws Exception { log.info("Load data...."); /**cd * Data Setup -> organize and limit data file paths: * - mainPath = path to image files//图片路径 * - fileSplit = define basic dataset split with limits on format//定义数据划分 * - pathFilter = define additional file load filter to limit size and balance batch content//定义额外的文件加载过滤器用来限制大小平衡批内容 **/ ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator();//按文件名产生标签0,1,2,3 File mainPath = new File(System.getProperty("user.dir"), "dl4j-examples/src/main/resources/animals/");//图片主路径 FileSplit fileSplit = new FileSplit(mainPath, NativeImageLoader.ALLOWED_FORMATS, rng);//把所有图片弄成一个经过shuffle的数组 BalancedPathFilter pathFilter = new BalancedPathFilter(rng, labelMaker, numExamples, numLabels, batchSize);//平衡每个batch中label的数量 /** * Data Setup -> train test split//划分训练和测试集 * - inputSplit = define train and test split **/ InputSplit[] inputSplit = fileSplit.sample(pathFilter, numExamples * (1 + splitTrainTest), numExamples * (1 - splitTrainTest));//第二个参数是144,第三个是16,这里不是普通的80%,20%,内部有自己的策略,所以最终训练测试条数也跟预想的不一致 InputSplit trainData = inputSplit[0];//训练数据68条 InputSplit testData = inputSplit[1];//测试数据8条 /** * Data Setup -> transformation//把图片数据转换成数字数据集 * - Transform = how to tranform images and generate large dataset to train on **/ ImageTransform flipTransform1 = new FlipImageTransform(rng);//构建图片转换的实例,不翻转或者只进行水平垂直翻转 ImageTransform flipTransform2 = new FlipImageTransform(new Random(123)); ImageTransform warpTransform = new WarpImageTransform(rng, 42);//这个实例可以进行翻转,最大翻转为42 // ImageTransform colorTransform = new ColorConversionTransform(new Random(seed), COLOR_BGR2YCrCb); Listtransforms = Arrays.asList(new ImageTransform[]{flipTransform1, warpTransform, flipTransform2});//这就符合我们说的3个channel了 /** * Data Setup -> normalization * - how to normalize images and generate large dataset to train on **/ DataNormalization scaler = new ImagePreProcessingScaler(0, 1);//把像素规范化到0,1区间 log.info("Build model...."); // Uncomment below to try AlexNet. Note change height and width to at least 100//下面的注释尝试AlexNet,注意高和宽至少要有100个像素 // MultiLayerNetwork network = new AlexNet(height, width, channels, numLabels, seed, iterations).init(); MultiLayerNetwork network; switch (modelType) {//匹配网络,这里我们用的是AlexNet case "LeNet": network = lenetModel(); break; case "AlexNet": network = alexnetModel(); break; case "custom": network = customModel(); break; default: throw new InvalidInputTypeException("Incorrect model provided."); } network.init();//初始化网络 network.setListeners(new ScoreIterationListener(listenerFreq));//设置监听器,参数更新一次就打印一次score /** * Data Setup -> define how to load data into net://定义如何把数据载入网络 * - recordReader = the reader that loads and converts image data pass in inputSplit to initialize//reader装载并转换图片数据,并传入数组inputSplit完成初始化 * - dataIter = a generator that only loads one batch at a time into memory to save memory//数据迭代器,每次只载入一批数据到内存 * - trainIter = uses MultipleEpochsIterator to ensure model runs through the data for all epochs//训练迭代器,使用多步迭代器保证模型每步迭代都能使用所有数据 **/ ImageRecordReader recordReader = new ImageRecordReader(height, width, channels, labelMaker);//构建图片读取器 DataSetIterator dataIter;//数据迭代器 MultipleEpochsIterator trainIter;//训练迭代器 log.info("Train model...."); // Train without transformations//训练不翻转的数据,目的是跑一遍初始化参数 recordReader.initialize(trainData, null);//初始化读取器 dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numLabels);//构造数据迭代器,传入读取器,批大小,label索引,label数量 scaler.fit(dataIter);//收集规范化统计信息 dataIter.setPreProcessor(scaler);//对数据进行规范化 trainIter = new MultipleEpochsIterator(epochs, dataIter, nCores);//构建训练迭代器传入步数,数据迭代器,队列数 network.fit(trainIter);//训练数据 // Train with transformations//训练翻转的数据,有了初始化参数再上各种翻转数据 for (ImageTransform transform : transforms) {//3组翻转过滤器,代码和之前一样 System.out.print("\nTraining on transformation: " + transform.getClass().toString() + "\n\n"); recordReader.initialize(trainData, transform); dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numLabels); scaler.fit(dataIter); dataIter.setPreProcessor(scaler); trainIter = new MultipleEpochsIterator(epochs, dataIter, nCores);// network.fit(trainIter); } log.info("Evaluate model...."); recordReader.initialize(testData);//初始化测试数据 dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numLabels); scaler.fit(dataIter);//统计信息是分批统计的,这个要注意下 dataIter.setPreProcessor(scaler); Evaluation eval = network.evaluate(dataIter);//评估测试数据 log.info(eval.stats(true));//打印测试统计信息 // Example on how to get predict results with trained model//如果获取预测结果 dataIter.reset();//清空数据迭代器 DataSet testDataSet = dataIter.next();//清空再next获取的是一条数据 String expectedResult = testDataSet.getLabelName(0);//获取label,索引是0 List predict = network.predict(testDataSet);//预测这个数据,返回的是对个各类的概率,所以是数组 String modelResult = predict.get(0);//它会把概率最大的放到0的位置,所以get(0)就得到预测值了 System.out.print("\nFor a single example that is labeled " + expectedResult + " the model predicted " + modelResult + "\n\n");//打印 if (save) { log.info("Save model...."); String basePath = FilenameUtils.concat(System.getProperty("user.dir"), "src/main/resources/");//基础路径 NetSaverLoaderUtils.saveNetworkAndParameters(network, basePath);//保存网络,参数和更新器 NetSaverLoaderUtils.saveUpdators(network, basePath); } log.info("****************Example finished********************"); } private ConvolutionLayer convInit(String name, int in, int out, int[] kernel, int[] stride, int[] pad, double bias) { return new ConvolutionLayer.Builder(kernel, stride, pad).name(name).nIn(in).nOut(out).biasInit(bias).build(); }//卷积输入层,参数包括名字,过滤器数量,输出节点数,卷积核大小,步副大小,补充边框大小,偏差 private ConvolutionLayer conv3x3(String name, int out, double bias) { return new ConvolutionLayer.Builder(new int[]{3,3}, new int[] {1,1}, new int[] {1,1}).name(name).nOut(out).biasInit(bias).build(); }//3*3的卷积层,卷积核大小3*3,步副大小1*1,补充边框1*1 private ConvolutionLayer conv5x5(String name, int out, int[] stride, int[] pad, double bias) { return new ConvolutionLayer.Builder(new int[]{5,5}, stride, pad).name(name).nOut(out).biasInit(bias).build(); }//5*5的卷积层,卷积核大小5*5 private SubsamplingLayer maxPool(String name, int[] kernel) { return new SubsamplingLayer.Builder(kernel, new int[]{2,2}).name(name).build(); }//子采样层,本例中卷积核大小是2*2,步副2*2 private DenseLayer fullyConnected(String name, int out, double bias, double dropOut, Distribution dist) { return new DenseLayer.Builder().name(name).nOut(out).biasInit(bias).dropOut(dropOut).dist(dist).build(); }//全连接层,本例中输出4096个节点,偏差为1,随机丢弃比例50%,参数服从均值为0,方差为0.005的高斯分布 public MultiLayerNetwork lenetModel() {//le网络模型 /** * Revisde Lenet Model approach developed by ramgo2 achieves slightly above random * Reference: https://gist.github.com/ramgo2/833f12e92359a2da9e5c2fb6333351c5 **/ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()//构造神经网络配置 .seed(seed)//随机种子 .iterations(iterations)//迭代次数 .regularization(false).l2(0.005) // tried 0.0001, 0.0005//不使用正则化,0.005没意义 .activation("relu")//激活函数relu .learningRate(0.0001) // tried 0.00001, 0.00005, 0.000001//学习率 .weightInit(WeightInit.XAVIER)//参数服从均值为0,方差为2.0/(fanIn + fanOut)的高斯分布,fanIn是上一层节点数,fanOut是当前层节点数 .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)//随机梯度下降作为优化器 .updater(Updater.RMSPROP).momentum(0.9)//采用可变学习率,动量衰减参数为0.9的参数优化方法 .list()//list代表多层网络,0,1,2,3,4层已经介绍过,5层是输出层, .layer(0, convInit("cnn1", channels, 50 , new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}, 0)) .layer(1, maxPool("maxpool1", new int[]{2,2})) .layer(2, conv5x5("cnn2", 100, new int[]{5, 5}, new int[]{1, 1}, 0)) .layer(3, maxPool("maxool2", new int[]{2,2})) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)//使用交叉熵作为损失函数 .nOut(numLabels)//输出节点数就是类的个数 .activation("softmax")//softmax作为激活函数 .build()) .backprop(true).pretrain(false)//使用后向反馈,不使用预训练 .cnnInputSize(height, width, channels).build();//输入的高,宽,过滤器数量 return new MultiLayerNetwork(conf);//传入配置 } public MultiLayerNetwork alexnetModel() {//本例中用的alexnet /** * AlexNet model interpretation based on the original paper ImageNet Classification with Deep Convolutional Neural Networks * and the imagenetExample code referenced. * http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf **/ double nonZeroBias = 1;//偏差 double dropOut = 0.5;//随机丢弃比例 MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()//和lenetModel一样 .seed(seed) .weightInit(WeightInit.DISTRIBUTION)//根据给定的分布采样参数 .dist(new NormalDistribution(0.0, 0.01))//均值为0,方差为0.01的正态分布 .activation("relu") .updater(Updater.NESTEROVS)//采用梯度修正的参数优化方法 .iterations(iterations) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients//采用除以梯度2范数来规范化梯度防止梯度消失或突变 .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .learningRate(1e-2) .biasLearningRate(1e-2*2)//偏差学习率 .learningRateDecayPolicy(LearningRatePolicy.Step)//按步按批衰减学习率 .lrPolicyDecayRate(0.1)//设置衰减率 .lrPolicySteps(100000)//设置衰减步 .regularization(true) .l2(5 * 1e-4) .momentum(0.9) .miniBatch(false)//不限制最小批 .list()//13层的网络,第1,3层构建了alexnet计算层,目的是对当前输出的结果做平滑处理,参数有相邻核映射数n=5,规范化常亮k=2,指数常量beta=0.75,系数常量alpha=1e-4 .layer(0, convInit("cnn1", channels, 96, new int[]{11, 11}, new int[]{4, 4}, new int[]{3, 3}, 0)) .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build()) .layer(2, maxPool("maxpool1", new int[]{3,3})) .layer(3, conv5x5("cnn2", 256, new int[] {1,1}, new int[] {2,2}, nonZeroBias)) .layer(4, new LocalResponseNormalization.Builder().name("lrn2").build()) .layer(5, maxPool("maxpool2", new int[]{3,3})) .layer(6,conv3x3("cnn3", 384, 0)) .layer(7,conv3x3("cnn4", 384, nonZeroBias)) .layer(8,conv3x3("cnn5", 256, nonZeroBias)) .layer(9, maxPool("maxpool3", new int[]{3,3})) .layer(10, fullyConnected("ffn1", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005))) .layer(11, fullyConnected("ffn2", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005))) .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .name("output") .nOut(numLabels) .activation("softmax") .build()) .backprop(true) .pretrain(false) .cnnInputSize(height,width,channels).build(); return new MultiLayerNetwork(conf); } public static MultiLayerNetwork customModel() {//自定义网络 /** * Use this method to build your own custom model. **/ return null; } public static void main(String[] args) throws Exception { new AnimalsClassification().run(args);//主类传参运行 } }