caffe voting
voting ensemble_accuracy_layer.cpp 代码如下, 因为这里有根据概率求准确率的部分,所以弱分类器模型的可以不用配置Accuracy层,
Softmax后<span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">直接用en</span><span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">semble layer。</span>
Softmax 层和 accuracy 层的配置文件如下:
layer { name: "3_prob" type: "Softmax" bottom: "3_ip2" top: "3_prob" } layer { name: "1_accuracy" type: "Accuracy" bottom: "1_prob" bottom: "label" top: "1_accuracy" include { phase: TEST } }
caffe.proto文件中 层定义,因为ensemble 层只需要 name,type,bottom,top 四个参数即可。所以不需要在 caffe.proto 文件中设置。
message LayerParameter {ensemble 层配置函数如下:
layer { name: "ensemble<span style="font-family: Arial, Helvetica, sans-serif;">_accuracy"</span> type: "Esemble" bottom: "prob1"
<pre name="code" class="cpp"> bottom: "prob2"
bottom: "prob3"bottom: "label" top: " ensemble _accuracy"
#include <algorithm> #include <functional> #include <utility> #include <vector> #include "caffe/layer.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { template <typename Dtype> void EnsembleAccuracyLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { vector<int> top_shape(0); top[0]->Reshape(top_shape); } template <typename Dtype> void EnsembleAccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype accuracy = 0; int n_pred = bottom.size()-1; const Dtype* bottom_label = bottom[n_pred]->cpu_data(); int num = bottom[0]->num(); // 样本个数 int dim = bottom[0]->count() / bottom[0]->num(); // 每个样本维数 Dtype max_prob = 0; // 得到最大<span style="font-family: Arial, Helvetica, sans-serif;">概率</span> int arg_max = 0; Dtype prob_pred = 0; //For each data point for (int i = 0; i < num; ++i) { // 对每个样本结果进行遍历 int label = static_cast<int>(bottom_label[i]); //Find if any classifier is correct for (int j = 0; j< n_pred; ++j) { const Dtype* bottom_data = bottom[j]->cpu_data(); max_prob = 0; for(int k = 0; k < dim; k++){ prob_pred = std::max(bottom_data[i * dim + k], Dtype(kLOG_THRESHOLD)); if(prob_pred > max_prob){// 如果 这一类概率大于max_prob ,这保存该类概率 max_prob = prob_pred; // 保存最大概率 arg_max = k; // 保存所属标签。 } } if(arg_max == label){ // 如果 语出的类和标签相同,则识别对, accuracy++; break; } } } top[0]->mutable_cpu_data()[0] = accuracy/(double)num; } INSTANTIATE_CLASS(EnsembleAccuracyLayer); REGISTER_LAYER_CLASS(EnsembleAccuracy); } // namespace caffe
caffe adaboost实现方式
https://github.com/terrychenism/NeuralNetTests/blob/master/caffe_utils/cnn_adaboost.py 参考代码,
今天 在caffe上实现adaboost算法,
先训练弱分类器,用弱分类器的模型即可,如果把caffe训练好的模型当弱分类器,只需要调用caffe,使用该模型即可,不需要重新训练该弱分类器。
下面代码是调用caffe训练的模型,使用adaboost弱分类器。 这里主要使用了sklearn 库。
#!/usr/bin/env python # -*- coding: utf-8 -*- # author: Tairui Chen import numpy as np import os import sys import argparse import glob import time from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier import caffe g_rnd = np.random.randint(100000) def create_weighted_db(X, y, weights, name='boost'): X = X.reshape(-1, 3, 32, 32) train_fn = os.path.join(DIR, name + '.h5') dd.io.save(train_fn, dict(data=X, label=y.astype(np.float32), sample_weight=weights), compress=False) with open(os.path.join(DIR, name + '.txt'), 'w') as f: print(train_fn, file=f) class CNN(BaseEstimator, ClassifierMixin): def __init__(self): pass def get_params(self, deep=False): return {} def fit(self, X, y, sample_weight=None): global g_seed global g_loop if sample_weight is None: sample_weight = np.ones(X.shape[0], np.float32) print('Calling fit with sample_weight None') else: sample_weight *= X.shape[0] print('Calling fit with sample_weight sum', sample_weight.sum()) #sample_weight = np.ones(X.shape[0], np.float32) #II = sample_weight > 0 #X = X[II] #y = y[II] #sample_weight = sample_weight[II] #sample_weight = np.ones(X.shape[0]) w = sample_weight #sample_weight[:10] = 0.0 #w[:1000] = 0.0 #w = sample_weight #w0 = w / w.sum() #print('Weight entropy:', -np.sum(w0 * np.log2(w0))) print('Weight max:', w.max()) print('Weight min:', w.min()) #import sys; sys.exit(0) self.classes_ = np.unique(y) self.n_classes_ = len(self.classes_) # Set up weighted database create_weighted_db(X, y, sample_weight) #steps = [(0.001, 2000, 2000)] steps = [(0.001, 0.004, 60000), (0.0001, 0.004, 5000), (0.00001, 0.004, 5000)] #steps = [(0.00001, 10000, 10000), (0.000001, 5000, 15000), (0.0000001, 5000, 20000)] #steps = [(0.001, 10000, 10000)] #steps = [(0.001, 200, 1000)] name = os.path.join(CONF_DIR, 'adaboost_{}_loop{}'.format(g_rnd, g_loop)) bare_conf_fn = os.path.join(CONF_DIR, 'boost_bare.prototxt') conf_fn = os.path.join(CONF_DIR, 'solver.prototxt.template') #bare_conf_fn = 'regaug_bare.prototxt' #conf_fn = 'regaug_solver.prototxt.template' net, info = train_model(name, conf_fn, bare_conf_fn, steps, seed=g_seed, device_id=DEVICE_ID) loss_fn = 'info/info_{}_loop{}.h5'.format(g_rnd, g_loop) dd.io.save(loss_fn, info) print('Saved to', loss_fn) g_loop += 1 print('Classifier set up') self.net_ = net def predict_proba(self, X): X = X.reshape(-1, 3, 32, 32) #X = X.transpose(0, 2, 3, 1) prob = np.zeros((X.shape[0], self.n_classes_)) M = 2500 for k in range(int(np.ceil(X.shape[0] / M))): y = self.net_.forward_all(data=X[k*M:(k+1)*M]).values()[0].squeeze(axis=(2,3)) prob[k*M:(k+1)*M] = y T = 30.0 eps = 0.0001 #prob = prob.clip(eps, 1-eps) log_prob = np.log(prob) print('log_prob', log_prob.min(), log_prob.max()) #log_prob = log_prob.clip(min=-4, max=4) new_prob = np.exp(log_prob / T) new_prob /= dd.apply_once(np.sum, new_prob, [1]) return new_prob def predict(self, X): prob = self.predict_proba(X) return prob.argmax(-1) train_data = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/train_data.npy') train_labels = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/train_labels.npy') model_path = 'cifar10/' # substitute your path here # GoogleNet net_fn = model_path + 'VGG_mini_ABN.prototxt' param_fn = model_path + 'cifar10_vgg_iter_120000.caffemodel' caffe.set_mode_cpu() net = caffe.Classifier(net_fn, param_fn, mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB def preprocess(net, img): return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data'] for i in range(10): img = train_data[i].transpose((1, 2, 0)) * 255 img = img.astype(np.uint8)[:, :, ::-1] end = 'prob' h, w = img.shape[:2] src, dst = net.blobs['data'], net.blobs[end] src.data[0] = preprocess(net, img) net.forward(end=end) features = dst.data[0].copy() X = train_data y = train_labels X *= 255.0 mean_x = X.mean(0) X -= mean_x te_X= np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/test_data.npy') te_y = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/test_labels.npy') create_weighted_db(te_X, te_y, np.ones(te_X.shape[0], dtype=np.float32), name='test') clf = AdaBoostClassifier(base_estimator=CNN(), algorithm='SAMME.R', n_estimators=10, random_state=0) clf.fit(X.reshape(X.shape[0], -1), y) for i, score in enumerate(clf.staged_score(X.reshape(X.shape[0], -1), y)): print(i+1, 'train score', score) for i, score in enumerate(clf.staged_score(te_X.reshape(te_X.shape[0], -1), te_y)): print(i+1, 'test score', score)