FR-0912

Faster R-CNN 纯C++版本 + 使用最新的caffe

本方法参照纯C++版的Faster-Rcnn（通过caffe自定义RPN层实现）进行设置更改，略有改动。
核心思想：通过添加自定义层（RPN层）代替python层，实现c++版的Faster R-CNN

Faster R-CNN 纯C++版本融合最新caffe

（1）添加自定义层 rpn_layer.hpp ，把它放在 caffe/include/caffe/layers/ 目录下

#ifndef CAFFE_RPN_LAYER_HPP_
#define CAFFE_RPN_LAYER_HPP_

#include 

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
//#include"opencv2/opencv.hpp"

#define mymax(a,b) ((a)>(b))?(a):(b)
#define mymin(a,b) ((a)>(b))?(b):(a)
namespace caffe {

    /**
    * @brief implement RPN layer for faster rcnn
    */

    template <typename Dtype>
    class RPNLayer : public Layer {
    public:
        explicit RPNLayer(const LayerParameter& param)
            : Layer(param) {
                m_score_.reset(new Blob());
                m_box_.reset(new Blob());
                local_anchors_.reset(new Blob());
            }
        virtual void LayerSetUp(const vector*>& bottom,
            const vector*>& top);
        virtual void Reshape(const vector*>& bottom,
            const vector*>& top){}
        virtual inline const char* type() const { return "RPN"; }

        struct abox{
            Dtype batch_ind;
            Dtype x1;
            Dtype y1;
            Dtype x2;
            Dtype y2;
            Dtype score;
            bool operator <(const abox&tmp) const{
                return score < tmp.score;
            }
        };

    protected:
        virtual void Forward_cpu(const vector*>& bottom,
            const vector*>& top);
        //virtual void Forward_gpu(const vector*>& bottom,
            //const vector*>& top);
        virtual void Backward_cpu(const vector*>& top,
            const vector<bool>& propagate_down, const vector*>& bottom){};


        int feat_stride_;
        int base_size_;
        int min_size_;
        int pre_nms_topN_;
        int post_nms_topN_;
        float nms_thresh_;
        vector<int> anchor_scales_;
        vector<float> ratios_;


        vector<vector<float> > gen_anchors_;
        int *anchors_;
        int anchors_nums_;
        int src_height_;
        int src_width_;
        float src_scale_;
        int map_width_;
        int map_height_;

        shared_ptr > m_score_;
        shared_ptr > m_box_;
        shared_ptr >local_anchors_;
        void generate_anchors();
        vector<vector<float> > ratio_enum(vector<float>);
        vector<float> whctrs(vector<float>);
        vector<float> mkanchor(float w,float h,float x_ctr,float y_ctr);
        vector<vector<float> > scale_enum(vector<float>);

        //cv::Mat proposal_local_anchor(int width, int height);
        void proposal_local_anchor();
        void bbox_tranform_inv();
        cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta);
        void nms(std::vector &input_boxes, float nms_thresh);
        void filter_boxs(cv::Mat& pre_box, cv::Mat& score, vector& aboxes);
        void filter_boxs(vector& aboxes);
    };
}  // namespace caffe

#endif  // CAFFE_RPN_LAYER_HPP_

（2）然后是源文件 rpn_layer.cpp，放在 caffe/src/caffe/layers/ 目录下

#include 
#include 

#include "caffe/layers/rpn_layer.hpp"
#include "caffe/util/math_functions.hpp"
#include 

int debug = 0;
int  tmp[9][4] = {
    { -83, -39, 100, 56 },
    { -175, -87, 192, 104 },
    { -359, -183, 376, 200 },
    { -55, -55, 72, 72 },
    { -119, -119, 136, 136 },
    { -247, -247, 264, 264 },
    { -35, -79, 52, 96 },
    { -79, -167, 96, 184 },
    { -167, -343, 184, 360 }
};
namespace caffe {

    template <typename Dtype>
    void RPNLayer::LayerSetUp(
        const vector*>& bottom, const vector*>& top) {
        anchor_scales_.clear();
        ratios_.clear();
        feat_stride_ = this->layer_param_.rpn_param().feat_stride();
        base_size_ = this->layer_param_.rpn_param().basesize();
        min_size_ = this->layer_param_.rpn_param().boxminsize();
        pre_nms_topN_ = this->layer_param_.rpn_param().per_nms_topn();
        post_nms_topN_ = this->layer_param_.rpn_param().post_nms_topn();
        nms_thresh_ = this->layer_param_.rpn_param().nms_thresh();
        int scales_num = this->layer_param_.rpn_param().scale_size();
        for (int i = 0; i < scales_num; ++i)
        {
            anchor_scales_.push_back(this->layer_param_.rpn_param().scale(i));
        }
        int ratios_num = this->layer_param_.rpn_param().ratio_size();
        for (int i = 0; i < ratios_num; ++i)
        {
            ratios_.push_back(this->layer_param_.rpn_param().ratio(i));
        }


        //anchors_nums_ = 9;
        //anchors_ = new int[anchors_nums_ * 4];
        //memcpy(anchors_, tmp, 9 * 4 * sizeof(int));

        generate_anchors();

        anchors_nums_ = gen_anchors_.size();
        anchors_ = new int[anchors_nums_ * 4];
        for (int i = 0; ifor (int j = 0; j4+j] = gen_anchors_[i][j];
            }
        }
        top[0]->Reshape(1, 5, 1, 1);
        if (top.size() > 1)
        {
            top[1]->Reshape(1, 1, 1, 1);
        }
    }

    template <typename Dtype>
    void RPNLayer::generate_anchors(){
        //generate base anchor
        vector<float> base_anchor;
        base_anchor.push_back(0);
        base_anchor.push_back(0);
        base_anchor.push_back(base_size_ - 1);
        base_anchor.push_back(base_size_ - 1);
        //enum ratio anchors
        vector<vector<float> >ratio_anchors = ratio_enum(base_anchor);
        for (int i = 0; i < ratio_anchors.size(); ++i)
        {
            vector<vector<float> > tmp = scale_enum(ratio_anchors[i]);
            gen_anchors_.insert(gen_anchors_.end(), tmp.begin(), tmp.end());
        }
    }

    template <typename Dtype>
    vector<vector<float> > RPNLayer::scale_enum(vector<float> anchor){
        vector<vector<float> > result;
        vector<float> reform_anchor = whctrs(anchor);
        float x_ctr = reform_anchor[2];
        float y_ctr = reform_anchor[3];
        float w = reform_anchor[0];
        float h = reform_anchor[1];
        for (int i = 0; i < anchor_scales_.size(); ++i)
        {
            float ws = w * anchor_scales_[i];
            float hs = h *  anchor_scales_[i];
            vector<float> tmp = mkanchor(ws, hs, x_ctr, y_ctr);
            result.push_back(tmp);
        }
        return result;
    }


    template <typename Dtype>
    vector<vector<float> > RPNLayer::ratio_enum(vector<float> anchor){
        vector<vector<float> > result;
        vector<float> reform_anchor = whctrs(anchor);
        float x_ctr = reform_anchor[2];
        float y_ctr = reform_anchor[3];
        float size = reform_anchor[0] * reform_anchor[1];
        for (int i = 0; i < ratios_.size(); ++i)
        {
            float size_ratios = size / ratios_[i];
            float ws = round(sqrt(size_ratios));
            float hs = round(ws*ratios_[i]);
            vector<float> tmp = mkanchor(ws, hs, x_ctr, y_ctr);
            result.push_back(tmp);
        }
        return result;
    }

    template <typename Dtype>
    vector<float> RPNLayer::mkanchor(float w, float h, float x_ctr, float y_ctr){
        vector<float> tmp;
        tmp.push_back(x_ctr - 0.5*(w - 1));
        tmp.push_back(y_ctr - 0.5*(h - 1));
        tmp.push_back(x_ctr + 0.5*(w - 1));
        tmp.push_back(y_ctr + 0.5*(h - 1));
        return tmp;
    }
    template <typename Dtype>
    vector<float> RPNLayer::whctrs(vector<float> anchor){
        vector<float> result;
        result.push_back(anchor[2] - anchor[0] + 1); //w
        result.push_back(anchor[3] - anchor[1] + 1); //h
        result.push_back((anchor[2] + anchor[0]) / 2); //ctrx
        result.push_back((anchor[3] + anchor[1]) / 2); //ctry
        return result;
    }


    /*template 
    cv::Mat RPNLayer::proposal_local_anchor(int width, int height)
    {
        Blob shift;
        cv::Mat shitf_x(height, width, CV_32SC1);
        cv::Mat shitf_y(height, width, CV_32SC1);
        for (size_t i = 0; i < width; i++)
        {
            for (size_t j = 0; j < height; j++)
            {
                shitf_x.at(j, i) = i * feat_stride_;
                shitf_y.at(j, i) = j * feat_stride_;
            }
        }
        shift.Reshape(anchors_nums_, width*height, 4,  1);
        float *p = shift.mutable_cpu_diff(), *a = shift.mutable_cpu_data();
        for (int i = 0; i < height*width; i++)
        {
            for (int j = 0; j < anchors_nums_; j++)
            {
                size_t num = i * 4 + j * 4 * height*width;
                p[num + 0] = -shitf_x.at(i / shitf_x.cols, i % shitf_x.cols);
                p[num + 2] = -shitf_x.at(i / shitf_x.cols, i % shitf_x.cols);
                p[num + 1] = -shitf_y.at(i / shitf_y.cols, i % shitf_y.cols);
                p[num + 3] = -shitf_y.at(i / shitf_y.cols, i % shitf_y.cols);
                a[num + 0] = anchors_[j * 4 + 0];
                a[num + 1] = anchors_[j * 4 + 1];
                a[num + 2] = anchors_[j * 4 + 2];
                a[num + 3] = anchors_[j * 4 + 3];
            }
        }
        shift.Update();
        cv::Mat loacl_anchors(anchors_nums_ * height*width, 4, CV_32FC1);
        size_t num = 0;
        for (int i = 0; i < height; ++i)
        {
            for (int j = 0; j < width; ++j)
            {
                for (int c = 0; c < anchors_nums_; ++c)
                {
                    for (int k = 0; k < 4; ++k)
                    {
                        loacl_anchors.at((i*width + j)*anchors_nums_+c, k)= shift.data_at(c, i*width + j, k, 0);
                    }
                }
            }
        }
        return loacl_anchors;
    }*/

    template <typename Dtype>
    void RPNLayer::proposal_local_anchor(){
        int length = mymax(map_width_, map_height_);
        int step = map_width_*map_height_;
        int *map_m = new int[length];
        for (int i = 0; i < length; ++i)
        {
            map_m[i] = i*feat_stride_;
        }
        Dtype *shift_x = new Dtype[step];
        Dtype *shift_y = new Dtype[step];
        for (int i = 0; i < map_height_; ++i)
        {
            for (int j = 0; j < map_width_; ++j)
            {
                shift_x[i*map_width_ + j] = map_m[j];
                shift_y[i*map_width_ + j] = map_m[i];
            }
        }
        local_anchors_->Reshape(1, anchors_nums_ * 4, map_height_, map_width_);
        Dtype *a = local_anchors_->mutable_cpu_data();
        for (int i = 0; i < anchors_nums_; ++i)
        {
            caffe_set(step, Dtype(anchors_[i * 4 + 0]), a + (i * 4 + 0) *step);
            caffe_set(step, Dtype(anchors_[i * 4 + 1]), a + (i * 4 + 1) *step);
            caffe_set(step, Dtype(anchors_[i * 4 + 2]), a + (i * 4 + 2) *step);
            caffe_set(step, Dtype(anchors_[i * 4 + 3]), a + (i * 4 + 3) *step);
            caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 0)*step);
            caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 2)*step);
            caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 1)*step);
            caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 3)*step);
        }
    }

    template<typename Dtype>
    void RPNLayer::filter_boxs(cv::Mat& pre_box, cv::Mat& score, vector& aboxes)
    {
        float localMinSize=min_size_*src_scale_;
        aboxes.clear();

        for (int i = 0; i < pre_box.rows; i++)
        {
            int widths = pre_box.at<float>(i, 2) - pre_box.at<float>(i, 0) + 1;
            int heights = pre_box.at<float>(i, 3) - pre_box.at<float>(i, 1) + 1;
            if (widths >= localMinSize || heights >= localMinSize)
            {
                abox tmp;
                tmp.x1 = pre_box.at<float>(i, 0);
                tmp.y1 = pre_box.at<float>(i, 1);
                tmp.x2 = pre_box.at<float>(i, 2);
                tmp.y2 = pre_box.at<float>(i, 3);
                tmp.score = score.at<float>(i, 0);
                aboxes.push_back(tmp);
            }
        }
    }

    template<typename Dtype>
    void RPNLayer::filter_boxs(vector& aboxes)
    {
        float localMinSize = min_size_*src_scale_;
        aboxes.clear();
        int map_width = m_box_->width();
        int map_height = m_box_->height();
        int map_channel = m_box_->channels();
        const Dtype *box = m_box_->cpu_data();
        const Dtype *score = m_score_->cpu_data();

        int step = 4 * map_height*map_width;
        int one_step = map_height*map_width;
        int offset_w, offset_h, offset_x, offset_y, offset_s;

        for (int h = 0; h < map_height; ++h)
        {
            for (int w = 0; w < map_width; ++w)
            {
                offset_x = h*map_width + w;
                offset_y = offset_x + one_step;
                offset_w = offset_y + one_step;
                offset_h = offset_w + one_step;
                offset_s = one_step*anchors_nums_+h*map_width + w;
                for (int c = 0; c < map_channel / 4; ++c)
                {
                    Dtype width = box[offset_w], height = box[offset_h];
                    if (width < localMinSize || height < localMinSize)
                    {
                    }
                    else
                    {
                        abox tmp;
                        tmp.batch_ind = 0;
                        tmp.x1 = box[offset_x] - 0.5*width;
                        tmp.y1 = box[offset_y] - 0.5*height;
                        tmp.x2 = box[offset_x] + 0.5*width;
                        tmp.y2 = box[offset_y] + 0.5*height;
                        tmp.x1 = mymin(mymax(tmp.x1, 0), src_width_);
                        tmp.y1 = mymin(mymax(tmp.y1, 0), src_height_);
                        tmp.x2 = mymin(mymax(tmp.x2, 0), src_width_);
                        tmp.y2 = mymin(mymax(tmp.y2, 0), src_height_);
                        tmp.score = score[offset_s];
                        aboxes.push_back(tmp);
                    }
                    offset_x += step;
                    offset_y += step;
                    offset_w += step;
                    offset_h += step;
                    offset_s += one_step;
                }
            }
        }
    }

    template<typename Dtype>
    void RPNLayer::bbox_tranform_inv(){
        int channel = m_box_->channels();
        int height = m_box_->height();
        int width = m_box_->width();
        int step = height*width;
        Dtype * a = m_box_->mutable_cpu_data();
        Dtype * b = local_anchors_->mutable_cpu_data();
        for (int i = 0; i < channel / 4; ++i)
        {
            caffe_axpy(2*step, Dtype(-1), b + (i * 4 + 0)*step, b + (i * 4 + 2)*step);
            caffe_add_scalar(2 * step, Dtype(1), b + (i * 4 + 2)*step);
            caffe_axpy(2*step, Dtype(0.5), b + (i * 4 + 2)*step, b + (i * 4 + 0)*step);

            caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);
            caffe_add(2 * step, b + (i * 4 + 0)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);

            caffe_exp(2*step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
            caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
        }
    }




    template<typename Dtype>
    void RPNLayer::nms(std::vector &input_boxes, float nms_thresh){
        std::vector<float>vArea(input_boxes.size());
        for (int i = 0; i < input_boxes.size(); ++i)
        {
            vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
                * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
        }
        for (int i = 0; i < input_boxes.size(); ++i)
        {
            for (int j = i + 1; j < input_boxes.size();)
            {
                float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
                float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
                float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
                float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
                float w = std::max(float(0), xx2 - xx1 + 1);
                float   h = std::max(float(0), yy2 - yy1 + 1);
                float   inter = w * h;
                float ovr = inter / (vArea[i] + vArea[j] - inter);
                if (ovr >= nms_thresh)
                {
                    input_boxes.erase(input_boxes.begin() + j);
                    vArea.erase(vArea.begin() + j);
                }
                else
                {
                    j++;
                }
            }
        }
    }

    template <typename Dtype>
    void RPNLayer::Forward_cpu(
        const vector*>& bottom,
        const vector*>& top) {

        map_width_ = bottom[1]->width();
        map_height_ = bottom[1]->height();
        //int channels = bottom[1]->channels();


        //get boxs_delta,向右。
        m_box_->CopyFrom(*(bottom[1]), false, true);
        /*cv::Mat boxs_delta(height*width*anchors_nums_, 4, CV_32FC1);
        for (int i = 0; i < height; ++i)
        {
            for (int j = 0; j < width; ++j)
            {
                for (int k = 0; k < anchors_nums_; ++k)
                {
                    for (int c = 0; c < 4; ++c)
                    {
                        boxs_delta.at((i*width + j)*anchors_nums_ + k, c) = bottom[1]->data_at(0, k*4 + c, i, j);
                    }
                }
            }
        }*/



        //get sores 向右，前面anchors_nums_个位bg的得分，后面anchors_nums_为fg得分，我们需要的是后面的。
        m_score_->CopyFrom(*(bottom[0]),false,true);

        /*cv::Mat scores(height*width*anchors_nums_, 1, CV_32FC1);
        for (int i = 0; i < height; ++i)
        {
            for (int j = 0; j < width; ++j)
            {
                for (int k = 0; k < anchors_nums_; ++k)
                {
                    scores.at((i*width + j)*anchors_nums_+k, 0) = bottom[0]->data_at(0, k + anchors_nums_, i, j);
                }
            }
        }*/

        //get im_info

        src_height_ = bottom[2]->data_at(0, 0,0,0);
        src_width_ = bottom[2]->data_at(0, 1,0,0);
        src_scale_ = bottom[2]->data_at(0, 2, 0, 0);

        //gen local anchors 向右

        proposal_local_anchor();
        //cv::Mat local_anchors = proposal_local_anchor(width, height);


        //Convert anchors into proposals via bbox transformations

        bbox_tranform_inv();

        /*for (int i = 0; i < pre_box.rows; ++i)
        {
            if (pre_box.at(i, 0) < 0)    pre_box.at(i, 0) = 0;
            if (pre_box.at(i, 0) > (src_width_ - 1)) pre_box.at(i, 0) = src_width_ - 1;
            if (pre_box.at(i, 2) < 0)    pre_box.at(i, 2) = 0;
            if (pre_box.at(i, 2) > (src_width_ - 1)) pre_box.at(i, 2) = src_width_ - 1;

            if (pre_box.at(i, 1) < 0)    pre_box.at(i, 1) = 0;
            if (pre_box.at(i, 1) > (src_height_ - 1))    pre_box.at(i, 1) = src_height_ - 1;
            if (pre_box.at(i, 3) < 0)    pre_box.at(i, 3) = 0;
            if (pre_box.at(i, 3) > (src_height_ - 1))    pre_box.at(i, 3) = src_height_ - 1;
        }*/
        vectoraboxes;

        filter_boxs(aboxes);

        //clock_t start, end;
        //start = clock();
        std::sort(aboxes.rbegin(), aboxes.rend()); //降序
        if (pre_nms_topN_ > 0)
        {
            int tmp = mymin(pre_nms_topN_, aboxes.size());
            aboxes.erase(aboxes.begin() + tmp, aboxes.end());
        }

        nms(aboxes,nms_thresh_);
        //end = clock();
        //std::cout << "sort nms:" << (double)(end - start) / CLOCKS_PER_SEC << std::endl;
        if (post_nms_topN_ > 0)
        {
            int tmp = mymin(post_nms_topN_, aboxes.size());
            aboxes.erase(aboxes.begin() + tmp, aboxes.end());
        }
        top[0]->Reshape(aboxes.size(),5,1,1);
        Dtype *top0 = top[0]->mutable_cpu_data();
        for (int i = 0; i < aboxes.size(); ++i)
        {
            //caffe_copy(aboxes.size() * 5, (Dtype*)aboxes.data(), top0);
            top0[0] = aboxes[i].batch_ind;
            top0[1] = aboxes[i].x1;
            top0[2] = aboxes[i].y1; 
            top0[3] = aboxes[i].x2;
            top0[4] = aboxes[i].y2;
            top0 += top[0]->offset(1);
        }
        if (top.size()>1)
        {
            top[1]->Reshape(aboxes.size(), 1,1,1);
            Dtype *top1 = top[1]->mutable_cpu_data();
            for (int i = 0; i < aboxes.size(); ++i)
            {
                top1[0] = aboxes[i].score;
                top1 += top[1]->offset(1);
            }
        }   
    }

#ifdef CPU_ONLY
        STUB_GPU(RPNLayer);
#endif

    INSTANTIATE_CLASS(RPNLayer);
    REGISTER_LAYER_CLASS(RPN);

}  // namespace caffe

(3) 添加自定义层 roi pooling layer、smooth_L1_loss_layer（此层仅供训练时使用）
注意：参照的blog中提到需要添加roi pooling层，其实py-faster-rcnn的caffe-fast-rcnn中已经含有roi pooling layer，故此步骤如果用py-faster-rcnn的caffe-fast-rcnn可以不用添加，保留原始代码即可。如果用最新caffe，则需做如下操作：
1、将 py-faster-rcnn/caffe-fast-rcnn/src/caffe/layers/roi_pooling_layer.cpp 、roi_pooling_layer.cu 、smooth_L1_loss_layer.cpp、smooth_L1_loss_layer.cu 四个文件放入 caffe//src/caffe/layers/ 下
注意： roi pooling layer 用 py-faster-rcnn原本的代码也可以，用参照的blog里的代码也可。
2、将 py-faster-rcnn/caffe-fast-rcnn/include/caffe/fast_rcnn_layers.hpp、data_reader.hpp 放入 caffe/include/caffe/ 下
3、将 py-faster-rcnn/caffe-fast-rcnn/src/caffe/data_reader.cpp 放入 caffe//src/caffe/ 下

（4）在caffe/src/caffe/proto/caffe.proto 中声明这三个类
在message LayerParameter 中添加：

optional ROIPoolingParameter roi_pooling_param = 8266711;
optional SmoothL1LossParameter smooth_l1_loss_param = 8266712;
optional RPNParameter rpn_param = 8266713;

在这个文件的最末尾，定义具体的参数

message ROIPoolingParameter {  
  optional uint32 pooled_h = 1 [default = 0];  
  optional uint32 pooled_w = 2 [default = 0];  
  optional float spatial_scale = 3 [default = 1];  
}  
message RPNParameter {  
  optional uint32 feat_stride = 1;  
  optional uint32 basesize = 2;  
  repeated uint32 scale = 3;  
  repeated float ratio = 4;  
  optional uint32 boxminsize =5;  
  optional uint32 per_nms_topn = 9;  
  optional uint32 post_nms_topn = 11;  
  optional float nms_thresh = 8;  
}
message SmoothL1LossParameter {
  // SmoothL1Loss(x) =
  //   0.5 * (sigma * x) ** 2    -- if x < 1.0 / sigma / sigma
  //   |x| - 0.5 / sigma / sigma -- otherwise
  optional float sigma = 1 [default = 1];
}

（5）因为自定义层使用了RPN层，为了以后程序中各处都能使用该层，所以得在common.hpp和common.cpp文件的最末尾，添加对应的代码，注意这里的namespace RPN是和namespace caffe同一级的
1、头文件common.hpp里添加

namespace RPN{  
    struct abox  
    {  
        float x1;  
        float y1;  
        float x2;  
        float y2;  
        float score;  
        bool operator <(const abox&tmp) const{  
            return score < tmp.score;  
        }  
   };  
    void nms(std::vector& input_boxes,float nms_thresh);  
    cv::Mat bbox_tranform_inv(cv::Mat, cv::Mat);  
} // namespace RPN

源文件common.cpp里，为了防止说找不到cv::Mat类型的错误，添加opencv头文件

#include
using namespace cv;

在} // namespace caffe后添加：

namespace RPN{  
    cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta){  
        cv::Mat pre_box(local_anchors.rows, local_anchors.cols, CV_32FC1);  
        for (int i = 0; i < local_anchors.rows; i++)  
        {  
            double pred_ctr_x, pred_ctr_y, src_ctr_x, src_ctr_y;  
            double dst_ctr_x, dst_ctr_y, dst_scl_x, dst_scl_y;  
            double src_w, src_h, pred_w, pred_h;  
            src_w = local_anchors.at<float>(i, 2) - local_anchors.at<float>(i, 0) + 1;  
            src_h = local_anchors.at<float>(i, 3) - local_anchors.at<float>(i, 1) + 1;  
            src_ctr_x = local_anchors.at<float>(i, 0) + 0.5 * src_w;  
            src_ctr_y = local_anchors.at<float>(i, 1) + 0.5 * src_h;  

            dst_ctr_x = boxs_delta.at<float>(i, 0);  
            dst_ctr_y = boxs_delta.at<float>(i, 1);  
            dst_scl_x = boxs_delta.at<float>(i, 2);  
            dst_scl_y = boxs_delta.at<float>(i, 3);  
            pred_ctr_x = dst_ctr_x*src_w + src_ctr_x;  
            pred_ctr_y = dst_ctr_y*src_h + src_ctr_y;  
            pred_w = exp(dst_scl_x) * src_w;  
            pred_h = exp(dst_scl_y) * src_h;  

            pre_box.at<float>(i, 0) = pred_ctr_x - 0.5*pred_w;  
            pre_box.at<float>(i, 1) = pred_ctr_y - 0.5*pred_h;  
            pre_box.at<float>(i, 2) = pred_ctr_x + 0.5*pred_w;  
            pre_box.at<float>(i, 3) = pred_ctr_y + 0.5*pred_h;  
        }  
        return pre_box;  
    }  
    void nms(std::vector &input_boxes, float nms_thresh){  
        std::vector<float>vArea(input_boxes.size());  
        for (int i = 0; i < input_boxes.size(); ++i)  
        {  
            vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)  
                * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);  
        }  
        for (int i = 0; i < input_boxes.size(); ++i)  
        {  
            for (int j = i + 1; j < input_boxes.size();)  
            {  
                float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);  
                float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);  
                float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);  
                float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);  
                float w = std::max(float(0), xx2 - xx1 + 1);  
                float   h = std::max(float(0), yy2 - yy1 + 1);  
                float   inter = w * h;  
                float ovr = inter / (vArea[i] + vArea[j] - inter);  
                if (ovr >= nms_thresh)  
                {  
                    input_boxes.erase(input_boxes.begin() + j);  
                    vArea.erase(vArea.begin() + j);  
                }  
                else  
                {  
                    j++;  
                }  
            }  
        }  
    }  
}

(6) 更改caffe/src/caffe/util/blocking_queue.cpp
增加一个头文件

#include "caffe/data_reader.hpp"

其次，在最末尾处，将如下代码

template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;

更改为：

template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;
template class BlockingQueue<Datum*>;
template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;

（7）重新生成caffe.pb.h

cd $caffe/include/caffe/
mkdir proto
cd $caffe/src/caffe/proto
protoc --cpp_out=$your_home/caffe/include/caffe/proto/ caffe.proto

执行完后会在$caffe/include/caffe/proto/下生成2个文件，caffe.pb.h和caffe.pb.cc。
（8）编译caffe

make clean
make -j8
make pycaffe (不需要用python的可以不用，反正编译耗时也不长）

（9）环境已经配置好了，现在我们再加个类，用来对图片进行检测吧！编写头文件ObjectDetector.hpp

#define OBJECTDETECTOR_H

#define INPUT_SIZE_NARROW  600
#define INPUT_SIZE_LONG  1000

#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

class ObjectDetector
{
public:

      ObjectDetector(const std::string &model_file, const std::string &weights_file);  //构造函数
    //对一张图片，进行检测，将结果保存进map数据结构里,分别表示每个类别对应的目标框，如果需要分数信息，则计算分数
      map<int,vector > detect(const cv::Mat& image, map<int,vector<float> >* score=NULL);   

private:
    boost::shared_ptr< caffe::Net<float> > net_;
    int class_num_;     //类别数+1   ,官方给的demo 是20+1类
};

#endif

（10）源文件ObjectDetector.cpp

#include "ObjectDetector.hpp"
#include 
#include 
#include 
#include 

using std::string;
using std::vector;
using namespace caffe;
using  std::max;
using std::min;

ObjectDetector::ObjectDetector(const std::string &model_file,const std::string &weights_file){
#ifdef CPU_ONLY
    Caffe::set_mode(Caffe::CPU);
#else
    Caffe::set_mode(Caffe::GPU);
#endif 
    net_.reset(new Net<float>(model_file, TEST));
    net_->CopyTrainedLayersFrom(weights_file);
    this->class_num_ = net_->blob_by_name("cls_prob")->channels();  //求得类别数+1
}

//对一张图片，进行检测，将结果保存进map数据结构里,分别表示每个类别对应的目标框，如果需要分数信息，则计算分数
map<int,vector > ObjectDetector::detect(const cv::Mat& image,map<int,vector<float> >* objectScore){

    if(objectScore!=NULL)   //如果需要保存置信度
        objectScore->clear();

    float CONF_THRESH = 0.8;  //置信度阈值
    float NMS_THRESH = 0.3;   //非极大值抑制阈值
    int max_side = max(image.rows, image.cols);   //分别求出图片宽和高的较大者
    int min_side = min(image.rows, image.cols);
    float max_side_scale = float(max_side) / float(INPUT_SIZE_LONG);    //分别求出缩放因子
    float min_side_scale = float(min_side) / float(INPUT_SIZE_NARROW);
    float max_scale = max(max_side_scale, min_side_scale);

    float img_scale = float(1) / max_scale;
    int height = int(image.rows * img_scale);
    int width = int(image.cols * img_scale);

    int num_out;
    cv::Mat cv_resized;
    image.convertTo(cv_resized, CV_32FC3);
    cv::resize(cv_resized, cv_resized, cv::Size(width, height)); 
    cv::Mat mean(height, width, cv_resized.type(), cv::Scalar(102.9801, 115.9465, 122.7717));
    cv::Mat normalized;
    subtract(cv_resized, mean, normalized);

    float im_info[3];
    im_info[0] = height;
    im_info[1] = width;
    im_info[2] = img_scale;
    shared_ptrfloat> > input_layer = net_->blob_by_name("data");
    input_layer->Reshape(1, normalized.channels(), height, width);
    net_->Reshape();
    float* input_data = input_layer->mutable_cpu_data();
    vector input_channels;
    for (int i = 0; i < input_layer->channels(); ++i) {
        cv::Mat channel(height, width, CV_32FC1, input_data);
        input_channels.push_back(channel);
        input_data += height * width;
    }
    cv::split(normalized, input_channels);
    net_->blob_by_name("im_info")->set_cpu_data(im_info);
    net_->ForwardFrom(0);                                       //进行网络前向传播


    int num = net_->blob_by_name("rois")->num();    //产生的 ROI 个数,比如为 13949个ROI
    const float *rois_data = net_->blob_by_name("rois")->cpu_data();    //维度比如为：13949*5*1*1
    int num1 = net_->blob_by_name("bbox_pred")->num();   //预测的矩形框 维度为 13949*84
    cv::Mat rois_box(num, 4, CV_32FC1);
    for (int i = 0; i < num; ++i)
    {
        rois_box.at<float>(i, 0) = rois_data[i * 5 + 1] / img_scale;
        rois_box.at<float>(i, 1) = rois_data[i * 5 + 2] / img_scale;
        rois_box.at<float>(i, 2) = rois_data[i * 5 + 3] / img_scale;
        rois_box.at<float>(i, 3) = rois_data[i * 5 + 4] / img_scale;
    }

    shared_ptrfloat> > bbox_delt_data = net_->blob_by_name("bbox_pred");   // 13949*84
    shared_ptrfloat> > score = net_->blob_by_name("cls_prob");             // 3949*21

    map<int,vector > label_objs;    //每个类别，对应的检测目标框
    for (int i = 1; i < class_num_; ++i){     //对每个类，进行遍历
        cv::Mat bbox_delt(num, 4, CV_32FC1);
        for (int j = 0; j < num; ++j){
            bbox_delt.at<float>(j, 0) = bbox_delt_data->data_at(j, i * 4 + 0, 0, 0);
            bbox_delt.at<float>(j, 1) = bbox_delt_data->data_at(j, i * 4 + 1, 0, 0);
            bbox_delt.at<float>(j, 2) = bbox_delt_data->data_at(j, i * 4 + 2, 0, 0);
            bbox_delt.at<float>(j, 3) = bbox_delt_data->data_at(j, i * 4 + 3, 0, 0);
        }
        cv::Mat box_class = RPN::bbox_tranform_inv(rois_box, bbox_delt);

        vector aboxes;   //对于 类别i，检测出的矩形框保存在这
        for (int j = 0; j < box_class.rows; ++j){
            if (box_class.at<float>(j, 0) < 0)  box_class.at<float>(j, 0) = 0;
            if (box_class.at<float>(j, 0) > (image.cols - 1))   box_class.at<float>(j, 0) = image.cols - 1;
            if (box_class.at<float>(j, 2) < 0)  box_class.at<float>(j, 2) = 0;
            if (box_class.at<float>(j, 2) > (image.cols - 1))   box_class.at<float>(j, 2) = image.cols - 1;

            if (box_class.at<float>(j, 1) < 0)  box_class.at<float>(j, 1) = 0;
            if (box_class.at<float>(j, 1) > (image.rows - 1))   box_class.at<float>(j, 1) = image.rows - 1;
            if (box_class.at<float>(j, 3) < 0)  box_class.at<float>(j, 3) = 0;
            if (box_class.at<float>(j, 3) > (image.rows - 1))   box_class.at<float>(j, 3) = image.rows - 1;
            RPN::abox tmp;
            tmp.x1 = box_class.at<float>(j, 0);
            tmp.y1 = box_class.at<float>(j, 1);
            tmp.x2 = box_class.at<float>(j, 2);
            tmp.y2 = box_class.at<float>(j, 3);
            tmp.score = score->data_at(j, i, 0, 0);
            aboxes.push_back(tmp);
        }
        std::sort(aboxes.rbegin(), aboxes.rend());
        RPN::nms(aboxes, NMS_THRESH);  //与非极大值抑制消除对于的矩形框
        for (int k = 0; k < aboxes.size();){
            if (aboxes[k].score < CONF_THRESH)
                aboxes.erase(aboxes.begin() + k);
            else
                k++;
        }
        //################ 将类别i的所有检测框，保存
        vector rect(aboxes.size());    //对于类别i，检测出的矩形框
        for(int ii=0;ii//################ 将类别i的所有检测框的打分，保存
        if(objectScore!=NULL){           //################ 将类别i的所有检测框的打分，保存
            vector<float> tmp(aboxes.size());       //对于 类别i，检测出的矩形框的得分
            for(int ii=0;iiinsert(pair<int,vector<float> >(i,tmp));
        }
    }
    return label_objs;
}

注意：更改之处：net_->Forward(); 更改为 net_->ForwardFrom(0)；
这里的函数返回的是一个map对象，每一个键（类别label），对应一个矩形框向量。比如，一个20类检测任务，而一张图片里有3个人（标签是1），和2辆车（标签是5），那函数会返回一个map，其中有两个键值对，键1对应的值是一个3维的矩形框向量，分别代表着3个人的矩形框；键5对应的值是一个2维的矩形框向量，分别代表的是2辆车的矩形框。同时，函数还接受一个可选参数，可以返回每个矩形框各自对应的置信度。
（11）写个主函数，测试下效果，首先更改下网络描述文件test.prototxt，因为我们不需要python层了，那我们打开这个文件，定位到 Python层，

layer {  
   name: 'proposal'  
   type: 'Python'  
   bottom: 'rpn_cls_prob_reshape'  
   bottom: 'rpn_bbox_pred'  
   bottom: 'im_info'  
   top: 'rois'  
   python_param {  
     module: 'rpn.proposal_layer'  
     layer: 'ProposalLayer'  
    param_str: "'feat_stride': 16"  
   }  
}

把它修改为

layer {  
   name: "proposal"  
   type: "RPN"  
   bottom: "rpn_cls_prob_reshape"  
   bottom: "rpn_bbox_pred"  
   bottom: "im_info"  
   top: "rois"  
   rpn_param {  
       feat_stride : 16  
       basesize : 16  
       scale : 8  
       scale : 16  
       scale : 32  
       ratio : 0.5  
       ratio : 1  
       ratio : 2  
       boxminsize :16  
       per_nms_topn : 6000   #参照的blog这个参数设为0，和Python不一样，造成c++效果比python差很多。所以我更改为6000，按照原版python参数设置更改了
       post_nms_topn : 300  #同上
       nms_thresh : 0.7   #同上
   }  
}

（12）编写主函数进行测试

#include "ObjectDetector.hpp"  
#include   
#include   
#include  
#include 

using namespace cv;  
using namespace std;

const double clocksPerMillis = double(CLOCKS_PER_SEC) / 1000;

string num2str(float i){  
    stringstream ss;  
    ss<return ss.str();  
}  

// void drawRectOnImage(cv::Mat& im, Rect pos) {
//     int thickness = 2;
//     Scalar color(0, 0, 255);// color is blue.

//     Mat overlay;
//     im.copyTo(overlay);
//     rectangle(overlay, pos, color, thickness);

//     double alpha = 0.5;
//     cv::addWeighted(overlay, alpha, im, 1 - alpha, 0, im);
// }

// voffset: vertical offset
int drawTextOnImage(cv::Mat& im, Rect pos, float voffset, string text) {
    int fontFace = CV_FONT_HERSHEY_SIMPLEX; //FONT_HERSHEY_PLAIN;
    double fontScale = 1.0;
    int thickness = 0.5;
    int baseline = 0;
    Size textSize = getTextSize(text, fontFace, fontScale, thickness, &baseline);

    Mat overlay;
    im.copyTo(overlay);

    rectangle(overlay, Point(pos.x, pos.y + voffset),
    Point(pos.x + textSize.width, pos.y + textSize.height + thickness + voffset),Scalar(255, 128, 128), CV_FILLED);

    Point textOrg(pos.x, pos.y + textSize.height + thickness + voffset);
    cv::putText(overlay, text, textOrg,fontFace, fontScale, Scalar(255, 255, 255), thickness, LINE_AA);
    double alpha = 0.75;
    cv::addWeighted(overlay, alpha, im, 1 - alpha, 0, im);
    return textSize.height + thickness;
}

int main(int argc,char **argv){
  ::google::InitGoogleLogging(argv[0]);  
#ifdef CPU_ONLY  
  cout<<"Use CPU\n";  
#else  
  cout<<"Use GPU\n";  
#endif  
  ObjectDetector detect("test.prototxt", "xx.caffemodel");  

  long start_time = clock();  
  Mat img=imread("xx.jpg");
  if (img.empty()) {
    cout << "img is empty! " << endl;
    return 1;
  } 
  map<int,vector<float> > score;  
  map<int,vector > label_objs=detect.detect(img,&score);  //目标检测,同时保存每个框的置信度  
  string label_str = "";
  for(map<int,vector >::iterator it=label_objs.begin();it!=label_objs.end();it++){
      int label=it->first;  //标签  
      vector rects=it->second;  //检测框  
      for(int j=0;j// cout << j << endl;
          // drawRectOnImage(img, rects[j]);
          // cv::rectangle(overlay, rects[j], Scalar(255, 128, 128), CV_FILLED);
          cout << rects[j] << endl;
          cout << "score is " << score[label][j] << endl;
          rectangle(img,rects[j],Scalar(0,0,255),2);   //画出矩形框
          switch (label){
              case 1: label_str = "label1"; break;
              case 2: label_str = "label2";break;
              case 3: label_str = "label3";break;
              default: label_str = ""; cout << "label_str is error!" << endl; return 1;
          }  
          // string txt= label_str+ " : " + num2str(score[label][j]);  
          // putText(overlay,txt,Point(rects[j].x,rects[j].y),CV_FONT_HERSHEY_SIMPLEX,0.5,Scalar(255,0,0)); //标记 类别：置信度
          // putText(overlay,txt,Point(rects[j].x,rects[j].y),CV_FONT_HERSHEY_SIMPLEX,0.8,Scalar(255,0,0)); //标记 类别：置信度

          int offset = drawTextOnImage(img, rects[j], 0, label_str);
          int margin = 5;
          drawTextOnImage(img, rects[j], offset + margin, num2str(score[label][j]));
          // Point textOrg(rects[j].x, rects[j].y - 1);
          // cv::putText(overlay, txt, textOrg,
          //         CV_FONT_HERSHEY_PLAIN, 1.0, Scalar(255, 255, 255), 1, LINE_AA);
          // double alpha = 0.75;
          // cv::addWeighted(overlay, alpha, img, 1 - alpha, 0, img); 
      }  
  }
  cout << "time is " << double(clock() - start_time) / double(CLOCKS_PER_SEC) << endl;  
  //imshow("detect", img);
  waitKey();  
  return 0;  
}

如果需要使用CPU，请在代码前 #define CPU_ONLY 一下。

（13）最后一步，编译

 g++ -o demo.exe main.cpp ObjectDetector.cpp -I /home/*****/caffe/include/ -I /home/*****/caffe/.build_release/src/ -I /usr/local/cuda-8.0/include/ `pkg-config --libs --cflags opencv` -L /home/****/caffe/build/lib/ -lcaffe -lglog -lboost_system -lprotobuf

然后执行./demo.exe 就可以了
注意：这里为了方便，直接输出的标签号以及对应的置信度了。可以看出，飞机的的label为1，船的label是4，我们从python版的demo.py中可以证实这点：

CLASSES = ('__background__',                             
           'aeroplane', 'bicycle', 'bird', 'boat',  
           'bottle', 'bus', 'car', 'cat', 'chair',  
            'cow', 'diningtable', 'dog', 'horse',  
             'motorbike', 'person', 'pottedplant',  
            'sheep', 'sofa', 'train', 'tvmonitor')

这个根据自己模型需求改改就行。label的顺序按照py-faster-rcnn/lib/datasets/pascal_voc.py的

self._classes = ('__background__', # always index 0
                         'aeroplane', 'bicycle', 'bird', 'boat',
                         'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')

顺序进行标签排序，’background‘为0，aeroplane为1，以此类推。

结果

1、我看很多评论说，c++版本比python要更吃显存，更耗时，但我自己测试的效果没有这种感觉，不用最新caffe其实耗时和python基本差不多。
2、感觉用最新的caffe会比Faster R-CNN老版caffe要快一丁点
3、1080ti上测试一张图平均（600*1000）0.3s （新or老caffe都差不多）

可能出现的错误

（1）找不到libcaffe.so.1.0.0-rc3

./app.bin: error while loading shared libraries: libcaffe.so.1.0.0-rc3: cannot open shared object file: No such file or directory

解决办法：
在/etc/ld.so.conf.d/下新添加caffe.conf

$Home
sudo vi /etc/ld.so.conf.d/caffe.conf

然后把libcaffe.so.1.0.0-rc3所在的caffe/build/lib/地址写进caffe.conf
最后不要忘记 sudo ldconfig 一下

你可能感兴趣的:(深度学习)

机器学习与深度学习间关系与区别 ℒℴѵℯ心·动ꦿ໊ོ꫞ 人工智能学习深度学习 python
一、机器学习概述定义机器学习（MachineLearning,ML）是一种通过数据驱动的方法，利用统计学和计算算法来训练模型，使计算机能够从数据中学习并自动进行预测或决策。机器学习通过分析大量数据样本，识别其中的模式和规律，从而对新的数据进行判断。其核心在于通过训练过程，让模型不断优化和提升其预测准确性。主要类型1.监督学习（SupervisedLearning）监督学习是指在训练数据集中包含输入
将cmd中命令输出保存为txt文本文件落难Coder Windows cmd window
最近深度学习本地的训练中我们常常要在命令行中运行自己的代码，无可厚非，我们有必要保存我们的炼丹结果，但是复制命令行输出到txt是非常麻烦的，其实Windows下的命令行为我们提供了相应的操作。其基本的调用格式就是：运行指令>输出到的文件名称或者具体保存路径测试下，我打开cmd并且ping一下百度：pingwww.baidu.com>./data.txt看下相同目录下data.txt的输出：如果你再
推荐3家毕业AI论文可五分钟一键生成！文末附免费教程！小猪包333 写论文人工智能 AI写作深度学习计算机视觉
在当前的学术研究和写作领域，AI论文生成器已经成为许多研究人员和学生的重要工具。这些工具不仅能够帮助用户快速生成高质量的论文内容，还能进行内容优化、查重和排版等操作。以下是三款值得推荐的AI论文生成器：千笔-AIPassPaper、懒人论文以及AIPaperPass。千笔-AIPassPaper千笔-AIPassPaper是一款基于深度学习和自然语言处理技术的AI写作助手，旨在帮助用户快速生成高质
AI大模型的架构演进与最新发展季风泯灭的季节 AI大模型应用技术二人工智能架构
随着深度学习的发展，AI大模型（LargeLanguageModels,LLMs）在自然语言处理、计算机视觉等领域取得了革命性的进展。本文将详细探讨AI大模型的架构演进，包括从Transformer的提出到GPT、BERT、T5等模型的历史演变，并探讨这些模型的技术细节及其在现代人工智能中的核心作用。一、基础模型介绍：Transformer的核心原理Transformer架构的背景在Transfo
[实践应用] 深度学习之模型性能评估指标 YuanDaima2048 深度学习工具使用深度学习人工智能损失函数性能评估 pytorch python 机器学习
文章总览：YuanDaiMa2048博客文章总览深度学习之模型性能评估指标分类任务回归任务排序任务聚类任务生成任务其他介绍在机器学习和深度学习领域，评估模型性能是一项至关重要的任务。不同的学习任务需要不同的性能指标来衡量模型的有效性。以下是对一些常见任务及其相应的性能评估指标的详细解释和总结。分类任务分类任务是指模型需要将输入数据分配到预定义的类别或标签中。以下是分类任务中常用的性能指标：准确率(
[实践应用] 深度学习之优化器 YuanDaima2048 深度学习工具使用 pytorch 深度学习人工智能机器学习 python 优化器
文章总览：YuanDaiMa2048博客文章总览深度学习之优化器1.随机梯度下降（SGD）2.动量优化（Momentum）3.自适应梯度（Adagrad）4.自适应矩估计（Adam）5.RMSprop总结其他介绍在深度学习中，优化器用于更新模型的参数，以最小化损失函数。常见的优化函数有很多种，下面是几种主流的优化器及其特点、原理和PyTorch实现：1.随机梯度下降（SGD）原理:随机梯度下降通过
生成式地图制图 Bwywb_3 深度学习机器学习深度学习生成对抗网络
生成式地图制图（GenerativeCartography）是一种利用生成式算法和人工智能技术自动创建地图的技术。它结合了传统的地理信息系统（GIS）技术与现代生成模型（如深度学习、GANs等），能够根据输入的数据自动生成符合需求的地图。这种方法在城市规划、虚拟环境设计、游戏开发等多个领域具有应用前景。主要特点：自动化生成：通过算法和模型，系统能够根据输入的地理或空间数据自动生成地图，而无需人工逐
吴恩达深度学习笔记(30)-正则化的解释极客Array
正则化（Regularization）深度学习可能存在过拟合问题——高方差，有两个解决方法，一个是正则化，另一个是准备更多的数据，这是非常可靠的方法，但你可能无法时时刻刻准备足够多的训练数据或者获取更多数据的成本很高，但正则化通常有助于避免过拟合或减少你的网络误差。如果你怀疑神经网络过度拟合了数据，即存在高方差问题，那么最先想到的方法可能是正则化，另一个解决高方差的方法就是准备更多数据，这也是非常
个人学习笔记7-6：动手学深度学习pytorch版-李沐浪子L 深度学习深度学习笔记计算机视觉 python 人工智能神经网络 pytorch
#人工智能##深度学习##语义分割##计算机视觉##神经网络#计算机视觉13.11全卷积网络全卷积网络（fullyconvolutionalnetwork，FCN）采用卷积神经网络实现了从图像像素到像素类别的变换。引入l转置卷积（transposedconvolution）实现的，输出的类别预测与输入图像在像素级别上具有一一对应关系：通道维的输出即该位置对应像素的类别预测。13.11.1构造模型下
深度学习-点击率预估-研究论文2024-09-14速读 sp_fyf_2024 深度学习人工智能
深度学习-点击率预估-研究论文2024-09-14速读1.DeepTargetSessionInterestNetworkforClick-ThroughRatePredictionHZhong,JMa,XDuan,SGu,JYao-2024InternationalJointConferenceonNeuralNetworks,2024深度目标会话兴趣网络用于点击率预测摘要：这篇文章提出了一种新
损失函数与反向传播 Star_. PyTorch pytorch 深度学习 python
损失函数定义与作用损失函数(lossfunction)在深度学习领域是用来计算搭建模型预测的输出值和真实值之间的误差。1.损失函数越小越好2.计算实际输出与目标之间的差距3.为更新输出提供依据（反向传播)常见的损失函数回归常见的损失函数有：均方差（MeanSquaredError，MSE）、平均绝对误差（MeanAbsoluteErrorLoss，MAE）、HuberLoss是一种将MSE与MAE
【深度学习】训练过程中一个OOM的问题，太难查了 weixin_40293999 深度学习深度学习人工智能
现象：各位大佬又遇到过ubuntu的这个问题么？现象是在训练过程中，ssh上不去了，能ping通，没死机，但是ubunutu的pc侧的显示器，鼠标啥都不好用了。只能重启。问题原因：OOM了95G，尼玛！！！！pytorch爆内存了，然后journald假死了，在journald被watchdog干掉之后，系统就崩溃了。这种规模的爆内存一般，即使被oomkill了，也要卡半天的，确实会这样，能不能配
云服务业界动态简报-20180128 Captain7
一、青云青云QingCloud推出深度学习平台DeepLearningonQingCloud，包含了主流的深度学习框架及数据科学工具包，通过QingCloudAppCenter一键部署交付，可以让算法工程师和数据科学家快速构建深度学习开发环境，将更多的精力放在模型和算法调优。二、腾讯云1.腾讯云正式发布腾讯专有云TCE(TencentCloudEnterprise)矩阵，涵盖企业版、大数据版、AI
机器学习VS深度学习 nfgo 机器学习
机器学习（MachineLearning,ML）和深度学习（DeepLearning,DL）是人工智能（AI）的两个子领域，它们有许多相似之处，但在技术实现和应用范围上也有显著区别。下面从几个方面对两者进行区分：1.概念层面机器学习：是让计算机通过算法从数据中自动学习和改进的技术。它依赖于手动设计的特征和数学模型来进行学习，常用的模型有决策树、支持向量机、线性回归等。深度学习：是机器学习的一个子领
大数据毕业设计hadoop+spark+hive知识图谱租房数据分析可视化大屏租房推荐系统 58同城租房爬虫房源推荐系统房价预测系统计算机毕业设计机器学习深度学习人工智能 2401_84572577 程序员大数据 hadoop 人工智能
做了那么多年开发，自学了很多门编程语言，我很明白学习资源对于学一门新语言的重要性，这些年也收藏了不少的Python干货，对我来说这些东西确实已经用不到了，但对于准备自学Python的人来说，或许它就是一个宝藏，可以给你省去很多的时间和精力。别在网上瞎学了，我最近也做了一些资源的更新，只要你是我的粉丝，这期福利你都可拿走。我先来介绍一下这些东西怎么用，文末抱走。（1）Python所有方向的学习路线（
深度学习-13-小语言模型之SmolLM的使用皮皮冰燃深度学习深度学习
文章附录1SmolLM概述1.1SmolLM简介1.2下载模型2运行2.1在CPU/GPU/多GPU上运行模型2.2使用torch.bfloat162.3通过位和字节的量化版本3应用示例4问题及解决4.1attention_mask和pad_token_id报错4.2max_new_tokens=205参考附录1SmolLM概述1.1SmolLM简介SmolLM是一系列尖端小型语言模型，提供三种规
基于深度学习的农作物病害检测 SEU-WYL 深度学习dnn 深度学习人工智能
基于深度学习的农作物病害检测利用卷积神经网络（CNN）、生成对抗网络（GAN）、Transformer等深度学习技术，自动识别和分类农作物的病害，帮助农业工作者提高作物管理效率、减少损失。1.农作物病害检测的挑战病害种类繁多：农作物病害的类型多样，不同病害在同一作物上的表现差异很大，同时同一种病害在不同生长阶段的症状也可能不同。环境影响：天气、光照、湿度等外部环境因素会影响农作物的表现，使得病害检
基于深度学习的文本引导的图像编辑 SEU-WYL 深度学习dnn 深度学习人工智能
基于深度学习的文本引导的图像编辑（Text-GuidedImageEditing）是一种通过自然语言文本指令对图像进行编辑或修改的技术。它结合了图像生成和自然语言处理（NLP）的最新进展，使用户能够通过描述性文本对图像内容进行精确的调整和操控。1.文本引导的图像编辑的挑战文本和图像之间的对齐：如何将文本中的语义信息准确地映射到图像中的特定区域或元素是一个关键挑战。这涉及到多模态数据的对齐和理解。编
深度学习--对抗生成网络（GAN, Generative Adversarial Network） Ambition_LAO 深度学习生成对抗网络
对抗生成网络（GAN,GenerativeAdversarialNetwork）是一种深度学习模型，由IanGoodfellow等人在2014年提出。GAN主要用于生成数据，通过两个神经网络相互对抗，来生成以假乱真的新数据。以下是对GAN的详细阐述，包括其概念、作用、核心要点、实现过程、代码实现和适用场景。1.概念GAN由两个神经网络组成：生成器（Generator）和判别器（Discrimina
深度学习：怎么看pth文件的参数奥利给少年深度学习人工智能
.pth文件是PyTorch模型的权重文件，它通常包含了训练好的模型的参数。要查看或使用这个文件，你可以按照以下步骤操作：1.确保你有模型的定义你需要有创建这个.pth文件时所用的模型的代码。这意味着你需要有模型的类定义和架构。2.加载模型权重使用PyTorch的load_state_dict方法来加载权重。这里是如何操作的：importtorchimporttorch.nnasnn#定义模型结构
chatgpt赋能python：如何在Python中安装Keras库？ turensu ChatGpt python chatgpt keras 计算机
如何在Python中安装Keras库？Keras是一个简单易用的神经网络库，由FrançoisChollet编写。它在Python编程语言中实现了深度学习的功能，可以使您更轻松地构建和试验不同类型的神经网络。如果您是一名Python开发人员，肯定会想知道如何在您的Python项目中安装Keras库。在本文中，我们将向您展示如何安装和配置Keras库。步骤1：安装Python要使用Keras库，您需
如何理解深度学习的训练过程奋斗的草莓熊深度学习人工智能 python scikit-learn virtualenv numpy pandas
文章目录1.训练是干什么？2.预训练模型进行训练，主要更改的是预训练模型的什么东西？1.训练是干什么？以yolov5为例子，训练的目的是把一组输入猫狗图像放到神经网络中，得到一个输出模型，这个模型下次可以直接用来识别哪个是猫，哪个是狗2.预训练模型进行训练，主要更改的是预训练模型的什么东西？超参数（Hyperparameters）：这是模型结构中定义的参数，比如：卷积核大小（kernel_size
Keras深度学习框架入门及实战指南司莹嫣Maude
Keras深度学习框架入门及实战指南keraskeras-team/keras:是一个基于Python的深度学习库，它没有使用数据库。适合用于深度学习任务的开发和实现，特别是对于需要使用Python深度学习库的场景。特点是深度学习库、Python、无数据库。项目地址:https://gitcode.com/gh_mirrors/ke/keras一、项目介绍Keras简介Keras是一款高级神经网络
深度学习驱动的车牌识别：技术演进与未来挑战逼子歌深度学习车牌识别神经网络字符识别 YOLO 卷积神经网络
一、引言1.1研究背景在当今社会，智能交通系统的发展日益重要，而车牌识别作为其关键组成部分，发挥着至关重要的作用。车牌识别技术广泛应用于交通管理、停车场管理、安防监控等领域。在交通管理中，它可以用于车辆识别、交通违法监控和车流统计等，提高交通管理的效率和准确性。在停车场管理中，实现车辆的自动识别和收费，提升管理和服务水平。在安防监控领域，可用于追踪嫌疑人及犯罪行为。深度学习的出现为车牌识别带来了重
每天五分钟玩转深度学习PyTorch：模型参数优化器torch.optim 幻风_huanfeng 深度学习框架pytorch 深度学习 pytorch 人工智能神经网络机器学习优化算法
本文重点在机器学习或者深度学习中，我们需要通过修改参数使得损失函数最小化(或最大化)，优化算法就是一种调整模型参数更新的策略。在pytorch中定义了优化器optim，我们可以使用它调用封装好的优化算法，然后传递给它神经网络模型参数，就可以对模型进行优化。本文是学习第6步(优化器)，参考链接pytorch的学习路线随机梯度下降算法在深度学习和机器学习中，梯度下降算法是最常用的参数更新方法，它的公式
什么是AIGC？有哪些免费工具？ chent_某位 AIGC
AIGC（AIGeneratedContent），即“人工智能生成内容”，是指通过人工智能技术自动生成各种类型的数字内容。AIGC让机器能够根据输入的信息或数据生成符合人类需求的文本、图像、音频、视频等内容，极大提高了内容创作的效率。AIGC的背景与起源随着深度学习和自然语言处理技术的快速发展，人工智能已经不再局限于简单的任务，如分类、预测和数据分析，而是具备了生成内容的能力。生成式AI模型，如O
transformer架构(Transformer Architecture)原理与代码实战案例讲解 AI架构设计之禅大数据AI人工智能 Python入门实战计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
transformer架构(TransformerArchitecture)原理与代码实战案例讲解关键词：Transformer,自注意力机制,编码器-解码器,预训练,微调,NLP,机器翻译作者：禅与计算机程序设计艺术/ZenandtheArtofComputerProgramming1.背景介绍1.1问题的由来自然语言处理（NLP）领域的发展经历了从规则驱动到统计驱动再到深度学习驱动的三个阶段。
如何有效的学习AI大模型？ Python程序员罗宾学习人工智能语言模型自然语言处理架构
学习AI大模型是一个系统性的过程，涉及到多个学科的知识。以下是一些建议，帮助你更有效地学习AI大模型：基础知识储备：数学基础：学习线性代数、概率论、统计学和微积分等，这些是理解机器学习算法的数学基础。编程技能：掌握至少一种编程语言，如Python，因为大多数AI模型都是用Python实现的。理论学习：机器学习基础：了解监督学习、非监督学习、强化学习等基本概念。深度学习：学习神经网络的基本结构，如卷
【深度学习】【OnnxRuntime】【Python】模型转化、环境搭建以及模型部署的详细教程牙牙要健康深度学习 onnx onnxruntime 深度学习 python 人工智能
【深度学习】【OnnxRuntime】【Python】模型转化、环境搭建以及模型部署的详细教程提示:博主取舍了很多大佬的博文并亲测有效,分享笔记邀大家共同学习讨论文章目录【深度学习】【OnnxRuntime】【Python】模型转化、环境搭建以及模型部署的详细教程前言模型转换--pytorch转onnxWindows平台搭建依赖环境onnxruntime调用onnx模型ONNXRuntime推理核
基于深度学习的多模态信息检索 SEU-WYL 深度学习dnn 深度学习人工智能
基于深度学习的多模态信息检索（MultimodalInformationRetrieval,MMIR）是指利用深度学习技术，从包含多种模态（如文本、图像、视频、音频等）的数据集中检索出满足用户查询意图的相关信息。这种方法不仅可以处理单一模态的数据，还可以在多种模态之间建立关联，从而更准确地满足用户需求。1.多模态信息检索的挑战异构数据表示：多模态数据通常具有不同的特征和表示形式（如文本的词嵌入与图
JVM StackMapTable 属性的作用及理解 lijingyao8206 jvm 字节码 Class文件 StackMapTable
在Java 6版本之后JVM引入了栈图(Stack Map Table)概念。为了提高验证过程的效率，在字节码规范中添加了Stack Map Table属性，以下简称栈图，其方法的code属性中存储了局部变量和操作数的类型验证以及字节码的偏移量。也就是一个method需要且仅对应一个Stack Map Table。在Java 7版
回调函数调用方法百合不是茶 java
最近在看大神写的代码时,.发现其中使用了很多的回调 ,以前只是在学习的时候经常用到 ,现在写个笔记记录一下代码很简单: MainDemo :调用方法得到方法的返回结果
[时间机器]制造时间机器需要一些材料 comsci 制造
根据我的计算和推测,要完全实现制造一台时间机器,需要某些我们这个世界不存在的物质和材料... 甚至可以这样说,这种材料和物质,我们在反应堆中也无法获得......
开口埋怨不如闭口做事邓集海邓集海做人做事工作
“开口埋怨，不如闭口做事。”不是名人名言，而是一个普通父亲对儿子的训导。但是，因为这句训导，这位普通父亲却造就了一个名人儿子。这位普通父亲造就的名人儿子，叫张明正。　　　　张明正出身贫寒，读书时成绩差，常挨老师批评。高中毕业，张明正连普通大学的分数线都没上。高考成绩出来后，平时开口怨这怨那的张明正，不从自身找原因，而是不停地埋怨自己家庭条件不好、埋怨父母没有给他创造良好的学习环境。　　　　
jQuery插件开发全解析，类级别与对象级别开发 IT独行者 jquery 开发插件　函数
jQuery插件的开发包括两种：一种是类级别的插件开发，即给 jQuery添加新的全局函数，相当于给 jQuery类本身添加方法。 jQuery的全局函数就是属于 jQuery命名空间的函数，另一种是对象级别的插件开发，即给 jQuery对象添加方法。下面就两种函数的开发做详细的说明。 1 、类级别的插件开发类级别的插件开发最直接的理解就是给jQuer
Rome解析Rss 413277409 Rome解析Rss
import java.net.URL; import java.util.List; import org.junit.Test; import com.sun.syndication.feed.synd.SyndCategory; import com.sun.syndication.feed.synd.S
RSA加密解密无量加密解密 rsa
RSA加密解密代码代码有待整理 package com.tongbanjie.commons.util; import java.security.Key; import java.security.KeyFactory; import java.security.KeyPair; import java.security.KeyPairGenerat
linux 软件安装遇到的问题 aichenglong linux 遇到的问题 ftp
1 ftp配置中遇到的问题 500 OOPS: cannot change directory 出现该问题的原因:是SELinux安装机制的问题.只要disable SELinux就可以了修改方法:1 修改/etc/selinux/config 中SELINUX=disabled 2 source /etc
面试心得 alafqq 面试
最近面试了好几家公司。记录下；支付宝，面试我的人胖胖的，看着人挺好的；博彦外包的职位，面试失败；阿里金融，面试官人也挺和善，只不过我让他吐血了。。。由于印象比较深，记录下； 1，自我介绍 2，说下八种基本类型；（算上string。楼主才答了3种，哈哈，string其实不是基本类型，是引用类型） 3，什么是包装类，包装类的优点； 4，平时看过什么书？NND，什么书都没看过。。照样
java的多态性探讨百合不是茶 java
java的多态性是指main方法在调用属性的时候类可以对这一属性做出反应的情况 //package 1; class A{ public void test(){ System.out.println("A"); } } class D extends A{ public void test(){ S
网络编程基础篇之JavaScript-学习笔记 bijian1013 JavaScript
1.documentWrite <html> <head> <script language="JavaScript"> document.write("这是电脑网络学校"); document.close(); </script> </h
探索JUnit4扩展：深入Rule bijian1013 JUnit Rule 单元测试
本文将进一步探究Rule的应用，展示如何使用Rule来替代@BeforeClass，@AfterClass，@Before和@After的功能。在上一篇中提到，可以使用Rule替代现有的大部分Runner扩展，而且也不提倡对Runner中的withBefores()，withAfte
[CSS]CSS浮动十五条规则 bit1129 css
这些浮动规则，主要是参考CSS权威指南关于浮动规则的总结，然后添加一些简单的例子以验证和理解这些规则。 1. 所有的页面元素都可以浮动 2. 一个元素浮动后，会成为块级元素，比如<span>,a, strong等都会变成块级元素 3.一个元素左浮动，会向最近的块级父元素的左上角移动，直到浮动元素的左外边界碰到块级父元素的左内边界；如果这个块级父元素已经有浮动元素停靠了
【Kafka六】Kafka Producer和Consumer多Broker、多Partition场景 bit1129 partition
0.Kafka服务器配置 3个broker 1个topic，6个partition，副本因子是2 2个consumer，每个consumer三个线程并发读取 1. Producer package kafka.examples.multibrokers.producers; import java.util.Properties; import java.util.
zabbix_agentd.conf配置文件详解 ronin47 zabbix 配置文件
Aliaskey的别名，例如 Alias=ttlsa.userid:vfs.file.regexp[/etc/passwd,^ttlsa:.:([0-9]+),,,,\1]，或者ttlsa的用户ID。你可以使用key：vfs.file.regexp[/etc/passwd,^ttlsa:.: ([0-9]+),,,,\1]，也可以使用ttlsa.userid。备注: 别名不能重复，但是可以有多个
java--19.用矩阵求Fibonacci数列的第N项 bylijinnan fibonacci
参考了网上的思路，写了个Java版的： public class Fibonacci { final static int[] A={1,1,1,0}; public static void main(String[] args) { int n=7; for(int i=0;i<=n;i++){ int f=fibonac
Netty源码学习-LengthFieldBasedFrameDecoder bylijinnan java netty
先看看LengthFieldBasedFrameDecoder的官方API http://docs.jboss.org/netty/3.1/api/org/jboss/netty/handler/codec/frame/LengthFieldBasedFrameDecoder.html API举例说明了LengthFieldBasedFrameDecoder的解析机制，如下：实
AES加密解密 chicony 加密解密
AES加解密算法，使用Base64做转码以及辅助加密： package com.wintv.common; import javax.crypto.Cipher; import javax.crypto.spec.IvParameterSpec; import javax.crypto.spec.SecretKeySpec; import sun.misc.BASE64Decod
文件编码格式转换 ctrain 编码格式
package com.test; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream;
mysql 在linux客户端插入数据中文乱码 daizj mysql 中文乱码
1、查看系统客户端，数据库，连接层的编码查看方法： http://daizj.iteye.com/blog/2174993 进入mysql，通过如下命令查看数据库编码方式： mysql> show variables like 'character_set_%'; +--------------------------+------
好代码是廉价的代码 dcj3sjt126com 程序员读书
长久以来我一直主张：好代码是廉价的代码。当我跟做开发的同事说出这话时，他们的第一反应是一种惊愕，然后是将近一个星期的嘲笑，把它当作一个笑话来讲。当他们走近看我的表情、知道我是认真的时，才收敛一点。当最初的惊愕消退后，他们会用一些这样的话来反驳： “好代码不廉价，好代码是采用经过数十年计算机科学研究和积累得出的最佳实践设计模式和方法论建立起来的精心制作的程序代码。” 我只
Android网络请求库——android-async-http dcj3sjt126com android
在iOS开发中有大名鼎鼎的ASIHttpRequest库，用来处理网络请求操作，今天要介绍的是一个在Android上同样强大的网络请求库android-async-http，目前非常火的应用Instagram和Pinterest的Android版就是用的这个网络请求库。这个网络请求库是基于Apache HttpClient库之上的一个异步网络请求处理库，网络处理均基于Android的非UI线程，通
ORACLE 复习笔记之SQL语句的优化 eksliang SQL优化 Oracle sql语句优化 SQL语句的优化
转载请出自出处：http://eksliang.iteye.com/blog/2097999 SQL语句的优化总结如下 sql语句的优化可以按照如下六个步骤进行：合理使用索引避免或者简化排序消除对大表的扫描避免复杂的通配符匹配调整子查询的性能 EXISTS和IN运算符下面我就按照上面这六个步骤分别进行总结：
浅析：Android 嵌套滑动机制（NestedScrolling） gg163 android 移动开发滑动机制嵌套
谷歌在发布安卓 Lollipop版本之后，为了更好的用户体验，Google为Android的滑动机制提供了NestedScrolling特性 NestedScrolling的特性可以体现在哪里呢？ 比如你使用了Toolbar，下面一个ScrollView，向上滚
使用hovertree菜单作为后台导航 hvt JavaScript jquery .net hovertree asp.net
hovertree是一个jquery菜单插件，官方网址：http://keleyi.com/jq/hovertree/ ，可以登录该网址体验效果。 0.1.3版本：http://keleyi.com/jq/hovertree/demo/demo.0.1.3.htm hovertree插件包含文件： http://keleyi.com/jq/hovertree/css
SVG 教程（二）矩形天梯梦 svg
SVG <rect> SVG Shapes SVG有一些预定义的形状元素，可被开发者使用和操作：矩形 <rect> 圆形 <circle> 椭圆 <ellipse> 线 <line> 折线 <polyline> 多边形 <polygon> 路径 <path>
一个简单的队列 luyulong java 数据结构队列
public class MyQueue { private long[] arr; private int front; private int end; // 有效数据的大小 private int elements; public MyQueue() { arr = new long[10]; elements = 0; front
基础数据结构和算法九：Binary Search Tree sunwinner Algorithm
A binary search tree (BST) is a binary tree where each node has a Comparable key (and an associated value) and satisfies the restriction that the key in any node is larger than the keys in all
项目出现的一些问题和体会 Steven-Walker DAO Web servlet
第一篇博客不知道要写点什么，就先来点近阶段的感悟吧。这几天学了servlet和数据库等知识，就参照老方的视频写了一个简单的增删改查的，完成了最简单的一些功能，使用了三层架构。 dao层完成的是对数据库具体的功能实现，service层调用了dao层的实现方法，具体对servlet提供支持。 &
高手问答：Java老A带你全面提升Java单兵作战能力！ ITeye管理员 java
本期特邀《Java特种兵》作者：谢宇，CSDN论坛ID: xieyuooo 针对JAVA问题给予大家解答，欢迎网友积极提问，与专家一起讨论! 作者简介：淘宝网资深Java工程师，CSDN超人气博主，人称“胖哥”。 CSDN博客地址： http://blog.csdn.net/xieyuooo 作者在进入大学前是一个不折不扣的计算机白痴，曾经被人笑话过不懂鼠标是什么，

Faster R-CNN 纯C++版本 + 使用最新的caffe

Faster R-CNN 纯C++版本 融合 最新caffe

结果

可能出现的错误

你可能感兴趣的:(深度学习)

Faster R-CNN 纯C++版本融合最新caffe