下载https://github.com/ChenYingpeng/caffe-yolov3
1. 修改CMakeLists.txt
主要修改交叉编译器为x86_64编译,修改自己的caffe路径(其中caffe.proto中增加upsample层),修改自己的opencv路径
cmake_minimum_required(VERSION 2.8)
project(sysDetectSpeed)
# setup tensorRT flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11
set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed.")
# if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies)
#if( ${BUILD_DEPS} )
# message("Launching pre-build dependency installer script...")
#
# execute_process(COMMAND sh ../CMakePreBuild.sh
# WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
# RESULT_VARIABLE PREBUILD_SCRIPT_RESULT)
# set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox. #Automatically reset to NO after dependencies are installed." FORCE)
# message("Finished installing dependencies")
#endif()
# Qt is used to load images (installed by ubuntu-desktop)
find_package(Qt4 REQUIRED)
include(${QT_USE_FILE})
add_definitions(${QT_DEFINITIONS})
# setup CUDA
find_package(CUDA)
set(
CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3
-gencode arch=compute_53,code=sm_53
-gencode arch=compute_62,code=sm_62
-gencode arch=compute_61,code=sm_61
)
# setup project output paths
set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR})
set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include)
file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR})
file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}")
message("-- output path: ${PROJECT_OUTPUT_DIR}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
# build C/C++ interface
include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include)
include_directories(/usr/include/gstreamer-1.0 /usr/lib/x86_64-linux-gnu/gstreamer-1.0/include /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/x86_64-linux-gnu/glib-2.0/include/ /usr/local/cuda-8.0/extras/CUPTI/include)
include_directories(${PROJECT_INCLUDE_DIR}
/home/caffe/include
/home/caffe/build/include
/usr/local/include
/usr/local/cuda/include
/usr/include
)
file(GLOB inferenceSources *.cpp *.cu )
file(GLOB inferenceIncludes *.h )
cuda_add_library(sysDetectSpeed SHARED ${inferenceSources})
target_link_libraries(sysDetectSpeed nvcaffe_parser nvinfer Qt4::QtGui GL gstreamer-1.0 gstapp-1.0) #GLEW # gstreamer-0.10 gstbase-0.10 gstapp-0.10
target_link_libraries(sysDetectSpeed
/home/caffe/build/lib/libcaffe.so
/usr/lib/x86_64-linux-gnu/libglog.so
/usr/lib/x86_64-linux-gnu/libgflags.so.2
/usr/lib/x86_64-linux-gnu/libboost_system.so
/usr/lib/x86_64-linux-gnu/libGLEW.so.1.13
)
# transfer all headers to the include directory
foreach(include ${inferenceIncludes})
message("-- Copying ${include}")
configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY)
endforeach()
# create symbolic link for network data
execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" )
# copy image data
file(GLOB imageData ${PROJECT_SOURCE_DIR}/data/images/*)
foreach(image ${imageData})
message("-- Copying ${image}")
file(COPY ${image} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
#configure_file(${include} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COPYONLY)
endforeach()
add_subdirectory(detectnet)
#setup opencv
FIND_PACKAGE(OpenCV REQUIRED)
#TARGET_LINK_LIBRARIES(sysDetectSpeed ${OpenCV_LIBS})
target_link_libraries(sysDetectSpeed -I/usr/local/include/opencv -I/usr/local/include -L/usr/local/lib
-lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs -lopencv_video -lopencv_videoio)
# install
foreach(include ${inferenceIncludes})
install(FILES "${include}" DESTINATION include/sysDetectSpeed)
endforeach()
# install the shared library
install(TARGETS sysDetectSpeed DESTINATION lib/sysDetectSpeed EXPORT sysDetectSpeedConfig)
# install the cmake project, for importing
install(EXPORT sysDetectSpeedConfig DESTINATION share/sysDetectSpeed/cmake)
此处主要修改处为:
include_directories(${PROJECT_INCLUDE_DIR}
/home/caffe/include
/home/caffe/build/include #此处应当注意,如果你原始caffe为make编译,则路径应当为/home/caffe/build/src
/usr/local/include
/usr/local/cuda/include
/usr/include
)
target_link_libraries(sysDetectSpeed
/home/caffe/build/lib/libcaffe.so
/usr/lib/x86_64-linux-gnu/libglog.so
/usr/lib/x86_64-linux-gnu/libgflags.so.2
/usr/lib/x86_64-linux-gnu/libboost_system.so
/usr/lib/x86_64-linux-gnu/libGLEW.so.1.13
)
2.编译upsample层 caffe
增加include/caffe/layers/upsample_layer.hpp
#ifndef CAFFE_UPSAMPLE_LAYER_HPP_
#define CAFFE_UPSAMPLE_LAYER_HPP_
#include
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
template
class UpsampleLayer : public Layer {
public:
explicit UpsampleLayer(const LayerParameter& param)
: Layer(param) {}
virtual void LayerSetUp(const vector*>& bottom,
const vector*>& top);
virtual void Reshape(const vector*>& bottom,
const vector*>& top);
virtual inline const char* type() const { return "Upsample"; }
virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MaxBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual void Forward_cpu(const vector*>& bottom,
const vector*>& top);
virtual void Forward_gpu(const vector*>& bottom,
const vector*>& top);
virtual void Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
private:
int scale_;
};
} // namespace caffe
#endif // CAFFE_UPSAMPLE_LAYER_HPP_
增加src/caffe/layers/upsample_layer.cpp
#include
#include "caffe/layers/upsample_layer.hpp"
namespace caffe {
template
void UpsampleLayer::LayerSetUp(
const vector*>& bottom, const vector*>& top) {
UpsampleParameter upsample_param = this->layer_param_.upsample_param();
scale_ = upsample_param.scale();
}
template
void UpsampleLayer::Reshape(
const vector*>& bottom, const vector*>& top) {
vector out_shape;
for (int i = 0; i < bottom[0]->num_axes(); i++) {
out_shape.push_back(bottom[0]->shape(i));
}
out_shape[bottom[0]->num_axes() - 1] *= scale_;
out_shape[bottom[0]->num_axes() - 2] *= scale_;
top[0]->Reshape(out_shape);
}
template
void UpsampleLayer::Forward_cpu(const vector*>& bottom,
const vector*>& top) {
int N = top[0]->shape(0);
int C = top[0]->shape(1);
int H = top[0]->shape(2);
int W = top[0]->shape(3);
const Dtype *input = bottom[0]->cpu_data();
Dtype *output = top[0]->mutable_cpu_data();
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; h++) {
for (int w = 0; w < W; w++) {
int nw = w/scale_;
int nh = h/scale_;
int out_idx = (((n * C + c) * H) + h) * W + w;
int in_idx = (((n * C + c) * (H / scale_)) + nh) * (W / scale_) + nw;
output[out_idx] = input[in_idx];
}
}
}
}
}
template
void UpsampleLayer::Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom) {
int N = bottom[0]->shape(0);
int C = bottom[0]->shape(1);
int H = bottom[0]->shape(2);
int W = bottom[0]->shape(3);
const Dtype *output_grad = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; h++) {
for (int w = 0; w < W; w++) {
for (int i = 0; i < scale_; i++) {
for (int j = 0; j < scale_; j++) {
int nw = w * scale_ + i;
int nh = h * scale_ + j;
int out_idx = (((n * C + c) * H) + h) * W + w;
int in_idx = (((n * C + c) * (H * scale_))
+ nh) * (W * scale_) + nw;
bottom_diff[out_idx] += output_grad[in_idx];
}
}
}
}
}
}
}
#ifdef CPU_ONLY
STUB_GPU(UpsampleLayer);
#endif
INSTANTIATE_CLASS(UpsampleLayer);
REGISTER_LAYER_CLASS(Upsample);
} // namespace caffe
增加src/caffe/layers/upsample_layer.cu
z = ii % d2;
ii = ii/d2;
y = ii % d1;
ii = ii/d1;
x = ii;
w = w/scale_factor;
z = z/scale_factor;
d2 /= scale_factor;
d3 /= scale_factor;
return (((x*d1+y)*d2)+z)*d3+w;
}
__device__ int translate_idx_inv(
int ii, int d1, int d2, int d3, int scale_factor, int off_x, int off_y) {
int x, y, z, w;
w = ii % d3;
ii = ii/d3;
z = ii % d2;
ii = ii/d2;
y = ii % d1;
ii = ii/d1;
x = ii;
w = w*scale_factor+off_x;
z = z*scale_factor+off_y;
d2 *= scale_factor;
d3 *= scale_factor;
return (((x*d1+y)*d2)+z)*d3+w;
}
template
__global__ void upscale(const Dtype *input, Dtype *output,
int no_elements, int scale_factor, int d1, int d2, int d3) {
int ii = threadIdx.x + blockDim.x * blockIdx.x;
if (ii >= no_elements) return;
int ipidx = translate_idx(ii, d1, d2, d3, scale_factor);
output[ii]=input[ipidx];
}
template
__global__ void downscale(Dtype *gradInput_data, const Dtype *gradOutput_data,
int no_elements, int scale_factor, int d1, int d2,
int d3) {
int ii = threadIdx.x + blockDim.x * blockIdx.x;
if (ii >= no_elements) return;
for (int i = 0; i < scale_factor; i++) {
for (int j = 0; j < scale_factor; j++) {
int ipidx = translate_idx_inv(ii, d1, d2, d3, scale_factor, i, j);
gradInput_data[ii] += gradOutput_data[ipidx];
}
}
}
template
void UpsampleLayer::Forward_gpu(const vector*>& bottom,
const vector*>& top) {
int d1, d2, d3;
d1 = top[0]->shape(1);
d2 = top[0]->shape(2);
d3 = top[0]->shape(3);
int no_elements = top[0]->count();
upscale // NOLINT_NEXT_LINE(whitespace/operators)
<<>>(
bottom[0]->gpu_data(),
top[0]->mutable_gpu_data(), no_elements, scale_, d1, d2, d3);
}
template
void UpsampleLayer::Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom) {
int d1, d2, d3;
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
d1 = bottom[0]->shape(1);
d2 = bottom[0]->shape(2);
d3 = bottom[0]->shape(3);
int no_elements = bottom[0]->count();
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom_diff);
downscale // NOLINT_NEXT_LINE(whitespace/operators)
<<>>(
bottom_diff, top[0]->gpu_diff(), no_elements, scale_, d1, d2, d3);
}
INSTANTIATE_LAYER_GPU_FUNCS(UpsampleLayer);
} // namespace caffe
增加src/caffe/test/test_upsample_layer.cpp(此步骤可省略)
#include
#include
#include "boost/scoped_ptr.hpp"
#include "gtest/gtest.h"
#include "caffe/blob.hpp"
#include "caffe/filler.hpp"
#include "caffe/layers/upsample_layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"
using boost::scoped_ptr;
namespace caffe {
template
class UpsampleLayerTest : public MultiDeviceTest {
typedef typename TypeParam::Dtype Dtype;
protected:
UpsampleLayerTest()
: blob_bottom_data_(new Blob(2, 5, 2, 2)),
blob_top_data_(new Blob()) {
// fill the values
FillerParameter filler_param;
filler_param.set_std(10);
GaussianFiller filler(filler_param);
filler.Fill(this->blob_bottom_data_);
blob_bottom_vec_.push_back(blob_bottom_data_);
blob_top_vec_.push_back(blob_top_data_);
}
virtual ~UpsampleLayerTest() {
delete blob_bottom_data_;
delete blob_top_data_;
}
Blob* const blob_bottom_data_;
Blob* const blob_top_data_;
vector*> blob_bottom_vec_;
vector*> blob_top_vec_;
};
TYPED_TEST_CASE(UpsampleLayerTest, TestDtypesAndDevices);
TYPED_TEST(UpsampleLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
UpsampleParameter* upsample_param =
layer_param.mutable_upsample_param();
upsample_param->set_scale(2);
UpsampleLayer layer(layer_param);
GradientChecker checker(1e-2, 1e-2, 1701);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_, 0);
}
} // namespace caffe
最后在src/caffe/proto/caffe.proto中增加
message LayerParameter {
……
optional UpsampleParameter upsample_param = 148;
}
message UpsampleParameter {
optional int32 scale = 1 [default = 1];
}
重新编译caffe,后续转换需添加编译好的caffe路径
3.yolov3转caffemodel
下载https://github.com/marvis/pytorch-caffe-darknet-convert
原工程支持yolov2
首先安装pytorch:pip install torch==0.4.1
然后将ChenYingpeng/caffe-yolov3/model_convert/darknet2caffe_yolov3.py拷贝到pytorch-caffe-darknet-convert下然后在该目录下执行:
python yolov3_darknet2caffe.py yolov3xxx.cfg yolov3xxx.weights xxx.prototxt xxx.caffemodel
4. 在ChenYingpeng/caffe-yolov3下按照README.d中教程,配合修改好的CMAKEList.txt编译,编译后可测试:
caffe-yolov3/detectnet/detectnet.cpp yolov3.prototxt yolov3.caffemodel
若实在无法编译通过(由于博主没有sudo权限,无法安装opencv3),
可参考使用MobileNet-YOLO(https://github.com/eric612/MobileNet-YOLO)中测试脚本测试,其中yolov3.prototxt需要参考其中相应的yolov3.prototxt做修改,然后测试转出的yolov3.caffemodel