python调用opencv例子_Opencv调用深度学习模型

介绍

Opencv在前面的几个版本中已经支持caffe、tensorflow、pytorch训练的几种模型,包括分类和物体检测模型(SSD、Yolo),针对tensorflow,opencv与tensorflow object detection api对接,可以通过该api训练模型,然后通过opencv调用,这样就可以把python下的环境移植到C++中。

关于tensorflow object detection api,后面博文会详细介绍

数据准备与环境配置

基于mask_rcnn_inception_v2_coco_2018_01_28的frozen_inference_graph.pb,这个模型在tensorflow object detection api中可以找到,然后需要对应的mask_rcnn_inception_v2_coco_2018_01_28.pbtxt,以及colors.txt,mscoco_labels.names。

opencv必须是刚发布的4.0版本,该版本支持mask rcnn和faster rcnn,低版本不支持哦,注意opencv4.0中在配置环境时,include下少了一个opencv文件夹,只有opencv2,这是正常的。

好了,废话不多说了,直接上源代码,该代码调用usb摄像头进行实时检测,基于单幅图像的检测修改下代码即可。

#include

#include

#include

#include

#include

#include

#include

using namespace cv;

using namespace dnn;

using namespace std;

// Initialize the parameters

float confThreshold = 0.5; // Confidence threshold

float maskThreshold = 0.3; // Mask threshold

vector classes;

vector colors;

// Draw the predicted bounding box

void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask);

// Postprocess the neural network's output for each frame

void postprocess(Mat& frame, const vector& outs);

int main()

{

// Load names of classes

string classesFile = "./mask_rcnn_inception_v2_coco_2018_01_28/mscoco_labels.names";

ifstream ifs(classesFile.c_str());

string line;

while (getline(ifs, line)) classes.push_back(line);

// Load the colors

string colorsFile = "./mask_rcnn_inception_v2_coco_2018_01_28/colors.txt";

ifstream colorFptr(colorsFile.c_str());

while (getline(colorFptr, line))

{

char* pEnd;

double r, g, b;

r = strtod(line.c_str(), &pEnd);

g = strtod(pEnd, NULL);

b = strtod(pEnd, NULL);

Scalar color = Scalar(r, g, b, 255.0);

colors.push_back(Scalar(r, g, b, 255.0));

}

// Give the configuration and weight files for the model

String textGraph = "./mask_rcnn_inception_v2_coco_2018_01_28/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt";

String modelWeights = "./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb";

// Load the network

Net net = readNetFromTensorflow(modelWeights, textGraph);

net.setPreferableBackend(DNN_BACKEND_OPENCV);

net.setPreferableTarget(DNN_TARGET_CPU);

// Open a video file or an image file or a camera stream.

string str, outputFile;

VideoCapture cap(0);//根据摄像头端口id不同,修改下即可

//VideoWriter video;

Mat frame, blob;

// Create a window

static const string kWinName = "Deep learning object detection in OpenCV";

namedWindow(kWinName, WINDOW_NORMAL);

// Process frames.

while (waitKey(1) < 0)

{

// get frame from the video

cap >> frame;

// Stop the program if reached end of video

if (frame.empty())

{

cout << "Done processing !!!" << endl;

cout << "Output file is stored as " << outputFile << endl;

waitKey(3000);

break;

}

// Create a 4D blob from a frame.

blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false);

//blobFromImage(frame, blob);

//Sets the input to the network

net.setInput(blob);

// Runs the forward pass to get output from the output layers

std::vector outNames(2);

outNames[0] = "detection_out_final";

outNames[1] = "detection_masks";

vector outs;

net.forward(outs, outNames);

// Extract the bounding box and mask for each of the detected objects

postprocess(frame, outs);

// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)

vector layersTimes;

double freq = getTickFrequency() / 1000;

double t = net.getPerfProfile(layersTimes) / freq;

string label = format("Mask-RCNN on 2.5 GHz Intel Core i7 CPU, Inference time for a frame : %0.0f ms", t);

putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));

// Write the frame with the detection boxes

Mat detectedFrame;

frame.convertTo(detectedFrame, CV_8U);

imshow(kWinName, frame);

}

cap.release();

return 0;

}

// For each frame, extract the bounding box and mask for each detected object

void postprocess(Mat& frame, const vector& outs)

{

Mat outDetections = outs[0];

Mat outMasks = outs[1];

// Output size of masks is NxCxHxW where

// N - number of detected boxes

// C - number of classes (excluding background)

// HxW - segmentation shape

const int numDetections = outDetections.size[2];

const int numClasses = outMasks.size[1];

outDetections = outDetections.reshape(1, outDetections.total() / 7);

for (int i = 0; i < numDetections; ++i)

{

float score = outDetections.at(i, 2);

if (score > confThreshold)

{

// Extract the bounding box

int classId = static_cast(outDetections.at(i, 1));

int left = static_cast(frame.cols * outDetections.at(i, 3));

int top = static_cast(frame.rows * outDetections.at(i, 4));

int right = static_cast(frame.cols * outDetections.at(i, 5));

int bottom = static_cast(frame.rows * outDetections.at(i, 6));

left = max(0, min(left, frame.cols - 1));

top = max(0, min(top, frame.rows - 1));

right = max(0, min(right, frame.cols - 1));

bottom = max(0, min(bottom, frame.rows - 1));

Rect box = Rect(left, top, right - left + 1, bottom - top + 1);

// Extract the mask for the object

Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr(i, classId));

// Draw bounding box, colorize and show the mask on the image

drawBox(frame, classId, score, box, objectMask);

}

}

}

// Draw the predicted bounding box, colorize and show the mask on the image

void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask)

{

//Draw a rectangle displaying the bounding box

rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(255, 178, 50), 3);

//Get the label for the class name and its confidence

string label = format("%.2f", conf);

if (!classes.empty())

{

CV_Assert(classId < (int)classes.size());

label = classes[classId] + ":" + label;

}

//Display the label at the top of the bounding box

int baseLine;

Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

box.y = max(box.y, labelSize.height);

rectangle(frame, Point(box.x, box.y - round(1.5*labelSize.height)), Point(box.x + round(1.5*labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED);

putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);

Scalar color = colors[classId%colors.size()];

// Resize the mask, threshold, color and apply it on the image

resize(objectMask, objectMask, Size(box.width, box.height));

Mat mask = (objectMask > maskThreshold);

Mat coloredRoi = (0.3 * color + 0.7 * frame(box));

coloredRoi.convertTo(coloredRoi, CV_8UC3);

// Draw the contours on the image

vector contours;

Mat hierarchy;

mask.convertTo(mask, CV_8U);

findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);

drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100);

coloredRoi.copyTo(frame(box), mask);

}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

实验结果

不过检测速度很慢,I7-8700k,GTX1060下需要1s每帧,达不到实时性要求。。。

实验数据

你可能感兴趣的:(python调用opencv例子_Opencv调用深度学习模型)