最近在做数字检测与识别,主要检测识别手机端的数字,设备为树莓派3。
我在树莓派上安装了Tesseract-OCR,发现检测数字的速度并不是很快,一串数字识别大概要3-400多毫秒,配上特定环境代码检测的话,至少要1s左右,这个速度要想达到实时检测是不可能的,因为要想用摄像头达到实时检测的效果,每帧识别至少要低于40ms,所以完全不够,最近用最简单的深度学习方法识别,在树莓派上,速度能达到2-3ms的速度!!不经感叹,CNN真的厉害。当然这还不够,因为这套算法识别,在手机上很受外部环境的干扰,比如关照,除非你在一个黑盒子,或者晚上来做检测。
这里先说一下识别达到的效果,比如手机上固定位置的一串数字,识别到了,树莓派上GPIO能做出响应,这样就能在手机上实现一些监测功能。
代码和效果图稍后更新。。。。。
楼主正在做另一种方法,避免关照的干扰,代码和效果图可能会更新晚一点。有兴趣做数字检测的同学可以一起交流。
部分代码
string modelTxt = "./lenet_train_test.prototxt";
string modelBin = "./lenet_solver_iter_10000.caffemodel";
//read image
dnn::Net net;
try {
net = dnn::readNetFromCaffe(modelTxt, modelBin);
}
catch (cv::Exception &ee) {
cerr << "Exception: " << ee.what() << endl;
if (net.empty()) {
cout << "Can't load the network by using the flowing files:" << endl;
cout << "modelTxt: " << modelTxt << endl;
cout << "modelBin: " << modelBin << endl;
return -1;
}
}
//imshow("camera-demo", frame);
blur(frame, frame, Size(3, 3));
cvtColor(frame, gray_image, CV_BGR2GRAY);
equalizeHist(gray_image, gray_image);
//Rect first = selectROI("Test iamge", gray_image);
//Mat ROI = gray(Rect(first.x, first.y, first.width, first.height));
threshold(gray_image, result, 0, 255, THRESH_BINARY | THRESH_OTSU);
imshow("gray_image", result);
char c = waitKey(30);
if (c == 27)
{
break;
}
vector > contours;
vector hierarchy;
findContours(result, contours, hierarchy, CV_RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point());
//cout << "contours" << contours.size() << endl;
for (int i = 0; i < contours.size(); i++)
{
Rect rect = boundingRect(contours[i]);
double area = contourArea(contours[i]);
//cout << "area" << area << endl;
if (area > 100)
{
drawContours(frame, contours, i, Scalar(0, 0, 255), 1, 8, hierarchy);
rectangle(frame, rect, Scalar(255, 0, 0), 1, 8);
Mat roi = result(rect);
resize(roi, roi, Size(28, 28));
copyMakeBorder(roi, dst, roi.rows*0.1, roi.rows*0.1, roi.cols*0.1, roi.cols*0.1, BORDER_CONSTANT, Scalar(0));
TickMeter t;
Mat inputBlob = dnn::blobFromImage(dst, 0.00390625f, Size(28, 28), Scalar(), false);
Mat pred;
net.setInput(inputBlob, "data");//set the network input, "data" is the name of the input layer
t.start();
pred = net.forward("prob");//compute output, "prob" is the name of the output layer
t.stop();
//cout << "pred" << pred << endl;
int classId;
double classProb;
getMaxClass(pred, &classId, &classProb);
cout << "Best Class: " << classId << endl;
cout << "Probability: " << classProb * 100 << "%" << endl;
ostringstream ss;
ostringstream tt;
ss.str("");
tt.str("");
ss << classId;
tt << (int)t.getTimeMilli() /(int) t.getCounter();
String prenum(ss.str());
String time(tt.str());
String label1 = "n:"+prenum ;
String label2 = "time:" + time+"ms";
putText(frame, label1, Point(rect.x, rect.y), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 0, 0));
putText(frame, label2, Point(rect.x, rect.y-15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 0, 0));
//string k = to_string(i);
//imshow(k, roi);
imshow("final", frame);
}
}