opencv4.5版本以上在CNN模块中加入场景文字识别的高级API
利用opencv OCR的高级API实现对场景中所有中英文进行检测和识别
cv::dnn::TextDetectionModel
cv::dnn::TextDetectionModel
支持以下算法:
cv::dnn::TextDetectionModel_DB
cv::dnn::TextDetectionModel_EAST
cv::dnn::TextRecognitionModel
cv::dnn::TextRecognitionModel
支持CNN+RNN+CTC算法,也提供CTC的贪心解码算法
场景文字检测和文字识别均有现成的模型可以直接使用
可选模型
DB_IC15_resnet50.onnx
https://drive.google.com/uc?export=dowload&id=17_ABp79PlFt9yPCxSaarVc_DKTmrSGGf
推荐参数: -inputHeight=736, -inputWidth=1280;
这个模型是在 ICDAR2015 上训练的,所以它只能检测英文文本实例。
DB_IC15_resnet18.onnx
https://drive.google.com/uc?export=dowload&id=1sZszH3pEt8hliyBlTmB-iulxHP1dCQWV
推荐参数:-inputHeight=736, -inputWidth=1280
这个模型是在 ICDAR2015 上训练的,所以它只能检测英文文本实例。
DB_TD500_resnet50.onnx
https://drive.google.com/uc?export=dowload&id=19YWhArrNccaoSza0CfkXlA8im4-lAGsR
推荐参数: -inputHeight=736, -inputWidth=736;
该模型在 MSRA-TD500 上训练,可以检测英文和中文文本实例。
DB_TD500_resnet18.onnx
https://drive.google.com/uc?export=dowload&id=1vY_KsDZZZb_svd5RT6pjyI8BS1nPbBSX
推荐参数: -inputHeight=736, -inputWidth=736;
该模型在 MSRA-TD500 上训练,可以检测英文和中文文本实例。
测试数据
url: https://drive.google.com/uc?export=dowload&id=149tAhIcvfCYeyufRoZ9tmc2mZDKE_XrF
可选模型
1. crnn.onnx:
网址:https://drive.google.com/uc?export=dowload&id=1ooaLR-rkTl8jdpGy1DoQs0-X0lQsB6Fj
字典集:alphabet_36.txt
网址:https://drive.google.com/uc?export=dowload&id=1oPOYx5rQRp8L6XQciUwmwhMCfX0KyO4b
推荐参数:rgb=0
该模型采用的训练集是MJSynth,类别数量为36(0~9 + a~z)
2. crnn_cs.onnx:
https://drive.google.com/uc?export=dowload&id=12diBsVJrS9ZEl6BNUiRp9s0xPALBS7kt
字典集:alphabet_94.txt
https://drive.google.com/uc?export=dowload&id=1oKXxXKusquimp7XY1mFvj9nwLzldVgBR
推荐参数:rgb=1
该模型在MJsynth and SynthText训练,类别数量为94(0~9 + a~z + A~Z+标点符号)
3 crnn_cs_CN.onnx
https://drive.google.com/uc?export=dowload&id=1is4eYEUKH7HR7Gl37Sw4WPXx6Ir8oQEG
字典集:alphabet_3944.txt
https://drive.google.com/uc?export=dowload&id=18IZUUdNzJ44heWTndDO6NNfIpJMmN-ul
推荐参数:rgb=1
训练数据集:ReCTS (https://rrc.cvc.uab.es/?ch=12),识别类别数量3944(0~9 + a~z + A~Z + +中文字符+特殊字符)
测试数据
https://drive.google.com/uc?export=dowload&id=1nMcEy68zDNpIlqAn6xCk_kYcUTIeSOtN
该example可以实现对输入图片进行文字检测,将图片中所有文字框出来。从实用性出发则不使用测试数据进行测试,直接使用现实中的图片
目标:框选出图片中所有中英文
#include
#include
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
int text_detection()
{
// 初始化参数
float binThresh = 0.3; //二值图的置信度阈值
float polyThresh = 0.5 ; //文本多边形阈值
double unclipRatio = 2.0; //检测到的文本区域的未压缩比率,gai比率确定输出大小
uint maxCandidates = 200; //输出结果的最大数量
int height = 736; //输出图片长宽
int width = 736;
cv::String modelPath = "/home/haijun/code_study/opencv/chapter1/modle/DB_TD500_resnet50.onnx"; //模型权重文件
// Load the network
TextDetectionModel_DB detector(modelPath);
detector.setBinaryThreshold(binThresh)
.setPolygonThreshold(polyThresh)
.setUnclipRatio(unclipRatio)
.setMaxCandidates(maxCandidates);
double scale = 1.0 / 255.0;
Size inputSize = Size(width, height);
Scalar mean = Scalar(122.67891434, 116.66876762, 104.00698793);
detector.setInputParams(scale, inputSize, mean);
// Create a window
static const std::string winName = "TextDetectionModel";
//检测单张图片
// Open an image file
Mat frame = imread("/home/haijun/code_study/opencv/chapter1/picture/3.png");
CV_Assert(!frame.empty());
Mat frame1;
cv::resize(frame , frame1 , Size(frame.rows , frame.cols) , 0 , 0 , 1);
std::cout << "height:" <> results;
detector.detect(frame1, results);
polylines(frame1, results, true, Scalar(255, 0, 0), 2);
imshow(winName, frame1);
waitKey();
return 0;
}
检测结果如下:
该example是对文字检测的方框逐个进行文字识别
由于opencv不能进行中文标注,所以中文文字标注失败,因此只能打印出识别结果
#include
#include
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
// 文本区域仿射变化预处理:将所有方框变成水平
void fourPointsTransform(const Mat& frame, const Point2f vertices[], Mat& result);
bool sortPts(const Point& p1, const Point& p2);
int text_spotting()
{
// 初始化参数
// DB文本检测模型
float binThresh = 0.3; //二值图的置信度阈值
float polyThresh = 0.5 ; //文本多边形阈值
double unclipRatio = 2.0; //检测到的文本区域的未压缩比率,gai比率确定输出大小
uint maxCandidates = 200; //输出结果的最大数量
int height = 736; //输出图片长宽
int width = 736;
cv::String detModelPath = "/home/haijun/code_study/opencv/chapter1/modle/DB_TD500_resnet50.onnx"; //DB模型权重文件
// CRNN文本识别模型
String recModelPath = "/home/haijun/code_study/opencv/chapter1/modle/crnn_cs_CN.onnx"; //文字识别模型文件
String vocPath = "/home/haijun/code_study/opencv/chapter1/modle/alphabet_3944.txt"; //字典文件
int imreadRGB = 1; //0:以灰度图读取图像 1:以彩色图读取图像
// 载入模型
if (detModelPath.empty())
{
std::cout << "DB模型文件加载失败" < vocabulary;
while (std::getline(vocFile, vocLine)) {
vocabulary.push_back(vocLine);
}
recognizer.setVocabulary(vocabulary);
recognizer.setDecodeType("CTC-greedy");
// 设置检测参数
double detScale = 1.0 / 255.0;
Size detInputSize = Size(width, height);
Scalar detMean = Scalar(122.67891434, 116.66876762, 104.00698793);
detector.setInputParams(detScale, detInputSize, detMean);
// 设置识别参数
double recScale = 1.0 / 127.5;
Scalar recMean = Scalar(127.5);
Size recInputSize = Size(100, 32);
recognizer.setInputParams(recScale, recInputSize, recMean);
// Create a window
static const std::string winName = "Text_Spotting";
// 载入图像
Mat frame = imread("/home/haijun/code_study/opencv/chapter1/picture/11.jpg");
if (frame.empty())
{
std::cout << "图像加载失败" < > detResults;
detector.detect(frame, detResults);
if (detResults.size() > 0) {
//文本识别
Mat recInput;
if (!imreadRGB) {
cvtColor(frame, recInput, cv::COLOR_BGR2GRAY);
} else {
recInput = frame;
}
std::vector< std::vector > contours;
for (uint i = 0; i < detResults.size(); i++)
{
const auto& quadrangle = detResults[i];
CV_CheckEQ(quadrangle.size(), (size_t)4, ""); //j检测Mat是否为Vector
contours.emplace_back(quadrangle); //插入数据到向量
std::vector quadrangle_2f;
for (int j = 0; j < 4; j++)
quadrangle_2f.emplace_back(quadrangle[j]);
// 转换和裁剪图像
Mat cropped;
fourPointsTransform(recInput, &quadrangle_2f[0], cropped);
std::string recognitionResult = recognizer.recognize(cropped);
std::cout << i << ": '" << recognitionResult << "'" << std::endl;
putText(frame, recognitionResult, quadrangle[3], FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
}
polylines(frame, contours, true, Scalar(0, 255, 0), 2);
} else {
std::cout << "No Text Detected." << std::endl;
}
imshow(winName, frame);
waitKey();
return 0;
}
void fourPointsTransform(const Mat& frame, const Point2f vertices[], Mat& result)
{
const Size outputSize = Size(100, 32);
Point2f targetVertices[4] = {
Point(0, outputSize.height - 1),
Point(0, 0),
Point(outputSize.width - 1, 0),
Point(outputSize.width - 1, outputSize.height - 1)
};
Mat rotationMatrix = getPerspectiveTransform(vertices, targetVertices);
warpPerspective(frame, result, rotationMatrix, outputSize);
#if 1
imshow("roi", result);
waitKey();
#endif
}
bool sortPts(const Point& p1, const Point& p2)
{
return p1.x < p2.x;
}
终端打印:
2: ‘众城旗舰店’
3: ‘健康生活’
4: ‘环保打印’
5: ‘LASERTONER’
6: ‘PUBLICCOLOR’
7: ‘潮光酸粉盒’
8: ‘人众诚’
文字检测模型的效果还是非常棒的,可以框出所有文字
文字识别模型对英文识别准确率非常好,对中文的识别会出现差错,但是可以接受
该模型不能对竖排中文文字不能进行识别