最近与国内某大厂合作,开发了一套基于其OCR识别算法模型的实时监测系统,主要实现的是通过摄像头实时拍摄显示屏的画面,对拍摄的画面做OCR识别后,格式化输出识别结果。
该软件最终目标为嵌入式设备
,现阶段在win10端开始做验证工作。
OCR识别效果如下图所示。在左侧“Input”窗口内显示该OCR识别具体输入内容,右侧“Output”窗口内显示识别后,并做格式化输出后的显示结果。
这个显示系统,其实是基于了摄像头camera.cpp代码,具体怎样实现接收摄像头图片、并对图像实现加工处理,需要参照以下博客的内容:
https://blog.csdn.net/wang_chao118/article/details/126125022?spm=1001.2014.3001.5502
基于该大厂的OCR识别模型、SDK文档,将camera.cpp文件进行改写;
#include "camera.h"
#include
#include
using namespace std;
using namespace cv;
// 获取时间戳
std::time_t get_timestamp()
{
std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp
= std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::system_clock::now());
auto tmp = std::chrono::duration_cast<std::chrono::milliseconds>(tp.time_since_epoch());
std::time_t timestamp = tmp.count();
return timestamp;
}
//获取授权key
void get_license_key(std::string& key, const std::string& key_path)
{
std::fstream fs;
fs.open(key_path, std::ios::in | std::ios::binary);
if (!fs) {
return;
}
char line[256];
fs.getline(line, 256);
key = std::string(line);
fs.close();
}
template <class T>
int getlength(T& array)
{
return (sizeof(array) / sizeof(array[0]));
}
void Camera::folderTest(string path, Character_SDK* api) {
_finddata64i32_t fileInfo;
string p = path + "*";
intptr_t hFile = _findfirst(p.c_str(), &fileInfo);
if (hFile == -1) {
cout << "hFile == -1" << endl;
return;
}
do
{
if (fileInfo.size > 0) {
imageProcess(path + fileInfo.name, api);
}
} while (_findnext(hFile, &fileInfo) == 0);
// c++17 以上可以用此方法
//for (auto& i : filesystem::directory_iterator(path)) {
// imageProcess(i.path().string(), api);
// }
}
void Camera::imageProcess(string path, Character_SDK* api) {
// 以下为demo示例
std::time_t time_begin = get_timestamp();
cv::Mat mat = cv::imread(path);
CharacterResponse response;
bool is_single_word = false;
int processCode = api->process(&mat, is_single_word, &response);
printf("process result=%d \n", processCode);
if (processCode != 0) {
printf("process error=%d \n", processCode);
return;
}
std::string result_str("123456");
for (int i = 0; i < response.texts.size(); ++i) {
if (response.texts[i].empty()) {
continue;
}
result_str = result_str + response.texts[i] + "\t";
result_str = result_str + to_string(response.text_coords[i][0]) + "\t";
result_str = result_str + to_string(response.text_coords[i][1]) + "\t";
result_str = result_str + to_string(response.text_coords[i][2]) + "\t";
result_str = result_str + to_string(response.text_coords[i][3]) + "\t";
result_str = result_str + to_string(response.text_coords[i][4]) + "\t";
result_str = result_str + to_string(response.text_coords[i][5]) + "\t";
result_str = result_str + to_string(response.text_coords[i][6]) + "\t";
result_str = result_str + to_string(response.text_coords[i][7]) + "\t";
if (is_single_word) {
for (int j = 0; j < response.words[i].word.size(); ++j) {
result_str = result_str + response.words[i].word[j] + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][0]) + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][1]) + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][2]) + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][3]) + "\t";
}
}
result_str += "\n";
}
std::cout << "result =" << result_str << std::endl;
string filename = path.substr(10, path.length());
string outputname = "result/" + filename + ".txt";
cout << outputname << "===========" << std::endl;
ofstream fOut(outputname);
fOut << result_str.c_str() << std::endl;
fOut.close();
std::time_t time_end = get_timestamp();
std::cout << "time cost is :" << time_end - time_begin << "ms" << std::endl;
}
void Camera::imageProcess(cv::Mat src, Character_SDK* api) {
std::time_t time_begin = get_timestamp();
cv::Mat mat = src;
CharacterResponse response;
bool is_single_word = false;
int processCode = api->process(&mat, is_single_word, &response);
printf("process result=%d \n", processCode);
if (processCode != 0) {
printf("process error=%d \n", processCode);
return;
}
std::string result_str("\n");
for (int i = 0; i < response.texts.size(); ++i) {
if (response.texts[i].empty()) {
continue;
}
result_str = result_str + response.texts[i] + "\t";
result_str = result_str + to_string(response.text_coords[i][0]) + "\t";
result_str = result_str + to_string(response.text_coords[i][1]) + "\t";
result_str = result_str + to_string(response.text_coords[i][2]) + "\t";
result_str = result_str + to_string(response.text_coords[i][3]) + "\t";
result_str = result_str + to_string(response.text_coords[i][4]) + "\t";
result_str = result_str + to_string(response.text_coords[i][5]) + "\t";
result_str = result_str + to_string(response.text_coords[i][6]) + "\t";
result_str = result_str + to_string(response.text_coords[i][7]) + "\t";
if (is_single_word) {
for (int j = 0; j < response.words[i].word.size(); ++j) {
result_str = result_str + response.words[i].word[j] + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][0]) + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][1]) + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][2]) + "\t";
result_str = result_str + to_string(response.words[i].word_coords[j][3]) + "\t";
}
}
result_str += "\n";
}
std::cout << "result =" << result_str << std::endl;
std::time_t time_end = get_timestamp();
std::cout << "time cost is :" << time_end - time_begin << "ms" << std::endl;
out_img = output_image(response);
print_result = QString::fromStdString(result_str);
}
QImage Camera::output_image(CharacterResponse response)
{
QImage image(640,480, QImage::Format_ARGB32);
//填充图片背景,120/250为透明度
image.fill(qRgba(255, 255, 255, 0));
//为这个QImage构造一个QPainter
QPainter painter(&image);
//设置画刷的组合模式CompositionMode_SourceOut这个模式为目标图像在上。
//改变组合模式和上面的填充方式可以画出透明的图片。
painter.setCompositionMode(QPainter::CompositionMode_DestinationOver);
//改变画笔和字体
QPen pen = painter.pen();
pen.setColor( QColor(255, 0, 0));
QFont font;
//设置显示字体的大小
font.setPixelSize(22);
QFontMetrics fm(font);
painter.setPen(pen);
painter.setFont(font);
// CharacterResponse response=*resp;
for (int i = 0; i < response.texts.size(); ++i) {
if (response.texts[i].empty()) {
continue;
}
// 只需要使用左上角坐标
QPoint q(response.text_coords[i][0],response.text_coords[i][1]);
painter.drawText(q, QString::fromStdString(response.texts[i]));
}
//缩放
QImage scaledimg=image.scaled(640,480,Qt::KeepAspectRatio);
return scaledimg;
}
Camera::Camera()
{
std::cout << "before sdk_init" << std::endl;
api = new Character_SDK();
//设置保存sdk日志
api->set_log_config(0,"",false);
std::string licenseKeyPath = "C:/Users/118ai/Desktop/Baidu_Ocr_230220/Baidu_ocr/x64/license/license.key";
std::string key;
get_license_key(key, licenseKeyPath);
std::string licenseFile = "C:/Users/118ai/Desktop/Baidu_Ocr_230220/Baidu_ocr/x64/license/license.ini";
//授权
int res = api->auth_from_file(key.c_str(), licenseFile.c_str());
if (res != 0)
{
std::cout << "sdk auth_from_file ret code:" << res << std::endl;
getchar();
}
// model_path为模型文件夹路径,即resources文件夹(里面存的是身份证识别的模型文件)
// 传空为采用默认路径,若想定置化路径,请填写全局路径如:d:\\resource
std::string model_path = "C:/Users/118ai/Desktop/Baidu_Ocr_230220/Baidu_ocr/x64/Release/resource";
res = api->sdk_init(model_path.c_str());
std::cout << "after sdk_init" << std::endl;
if (res != 0)
{
std::cout << "sdk init result is:" << res << std::endl;
getchar();
}
//单次测试
// imageProcess("C:/Users/118ai/Desktop/Baidu_Ocr_230220/Baidu_ocr/x64/images/avic.jpg", api);
//文件夹测试
// folderTest("C:/Users/118ai/Desktop/Baidu_Ocr_230220/Baidu_ocr/x64/images/", api);
}
Camera::~Camera()
{
std::cout << "before delete api" << std::endl;
delete api;
}
void Camera::camNumber(const int &n)
{
camera_num = n;
}
void Camera::openCamera()
{
capture.set(CAP_PROP_FRAME_WIDTH, 640);
capture.set(CAP_PROP_FRAME_HEIGHT, 480);
// capture.open(camera_num,cv::CAP_DSHOW);
capture.open(camera_num);
if(!capture.isOpened())
{
qDebug() << "camer_num:"<<camera_num<<"not open";
return;
}
}
void Camera::mainwindowDisplay()
{
// QThread::sleep(1); //耗时操作
capture >> src_image;
imageProcess(src_image, api);
QImage img1 = QImage((const unsigned char*)src_image.data,
src_image.cols, src_image.rows,
QImage::Format_RGB888).rgbSwapped();
emit sendPicture(img1);
// emit send_result(print_result);
emit send_result_picture(out_img);
}
本案例中涉及到的所有代码、工程文件等,到此处https://download.csdn.net/download/wang_chao118/87586281下载。