Tesseract OCR支持不同调用方式(详情请看具体实例),同一种调用方式也可以设置不同模式。
调用方法或模式不同,对OCR识别结果的精度有一定影响。模式设置不同,输出的结果格式也不一致。
实际项目中,需要根据需求比较各方法的优劣从而选择最合适的。
构造体定义
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
RIL_BLOCK:把原稿分割成不同区域,按区域识别文字,OCR结果是每个区域的字符串
RIL_TEXTLINE:按行识别文字,OCR结果是一行一行的字符串
RIL_WORD: 按单词识别文字,OCR结果是一个一个的单词
RIL_SYMBOL:按字符识别文字,OCR结果是一个一个的字符
C++API实例:https://tesseract-ocr.github.io/tessdoc/Examples_C++.html
API实例:https://tesseract-ocr.github.io/tessdoc/#api-examples
注意
如果C++的实例代码编译不通过,可能是需要添加以下头文件
#include
3.1 Basic_example
#include
#include
int main()
{
char *outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
// Open input image with leptonica library
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
api->SetImage(image);
// Get OCR result
outText = api->GetUTF8Text();
printf("OCR output:\n%s", outText);
// Destroy used object and release memory
api->End();
delete api;
delete [] outText;
pixDestroy(&image);
return 0;
}
3.2 SetRectangle_example
如果只想识别特定区域的文字,可以用这个方法。需提前设定指定区域的坐标。
#include
#include
int main()
{
char *outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
// Open input image with leptonica library
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
api->SetImage(image);
// Restrict recognition to a sub-rectangle of the image
// SetRectangle(left, top, width, height)
api->SetRectangle(30, 86, 590, 100);
// Get OCR result
outText = api->GetUTF8Text();
printf("OCR output:\n%s", outText);
// Destroy used object and release memory
api->End();
delete api;
delete [] outText;
pixDestroy(&image);
return 0;
}
3.3 GetComponentImages_example
以Box的形式返回OCR结果
#include
#include
int main()
{
char *outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
api->SetImage(image);
Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL);
printf("Found %d textline image components.\n", boxes->n);
for (int i = 0; i < boxes->n; i++) {
BOX* box = boxaGetBox(boxes, i, L_CLONE);
api->SetRectangle(box->x, box->y, box->w, box->h);
char* ocrResult = api->GetUTF8Text();
int conf = api->MeanTextConf();
fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
i, box->x, box->y, box->w, box->h, conf, ocrResult);
boxDestroy(&box);
}
// Destroy used object and release memory
api->End();
delete api;
delete [] outText;
pixDestroy(&image);
return 0;
}
3.4 ResultIterator_example
以迭代器的形式返回OCR结果
#include
#include
int main()
{
char *outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
api->SetImage(image);
api->Recognize(0);
tesseract::ResultIterator* ri = api->GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (ri != 0) {
do {
const char* word = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
word, conf, x1, y1, x2, y2);
delete[] word;
} while (ri->Next(level));
}
// Destroy used object and release memory
api->End();
delete api;
delete [] outText;
pixDestroy(&image);
return 0;
}
3.5 OSD_example
如果需要判断文字的方向,可以参考这各个方法
#include
#include
int main()
{
const char* inputfile = "/tesseract/testing/devatest-rotated-270.png";
PIX *image = pixRead(inputfile);
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init(NULL, "osd");
api->SetPageSegMode(tesseract::PSM_OSD_ONLY);
api->SetImage(image);
int orient_deg;
float orient_conf;
const char* script_name;
float script_conf;
api->DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf);
printf("************\n Orientation in degrees: %d\n Orientation confidence: %.2f\n"
" Script: %s\n Script confidence: %.2f\n",
orient_deg, orient_conf,
script_name, script_conf);
// Destroy used object and release memory
api->End();
delete api;
pixDestroy(&image);
return 0;
}
3.6 LSTM_Choices_example
#include
#include
int main()
{
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
// Open input image with leptonica library
Pix *image = pixRead("choices.png");
api->SetImage(image);
// Set lstm_choice_mode to alternative symbol choices per character, bbox is at word level.
api->SetVariable("lstm_choice_mode", "2");
api->Recognize(0);
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
tesseract::ResultIterator* res_it = api->GetIterator();
// Get confidence level for alternative symbol choices. Code is based on
// https://github.com/tesseract-ocr/tesseract/blob/main/src/api/hocrrenderer.cpp#L325-L344
std::vector<std::vector<std::pair<const char*, float>>>* choiceMap = nullptr;
if (res_it != 0) {
do {
const char* word;
float conf;
int x1, y1, x2, y2, tcnt = 1, gcnt = 1, wcnt = 0;
res_it->BoundingBox(level, &x1, &y1, &x2, &y2);
choiceMap = res_it->GetBestLSTMSymbolChoices();
for (auto timestep : *choiceMap) {
if (timestep.size() > 0) {
for (auto & j : timestep) {
conf = int(j.second * 100);
word = j.first;
printf("%d symbol: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
wcnt, word, conf, x1, y1, x2, y2);
gcnt++;
}
tcnt++;
}
wcnt++;
printf("\n");
}
} while (res_it->Next(level));
}
// Destroy used object and release memory
api->End();
delete api;
pixDestroy(&image);
return 0;
}
3.7 OpenCV_example
/*
Windows compile example:
SET TESS_INSTALATION=C:/win64
SET OPENCV_INSTALATION=C:/opencv/build
cl OpenCV_example.cc -I %TESS_INSTALATION%/include -I %OPENCV_INSTALATION%/include /link /LIBPATH:%TESS_INSTALATION%/lib /LIBPATH:%OPENCV_INSTALATION%/x64/vc14/lib tesseract51.lib leptonica-1.83.0.lib opencv_world460.lib /machine:x64
*/
#include
#include
#include
#include
int main(int argc, char *argv[]) {
std::string outText, imPath = argv[1];
cv::Mat im = cv::imread(imPath, cv::IMREAD_COLOR);
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init(NULL, “eng”, tesseract::OEM_LSTM_ONLY);
api->SetPageSegMode(tesseract::PSM_AUTO);
api->SetImage(im.data, im.cols, im.rows, 3, im.step);
outText = std::string(api->GetUTF8Text());
std::cout << outText;
api->End();
delete api;
return 0;
}
3.8 UserPatterns_example
#include
#include
int main()
{
Pix *image;
char *outText;
char *configs[]={"path/to/my.patterns.config"};
int configs_size = 1;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
if (api->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY, configs, configs_size, NULL, NULL, false)) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
image = pixRead("Arial.png");
api->SetImage(image);
outText = api->GetUTF8Text();
printf(outText);
api->End();
delete api;
delete [] outText;
pixDestroy(&image);
return 0;
}