VS+Tesseract中英文识别代码

使用VS+Tesseract来实现中文和英文的识别:

#include
#include
#include
#include
using namespace cv;
using namespace std;
using namespace tesseract;

#pragma comment(lib, "D:/Program Files (x86)/Tesseract-OCR/lib/libtesseract302.lib") 
#pragma comment(lib, "D:/Program Files (x86)/Tesseract-OCR/lib/liblept168.lib")

char* U2G(const char* utf8)
{
    int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len + 1];
    memset(wstr, 0, len + 1);
    MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
    len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len + 1];
    memset(str, 0, len + 1);
    WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
    if (wstr) delete[] wstr;
    return str;
}

char* G2U(const char* gb2312)
{
    int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len + 1];
    memset(wstr, 0, len + 1);
    MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
    len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len + 1];
    memset(str, 0, len + 1);
    WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
    if (wstr) delete[] wstr;
    return str;
}

void main(){
    Mat src = imread("D://a//imgch.jpg",0);
    TessBaseAPI orc;
    char *langue = "eng+chi_sim";
    //char *langue1 = "";
    orc.Init(NULL,langue,OEM_TESSERACT_ONLY);
    orc.SetVariable("tessedit_write_images", "1");
    orc.SetPageSegMode(PSM_SINGLE_BLOCK);
    orc.SetImage(src.data, src.cols, src.rows, src.elemSize() ,src.cols);
    char *text = orc.GetUTF8Text();//.GetUTF8Text();
    String s = U2G(text);
    cout << s << endl;
    cout << orc.MeanTextConf() << endl << endl;
    imshow("src",src);
    waitKey(0);

}

测试图片:


VS+Tesseract中英文识别代码_第1张图片
测试图片

测试结果:


VS+Tesseract中英文识别代码_第2张图片
测试结果

你可能感兴趣的:(VS+Tesseract中英文识别代码)