#pragma comment(lib,"libtesseract302.lib")
char * str = "test.jpg"; tesseract::TessBaseAPI api; api.Init(NULL, "chi_sim", tesseract::OEM_DEFAULT); //初始化,设置语言包,中文简体:chi_sim;英文:eng;也可以自己训练语言包 //api.SetVariable( "tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ); STRING text_out; if (!api.ProcessPages(str, NULL, 0, &text_out)) { return 0; }
string UTF8ToGBK(const std::string& strUTF8) { int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0); unsigned short * wszGBK = new unsigned short[len + 1]; memset(wszGBK, 0, len * 2 + 2); MultiByteToWideChar(CP_UTF8, 0,LPCSTR(strUTF8.c_str()), -1, LPWSTR(wszGBK), len); len = WideCharToMultiByte(CP_ACP, 0,LPCTSTR(wszGBK), -1, NULL, 0, NULL, NULL); char *szGBK = new char[len + 1]; memset(szGBK, 0, len + 1); WideCharToMultiByte(CP_ACP,0, LPCTSTR(wszGBK), -1, szGBK, len, NULL, NULL); //strUTF8 = szGBK; std::string strTemp(szGBK); delete[]szGBK; delete[]wszGBK; return strTemp; }
wchar_t * result_str; CString result; //utf-8转换成unicode int len = MultiByteToWideChar(CP_UTF8,0,text_out.string(),-1,NULL,0); result_str = new wchar_t[len + 1]; memset(result_str,0,len + 1); MultiByteToWideChar(CP_UTF8,0,text_out.string(),-1,result_str,len); //识别结果 result = result_str;