在代码中加入下面一行,在tesseract/win64/bin/Realease/可以得到二值化后的图像(tessinput.tif)
api.SetVariable("tessedit_write_images", "true");
tessinput.tif(otsu得到的)
TessBaseAPI::ProcessPage[api/tesseractmain.cpp] ->
TessBaseAPI::Recognize [api/baseapi.cpp] ->
/*
//Recognize the tesseract global image and return the result as Tesseractinternal structures.
int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
...
if (FindLines() != 0)
return -1;
...
}
*/
TessBaseAPI::FindLines[api/baseapi.cpp] ->
/** Find lines from the image making the BLOCK_LIST.
int TessBaseAPI::FindLines() {
...
if (tesseract_->pix_binary() == NULL && //
!Threshold(tesseract_->mutable_pix_binary())) {//进入二值化
return -1;
//if (tesseract_->tessedit_write_images)
//pixWrite("pix_binary.tif", tesseract_->pix_binary(), IFF_TIFF_G4);//可以查看二值化的结果
...
}
//pix_binary_:初始值为输入的图像,保存布局分析和识别的图像
//Image used for input to layout analysis and tesseract recognition.May be modified by the ShiroRekhaSplitter to eliminate the top-line.
//mutable_pix_binary:&pix_binary_(Destroy any existing pix and return a pointer to the pointer.)
*/
TessBaseAPI::Threshold[api/baseapi.cpp] ->
/**二值化
* Run the thresholder to make the thresholded image, returned in pix,
* which must not be NULL. *pix must be initialized to NULL, or point
* to an existing pixDestroyable Pix.
* The usual argument to Threshold is Tesseract::mutable_pix_binary().
*/
bool TessBaseAPI::Threshold(Pix** pix) {
...
if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
...
}
ImageThresholder::ThresholdToPix[ccmain/thresholder.cpp] ->
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
...
if (pix_channels_ != 0) {
OtsuThresholdRectToPix(pix_, pix);
}
...
}
ImageThresholder::OtsuThresholdRectToPix [ccmain/thresholder.cpp]
//建议分辨率届于【70,2400】
TessBaseAPI::FindLines[api/baseapi.cpp] ->
/** Find lines from the image making the BLOCK_LIST. */
int TessBaseAPI::FindLines() {
if (thresholder_ == NULL || thresholder_->IsEmpty()) {
tprintf("Please call SetImage before attempting recognition.\n");
return -1;
}
if (recognition_done_)
ClearResults();
if (!block_list_->empty()) {
return 0;
}
if (tesseract_ == NULL) {
tesseract_ = new Tesseract;
tesseract_->InitAdaptiveClassifier(nullptr);
}
if (tesseract_->pix_binary() == NULL &&
!Threshold(tesseract_->mutable_pix_binary())) {
return -1;
}
tesseract_->PrepareForPageseg();
if (tesseract_->textord_equation_detect) {
if (equ_detect_ == NULL && datapath_ != NULL) {
equ_detect_ = new EquationDetect(datapath_->string(), NULL);
}
tesseract_->SetEquationDetect(equ_detect_);
}
Tesseract* osd_tess = osd_tesseract_;
OSResults osr;
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
osd_tess == nullptr) {
if (strcmp(language_->string(), "osd") == 0) {
osd_tess = tesseract_;
} else {
osd_tesseract_ = new Tesseract;
TessdataManager mgr(reader_);
if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, "osd",
OEM_TESSERACT_ONLY, nullptr, 0,
nullptr, nullptr, false, &mgr) == 0) {
osd_tess = osd_tesseract_;
osd_tesseract_->set_source_resolution(
thresholder_->GetSourceYResolution());
} else {
tprintf("Warning: Auto orientation and script detection requested,"
" but osd language failed to load\n");
delete osd_tesseract_;
osd_tesseract_ = nullptr;
}
}
}
if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
return -1;
// If Devanagari is being recognized, we use different images for page seg
// and for OCR.
tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
return 0;
}
TessBaseAPI::Threshold[api/baseapi.cpp] ->
/**
* Run the thresholder to make the thresholded image, returned in pix,
* which must not be NULL. *pix must be initialized to NULL, or point
* to an existing pixDestroyable Pix.
* The usual argument to Threshold is Tesseract::mutable_pix_binary().
*/
bool TessBaseAPI::Threshold(Pix** pix) {
ASSERT_HOST(pix != NULL);
if (*pix != NULL)
pixDestroy(pix);
// Zero resolution messes up the algorithms, so make sure it is credible.
int y_res = thresholder_->GetScaledYResolution();
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
// Use the minimum default resolution, as it is safer to under-estimate
// than over-estimate resolution.
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
kMinCredibleResolution);
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
PageSegMode pageseg_mode =
static_cast(
static_cast(tesseract_->tessedit_pageseg_mode));
if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
thresholder_->GetImageSizes(&rect_left_, &rect_top_,
&rect_width_, &rect_height_,
&image_width_, &image_height_);
if (!thresholder_->IsBinary()) {
tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
} else {
tesseract_->set_pix_thresholds(NULL);
tesseract_->set_pix_grey(NULL);
}
// Set the internal resolution that is used for layout parameters from the
// estimated resolution, rather than the image resolution, which may be
// fabricated, but we will use the image resolution, if there is one, to
// report output point sizes.
int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
kMinCredibleResolution,
kMaxCredibleResolution);
if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
tprintf("Estimated resolution %d out of range! Corrected to %d\n",
thresholder_->GetScaledEstimatedResolution(), estimated_res);
}
tesseract_->set_source_resolution(estimated_res);
SavePixForCrash(estimated_res, *pix);
return true;
}
ImageThresholder::ThresholdToPix[ccmain/thresholder.cpp] ->
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
if (image_width_ > MAX_INT16 || image_height_ > MAX_INT16) {
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
return false;
}
if (pix_channels_ == 0) {
// We have a binary image, but it still has to be copied, as this API
// allows the caller to modify the output.
Pix* original = GetPixRect();
*pix = pixCopy(nullptr, original);
pixDestroy(&original);
} else {
OtsuThresholdRectToPix(pix_, pix);
}
return true;
}
ImageThresholder::OtsuThresholdRectToPix [ccmain/thresholder.cpp]
// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
Pix** out_pix) const {
PERF_COUNT_START("OtsuThresholdRectToPix")
int* thresholds;
int* hi_values;
int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
rect_height_, &thresholds, &hi_values);
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
OpenclDevice od;
if ((num_channels == 4 || num_channels == 1) &&
od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
pixGetWpl(src_pix) * 4, thresholds, hi_values,
out_pix /*pix_OCL*/, rect_height_, rect_width_,
rect_top_, rect_left_);
} else {
#endif
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
#ifdef USE_OPENCL
}
#endif
delete [] thresholds;
delete [] hi_values;
PERF_COUNT_END
}
https://github.com/gali8/Tesseract-OCR-iOS/wiki/Tips-for-Improving-OCR-Results
有错请指出,谢谢!
欢迎加Tesseract OCR 讨论群 389402579