tesseract源码Page Layout解读1( 二值化,otsu)

代码插入:

在代码中加入下面一行,在tesseract/win64/bin/Realease/可以得到二值化后的图像(tessinput.tif)

api.SetVariable("tessedit_write_images", "true"); 

tesseract源码Page Layout解读1( 二值化,otsu)_第1张图片

tessinput.tif(otsu得到的)

tesseract源码Page Layout解读1( 二值化,otsu)_第2张图片

代码理解:

TessBaseAPI::ProcessPage[api/tesseractmain.cpp]       ->  
TessBaseAPI::Recognize [api/baseapi.cpp] -> 
/*
//Recognize the tesseract global image and return the result as Tesseractinternal structures.
int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
    ...
    if (FindLines() != 0)
    return -1;
    ...
    }
*/
TessBaseAPI::FindLines[api/baseapi.cpp] -> 
/** Find lines from the image making the BLOCK_LIST. 
int TessBaseAPI::FindLines() {
    ...
    if (tesseract_->pix_binary() == NULL && //
      !Threshold(tesseract_->mutable_pix_binary())) {//进入二值化
    return -1;
//if (tesseract_->tessedit_write_images) 
//pixWrite("pix_binary.tif", tesseract_->pix_binary(), IFF_TIFF_G4);//可以查看二值化的结果
      
	...
    }
//pix_binary_:初始值为输入的图像,保存布局分析和识别的图像
//Image used for input to layout analysis and tesseract recognition.May be modified by the ShiroRekhaSplitter to eliminate the top-line.
//mutable_pix_binary:&pix_binary_(Destroy any existing pix and return a pointer to the pointer.) 
*/
TessBaseAPI::Threshold[api/baseapi.cpp] ->
/**二值化
 * Run the thresholder to make the thresholded image, returned in pix,
 * which must not be NULL. *pix must be initialized to NULL, or point
 * to an existing pixDestroyable Pix.
 * The usual argument to Threshold is Tesseract::mutable_pix_binary().
 */
bool TessBaseAPI::Threshold(Pix** pix) {
    ...
    if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
	...
	}
ImageThresholder::ThresholdToPix[ccmain/thresholder.cpp] ->
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
    ...
	if (pix_channels_ != 0) {
    OtsuThresholdRectToPix(pix_, pix);
	}
    ...
}
ImageThresholder::OtsuThresholdRectToPix [ccmain/thresholder.cpp]
//建议分辨率届于【70,2400】





代码附录

TessBaseAPI::FindLines[api/baseapi.cpp] -> 

/** Find lines from the image making the BLOCK_LIST. */
int TessBaseAPI::FindLines() {
  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
    tprintf("Please call SetImage before attempting recognition.\n");
    return -1;
  }
  if (recognition_done_)
    ClearResults();
  if (!block_list_->empty()) {
    return 0;
  }
  if (tesseract_ == NULL) {
    tesseract_ = new Tesseract;
    tesseract_->InitAdaptiveClassifier(nullptr);
  }
  if (tesseract_->pix_binary() == NULL &&
      !Threshold(tesseract_->mutable_pix_binary())) {
    return -1;
  }

  tesseract_->PrepareForPageseg();

  if (tesseract_->textord_equation_detect) {
    if (equ_detect_ == NULL && datapath_ != NULL) {
      equ_detect_ = new EquationDetect(datapath_->string(), NULL);
    }
    tesseract_->SetEquationDetect(equ_detect_);
  }

  Tesseract* osd_tess = osd_tesseract_;
  OSResults osr;
  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
      osd_tess == nullptr) {
    if (strcmp(language_->string(), "osd") == 0) {
      osd_tess = tesseract_;
    } else {
      osd_tesseract_ = new Tesseract;
      TessdataManager mgr(reader_);
      if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, "osd",
                                         OEM_TESSERACT_ONLY, nullptr, 0,
                                         nullptr, nullptr, false, &mgr) == 0) {
        osd_tess = osd_tesseract_;
        osd_tesseract_->set_source_resolution(
            thresholder_->GetSourceYResolution());
      } else {
        tprintf("Warning: Auto orientation and script detection requested,"
                " but osd language failed to load\n");
        delete osd_tesseract_;
        osd_tesseract_ = nullptr;
      }
    }
  }

  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
    return -1;
  // If Devanagari is being recognized, we use different images for page seg
  // and for OCR.
  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
  return 0;
}

TessBaseAPI::Threshold[api/baseapi.cpp] -> 

/**
 * Run the thresholder to make the thresholded image, returned in pix,
 * which must not be NULL. *pix must be initialized to NULL, or point
 * to an existing pixDestroyable Pix.
 * The usual argument to Threshold is Tesseract::mutable_pix_binary().
 */
bool TessBaseAPI::Threshold(Pix** pix) {
  ASSERT_HOST(pix != NULL);
  if (*pix != NULL)
    pixDestroy(pix);
  // Zero resolution messes up the algorithms, so make sure it is credible.
  int y_res = thresholder_->GetScaledYResolution();
  if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
    // Use the minimum default resolution, as it is safer to under-estimate
    // than over-estimate resolution.
    tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
            kMinCredibleResolution);
    thresholder_->SetSourceYResolution(kMinCredibleResolution);
  }
  PageSegMode pageseg_mode =
      static_cast(
          static_cast(tesseract_->tessedit_pageseg_mode));
  if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
  thresholder_->GetImageSizes(&rect_left_, &rect_top_,
                              &rect_width_, &rect_height_,
                              &image_width_, &image_height_);
  
  if (!thresholder_->IsBinary()) {
    tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
    tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
  } else {
    tesseract_->set_pix_thresholds(NULL);
    tesseract_->set_pix_grey(NULL);
  }
  // Set the internal resolution that is used for layout parameters from the
  // estimated resolution, rather than the image resolution, which may be
  // fabricated, but we will use the image resolution, if there is one, to
  // report output point sizes.
  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
                                  kMinCredibleResolution,
                                  kMaxCredibleResolution);
  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
    tprintf("Estimated resolution %d out of range! Corrected to %d\n",
            thresholder_->GetScaledEstimatedResolution(), estimated_res);
  }
  tesseract_->set_source_resolution(estimated_res);
  SavePixForCrash(estimated_res, *pix);
  return true;
}
ImageThresholder::ThresholdToPix[ccmain/thresholder.cpp] -> 

// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
  if (image_width_ > MAX_INT16 || image_height_ > MAX_INT16) {
    tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
    return false;
  }
  if (pix_channels_ == 0) {
    // We have a binary image, but it still has to be copied, as this API
    // allows the caller to modify the output.
    Pix* original = GetPixRect();
    *pix = pixCopy(nullptr, original);
    pixDestroy(&original);
  } else {
    OtsuThresholdRectToPix(pix_, pix);
  }
  return true;
}

ImageThresholder::OtsuThresholdRectToPix [ccmain/thresholder.cpp]
// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
                                              Pix** out_pix) const {
  PERF_COUNT_START("OtsuThresholdRectToPix")
  int* thresholds;
  int* hi_values;

  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
                                   rect_height_, &thresholds, &hi_values);
  // only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
  OpenclDevice od;
  if ((num_channels == 4 || num_channels == 1) &&
      od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
    od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
                             pixGetWpl(src_pix) * 4, thresholds, hi_values,
                             out_pix /*pix_OCL*/, rect_height_, rect_width_,
                             rect_top_, rect_left_);
  } else {
#endif
    ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
#ifdef USE_OPENCL
  }
#endif
  delete [] thresholds;
  delete [] hi_values;

  PERF_COUNT_END
}

tesseract默认otsu方法,但是效果并不好,所以这是它的建议:

https://github.com/gali8/Tesseract-OCR-iOS/wiki/Tips-for-Improving-OCR-Results

有错请指出,谢谢!

欢迎加Tesseract OCR 讨论群 389402579


你可能感兴趣的:(Tesseract4.0)