你好joy

tesseract源码Page Layout解读（倾斜矫正）

http://blog.csdn.net/kaelsass/article/details/46874627

http://www.jianshu.com/p/7c63fd62ea28

代码调用

代码附录

Tesseract::SegmentPage[ccmain/pagesegmain.cpp] ->

/**
 * Segment the page according to the current value of tessedit_pageseg_mode.
 * pix_binary_ is used as the source image and should not be NULL.
 * On return the blocks list owns all the constructed page layout.
 */
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
                           Tesseract* osd_tess, OSResults* osr) {
  ASSERT_HOST(pix_binary_ != NULL);
  int width = pixGetWidth(pix_binary_);
  int height = pixGetHeight(pix_binary_);
  // Get page segmentation mode.
  PageSegMode pageseg_mode = static_cast(
      static_cast(tessedit_pageseg_mode));
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
      input_file != NULL && input_file->length() > 0) {
    STRING name = *input_file;
    const char* lastdot = strrchr(name.string(), '.');
    if (lastdot != NULL)
      name[lastdot - name.string()] = '\0';
    read_unlv_file(name, width, height, blocks);
  }
  if (blocks->empty()) {
    // No UNLV file present. Work according to the PageSegMode.
    // First make a single block covering the whole image.
    BLOCK_IT block_it(blocks);
    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
    block->set_right_to_left(right_to_left());
    block_it.add_to_end(block);
  } else {
    // UNLV file present. Use PSM_SINGLE_BLOCK.
    pageseg_mode = PSM_SINGLE_BLOCK;
  }
  // The diacritic_blobs holds noise blobs that may be diacritics. They
  // are separated out on areas of the image that seem noisy and short-circuit
  // the layout process, going straight from the initial partition creation
  // right through to after word segmentation, where they are added to the
  // rej_cblobs list of the most appropriate word. From there classification
  // will determine whether they are used.
  BLOBNBOX_LIST diacritic_blobs;
  int auto_page_seg_ret_val = 0;
  TO_BLOCK_LIST to_blocks;
  if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
      PSM_SPARSE(pageseg_mode)) {
    auto_page_seg_ret_val = AutoPageSeg(
        pageseg_mode, blocks, &to_blocks,
        enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);
    if (pageseg_mode == PSM_OSD_ONLY)
      return auto_page_seg_ret_val;
    // To create blobs from the image region bounds uncomment this line:
    //  to_blocks.clear();  // Uncomment to go back to the old mode.
  } else {
    deskew_ = FCOORD(1.0f, 0.0f);
    reskew_ = FCOORD(1.0f, 0.0f);
    if (pageseg_mode == PSM_CIRCLE_WORD) {
      Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
      if (pixcleaned != NULL) {
        pixDestroy(&pix_binary_);
        pix_binary_ = pixcleaned;
      }
    }
  }

  if (auto_page_seg_ret_val < 0) {
    return -1;
  }

  if (blocks->empty()) {
    if (textord_debug_tabfind)
      tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }
  bool splitting =
      pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
  bool cjk_mode = textord_use_cjk_fp_model;

  textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
                       pix_thresholds_, pix_grey_, splitting || cjk_mode,
                       &diacritic_blobs, blocks, &to_blocks);
  return auto_page_seg_ret_val;
}

Tesseract::SetupPageSegAndDetectOrientation[ccmain/pagesegmain.cpp] ->

 * Sets up auto page segmentation, determines the orientation, and corrects it.
 * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
 * facilitate testing.
 * photo_mask_pix is a pointer to a NULL pointer that will be filled on return
 * with the leptonica photo mask, which must be pixDestroyed by the caller.
 * to_blocks is an empty list that will be filled with (usually a single)
 * block that is used during layout analysis. This ugly API is required
 * because of the possibility of a unlv zone file.
 * TODO(rays) clean this up.
 * See AutoPageSeg for other arguments.
 * The returned ColumnFinder must be deleted after use.
 */
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
    PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
    OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
    Pix** music_mask_pix) {
  int vertical_x = 0;
  int vertical_y = 1;
  TabVector_LIST v_lines;
  TabVector_LIST h_lines;
  ICOORD bleft(0, 0);

  ASSERT_HOST(pix_binary_ != NULL);
  if (tessedit_dump_pageseg_images) {
    pixa_debug_.AddPix(pix_binary_, "PageSegInput");
  }
  // Leptonica is used to find the rule/separator lines in the input.
  LineFinder::FindAndRemoveLines(source_resolution_,
                                 textord_tabfind_show_vlines, pix_binary_,
                                 &vertical_x, &vertical_y, music_mask_pix,
                                 &v_lines, &h_lines);
  if (tessedit_dump_pageseg_images) {
    pixa_debug_.AddPix(pix_binary_, "NoLines");
  }
  // Leptonica is used to find a mask of the photo regions in the input.
  *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
  if (tessedit_dump_pageseg_images) {
    pixa_debug_.AddPix(pix_binary_, "NoImages");
  }
  if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();

  // The rest of the algorithm uses the usual connected components.
  textord_.find_components(pix_binary_, blocks, to_blocks);

  TO_BLOCK_IT to_block_it(to_blocks);
  // There must be exactly one input block.
  // TODO(rays) handle new textline finding with a UNLV zone file.
  ASSERT_HOST(to_blocks->singleton());
  TO_BLOCK* to_block = to_block_it.data();
  TBOX blkbox = to_block->block->bounding_box();
  ColumnFinder* finder = NULL;
  int estimated_resolution = source_resolution_;
  if (source_resolution_ == kMinCredibleResolution) {
    // Try to estimate resolution from typical body text size.
    int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor);
    if (res > estimated_resolution && res < kMaxCredibleResolution) {
      estimated_resolution = res;
      tprintf("Estimating resolution as %d\n", estimated_resolution);
    }
  }

  if (to_block->line_size >= 2) {
    finder = new ColumnFinder(static_cast(to_block->line_size),
                              blkbox.botleft(), blkbox.topright(),
                              estimated_resolution, textord_use_cjk_fp_model,
                              textord_tabfind_aligned_gap_fraction, &v_lines,
                              &h_lines, vertical_x, vertical_y);

    finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);

    if (equ_detect_) {
      equ_detect_->LabelSpecialText(to_block);
    }

    BLOBNBOX_CLIST osd_blobs;
    // osd_orientation is the number of 90 degree rotations to make the
    // characters upright. (See osdetect.h for precise definition.)
    // We want the text lines horizontal, (vertical text indicates vertical
    // textlines) which may conflict (eg vertically written CJK).
    int osd_orientation = 0;
    bool vertical_text = textord_tabfind_force_vertical_text ||
                         pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
    if (!vertical_text && textord_tabfind_vertical_text &&
        PSM_ORIENTATION_ENABLED(pageseg_mode)) {
      vertical_text =
          finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
                                          to_block, &osd_blobs);
    }
    if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
      GenericVector osd_scripts;
      if (osd_tess != this) {
        // We are running osd as part of layout analysis, so constrain the
        // scripts to those allowed by *this.
        AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
        for (int s = 0; s < sub_langs_.size(); ++s) {
          AddAllScriptsConverted(sub_langs_[s]->unicharset,
                                 osd_tess->unicharset, &osd_scripts);
        }
      }
      os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
      if (pageseg_mode == PSM_OSD_ONLY) {
        delete finder;
        return NULL;
      }
      osd_orientation = osr->best_result.orientation_id;
      double osd_score = osr->orientations[osd_orientation];
      double osd_margin = min_orientation_margin * 2;
      for (int i = 0; i < 4; ++i) {
        if (i != osd_orientation &&
            osd_score - osr->orientations[i] < osd_margin) {
          osd_margin = osd_score - osr->orientations[i];
        }
      }
      int best_script_id = osr->best_result.script_id;
      const char* best_script_str =
          osd_tess->unicharset.get_script_from_script_id(best_script_id);
      bool cjk = best_script_id == osd_tess->unicharset.han_sid() ||
          best_script_id == osd_tess->unicharset.hiragana_sid() ||
          best_script_id == osd_tess->unicharset.katakana_sid() ||
          strcmp("Japanese", best_script_str) == 0 ||
          strcmp("Korean", best_script_str) == 0 ||
          strcmp("Hangul", best_script_str) == 0;
      if (cjk) {
        finder->set_cjk_script(true);
      }
      if (osd_margin < min_orientation_margin) {
        // The margin is weak.
        if (!cjk && !vertical_text && osd_orientation == 2) {
          // upside down latin text is improbable with such a weak margin.
          tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
                  "Don't rotate.\n", osd_margin);
          osd_orientation = 0;
        } else {
          tprintf(
              "OSD: Weak margin (%.2f) for %d blob text block, "
              "but using orientation anyway: %d\n",
              osd_margin, osd_blobs.length(), osd_orientation);
        }
      }
    }
    osd_blobs.shallow_clear();
    finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
  }

  return finder;
}

}  // namespace tesseract.

LineFinder::FindAndRemoveLines[textord/linefind.cpp]

// Finds vertical and horizontal line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// If pix_music_mask != NULL, and music is detected, a mask of the staves
// and anything that is connected (bars, notes etc.) will be returned in
// pix_music_mask, the mask subtracted from pix, and the lines will not
// appear in v_lines or h_lines.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// The detected lines are removed from the pix.
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
                                    int* vertical_x, int* vertical_y,
                                    Pix** pix_music_mask,
                                    TabVector_LIST* v_lines,
                                    TabVector_LIST* h_lines) {
  PERF_COUNT_START("FindAndRemoveLines")
  if (pix == NULL || vertical_x == NULL || vertical_y == NULL) {
    tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
    return;
  }
  Pix* pix_vline = NULL;
  Pix* pix_non_vline = NULL;
  Pix* pix_hline = NULL;
  Pix* pix_non_hline = NULL;
  Pix* pix_intersections = NULL;
  Pixa* pixa_display = debug ? pixaCreate(0) : NULL;
  GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
               &pix_non_hline, &pix_intersections, pix_music_mask,
               pixa_display);
  // Find lines, convert to TabVector_LIST and remove those that are used.
  FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
                      &pix_vline, pix_non_vline, pix, v_lines);
  if (pix_hline != NULL) {
    // Recompute intersections and re-filter false positive h-lines.
    if (pix_vline != NULL)
      pixAnd(pix_intersections, pix_vline, pix_hline);
    else
      pixDestroy(&pix_intersections);
    if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
                              pix_hline)) {
      pixDestroy(&pix_hline);
    }
  }
  FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
                      &pix_hline, pix_non_hline, pix, h_lines);
  if (pixa_display != NULL && pix_vline != NULL)
    pixaAddPix(pixa_display, pix_vline, L_CLONE);
  if (pixa_display != NULL && pix_hline != NULL)
    pixaAddPix(pixa_display, pix_hline, L_CLONE);
  if (pix_vline != NULL && pix_hline != NULL) {
    // Remove joins (intersections) where lines cross, and the residue.
    // Recalculate the intersections, since some lines have been deleted.
    pixAnd(pix_intersections, pix_vline, pix_hline);
    // Fatten up the intersections and seed-fill to get the intersection
    // residue.
    Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5);
    pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
    // Now remove the intersection residue.
    pixSubtract(pix, pix, pix_join_residue);
    pixDestroy(&pix_join_residue);
  }
  // Remove any detected music.
  if (pix_music_mask != NULL && *pix_music_mask != NULL) {
    if (pixa_display != NULL)
      pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
    pixSubtract(pix, pix, *pix_music_mask);
  }
  if (pixa_display != NULL)
    pixaAddPix(pixa_display, pix, L_CLONE);

  pixDestroy(&pix_vline);
  pixDestroy(&pix_non_vline);
  pixDestroy(&pix_hline);
  pixDestroy(&pix_non_hline);
  pixDestroy(&pix_intersections);
  if (pixa_display != NULL) {
    pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
                     "vhlinefinding.pdf");
    pixaDestroy(&pixa_display);
  }
  PERF_COUNT_END

ImageFind::FindImages [textord/linefind.cpp]

// Finds image regions within the BINARY source pix (page image) and returns
// the image regions as a mask image.
// The returned pix may be NULL, meaning no images found.
// If not NULL, it must be PixDestroyed by the caller.
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) {
  // Not worth looking at small images.
  if (pixGetWidth(pix) < kMinImageFindSize ||
      pixGetHeight(pix) < kMinImageFindSize)
    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);

  // Reduce by factor 2.
  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
  if (textord_tabfind_show_images && pixa_debug != nullptr)
    pixa_debug->AddPix(pixr, "CascadeReduced");

  // Get the halftone mask directly from Leptonica.
  //
  // Leptonica will print an error message and return NULL if we call
  // pixGenHalftoneMask(pixr, NULL, ...) with too small image, so we
  // want to bypass that.
  if (pixGetWidth(pixr) < kMinImageFindSize ||
      pixGetHeight(pixr) < kMinImageFindSize) {
    pixDestroy(&pixr);
    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
  }
  // Get the halftone mask.
  l_int32 ht_found = 0;
  Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr)
                     ? pixaCreate(0)
                     : nullptr;
  Pix* pixht2 = pixGenerateHalftoneMask(pixr, NULL, &ht_found, pixadb);
  if (pixadb) {
    Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
    if (textord_tabfind_show_images && pixa_debug != nullptr)
      pixa_debug->AddPix(pixdb, "HalftoneMask");
    pixDestroy(&pixdb);
    pixaDestroy(&pixadb);
  }
  pixDestroy(&pixr);
  if (!ht_found && pixht2 != NULL)
    pixDestroy(&pixht2);
  if (pixht2 == NULL)
    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);

  // Expand back up again.
  Pix *pixht = pixExpandReplicate(pixht2, 2);
  if (textord_tabfind_show_images && pixa_debug != nullptr)
    pixa_debug->AddPix(pixht, "HalftoneReplicated");
  pixDestroy(&pixht2);

  // Fill to capture pixels near the mask edges that were missed
  Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
  pixOr(pixht, pixht, pixt);
  pixDestroy(&pixt);

  // Eliminate lines and bars that may be joined to images.
  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
  if (textord_tabfind_show_images && pixa_debug != nullptr)
    pixa_debug->AddPix(pixfinemask, "FineMask");
  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
  pixDestroy(&pixreduced);
  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
  pixDestroy(&pixreduced2);
  if (textord_tabfind_show_images && pixa_debug != nullptr)
    pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
  // Combine the coarse and fine image masks.
  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
  pixDestroy(&pixfinemask);
  // Dilate a bit to make sure we get everything.
  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
  pixDestroy(&pixcoarsemask);
  if (textord_tabfind_show_images && pixa_debug != nullptr)
    pixa_debug->AddPix(pixmask, "MaskDilated");
  // And the image mask with the line and bar remover.
  pixAnd(pixht, pixht, pixmask);
  pixDestroy(&pixmask);
  if (textord_tabfind_show_images && pixa_debug != nullptr)
    pixa_debug->AddPix(pixht, "FinalMask");
  // Make the result image the same size as the input.
  Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
  pixOr(result, result, pixht);
  pixDestroy(&pixht);
  return result;
}

Textord::find_components [textord/tordmain.cpp]

/**********************************************************************
 * find_components
 *
 * Find the C_OUTLINEs of the connected components in each block, put them
 * in C_BLOBs, and filter them by size, putting the different size
 * grades on different lists in the matching TO_BLOCK in to_blocks.
 **********************************************************************/

void Textord::find_components(Pix* pix, BLOCK_LIST *blocks,
                              TO_BLOCK_LIST *to_blocks) {
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  if (width > MAX_INT16 || height > MAX_INT16) {
    tprintf("Input image too large! (%d, %d)\n", width, height);
    return;  // Can't handle it.
  }

  set_global_loc_code(LOC_EDGE_PROG);

  BLOCK_IT block_it(blocks);    // iterator
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    BLOCK* block = block_it.data();
    if (block->poly_block() == NULL || block->poly_block()->IsText()) {
      extract_edges(pix, block);
    }
  }

  assign_blobs_to_blocks2(pix, blocks, to_blocks);
  ICOORD page_tr(width, height);
  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
}

图像识别/Tesseract4.0训练字库，如何提高正确识别率 T_Y_F666 图像识别计算机视觉 java
需求识别图像文字问题由于图像模糊导致识别率较低训练大量图像数据集后识别率依旧不理想解决方案被动解决(1)用户解决用户上传后系统判断是否可以获取到关键信息，可以获取程序正常执行。否则，提示用户，需要进行照片重传。主动解决(1)加大训练集优点简单，无脑进行图像分割就可缺点耗时耗力，结果可能依旧不能让人满意(2)图像清晰处理优点相对省时省力，可以获得较为理想的效果缺点微乎其微判断图像清晰度+清晰处理需要
Centos 6.5上的Tesseract 4.0安装 crazytang_150c
Tesseract4.0在Centos6上安装，存在很多坑，花了不少时间来处理，所以在这里给大家共享一下。1.安装依赖工具yuminstallautoconfautomakelibtoollibjpeg-devellibpng-devellibtiff-develzlib-devel在centos上的yum的autoconf版本是2.63，安装tesseract需要2.69，所以需要先手动升级au
Centos 6.5上的Tesseract 4.0安装 crazytang
Tesseract4.0在Centos6上安装，存在很多坑，花了不少时间来处理，所以在这里给大家共享一下。1.安装依赖工具yuminstallautoconfautomakelibtoollibjpeg-devellibpng-devellibtiff-develzlib-devel在centos上的yum的autoconf版本是2.63，安装tesseract需要2.69，所以需要先手动升级au
VS2015+Tesseract4配置与示例 andylan_zy Tesseract-OCR
接上一篇博客：Tesseract4编译，原文链接：https://blog.csdn.net/andylanzhiyong/article/details/81746904上次编译Tesseract4.0后，在“C:/ProgrameFiles(x86)/tesseract/”目录下生成了目录"include","lib","bin"，这些就是我们编程需要的头文件和库。没配置之前，感觉挺简单的，无
如何训练Tesseract 4.0 伙伴几时见图像识别
原文：https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00tesseract4.0之后开始使用机器学习来进行字符识别，其训练模型的方法与以前的版本有所不同，现将其官网的手册翻译如下（未完成）一、引言Tesseract4.0中包含了一个新的基于神经元网络的识别引擎，使得识别的精度比以前的版本大大提高了，相应的，对
Tesseract怎么识别中文欧世乐 CV
前言经过上一篇文章，我们已经成功安装了Tesseract4.0，并且可以识别出英文了https://blog.csdn.net/qq_43576028/article/details/102907170那么Tesseract要怎么识别中文呢。流程去官网的GitHub上面下载中文训练包https://github.com/tesseract-ocr/tessdata点击进入，不用直接下载，可能会导致
Windows10x64安装tesseract和tesserocr过程 MajQing
1.安装tesseract包的地址https://download.csdn.net/download/u010590593/12476777这是tesseract4.0版本，安装选中语言包的时候需要用外网。也可以不选择语言包，随后在网上下载语言包，自己装进去。2.安装tesserocr使用其他方法容易出错，所以使用whl文件安装包的地址https://download.csdn.net/down
tesseract4.0训练脚本（四）大熊爷 tesseract
lstmtrainingNAMElstmtraining-TrainingprogramforLSTM-basednetworks.基于LSTM的训练程序SYNOPSISlstmtraining--continue_fromtrain_output_dir/continue_from_lang.lstm--old_traineddatabestdata_dir/continue_from_lang
tesseract4.0训练脚本（三）大熊爷 tesseract
combine_tessdata这里包含了很多重要的信息其中介绍tesserac4.0和3.0中的traineddata的构成组件NAMEcombine_tessdata-combine/extract/overwrite/list/compactTesseractdata#用于合并/提取/覆盖/list(-d)/压缩tesseractdataSYNOPSIScombine_tessdata[OP
tesseract4.0训练脚本（一）大熊爷 tesseract
lstmevalNAMElstmeval-EvaluationprogramforLSTM-basednetworks.基于LSTM网络的评估程序SYNOPSISlstmeval--modellang.lstm|langtrain_checkpoint|pluscharsN.NNN_NN.checkpoint[--traineddatalang/lang.traineddata]--eval_li
tesseract4.0训练脚本（二）大熊爷 tesseract
combine_lang_modelCOMBINE_LANG_MODEL(1)COMBINE_LANG_MODEL(1)NAMEcombine_lang_model-generatestartertraineddata#用于生成初始traineddata文件SYNOPSIScombine_lang_model--input_unicharsetfilename--script_dirdirname
使用Tesseract训练图片的方法 lifei092 机器视觉 Tesseract train
所需要的工具：Tesseract4.0（windows版本于2017年1月30号发布），据说windows版本会有很多诡异的Bug；java7以上版本，为安装jTessBoxEditor做准备；jTessBoxEditor，用于标定数据，此软件依赖于java；准备一些需要识别的目标图片，应该能够覆盖所有需要识别的字符；训练新字体对图片的预处理和要求：同tesseractOCR识别对图片有要求一样，
centos下安装tesseract4.0详细步骤及Syntax error near unexpected token `LEPTONICA,'解决 helen1313 python
#安装系统依赖yuminstall-yautomakeautoconflibtoolgccgcc-c++yuminstall-ylibpng-devellibjpeg-devellibtiff-develyum-yinstallpython-develyum-yinstallopenssl-develyum-yinstallopencvyum-yinstalljava-1.8.0-openjdkj
身份证信息识别基于tesseract4.0 + opencv + Python Omega_szu ocr
身份证信息识别基于tesseract4.0+opencv+Pythontessreact4.0介绍及安装opencv安装身份证信息识别的实现批量读取身份证图片SIFT算法进行特征点匹配模板匹配算法二值化,降噪文字识别本人最近开始做有关ocr的项目,具体要做的是身份证信息识别,所以我决定使用tesseract+opencv+Python来实现这个工作.tessreact4.0介绍及安装tessera
记录我训练tesseract4.0时遇到的问题这是一个假的程序员
一、创建培训数据training/tesstrain.sh--fontlist'font1''font2'＃要训练的字体名称列表。--fonts_dir/path/to/fonts＃字体文件的路径。--langLANG_CODE--linedata_only＃仅为训练生成训练数据。--noextract_font_properties--langdata_dir/path/to/langdata＃
在ubuntu16.04上安装tesseract4.0 这是一个假的程序员
1.安装依赖的库sudoapt-getinstallg++#orclang++(presumably)sudoapt-getinstallautoconfautomakelibtoolsudoapt-getinstallautoconf-archivesudoapt-getinstallpkg-configsudoapt-getinstalllibpng12-devsudoapt-getinsta
一、Tesseract4.0训练字库 OCR 提高识别率必备(超详情) LinJF
由于tesseract的中文语言包“chi_sim”对中文手写字体或者环境比较复杂的图片，识别正确率不高，因此需要针对特定情况用自己的样本进行训练，提高识别率，通过训练，也可以形成自己的语言库。对其他语言库有兴趣的：https://github.com/tesseract-ocr/tessdatamac安装：brewinstalltesseract1.前期准备工作：1.安装jdk1.8或以上配置j
VS2013编译Tesseract 3.04步骤详解 ZYCoderr
最近开始学习Tesseract-ocr。Google在GitHub上提供了Tesseract的源码以及现成的语言训练包。然而最新版本的Tesseract4.0需要VS2015及以上版本的支持，而提供的语言训练包只支持4.00和3.04两个版本的Tesseract。由于工作需要，不能安装VS2015，因此为了能直接使用官方提供的训练包，我选择在VS2013上编译Tesseract3.04版本。然而，
Tesseract 4.0在VS2015上的编译与运行温素年
最近毕设在做Tesseract-OCR方面的项目，需要自己编译Tesseract库。1下载相应的软件与源代码下载cppan并配置环境变量，地址：https://cppan.org/client/下载cmake并配置环境变量，地址：https://cmake.org/download/（注意不要下载错误的版本哦！）下载tesseract4.0源代码，地址：https://github.com/tes
tesseract4.0引擎语言包的配置！行走的小明
http://baijiahao.baidu.com/s?id=1603080386704917711&wfr=spider&for=pc在“小叮当Python人工智能篇：图文识别tesseract4.0引擎的安装”中已为大家介绍了如何安装python进行图文识别的引擎，接下来我们来配置语言包，为大家分享tesseract4.0引擎语言包的配置！tesseract4.0引擎语言包的配置Step1.
Tesseract训练中文字体识别问题总结飞轲
Tesseract4.0训练中文字体识别问题总结注：目前仅说明windows下的情况前言网上已经有大量的tesseract的识别教程，这里不再赘述，本文主要针对初学者搭建环境中所遇到的问题进行描述和解答，有些问题因资料有限，只能罗列无法回答，也期待各位进行完善补充：本文参考以下博主的文章进行逐步搭建https://blog.csdn.net/qq_37674858/article/details/
tesseract4.0_train King_Karl
需求原文地址https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00参考地址：https://blog.csdn.net/panbiao1999/article/details/74638749介绍：tesseract4.0集成了基于神经网络的识别引擎，比之前的版本准确率有显著的提高，对于复杂语言的训练比传统tes
python调用tesseract API 使用LSTM模式喵水军的近卫第76师 tesseract-ocr python lstm
前面已经写过如何使用python调用tesseractAPI了，这里说的是如何使用tesseract的LSTM模式。tesseract4.0已经加入LSTM了，在用命令行执行的时候，添加“–oem1”参数即可，但是pythonocr模块里并没有提供使用oem参数的init函数，查看tesseract的源码，capi.cpp定位到257行有：TESS_APIintTESS_CALLTessBaseA
图像文字识别（三）：Tesseract4.0训练字库，提高正确识别率 a745233700 Tesseract文字识别
由于tesseract的中文语言包“chi_sim”对中文手写字体或者环境比较复杂的图片，识别正确率不高，因此需要针对特定情况用自己的样本进行训练，提高识别率，通过训练，也可以形成自己的语言库。步骤：1、工具准备：（1）官方文档：https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00（2）Java虚拟机，由于jT
谷歌开源tesseract4.0的编译过程 lancetop-stardrms OCR
1、配置安装cppan和cmake，下载tesseract源码文件。2、cd[sourcedir]->cppan->mdbuild->cdbuild->cmake..;3、VS打开build下工程，修改下列文件编码格式，打开文件，点击菜单[文件]->[高级保存选项]，设置为Unicodecodepage1200ccmain\equationdetect.cppSaveAsUnicodecodepa
Windows下Tesseract4.0识别与中文手写字体训练 -牧野- tesseract JTessBoxEditor 中文训练深度学习
一、tesseract4.0安装及使用1.tesseract4.0安装安装包下载地址：http://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-setup-4.00.00dev.exe我在CSDN下载资源里也上传了一份：http://download.csdn.net/download/dcrmg/10021168exe可执行文件直接安装，选择
ubuntu下tesseract 4.0安装及参数使用 -牧野- 深度学习 ubuntu
tesseract是一个开源的OCR引擎，最初是由惠普公司开发用来作为其平板扫描仪的OCR引擎，2005年惠普将其开源出来，之后google接手负责维护。目前稳定的版本是3.0。4.0版本加入了基于LSTM的神经网络技术，中文字符识别准确率有所提高。ubuntu下tesseract4.0安装：终端输入以下命令：sudoadd-apt-repositoryppa:alex-p/tesseract
VS2013编译Tesseract 3.04步骤详解随便称呼
最近开始学习Tesseract-ocr。Google在GitHub上提供了Tesseract的源码以及现成的语言训练包。然而最新版本的Tesseract4.0需要VS2015及以上版本的支持，而提供的语言训练包只支持4.00和3.04两个版本的Tesseract。由于工作需要，不能安装VS2015，因此为了能直接使用官方提供的训练包，我选择在VS2013上编译Tesseract3.04版本。然而，
如何训练Tesseract 4.0 panbiao1999 ocr
原文：https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00tesseract4.0之后开始使用机器学习来进行字符识别，其训练模型的方法与以前的版本有所不同，现将其官网的手册翻译如下（未完成）一、引言Tesseract4.0中包含了一个新的基于神经元网络的识别引擎，使得识别的精度比以前的版本大大提高了，相应的，对
桌面上有多个球在同时运动，怎么实现球之间不交叉，即碰撞？换个号韩国红果果 html 小球碰撞
稍微想了一下，然后解决了很多bug，最后终于把它实现了。其实原理很简单。在每改变一个小球的x y坐标后，遍历整个在dom树中的其他小球，看一下它们与当前小球的距离是否小于球半径的两倍？若小于说明下一次绘制该小球（设为a）前要把他的方向变为原来相反方向（与a要碰撞的小球设为b），即假如当前小球的距离小于球半径的两倍的话，马上改变当前小球方向。那么下一次绘制也是先绘制b，再绘制a，由于a的方向已经改变
《高性能HTML5》读后整理的Web性能优化内容白糖_ html5
读后感先说说《高性能HTML5》这本书的读后感吧，个人觉得这本书前两章跟书的标题完全搭不上关系，或者说只能算是讲解了“高性能”这三个字，HTML5完全不见踪影。个人觉得作者应该首先把HTML5的大菜拿出来讲一讲，再去分析性能优化的内容，这样才会有吸引力。因为只是在线试读，没有机会看后面的内容，所以不胡乱评价了。
[JShop]Spring MVC的RequestContextHolder使用误区 dinguangx jeeshop 商城系统 jshop 电商系统
在spring mvc中，为了随时都能取到当前请求的request对象，可以通过RequestContextHolder的静态方法getRequestAttributes()获取Request相关的变量，如request, response等。在jshop中，对RequestContextHolder的
算法之时间复杂度周凡杨 java 算法时间复杂度效率
在计算机科学中，算法的时间复杂度是一个函数，它定量描述了该算法的运行时间。这是一个关于代表算法输入值的字符串的长度的函数。时间复杂度常用大O符号表述，不包括这个函数的低阶项和首项系数。使用这种方式时，时间复杂度可被称为是渐近的，它考察当输入值大小趋近无穷时的情况。这样用大写O()来体现算法时间复杂度的记法，
Java事务处理 g21121 java
一、什么是Java事务通常的观念认为，事务仅与数据库相关。事务必须服从ISO/IEC所制定的ACID原则。ACID是原子性（atomicity）、一致性（consistency）、隔离性（isolation）和持久性（durability）的缩写。事务的原子性表示事务执行过程中的任何失败都将导致事务所做的任何修改失效。一致性表示当事务执行失败时，所有被该事务影响的数据都应该恢复到事务执行前的状
Linux awk命令详解 510888780 linux
一. AWK 说明 awk是一种编程语言，用于在linux/unix下对文本和数据进行处理。数据可以来自标准输入、一个或多个文件，或其它命令的输出。它支持用户自定义函数和动态正则表达式等先进功能，是linux/unix下的一个强大编程工具。它在命令行中使用，但更多是作为脚本来使用。 awk的处理文本和数据的方式：它逐行扫描文件，从第一行到
android permission 布衣凌宇 Permission
<uses-permission android:name="android.permission.ACCESS_CHECKIN_PROPERTIES" ></uses-permission>允许读写访问"properties"表在checkin数据库中，改值可以修改上传 <uses-permission android:na
Oracle和谷歌Java Android官司将推迟 aijuans java oracle
北京时间 10 月 7 日，据国外媒体报道，Oracle 和谷歌之间一场等待已久的官司可能会推迟至 10 月 17 日以后进行，这场官司的内容是 Android 操作系统所谓的 Java 专利权之争。本案法官 William Alsup 称根据专利权专家 Florian Mueller 的预测，谷歌 Oracle 案很可能会被推迟。　　该案中的第二波辩护被安排在 10 月 17 日出庭，从目前看来
linux shell 常用命令 antlove linux shell command
grep [options] [regex] [files] /var/root # grep -n "o" * hello.c:1:/* This C source can be compiled with:
Java解析XML配置数据库连接(DOM技术连接 SAX技术连接) 百合不是茶 sax技术 Java解析xml文档 dom技术 XML配置数据库连接
XML配置数据库文件的连接其实是个很简单的问题,为什么到现在才写出来主要是昨天在网上看了别人写的,然后一直陷入其中,最后发现不能自拔所以今天决定自己完成 ,,,,现将代码与思路贴出来供大家一起学习 XML配置数据库的连接主要技术点的博客; JDBC编程 : JDBC连接数据库 DOM解析XML: DOM解析XML文件 SA
underscore.js 学习（二） bijian1013 JavaScript underscore
Array Functions 所有数组函数对参数对象一样适用。1.first _.first(array, [n]) 别名: head, take 返回array的第一个元素，设置了参数n，就
plSql介绍 bijian1013 oracle 数据库 plsql
/* * PL/SQL 程序设计学习笔记 * 学习plSql介绍.pdf * 时间：2010-10-05 */ --创建DEPT表 create table DEPT ( DEPTNO NUMBER(10), DNAME NVARCHAR2(255), LOC NVARCHAR2(255) ) delete dept; select
【Nginx一】Nginx安装与总体介绍 bit1129 nginx
启动、停止、重新加载Nginx nginx 启动Nginx服务器，不需要任何参数u nginx -s stop 快速(强制)关系Nginx服务器 nginx -s quit 优雅的关闭Nginx服务器 nginx -s reload 重新加载Nginx服务器的配置文件 nginx -s reopen 重新打开Nginx日志文件
spring mvc开发中浏览器兼容的奇怪问题 bitray jquery Ajax springMVC 浏览器上传文件
最近个人开发一个小的OA项目,属于复习阶段.使用的技术主要是spring mvc作为前端框架,mybatis作为数据库持久化技术.前台使用jquery和一些jquery的插件. 在开发到中间阶段时候发现自己好像忽略了一个小问题,整个项目一直在firefox下测试,没有在IE下测试,不确定是否会出现兼容问题.由于jquer
Lua的io库函数列表 ronin47 lua io
1、io表调用方式：使用io表，io.open将返回指定文件的描述，并且所有的操作将围绕这个文件描述　　io表同样提供三种预定义的文件描述io.stdin,io.stdout,io.stderr 　　2、文件句柄直接调用方式,即使用file:XXX()函数方式进行操作,其中file为io.open()返回的文件句柄　　多数I/O函数调用失败时返回nil加错误信息,有些函数成功时返回nil
java-26-左旋转字符串 bylijinnan java
public class LeftRotateString { /** * Q 26 左旋转字符串 * 题目：定义字符串的左旋转操作：把字符串前面的若干个字符移动到字符串的尾部。 * 如把字符串abcdef左旋转2位得到字符串cdefab。 * 请实现字符串左旋转的函数。要求时间对长度为n的字符串操作的复杂度为O(n)，辅助内存为O(1)。 */ pu
《vi中的替换艺术》-linux命令五分钟系列之十一 cfyme linux命令
vi方面的内容不知道分类到哪里好，就放到《Linux命令五分钟系列》里吧！今天编程，关于栈的一个小例子，其间我需要把”S.”替换为”S->”(替换不包括双引号)。其实这个不难，不过我觉得应该总结一下vi里的替换技术了，以备以后查阅。 1 所有替换方案都要在冒号“:”状态下书写。 2 如果想将abc替换为xyz，那么就这样 :s/abc/xyz/ 不过要特别
[轨道与计算]新的并行计算架构 comsci 并行计算
我在进行流程引擎循环反馈试验的过程中，发现一个有趣的事情。。。如果我们在流程图的每个节点中嵌入一个双向循环代码段，而整个流程中又充满着很多并行路由，每个并行路由中又包含着一些并行节点，那么当整个流程图开始循环反馈过程的时候，这个流程图的运行过程是否变成一个并行计算的架构呢？
重复执行某段代码 dai_lm android
用handler就可以了 private Handler handler = new Handler(); private Runnable runnable = new Runnable() { public void run() { update(); handler.postDelayed(this, 5000); } }; 开始计时 h
Java实现堆栈（list实现） datageek 数据结构——堆栈
public interface IStack<T> { //元素出栈，并返回出栈元素 public T pop(); //元素入栈 public void push(T element); //获取栈顶元素 public T peek(); //判断栈是否为空 public boolean isEmpty
四大备份MySql数据库方法及可能遇到的问题 dcj3sjt126com DB backup
一：通过备份王等软件进行备份前台进不去？用备份王等软件进行备份是大多老站长的选择，这种方法方便快捷，只要上传备份软件到空间一步步操作就可以，但是许多刚接触备份王软件的客用户来说还原后会出现一个问题：因为新老空间数据库用户名和密码不统一，网站文件打包过来后因没有修改连接文件，还原数据库是好了，可是前台会提示数据库连接错误，网站从而出现打不开的情况。解决方法：学会修改网站配置文件，大多是由co
github做webhooks：[1]钩子触发是否成功测试 dcj3sjt126com github git webhook
转自: http://jingyan.baidu.com/article/5d6edee228c88899ebdeec47.html github和svn一样有钩子的功能，而且更加强大。例如我做的是最常见的push操作触发的钩子操作，则每次更新之后的钩子操作记录都会在github的控制板可以看到！工具/原料 github 方法/步骤
">的作用" target="_blank">JSP中的作用蕃薯耀
JSP中<base href="<%=basePath%>">的作用 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
linux下SAMBA服务安装与配置 hanqunfeng linux
局域网使用的文件共享服务。一.安装包： rpm -qa | grep samba samba-3.6.9-151.el6.x86_64 samba-common-3.6.9-151.el6.x86_64 samba-winbind-3.6.9-151.el6.x86_64 samba-client-3.6.9-151.el6.x86_64 samba-winbind-clients
guava cache IXHONG cache
缓存，在我们日常开发中是必不可少的一种解决性能问题的方法。简单的说，cache 就是为了提升系统性能而开辟的一块内存空间。　　缓存的主要作用是暂时在内存中保存业务系统的数据处理结果，并且等待下次访问使用。在日常开发的很多场合，由于受限于硬盘IO的性能或者我们自身业务系统的数据处理和获取可能非常费时，当我们发现我们的系统这个数据请求量很大的时候，频繁的IO和频繁的逻辑处理会导致硬盘和CPU资源的
Query的开始--全局变量,noconflict和兼容各种js的初始化方法 kvhur JavaScript jquery css
这个是整个jQuery代码的开始，里面包含了对不同环境的js进行的处理，例如普通环境，Nodejs，和requiredJs的处理方法。还有jQuery生成$, jQuery全局变量的代码和noConflict代码详解完整资源： http://www.gbtags.com/gb/share/5640.htm jQuery 源码： (
美国人的福利和中国人的储蓄 nannan408
今天看了篇文章，震动很大，说的是美国的福利。美国医院的无偿入院真的是个好措施。小小的改善，对于社会是大大的信心。小孩，税费等，政府不收反补，真的体现了人文主义。美国这么高的社会保障会不会使人变懒？答案是否定的。正因为政府解决了后顾之忧，人们才得以倾尽精力去做一些有创造力，更造福社会的事情，这竟成了美国社会思想、人
N阶行列式计算(JAVA) qiuwanchi N阶行列式计算
package gaodai; import java.util.List; /** * N阶行列式计算 * @author 邱万迟 * */ public class DeterminantCalculation { public DeterminantCalculation(List<List<Double>> determina
C语言算法之打渔晒网问题 qiufeihu c 算法
如果一个渔夫从2011年1月1日开始每三天打一次渔，两天晒一次网，编程实现当输入2011年1月1日以后任意一天，输出该渔夫是在打渔还是在晒网。代码如下： #include <stdio.h> int leap(int a) /*自定义函数leap()用来指定输入的年份是否为闰年*/ { if((a%4 == 0 && a%100 != 0
XML中DOCTYPE字段的解析 wyzuomumu xml
DTD声明始终以!DOCTYPE开头,空一格后跟着文档根元素的名称,如果是内部DTD,则再空一格出现[],在中括号中是文档类型定义的内容. 而对于外部DTD,则又分为私有DTD与公共DTD,私有DTD使用SYSTEM表示,接着是外部DTD的URL. 而公共DTD则使用PUBLIC,接着是DTD公共名称,接着是DTD的URL. 私有DTD <!DOCTYPErootSYST

tesseract源码Page Layout解读（倾斜矫正）

http://blog.csdn.net/kaelsass/article/details/46874627

http://www.jianshu.com/p/7c63fd62ea28

代码调用

代码附录

Tesseract::SegmentPage[ccmain/pagesegmain.cpp] ->

Tesseract::SetupPageSegAndDetectOrientation[ccmain/pagesegmain.cpp] ->

LineFinder::FindAndRemoveLines[textord/linefind.cpp]

ImageFind::FindImages [textord/linefind.cpp]

Textord::find_components [textord/tordmain.cpp]

你可能感兴趣的:(Tesseract4.0)