Tesseract学习(四)

         本文将介绍如何在C#下调用Tesseract-OCR,由于现在已经有编译好的dll文件,所以只需添加引用到项目中即可。

dll文件可在此处下载,下载后添加到项目中。另外需要自己下载语言库(注意版本为3.01),此处为英文语言库。

与在C++下添加引用不同,此处无需再进行繁琐的配置,所以下面直接贴代码:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using tesseract;//引入命名空间

namespace 图像识别
{
    public partial class Form1 : Form
    {
        public enum TesseractEngineMode : int
        {
            /// <summary>
            /// Run Tesseract only - fastest
            /// </summary>
            TESSERACT_ONLY = 0,

            /// <summary>
            /// Run Cube only - better accuracy, but slower
            /// </summary>
            CUBE_ONLY = 1,

            /// <summary>
            /// Run both and combine results - best accuracy
            /// </summary>
            TESSERACT_CUBE_COMBINED = 2,

            /// <summary>
            /// Specify this mode when calling init_*(),
            /// to indicate that any of the above modes
            /// should be automatically inferred from the
            /// variables in the language-specific config,
            /// command-line configs, or if not specified
            /// in any of the above should be set to the
            /// default OEM_TESSERACT_ONLY.
            /// </summary>
            DEFAULT = 3
        }

        public enum TesseractPageSegMode : int
        {
            /// <summary>
            /// Fully automatic page segmentation
            /// </summary>
            PSM_AUTO = 0,

            /// <summary>
            /// Assume a single column of text of variable sizes
            /// </summary>
            PSM_SINGLE_COLUMN = 1,

            /// <summary>
            /// Assume a single uniform block of text (Default)
            /// </summary>
            PSM_SINGLE_BLOCK = 2,

            /// <summary>
            /// Treat the image as a single text line
            /// </summary>
            PSM_SINGLE_LINE = 3,

            /// <summary>
            /// Treat the image as a single word
            /// </summary>
            PSM_SINGLE_WORD = 4,

            /// <summary>
            /// Treat the image as a single character
            /// </summary>
            PSM_SINGLE_CHAR = 5
        }

        private TesseractProcessor m_tesseract = null;
        private string m_path = Application.StartupPath+@"\tessdata\";
        private string m_lang = "eng";

        public Form1()
        {
            InitializeComponent();
            //初始化
            m_tesseract = new TesseractProcessor();
            m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT);
            m_tesseract.SetVariable("tessedit_pageseg_mode", TesseractPageSegMode.PSM_AUTO.ToString());
            //m_tesseract.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQLSTUVWXYZ");
        }

        private string Ocr(Image image)
        {
            m_tesseract.Clear();
            m_tesseract.ClearAdaptiveClassifier();
            return m_tesseract.Apply(image);
        }

        private void pictureBox1_Click(object sender, EventArgs e)
        {
            OpenFileDialog openFileDialog = new OpenFileDialog();
            openFileDialog.Filter = "*.JPG|*.jpg|*.PNG|*.png|*.BMP|*.bmp";
            DialogResult result = openFileDialog.ShowDialog();
            if (result == DialogResult.OK)
            {
                Image image = Image.FromFile(openFileDialog.FileName);
                pictureBox1.Image = image;
            }
        }

        private void button1_Click(object sender, EventArgs e)
        {
            if (pictureBox1.Image != null)
            {
                string result = Ocr(pictureBox1.Image);
                textBox1.Text = result.Replace("\n","\r\n");//不做此转换则无换行效果
            }
        }
    }
}



结果:

Tesseract学习(四)_第1张图片

你可能感兴趣的:(api,C#,博客,图像识别)