本文将介绍如何在C#下调用Tesseract-OCR,由于现在已经有编译好的dll文件,所以只需添加引用到项目中即可。
dll文件可在此处下载,下载后添加到项目中。另外需要自己下载语言库(注意版本为3.01),此处为英文语言库。
与在C++下添加引用不同,此处无需再进行繁琐的配置,所以下面直接贴代码:
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using tesseract;//引入命名空间 namespace 图像识别 { public partial class Form1 : Form { public enum TesseractEngineMode : int { /// <summary> /// Run Tesseract only - fastest /// </summary> TESSERACT_ONLY = 0, /// <summary> /// Run Cube only - better accuracy, but slower /// </summary> CUBE_ONLY = 1, /// <summary> /// Run both and combine results - best accuracy /// </summary> TESSERACT_CUBE_COMBINED = 2, /// <summary> /// Specify this mode when calling init_*(), /// to indicate that any of the above modes /// should be automatically inferred from the /// variables in the language-specific config, /// command-line configs, or if not specified /// in any of the above should be set to the /// default OEM_TESSERACT_ONLY. /// </summary> DEFAULT = 3 } public enum TesseractPageSegMode : int { /// <summary> /// Fully automatic page segmentation /// </summary> PSM_AUTO = 0, /// <summary> /// Assume a single column of text of variable sizes /// </summary> PSM_SINGLE_COLUMN = 1, /// <summary> /// Assume a single uniform block of text (Default) /// </summary> PSM_SINGLE_BLOCK = 2, /// <summary> /// Treat the image as a single text line /// </summary> PSM_SINGLE_LINE = 3, /// <summary> /// Treat the image as a single word /// </summary> PSM_SINGLE_WORD = 4, /// <summary> /// Treat the image as a single character /// </summary> PSM_SINGLE_CHAR = 5 } private TesseractProcessor m_tesseract = null; private string m_path = Application.StartupPath+@"\tessdata\"; private string m_lang = "eng"; public Form1() { InitializeComponent(); //初始化 m_tesseract = new TesseractProcessor(); m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); m_tesseract.SetVariable("tessedit_pageseg_mode", TesseractPageSegMode.PSM_AUTO.ToString()); //m_tesseract.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQLSTUVWXYZ"); } private string Ocr(Image image) { m_tesseract.Clear(); m_tesseract.ClearAdaptiveClassifier(); return m_tesseract.Apply(image); } private void pictureBox1_Click(object sender, EventArgs e) { OpenFileDialog openFileDialog = new OpenFileDialog(); openFileDialog.Filter = "*.JPG|*.jpg|*.PNG|*.png|*.BMP|*.bmp"; DialogResult result = openFileDialog.ShowDialog(); if (result == DialogResult.OK) { Image image = Image.FromFile(openFileDialog.FileName); pictureBox1.Image = image; } } private void button1_Click(object sender, EventArgs e) { if (pictureBox1.Image != null) { string result = Ocr(pictureBox1.Image); textBox1.Text = result.Replace("\n","\r\n");//不做此转换则无换行效果 } } } }