csharp:Optical Character Recognition

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Drawing;
using System.IO;
using System.Drawing.Imaging;
using MODI;//Microsoft Office Document Imaging
// 首先用office安装盘这个组件,默认安装office的时候是不会安装的,只要添加这个组件功能就好了安装说明:http://support.microsoft.com/kb/982760
//组件Microsoft Office Document Imaging 12.0 Type Library(office2007)
//或者Microsoft Office Document Imaging 11.0 Type Library(office2003)
//中文简体OCR引擎 http://www.microsoft.com/downloads/thankyou.aspx?familyId=dd172063-9517-41d8-82af-29c38f7437b6&displayLang=zh-hk 

namespace ToText
{   
    /// <summary>
    /// Optical Character Recognition光学字符识别
    /// 20140507 Geovin Du
    /// 涂聚文
    /// </summary>
    public static class OCRGetstring
    {
        /// <summary>
        /// 语言类型
        /// </summary>
        /// <returns></returns>
        public static DataTable getLanguageList()
        {
            DataTable dt = new DataTable();
            dt.Columns.Add("ID",typeof(int));
            dt.Columns.Add("LanguageName", typeof(string));
            dt.Columns.Add("LanguageLCID", typeof(string));
            //dt.Rows.Add(1, "", 1);
            dt.Rows.Add(1,"简体中文", "2052");
            dt.Rows.Add(2,"繁体中文", "1028");
            dt.Rows.Add(3,"英语", "9");
            dt.Rows.Add(4,"捷克语", "5");
            dt.Rows.Add(5,"丹麦语", "6");
            dt.Rows.Add(6,"德语", "7");
            dt.Rows.Add(7,"希腊语", "8");
            dt.Rows.Add(8,"西班牙语", "10");
            dt.Rows.Add(9,"芬兰语", "11");
            dt.Rows.Add(10,"法语", "12");
            dt.Rows.Add(11, "匈牙利语", "14");
            dt.Rows.Add(12, "意大利语", "16");
            dt.Rows.Add(13, "日语", "17");
            dt.Rows.Add(14, "韩语", "18");
            dt.Rows.Add(15, "荷兰语", "19");
            dt.Rows.Add(16, "挪威语", "20");
            dt.Rows.Add(17, "波兰语", "21");
            dt.Rows.Add(18, "葡萄牙语", "22");
            dt.Rows.Add(19, "俄语", "25");
            dt.Rows.Add(20,"瑞典语", "29");
            dt.Rows.Add(21,"土耳其语", "31");
            return dt;
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="sValue"></param>
        /// <returns></returns>
        private static MODI.MiLANGUAGES GetLanuageType(string sValue)
        {
            switch (sValue)
            {
                case "2052":
                    return MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED;
                case "5":
                    return MODI.MiLANGUAGES.miLANG_CZECH;
                case "6":
                    return MODI.MiLANGUAGES.miLANG_DANISH;
                case "7":
                    return MODI.MiLANGUAGES.miLANG_GERMAN;
                case "8":
                    return MODI.MiLANGUAGES.miLANG_GREEK;
                case "9":
                    return MODI.MiLANGUAGES.miLANG_ENGLISH;
                case "10":
                    return MODI.MiLANGUAGES.miLANG_SPANISH;
                case "11":
                    return MODI.MiLANGUAGES.miLANG_FINNISH;
                case "12":
                    return MODI.MiLANGUAGES.miLANG_FRENCH;
                case "14":
                    return MODI.MiLANGUAGES.miLANG_HUNGARIAN;
                case "16":
                    return MODI.MiLANGUAGES.miLANG_ITALIAN;
                case "17":
                    return MODI.MiLANGUAGES.miLANG_JAPANESE;
                case "18":
                    return MODI.MiLANGUAGES.miLANG_KOREAN;
                case "19":
                    return MODI.MiLANGUAGES.miLANG_DUTCH;
                case "20":
                    return MODI.MiLANGUAGES.miLANG_NORWEGIAN;
                case "21":
                    return MODI.MiLANGUAGES.miLANG_POLISH;
                case "22":
                    return MODI.MiLANGUAGES.miLANG_PORTUGUESE;
                case "25":
                    return MODI.MiLANGUAGES.miLANG_RUSSIAN;
                case "29":
                    return MODI.MiLANGUAGES.miLANG_SWEDISH;
                case "31":
                    return MODI.MiLANGUAGES.miLANG_TURKISH;
                case "1028":
                    return MODI.MiLANGUAGES.miLANG_CHINESE_TRADITIONAL;
                default:
                    return MODI.MiLANGUAGES.miLANG_ENGLISH;
            }
        }


        /// <summary>
        ///  Images轉換文字
        /// </summary>
        /// <param name="image">Image</param>
        /// <param name="language">语言类型</param>
        /// <returns></returns>
        public static string ExtractText(this System.Drawing.Image image,string language)
        {
            var tmpFile = Path.GetTempFileName();
            StringBuilder sb = new StringBuilder();
            //string text;
            try
            {
                var bmp = new Bitmap(Math.Max(image.Width, 1024), Math.Max(image.Height, 768));
                var gfxResize = Graphics.FromImage(bmp);
                gfxResize.DrawImage(image, new Rectangle(0, 0, image.Width, image.Height));
                bmp.Save(tmpFile + ".bmp", ImageFormat.Bmp);
                var doc = new MODI.Document();
                doc.Create(tmpFile + ".bmp");
                // doc.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);
                doc.OCR(GetLanuageType(language), true, true);  // 识别文字类型
                var img = (MODI.Image)doc.Images[0];
                var layout = img.Layout;
                sb.Append(layout.Text);
                //text = sb.ToString();// layout.Text;
            }
            finally
            {
                File.Delete(tmpFile);
                File.Delete(tmpFile + ".bmp");
            }

            return sb.ToString();// text;
        }
        /// <summary>
        /// 来源图片文件轉換文字
        /// </summary>
        /// <param name="fileToOCR">file文件</param>
        /// <param name="language">语言类型</param>
        /// <returns></returns>
        public static string getFileToOCR(string fileToOCR, string language)
        {
            StringBuilder sb = new StringBuilder();
            if (File.Exists(fileToOCR))
            {

                MODI.Document md = new MODI.Document();
                md.Create(fileToOCR);
                md.OCR(GetLanuageType(language), true, true);
                MODI.Image img;
                MODI.Layout layout;
                for (int i = 0; i < md.Images.Count; i++)
                {
                    img = (MODI.Image)md.Images[i];
                    layout = img.Layout;
                    sb.Append(layout.Text);
                }

                md.Close(false);
                
            }
            else
            {
                sb.Append("");
            }
             return sb.ToString();
        }
    }
}

 

你可能感兴趣的:(character)