LumiSoft.Net邮件接收乱码问题解决

今天遇到用LumiSoft.Net这个组件收取邮件中含有类似于=?utf-8?B?5rWL6K+V6YKu5Lu2?= ,=?gbk?Q?=C6=BD=B0=B2=D6=A4=C8=AF*=C3=BF=D6=DC=B1=A8?=这两种格式的乱码,随后Google了下,原因是邮件本身的编码,跟传输过程采用的编码不一致,=?utf-8?B?5rWL6K+V6YKu5Lu2?= 这个表示邮件编码是utf-8,传输采用base64编码格式,第二个Q表示传输格式为Quote-Printable。
对于这种格式的字符,Google大神帮我搜到了相关的处理代码,然后综合项目,整理出来了,留个记号。

  	/// <summary>

        /// 乱码解析

        /// </summary>

        /// <param name="input"></param>

        /// <returns></returns>

        private string GetMailSubject(string input)

        {

            try

            {

                string regex = @"=\?(?<encode>.*?)\?(?<type>[B|Q])\?(?<body>.*?)\?=";

                Regex re = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);

                MatchCollection mcs = re.Matches(input);

                foreach (Match mc in mcs)

                {

                    string encode = mc.Groups["encode"].Value;

                    string type = mc.Groups["type"].Value;

                    Encoding encod = null;

                    if (!string.IsNullOrEmpty(encode))

                    {

                        if ((encode.ToLower().Contains("gbk") || encode.ToLower().Contains("utf8") || encode.ToLower().Contains("utf-8")) && type == "Q")

                        {

                            if (encode.ToLower().Contains("utf-8"))

                            {

                                encod = Encoding.UTF8;

                            }

                            else if (encode.ToLower().Contains("gbk"))

                            {

                                encod = Encoding.GetEncoding("gb2312");

                            }

                            input = input.Replace(mc.Value, QuotedPrintable.Decode(mc.Groups["body"].Value, encod));

                        }

                        else

                        {

                            if (encode.ToLower().Contains("euccn") || encode.ToLower().Contains("euc-cn"))

                            {

                                encode = "gb2312";

                            }

                            else if (encode.ToLower().Contains("utf8"))

                            {

                                encode = "utf-8";

                            }

                            string body = mc.Groups["body"].Value;

                            byte[] bytes = Convert.FromBase64String(body);

                            string result = Encoding.GetEncoding(encode).GetString(bytes);

                            input = input.Replace(mc.Value, result);

                        }

                    }



                }

                if (mcs.Count > 0)

                {

                    FileLogHelper.WriteInfo(string.Format("原邮件标题:[{0}]\r\n解析后标题:[{1}]", input, input));

                }

                return input;

            }

            catch (Exception)

            {

                return input;

            }

        }

 针对两种不同类型的字符做了处理,这里调用了一个Quote-Printable编码解码类,代码如下:

using System;

using System.Collections;

using System.Text;

namespace Wind.MailRobot.BLL

{

    public class QuotedPrintable

    {

        private const byte EQUALS = 61;

        private const byte CR = 13;

        private const byte LF = 10;

        private const byte SPACE = 32;

        private const byte TAB = 9;



        /// <summary>

        /// Encodes a string to QuotedPrintable

        /// </summary>

        /// <param name="_ToEncode">String to encode</param>

        /// <returns>QuotedPrintable encoded string</returns>

        public static string Encode(string _ToEncode, Encoding encoding)

        {

            StringBuilder Encoded = new StringBuilder();

            string hex = string.Empty;

            byte[] bytes = encoding.GetBytes(_ToEncode);

            int count = 0;



            for (int i = 0; i < bytes.Length; i++)

            {

                //these characters must be encoded

                if ((bytes[i] < 33 || bytes[i] > 126 || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE)

                {

                    if (bytes[i].ToString("X").Length < 2)

                    {

                        hex = "0" + bytes[i].ToString("X");

                        Encoded.Append("=" + hex);

                    }

                    else

                    {

                        hex = bytes[i].ToString("X");

                        Encoded.Append("=" + hex);

                    }

                }

                else

                {

                    //check if index out of range

                    if ((i + 1) < bytes.Length)

                    {

                        //if TAB is at the end of the line - encode it!

                        if ((bytes[i] == TAB && bytes[i + 1] == LF) || (bytes[i] == TAB && bytes[i + 1] == CR))

                        {

                            Encoded.Append("=0" + bytes[i].ToString("X"));

                        }

                        //if SPACE is at the end of the line - encode it!

                        else if ((bytes[i] == SPACE && bytes[i + 1] == LF) || (bytes[i] == SPACE && bytes[i + 1] == CR))

                        {

                            Encoded.Append("=" + bytes[i].ToString("X"));

                        }

                        else

                        {

                            Encoded.Append(System.Convert.ToChar(bytes[i]));

                        }

                    }

                    else

                    {

                        Encoded.Append(System.Convert.ToChar(bytes[i]));

                    }

                }

                if (count == 75)

                {

                    Encoded.Append("=\r\n"); //insert soft-linebreak

                    count = 0;

                }

                count++;

            }



            return Encoded.ToString();

        }



        /// <summary>

        /// Decodes a QuotedPrintable encoded string 

        /// </summary>

        /// <param name="_ToDecode">The encoded string to decode</param>

        /// <returns>Decoded string</returns>

        public static string Decode(string _ToDecode, Encoding encoding)

        {

            try

            {

                //remove soft-linebreaks first

                _ToDecode = _ToDecode.Replace("=\r\n", "");

                char[] chars = _ToDecode.ToCharArray();

                byte[] bytes = new byte[chars.Length];

                int bytesCount = 0;

                for (int i = 0; i < chars.Length; i++)

                {

                    // if encoded character found decode it

                    if (chars[i] == '=')

                    {

                        bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber));

                        i += 2;

                    }

                    else

                    {

                        bytes[bytesCount++] = System.Convert.ToByte(chars[i]);

                    }

                }

                return encoding.GetString(bytes, 0, bytesCount);

            }

            catch (Exception)

            {



                return _ToDecode;

            }

        }

    }

}

 

你可能感兴趣的:(.net)