C#版 判断字符编码

public static string GetText(byte[] buff)
		{
			string strReslut = string.Empty;
			if (buff.Length > 3)
			{
				if (buff[0] == 239 && buff[1] == 187 && buff[2] == 191)
				{// utf-8
					strReslut = Encoding.UTF8.GetString(buff);
				}
				else if (buff[0] == 254 && buff[1] == 255)
				{// big endian unicode
					strReslut = Encoding.BigEndianUnicode.GetString(buff);
				}
				else if (buff[0] == 255 && buff[1] == 254)
				{// unicode
					strReslut = Encoding.Unicode.GetString(buff);
				}
				else if (isUtf8(buff))
				{// utf-8
					strReslut = Encoding.UTF8.GetString(buff);
				}
				else
				{// ansi
					strReslut = Encoding.Default.GetString(buff);
				}
			}

			return strReslut;
		}

		// 110XXXXX, 10XXXXXX
		// 1110XXXX, 10XXXXXX, 10XXXXXX
		// 11110XXX, 10XXXXXX, 10XXXXXX, 10XXXXXX
		private static bool isUtf8(byte[] buff)
		{
			for (int i = 0; i < buff.Length; i++)
			{
				if ((buff[i] & 0xE0) == 0xC0)    // 110x xxxx 10xx xxxx
				{
					if ((buff[i + 1] & 0x80) != 0x80)
					{
						return false;
					}
				}
				else if ((buff[i] & 0xF0) == 0xE0)  // 1110 xxxx 10xx xxxx 10xx xxxx
				{
					if ((buff[i + 1] & 0x80) != 0x80 || (buff[i + 2] & 0x80) != 0x80)
					{
						return false;
					}
				}
				else if ((buff[i] & 0xF8) == 0xF0)  // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
				{
					if ((buff[i + 1] & 0x80) != 0x80 || (buff[i + 2] & 0x80) != 0x80 || (buff[i + 3] & 0x80) != 0x80)
					{
						return false;
					}
				}
			}
			return true;
		}

		// news.sohu.com
		private static bool isGBK(byte[] buff)
		{
			return false;
		}

你可能感兴趣的:(C#版 判断字符编码)