如何自动识别判断url中的中文参数是GB2312还是Utf-8编码

先看下面两个Url,他们传递的参数一样么?
aaa.aspx?tag=.net%bc%bc%ca%f5
aaa.aspx?tag=.net%e6%8a%80%e6%9c%af

看起来好像是不一样,其实他们都是对".net技术"进行了UrlEncode,不过一个是GB2312的编码,一个是Utf-8的编码。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using System.Text.RegularExpressions;

namespace ConsoleApplication2 {
   
class Program {
       
public static string DecodeURL2(String uriString) {
           
if (Regex.IsMatch(
                HttpUtility.UrlDecode(uriString, Encoding.GetEncoding(
"iso-8859-1")),
               
@"^(?:[/x00-/x7f]|[/xe0-/xef][/x80-/xbf]{2})+$" // 如果不考虑哪些什么拉丁文啊,希腊文啊。。。乱七八糟的外文,用这个短的正则
            )) {
               
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("UTF-8"));
            }
else {
               
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("GB2312"));
            }
        }

       
public static string DecodeURL(String uriString) {
           
if (Regex.IsMatch(
                HttpUtility.UrlDecode(uriString, Encoding.GetEncoding(
"iso-8859-1")),
               
@"^(?:[/x00-/x7f]|[/xfc-/xff][/x80-/xbf]{5}|[/xf8-/xfb][/x80-/xbf]{4}|[/xf0-/xf7][/x80-/xbf]{3}|[/xe0-/xef][/x80-/xbf]{2}|[/xc0-/xdf][/x80-/xbf])+$"
            )) {
               
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("UTF-8"));
            }
else {
               
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("GB2312"));
            }
        }

       
public static void Main(string[] args) {
            Console.WriteLine(
"----------------------------------------------");
            Console.WriteLine(DecodeURL(
".net%bc%bc%ca%f5"));
            Console.WriteLine(DecodeURL(
".net%e6%8a%80%e6%9c%af"));


            Console.WriteLine(
"----------------------------------------------");
            Console.WriteLine(DecodeURL(
"%B8%A7%CB%B3%C7%E0%CB%C9%D2%A9%D2%B5"));
            Console.WriteLine(DecodeURL(
"%E6%8A%9A%E9%A1%BA%E9%9D%92%E6%9D%BE%E8%8D%AF%E4%B8%9A"));


            Console.WriteLine(
"------------------↓↓↓下面的出问题↓↓↓------------------");


            Console.WriteLine(DecodeURL(
"%E8%81%94%E9%80%9A")); // 正常
            Console.WriteLine(DecodeURL("%C1%AA%CD%A8")); // 发生编码误认
           
// 编码误认,并没有好的解决方案,因为utf-8和gbk编码结果存在交叉,  我们都知道,记事本也都会出现这种情况

            Console.WriteLine(
"------------------↑↑↑上面的出问题↑↑↑------------------");


            Console.WriteLine(DecodeURL2(
"%E8%81%94%E9%80%9A")); // 正常
            Console.WriteLine(DecodeURL2("%C1%AA%CD%A8")); // 不会误认
            Console.WriteLine("----------------------------------------------");


            Console.ReadKey();
        }
    }
}

你可能感兴趣的:(c,.net,String,url,Class)