如何自动判断url中汉字的编码格式

参考了http://topic.csdn.net/u/20091105/15/0d54b7b2-38fe-4cdf-ae1b-5a1f07c26ea0.html帖子18楼的代码

 

修改了一下,满足了我实际的项目需求

using  System;
using  System.Collections.Generic;
using  System.Linq;
using  System.Text;
using  System.Web;

namespace  testProgram
{
    
class  MyEncoding
    {

        
static   void  Main()
        {
            MyEncoding myEncoding 
=   new  MyEncoding();
            
// GB2312
             string  gb2312  =   " http://www.baidu.com/s?wd=%B9%A4%B3%A7%B9%A9%B5%E7 " ;
            
// utf8
             string  utf8  =   " http://www.google.com.hk/search?hl=zh-CN&newwindow=1&safe=strict&q=%25abc%E4%B8%AD%E5%9B%BD%2C%3B&btnG=Google+%E6%90%9C%E7%B4%A2&aq=f&aqi=&aql=&oq=&gs_rfai= " ;

            
string  ss  =  myEncoding.UrlDecode(gb2312);
            
string  ss1  =  myEncoding.UrlDecode(utf8);

            Console.WriteLine(ss);
            Console.WriteLine(ss1);

            Console.ReadLine();
        }

        
private   string  UrlDecode( string  url)
        {
            
string  result  =   "" ;
            
byte [] buf  =  GetUrlCodingToBytes(url);
            
if  (IsUTF8(buf))
            {
                result 
=  HttpUtility.UrlDecode(url, Encoding.UTF8);
            }
            
else
            {
                result 
=  HttpUtility.UrlDecode(url, Encoding.GetEncoding( " GB2312 " ));
            }
            
return  result;
        }

        
private   byte [] GetUrlCodingToBytes( string  url)
        {
            StringBuilder sb 
=   new  StringBuilder();

            
int  i  =  url.IndexOf( ' % ' );
            
while  (i  >=   0 )
            {
                
if  (url.Length  <  i  +   3 )
                {
                    
break ;
                }
                sb.Append(url.Substring(i, 
3 ));
                url 
=  url.Substring(i  +   3 );
                i 
=  url.IndexOf( ' % ' );
            }

            
string  urlCoding  =  sb.ToString();
            
if  ( string .IsNullOrEmpty(urlCoding))
                
return   new   byte [ 0 ];

            urlCoding 
=  urlCoding.Replace( " % " string .Empty);
            
int  len  =  urlCoding.Length  /   2 ;
            
byte [] result  =   new   byte [len];
            len 
*=   2 ;
            
for  ( int  index  =   0 ; index  <  len; index ++ )
            {
                
string  s  =  urlCoding.Substring(index,  2 );
                
int  b  =   int .Parse(s, System.Globalization.NumberStyles.HexNumber);
                result[index 
/   2 =  ( byte )b;
                index
++ ;
            }
            
return  result;
        }

        
private   bool  IsUTF8( byte [] buf)
        {
            
int  i;
            
byte  cOctets;  //  octets to go in this UTF-8 encoded character  
             bool  bAllAscii  =   true ;
            
long  iLen  =  buf.Length;
            cOctets 
=   0 ;
            
for  (i  =   0 ; i  <  iLen; i ++ )
            {
                
if  ((buf[i]  &   0x80 !=   0 ) bAllAscii  =   false ;

                
if  (cOctets  ==   0 )
                {
                    
if  (buf[i]  >=   0x80 )
                    {
                        
do
                        {
                            buf[i] 
<<=   1 ;
                            cOctets
++ ;
                        }
                        
while  ((buf[i]  &   0x80 !=   0 );

                        cOctets
-- ;
                        
if  (cOctets  ==   0 )
                            
return   false ;
                    }
                }
                
else
                {
                    
if  ((buf[i]  &   0xC0 !=   0x80 )
                        
return   false ;
                    cOctets
-- ;
                }
            }
            
if  (cOctets  >   0 )
                
return   false ;
            
if  (bAllAscii)
                
return   false ;
            
return   true ;
        }
    }
}

 

 

你可能感兴趣的:(编码格式)