Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)

为了一个gb2312下post中文参数到utf-8页面的程序,随闷的难受,查了一下午资料,大部分是讲Gb2312传到Gb2312页面的UrlEncode,没有提供到Utf-8页面的UrlEncode,后来找到Unicode转Utf-8码的资料,参考之下,终于写出了Utf-8的UrlEncode,这里整理下各种UrlEncode方法,供有需要的朋友参考。

详细Utf-8编码规则请百度一下。

Unicode 与 Utf-8码间的内码规则模板为:
原始码(16进制) UTF-8编码(二进制)
--------------------------------------------
0000 - 007F       0xxxxxxx
0080 - 07FF       110xxxxx 10xxxxxx
0800 - FFFF       1110xxxx 10xxxxxx 10xxxxxx   (中文字在此区间)
……
--------------------------------------------

例如:
百度中查询“中国人”,会将中文URL参数转为Gb2312码的16进制表示,一个中文字用2个字节
http://www.baidu.com/s?wd=%D6%D0%B9%FA%C8%CB
Google中查询“中国人”,会将中文URL参数转为Utf-8编码的16进制表示,一个中文字用3个字节
http://www.google.cn/search?client=opera&rls=en&q=%E4%B8%AD%E5%9B%BD%E4%BA%BA&sourceid=opera&ie=utf-8&oe=utf-8

 






' Url编码,Gb2312页面之间传递参数
Function  URLEncode_Gb(ByVal str)
    
Dim  i,s
    
Dim  B,bCode,gb,Hight8b,Low8b
    s 
=   ""
    
For  i  =   1   To   Len (str)
        B 
=   Mid (str,i, 1 )
        bCode
= Abs ( Asc (B))
        
If  (bCode >= 48   And  bCode <= 57 Or  (bCode >= 65   And  bCode <= 90 Or  (bCode >= 97   And  bCode <= 122 Or  bCode = 42   Or  bCode = 45   Or  bCode = 46   Or  bCode = 64   Or  bCode = 95   Then  
            
' 48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
             ' 42代表*;46代表.;64代表@;45代表-;95代表_ 
            s = &  B
        
ElseIf  bCode = 32   Then   ' 空格转成+
            s = &   " + "
        
ElseIf  bCode < 128   Then      ' 低于128的Ascii转成1个字节
            s = &   " % "   &   Right ( " 00 "   &   Hex (bCode), 2 )
        
Else  
            gb 
=   Asc (B)
            
If  gb  <   0   Then
                gb 
=  gb  +   & H10000     ' gb编码为负数,要加上65536
             End   If
            Hight8b 
=  (gb   And   & HFF00)  /   & H100     ' 二进制高8位
            Low8b  =  gb  And   & HFF     ' 二进制低8位
            s  =  s  &   " % "   &   Hex (Hight8b)  &    " % "   &   Hex (Low8b)
        
End   If   
    
Next
    URLEncode_Gb 
=  s
End Function

' Url解码,Gb2312页面之间传递参数
Function  URLDecode_Gb(ByVal str)
    
Dim  i,s
    
Dim  B,bCode,gb,Hight8b,Low8b
    s 
=   ""
    
For  i  =   1   To   Len (str)
        B 
=   Mid (str,i, 1 )
        
Select   Case  B
            
Case   " + "
                s
= &   "   "
            
Case   " % "
                gb
= Mid (str,i + 1 , 2 )
                bCode
= CInt ( " &H "   &  gb)
                
If  bCode < 128   Then
                    i
= i + 2
                
Else
                    bCode
= CInt ( " &H "   &  gb  &   Mid (str,i + 4 , 2 ))
                    i
= i + 5
                
End   If  
                s
= &   Chr (bCode)
            
Case   Else
                s
= &  B
        
End   Select  
    
Next
    URLDecode_Gb 
=  s
End Function

' URL编码,Gb2312页面提交到Utf-8页面
Function  UrlEncode_GBToUtf8(ByVal str)
    
Dim  B                     ' 单个字符
     Dim  ub                   ' 中文字的Unicode码(2字节)
     Dim  High8b, Low8b        ' Unicode码的高低位字节
     Dim  UtfB1, UtfB2, UtfB3  ' Utf-8码的三个字节
     Dim  i, s
    
For  i  =   1   To   Len (str)
        B
= Mid (str, i,  1 )
        ub 
=  AscW(B)
        
If  (ub >= 48   And  ub <= 57 Or  (ub >= 65   And  ub <= 90 Or  (ub >= 97   And  ub <= 122 Or  ub = 42   Or  ub = 45   Or  ub = 46   Or  ub = 64   Or  ub = 95   Then  
            
' 48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
             ' 42代表*;46代表.;64代表@;45代表-;95代表_ 
            s = &  B
        
ElseIf  ub = 32   Then   ' 空格转成+
            s = &   " + "
        
ElseIf  ub < 128   Then      ' 低于128的Ascii转成1个字节
            s = &   " % "   &   Right ( " 00 "   &   Hex (ub), 2 )
        
Else
            High8b 
=  (ub  And   & HFF00)  /   & H100  ' Unicode码高位
            Low8b  =  ub  And   & HFF  ' Unicode码低位
            UtfB1  =  (High8b  And   & HF0)  /   & H10  Or   & HE0  ' 取Unicode高位字节的二进制的前4位 + 11100000
            UtfB2  =  ((High8b  And   & HF)  *   & H4  +  (Low8b  And   & HC0)  /   & H40)  Or   & H80  ' 取Unicode高位字节的后4位及低位字节的前2位 +10000000
            UtfB3  =  (Low8b  And   & H3F)  Or   & H80  ' 取Unicode低位字节的二进制后6位 + 10000000
            s  =  s  &   " % "   &   Hex (UtfB1)  &   " % "   &   Hex (UtfB2)  &   " % "   &   Hex (UtfB3)
        
End   If
    
Next
    UrlEncode_GBToUtf8 
=  s
End Function



' “汉”-AscW("汉")=27721(十进制)    01101100 01001001(二进制)     6C49(十六进制)
'
将Gb2312码转成Utf-8码(十六进制表示)的方法为,先用AscW将Gb2312转为Unicode码(2字节),再'将Unicode码的二进制中的位按utf-8(3字节)模板规则填充 x 位:

' URL解码,Gb2312页面提交到Utf-8页面
Function  UrlDecode_GBToUtf8(ByVal str)
    
Dim  B,ub     ' 中文字的Unicode码(2字节)
     Dim  UtfB     ' Utf-8单个字节
     Dim  UtfB1, UtfB2, UtfB3  ' Utf-8码的三个字节
     Dim  i, n, s
    n
= 0
    ub
= 0
    
For  i  =   1   To   Len (str)
        B
= Mid (str, i,  1 )
        
Select   Case  B
            
Case   " + "
                s
= &   "   "
            
Case   " % "
                ub
= Mid (str, i  +   1 2 )
                UtfB 
=   CInt ( " &H "   &  ub)
                
If  UtfB < 128   Then  
                    i
= i + 2
                    s
= &  ChrW(UtfB)
                
Else  
                    UtfB1
= (UtfB  And   & H0F)  *   & H1000     ' 取第1个Utf-8字节的二进制后4位
                    UtfB2 = ( CInt ( " &H "   &   Mid (str, i  +   4 2 ))  And   & H3F)  *   & H40         ' 取第2个Utf-8字节的二进制后6位
                    UtfB3 = CInt ( " &H "   &   Mid (str, i  +   7 2 ))  And   & H3F         ' 取第3个Utf-8字节的二进制后6位
                    s = &  ChrW(UtfB1  Or  UtfB2  Or  UtfB3)
                    i
= i + 8
                
End   If  
            
Case   Else      ' Ascii码
                s = &  B
        
End   Select  
    
Next
    UrlDecode_GBToUtf8 
=  s
End Function

' URL编码,Gb2312页面提交到Utf-8页面,另一种位计算方法
Private   Function  UrlEncode_GBToUtf8_V2(szInput) 
    
Dim  wch, uch, szRet 
    
Dim  x 
    
Dim  nAsc, nAsc2, nAsc3 
    
If  szInput  =   ""   Then  
        UrlEncode_GBToUtf8_V2
=  szInput 
        
Exit   Function  
    
End   If  
    
For  x  =   1   To   Len (szInput) 
        wch 
=   Mid (szInput, x,  1
        nAsc 
=  AscW(wch) 
        
If  nAsc  <   0   Then  nAsc  =  nAsc  +   65536  
        
If  (nAsc  And   & HFF80)  =   0   Then  
            szRet 
=  szRet  &  wch 
        
Else  
            
If  (nAsc  And   & HF000)  =   0   Then  
                uch 
=   " % "   &   Hex (((nAsc    2   ^   6 ))  Or   & HC0)  &   Hex (nAsc  And   & H3F  Or   & H80) 
                szRet 
=  szRet  &  uch 
            
Else  
                uch 
=   " % "   &   Hex ((nAsc    2   ^   12 Or   & HE0)  &   " % "   &  _ 
                
Hex ((nAsc    2   ^   6 And   & H3F  Or   & H80)  &   " % "   &  _ 
                
Hex (nAsc  And   & H3F  Or   & H80) 
                szRet 
=  szRet  &  uch 
            
End   If  
        
End   If  
    
Next  
    UrlEncode_GBToUtf8_V2
=  szRet 
End Function



' VB下用API方法的Unicode转Utf-8方法:
Private  Declare  Function  WideCharToMultiByte Lib  " kernel32 "  (ByVal CodePage  As   Long , ByVal dwFlags  As   Long , ByVal lpWideCharStr  As   Long , ByVal cchWideChar  As   Long , ByRef lpMultiByteStr  As  Any, ByVal cchMultiByte  As   Long , ByVal lpDefaultChar  As   String , ByVal lpUsedDefaultChar  As   Long As   Long
Private  Declare  Function  MultiByteToWideChar Lib  " kernel32 "  (ByVal CodePage  As   Long , ByVal dwFlags  As   Long , ByVal lpMultiByteStr  As   Long , ByVal cchMultiByte  As   Long , ByVal lpWideCharStr  As   Long , ByVal cchWideChar  As   Long As   Long
Private   Const  CP_UTF8  =   65001


Function  Utf8ToUnicode(ByRef Utf()  As   Byte As   String
    
Dim  lRet  As   Long
    
Dim  lLength  As   Long
    
Dim  lBufferSize  As   Long
    lLength 
=   UBound (Utf)  -   LBound (Utf)  +   1
    
If  lLength  <=   0   Then   Exit   Function
    lBufferSize 
=  lLength  *   2
    Utf8ToUnicode 
=   String $(lBufferSize,  Chr ( 0 ))
    lRet 
=  MultiByteToWideChar(CP_UTF8,  0 , VarPtr(Utf( 0 )), lLength, StrPtr(Utf8ToUnicode), lBufferSize)
    
If  lRet  <>   0   Then
        Utf8ToUnicode 
=   Left (Utf8ToUnicode, lRet)
    
End   If
End Function

Function  UnicodeToUtf8(ByVal UCS  As   String As   Byte ()
    
Dim  lLength  As   Long
    
Dim  lBufferSize  As   Long
    
Dim  lResult  As   Long
    
Dim  abUTF8()  As   Byte
    lLength 
=   Len (UCS)
    
If  lLength  =   0   Then   Exit   Function
    lBufferSize 
=  lLength  *   3   +   1
    
ReDim  abUTF8(lBufferSize  -   1 )
    lResult 
=  WideCharToMultiByte(CP_UTF8,  0 , StrPtr(UCS), lLength, abUTF8( 0 ), lBufferSize, vbNullString,  0 )
    
If  lResult  <>   0   Then
    lResult 
=  lResult  -   1
    
ReDim  Preserve abUTF8(lResult)
    UnicodeToUtf8 
=  abUTF8
    
End   If
End Function


你可能感兴趣的:(VB)