char *str = "这是一个7 8吗";
用站长工具进行URL编码得到二种形式的编码:
UTF-8(汉字由3个字节表示):%e8%bf%99%e6%98%af%e4%b8%80%e4%b8%aa7+8%e5%90%97
GBK (汉字由2个字节表示):%d5%e2%ca%c7%d2%bb%b8%f67+8%c2%f0
----------------------------------------------------------------------------------
然而程序中的编码格式,由编写程序文件本身的文件编码格式决定;
[[email protected]]/home/term/mengfh/other>>./url_encode 源数据src(GBK) =[这是一个7 8吗] urlencode(str, strlen(str), &des_len)= %D5%E2%CA%C7%D2%BB%B8%F67+8%C2%F0 urldecode(desBuf, des_len)=这是一个7 8吗 conv_charset("UTF-8","GBK", str, strlen(str), tmpBuf, sizeof(tmpBuf)) = 杩..涓涓. 8? urlencode(tmpBuf, strlen(tmpBuf)= %E8%BF%99%E6%98%AF%E4%B8%80%E4%B8%AA7+8%E5%90%97 urldecode(desBuf, des_len)=杩..涓涓. 8? conv_charset("GBK", "UTF-8", desBuf, strlen(desBuf), tmpBuf, sizeof(tmpBuf))=这是一个7 8吗
代码如下:
/* * url_encode.c * * Created on: 2015年12月1日 * Author: mengfh * * 程序默认编码格式:由程序本身决定,如文件为GBK格式时程序默认产生的也是GBK格式 * 需要转换成其他格式时,可以自行转换 * */ #include "stdio.h" #include "string.h" #include "stdlib.h" #include "iconv.h" /** * @param s 需要编码的url字符串 * @param len 需要编码的url的长度 * @param new_length 编码后的url的长度 * @return char * 返回编码后的url * @note 存储编码后的url存储在一个新审请的内存中, * 用完后,调用者应该释放它 */ char * urlencode(char const *s, int len, int *new_length) { unsigned char const *from, *end; unsigned char *start, *to; unsigned char c; from = s; end = s + len; start = to = (unsigned char *) malloc(3 * len + 1); unsigned char hexchars[] = "0123456789ABCDEF"; while (from < end) { c = *from++; if (c == ' ') { *to++ = '+'; } else if ((c < '0' && c != '-' && c != '.') || (c < 'A' && c > '9') || (c > 'Z' && c < 'a' && c != '_') || (c > 'z')) { to[0] = '%'; to[1] = hexchars[c >> 4]; to[2] = hexchars[c & 15]; to += 3; } else { *to++ = c; } } *to = 0; if (new_length) { *new_length = to - start; } return (char *) start; } /** * @param str 需要解码的url字符串 * @param len 需要解码的url的长度 * @return int 返回解码后的url长度 */ int urldecode(char *str, int len) { char *dest = str; char *data = str; int value; int c; while (len--) { if (*data == '+') { *dest = ' '; } else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2))) { c = ((unsigned char *) (data + 1))[0]; if (isupper(c)) c = tolower(c); value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16; c = ((unsigned char *) (data + 1))[1]; if (isupper(c)) c = tolower(c); value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10; *dest = (char) value; data += 2; len -= 2; } else { *dest = *data; } data++; dest++; } *dest = '\0'; return dest - str; } /** * 编码转换 * dest:目标编码格式 * src:源编码格式 * input:源数据 * ilen: * output:目标数据 * olen */ int conv_charset(const char *dest, const char *src, char *input, size_t ilen, char *output, size_t olen) { iconv_t conv = iconv_open(dest, src); if (conv == (iconv_t) - 1) return -1; memset(output, 0, olen); if (iconv(conv, &input, &ilen, &output, &olen)) return -1; iconv_close(conv); return 0; } int main() { char *str = "这是一个7 8吗"; char desBuf[1024], tmpBuf[1024]; char *s = NULL; int src_len, des_len; //===============GBK======================================== fprintf(stderr, " 源数据src(GBK) =[%s]\n", str); memset(desBuf, 0, sizeof(desBuf)); s = urlencode(str, strlen(str), &des_len); if (s == NULL) { fprintf(stderr, "urlencode error \n"); return -1; } memcpy(desBuf, s, des_len); fprintf(stderr, "urlencode(str, strlen(str), &des_len)= %s\n", desBuf); src_len = urldecode(desBuf, des_len); fprintf(stderr, "urldecode(desBuf, des_len)=%s\n", desBuf); //===================UTF====================================== memset(tmpBuf, 0, sizeof(tmpBuf)); conv_charset("UTF-8", "GBK", str, strlen(str), tmpBuf, sizeof(tmpBuf)); fprintf(stderr, "conv_charset(\"UTF-8\",\"GBK\", str, strlen(str), tmpBuf, sizeof(tmpBuf)) = %s\n", tmpBuf); memset(desBuf, 0, sizeof(desBuf)); s = urlencode(tmpBuf, strlen(tmpBuf), &des_len); if (s == NULL) { fprintf(stderr, "urlencode error \n"); return -1; } memcpy(desBuf, s, des_len); fprintf(stderr, "urlencode(tmpBuf, strlen(tmpBuf)= %s\n", desBuf); src_len = urldecode(desBuf, des_len); fprintf(stderr, "urldecode(desBuf, des_len)=%s\n", desBuf); memset(tmpBuf, 0, sizeof(tmpBuf)); conv_charset("GBK", "UTF-8", desBuf, strlen(desBuf), tmpBuf, sizeof(tmpBuf)); fprintf(stderr, "conv_charset(\"GBK\", \"UTF-8\", desBuf, strlen(desBuf), tmpBuf, sizeof(tmpBuf))=%s\n", tmpBuf); return 0; }