C++ URL编码实操

直接上代码

#include 
#include 

using namespace std;

unsigned char CharToHex(unsigned char x) {
    return (unsigned char) (x > 9 ? x + 55 : x + 48);
}

bool IsAlphaNumber(unsigned char c) {
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'))
        return true;
    return false;
}

// URL encode
void UrlEncode(const string &src, string &str_encode) {
    unsigned char *p = (unsigned char *) src.c_str();
    unsigned char ch;
    while (*p) {
        ch = (unsigned char) *p;
        if (*p == ' ') {
            str_encode += '+';
        } else if (IsAlphaNumber(ch) || strchr("-_.~!*'();:@&=+$,/?#[]", ch)) {
            str_encode += *p;
        } else {
            str_encode += '%';
            str_encode += CharToHex((unsigned char) (ch >> 4));
            str_encode += CharToHex((unsigned char) (ch % 16));
        }
        ++p;
    }
}

// URL decode
void UrlDecode(const string &src, string &str_decode) {
    int i;
    char *cd = (char *) src.c_str();
    char p[2];
    for (i = 0; i < strlen(cd); i++) {
        memset(p, '\0', 2);
        if (cd[i] != '%') {
            str_decode += cd[i];
            continue;
        }
        p[0] = cd[++i];
        p[1] = cd[++i];
        p[0] = p[0] - 48 - ((p[0] >= 'A') ? 7 : 0) - ((p[0] >= 'a') ? 32 : 0);
        p[1] = p[1] - 48 - ((p[1] >= 'A') ? 7 : 0) - ((p[1] >= 'a') ? 32 : 0);
        str_decode += (unsigned char) (p[0] * 16 + p[1]);
    }
}

int main() {
    string src = "你好";
    string str_encode, str_decode;
    UrlEncode(src, str_encode);
    UrlDecode(str_encode, str_decode);
    cout << str_encode << endl;
    cout << str_decode << endl;
    return 0;
}

编译后运行输出如下

src:你好
src_encode:%C4%E3%BA%C3
str_decode:你好

C4E3的GBK编码,BAC3的GBK编码.因为源文件编码为GBK, 平台为win10,对应的终端编码也是GBK,所以能正常显示。
但若将源文件编码改为UTF8后再编译运行,输出如下

src:浣犲ソ
src_encode:%E4%BD%A0%E5%A5%BD
str_decode:浣犲ソ

E4BDA0的UTF8编码,E5A5BD的UTF8编码,但由于平台终端编码为GBK,所以显示乱码.
接下来将UTF8转为GBK

string UTF8ToGBK(const string &str_utf8) {
    int len = MultiByteToWideChar(CP_UTF8, 0, str_utf8.c_str(), -1, NULL, 0);
    wchar_t *wsz_gbk = new wchar_t[len + 1];
    memset(wsz_gbk, 0, (len + 1) * sizeof(WCHAR));
    MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str_utf8.c_str(), -1, wsz_gbk, len);
    len = WideCharToMultiByte(CP_ACP, 0, wsz_gbk, -1, NULL, 0, NULL, NULL);
    char *sz_gbk = new char[len + 1];
    memset(sz_gbk, 0, len + 1);
    WideCharToMultiByte(CP_ACP, 0, wsz_gbk, -1, sz_gbk, len, NULL, NULL);
    string str_gbk(sz_gbk);
    delete[] sz_gbk;
    delete[] wsz_gbk;
    return str_gbk;
}

输出如下

src:浣犲ソ
src_encode:%E4%BD%A0%E5%A5%BD
str_decode:浣犲ソ
str_gbk: 你好

相关文章

URL编码
字符编码1
字符编码2

参考

HTTP URL 字符转义 字符编码 、 RFC 3986编码规范

你可能感兴趣的:(C++ URL编码实操)