C++-Unicode字符utf8编码与码点code point值互转编码示例

#include 
#include 
#include 
#include 
#include 


/*
*注意这里必须用std::codecvt_utf8和std::wstring_convert, char32_t>,
*含义是将utf8编码的字节字符串 转化为 定长4字节ucs4编码的宽字符串std::u32string,
*只有定长4字节,才能确保std::u32string的任何一个元素char32_t 能完整表达一个unicode
*字符,其值强转为int类型即为对应的码点值,即code point。
*/
typedef std::wstring_convert, char32_t>
        WstringConvertType;


std::u32string
StringToWideString(const std::string& s,
                   WstringConvertType* convert = nullptr) {
    if (nullptr == convert) {
        WstringConvertType conv;
        try {
            return conv.from_bytes(s);
        } catch (std::range_error& ex) {
            assert(false);
            return std::u32string();
        }
    } else {
        try {
            return convert->from_bytes(s);
        } catch (std::range_error& ex) {
            assert(false);
            return std::u32string();
        }
    }
}

std::string
WideStringToString(const std::u32string& wide_string,
                   WstringConvertType* convert = nullptr) {
    if (nullptr == convert) {
        WstringConvertType conv;
        try {
            return conv.to_bytes(wide_string);
        } catch (std::range_error& ex) {
            assert(false);
            return std::string();
        }
    } else {
        try {
            return convert->to_bytes(wide_string);
        } catch (std::range_error& ex) {
            assert(false);
            return std::string();
        }
    }
}


std::vector
StringToCodePoints(const std::string& s,
                   WstringConvertType* convert = nullptr) {
    std::u32string ws = StringToWideString(s, convert);
    std::vector res;
    res.reserve(ws.size());
    for (char32_t & c : ws) {
        res.push_back((int)c);
    }
    return res;
}

std::string
CodePointsToString(const std::vector& code_points,
                   WstringConvertType* convert = nullptr) {
    std::u32string ws;
    ws.reserve(code_points.size());
    for (const int& cp : code_points) {
        ws.push_back((char32_t)cp);
    }
    return WideStringToString(ws, convert);
}



int main(int argc, char** argv) {
    std::string s = "一伦红日冉冉升起,O(∩_∩)O哈哈~";
    std::u32string ws = StringToWideString(s);

    std::string s2 = WideStringToString(ws);
    assert(s == s2);

    std::vector code_points1 = StringToCodePoints(s);
    std::vector code_points2 = StringToCodePoints(s2);
    assert(code_points1 == code_points2);

    std::string s3 = CodePointsToString(code_points1);
    assert(s == s3);

    return 0;
}

你可能感兴趣的:(C++-Unicode字符utf8编码与码点code point值互转编码示例)