判断字符串是否为utf-8编码(c语言)

先上代码,实测有效:

bool utf8_check(const char* str,size_t length)
{
    size_t i = 0;
    int nBytes = 0;////UTF8可用1-6个字节编码,ASCII用一个字节
    unsigned char ch = 0;
    while(i < length)
    {
        ch = *(str + i);
        if(nBytes == 0)
        {
            if((ch & 0x80) != 0)
            {
                while((ch & 0x80) != 0)
                {
                    ch <<= 1;
                    nBytes ++;
                }
                if((nBytes < 2) || (nBytes > 6))
                {
                    return false;
                }
                nBytes --;
            }
        }
        else
        {
            if((ch & 0xc0) != 0x80)
            {
                return false;
            }
            nBytes --;
        }
        i ++;
    }
    return (nBytes == 0);
}

详述字符编码:

https://blog.csdn.net/caixiaobai_1/article/details/103876783

你可能感兴趣的:(Linux,C语言)