UTF-8编码与Unicode CS2的转换

/* Convert a UTF-8 string into a UCS-2 array. */

void tcstrutftoucs(const char *str, uint16_t *ary, int *np){

  assert(str && ary && np);

  const unsigned char *rp = (unsigned char *)str;

  unsigned int wi = 0;

  while(*rp != '\0'){

    int c = *(unsigned char *)rp;

    if(c < 0x80){

      ary[wi++] = c;

    } else if(c < 0xe0){

      if(rp[1] >= 0x80){

        ary[wi++] = ((rp[0] & 0x1f) << 6) | (rp[1] & 0x3f);

        rp++;

      }

    } else if(c < 0xf0){

      if(rp[1] >= 0x80 && rp[2] >= 0x80){

        ary[wi++] = ((rp[0] & 0xf) << 12) | ((rp[1] & 0x3f) << 6) | (rp[2] & 0x3f);

        rp += 2;

      }

    }

    rp++;

  }

  *np = wi;

}





/* Convert a UCS-2 array into a UTF-8 string. */

int tcstrucstoutf(const uint16_t *ary, int num, char *str){

  assert(ary && num >= 0 && str);

  unsigned char *wp = (unsigned char *)str;

  for(int i = 0; i < num; i++){

    unsigned int c = ary[i];

    if(c < 0x80){

      *(wp++) = c;

    } else if(c < 0x800){

      *(wp++) = 0xc0 | (c >> 6);

      *(wp++) = 0x80 | (c & 0x3f);

    } else {

      *(wp++) = 0xe0 | (c >> 12);

      *(wp++) = 0x80 | ((c & 0xfff) >> 6);

      *(wp++) = 0x80 | (c & 0x3f);

    }

  }

  *wp = '\0';

  return (char *)wp - str;

}

 

你可能感兴趣的:(unicode)