libjson支持中文显示,但要是你使用\u格式的utf8字符串,那解析就会出问题,百度谷歌了很久都没找到解决方法。后来找到了一篇文章,里面有提解决方法,但我没成功。既然知道了问题所在,就自己写吧。
libjson可以处理UNICODE,但使用了wchar_t,而开发当中大多使用char,这就造成了很多的不兼容。如果不开启UNICODE,"\uXXXX"之类的字符,libjson会舍弃前2位.
问题出在JSONWorker.cpp的SpecialChar函数里面
1
2
3
4
5
6
7
8
9
|
case
JSON_TEXT(
'u'
):
//utf character
{
#ifdef JSON_UNICODE
UTF(pos, res, end);
#else
UTF8(pos,res, end);
#endif
}
break
;
|
UTF8函数
1
2
3
4
5
6
7
8
9
10
11
12
|
json_uchar JSONWorker::UTF8(
const
json_char * & pos,
const
json_char *
const
end) json_nothrow {
JSON_ASSERT_SAFE(((
long
)end - (
long
)pos) > 4, JSON_TEXT(
"UTF will go out of bounds"
),
return
JSON_TEXT(
'\0'
););
#ifdef JSON_UNICODE
++pos;
json_uchar temp = Hex(pos) << 8;
++pos;
return
temp | Hex(pos);
#
else
pos+=3;
return
Hex(pos);
#endif
}
|
这里可以看到,如果不开启JSON_UNICODE,会将"\uXXXX"中的前2位忽略掉了,ASCII当然是正确的,但非ASCII就会出问题了。
于是我增加了几个函数Hex2UTF8,String2Hex,UTF83B,DChar2Short
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
std::string Hex2UTF8(
const
json_char* & pos,
const
json_char *
const
end)
{
JSON_ASSERT(*(pos + 1) == JSON_TEXT(
'0'
), JSON_TEXT(
"wide utf character (hihi)"
));
JSON_ASSERT(*(pos + 2) == JSON_TEXT(
'0'
), JSON_TEXT(
"wide utf character (hilo)"
));
//截取字符串
++pos;
int
len=4;
char
* str=(
char
*)
malloc
(len+1);
bzero(str, len+1);
memcpy
(str, pos, len);
str[len]=0;
pos+=3;
//保留原始指针
char
* pstr=str;
std::string rstr;
if
(*pstr==JSON_TEXT(
'u'
)) {
//u
pstr++;
}
else
if
(*pstr==JSON_TEXT(
'\\'
)){
//\u
pstr+=2;
}
char
hexStr[5]={0};
memcpy
(hexStr, pstr, 4);
unsigned
short
hex=String2Hex(hexStr);
if
(hex<0x0800 ||hex>0xFFFF) {
JSON_ASSERT(1, JSON_TEXT(
"now only support hex >0x0800 's utf8 char"
));
return
""
;
}
rstr+=UTF83B(hex);
free
(str);
str=NULL;
return
rstr;
}
|
1
2
3
4
5
6
7
8
9
10
11
12
|
std::string UTF83B(unsigned
short
hex)
{
unsigned
char
hc=((hex&0xF000)>>12);
//取前4位
unsigned
char
mc=((hex & 0x0FC0)>>6);
//取中间6位
unsigned
char
lc=(hex & 0x003F);
//取后6位
char
str[4]={0};
str[0]=(0xE0 | hc);
str[1]=(0x80 | mc);
str[2]=(0x80 | lc);
std::string rstr=str;
return
rstr;
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
unsigned
short
String2Hex(
const
json_char* str)
{
unsigned
short
hex=0x0000;
unsigned
short
hi=0;
unsigned
short
lo=0;
unsigned
short
len=
strlen
(str);
if
(len==2) {
lo=DChar2Short(str);
}
else
if
(len==4)
{
hi=DChar2Short(str);
lo=DChar2Short(str+2);
}
hex=(((hi<<8) &0xFF00) |(lo & 0x00FF));
return
hex;
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
unsigned
short
Char2Short(json_char c)
{
unsigned
short
ret = 0;
switch
(c) {
case
'A'
:
case
'a'
:
ret = 10;
break
;
case
'B'
:
case
'b'
:
ret = 11;
break
;
case
'C'
:
case
'c'
:
ret = 12;
break
;
case
'D'
:
case
'd'
:
ret = 13;
break
;
case
'E'
:
case
'e'
:
ret = 14;
break
;
case
'F'
:
case
'f'
:
ret = 15;
break
;
default
:
ret = c -
'0'
;
break
;
}
return
ret;
}
|
1
2
3
4
5
6
7
8
9
10
11
|
unsigned
short
DChar2Short(
const
json_char* str)
{
unsigned
short
ret=0;
unsigned
short
hi=0;
unsigned
short
lo=0;
hi=Char2Short(str[0]);
lo=Char2Short(str[1]);
ret=hi*16+lo;
return
ret;
}
|
然后,在SpecialChar函数里面修改
1
2
3
4
5
6
7
|
case
JSON_TEXT(
'u'
):
//utf character
#ifdef JSON_UNICODE
UTF(pos, res, end);
#
else
res += Hex2UTF8(pos, end);
#endif
break
;
|
还有就是不能开启JSON_ESCAPE_WRITES,不然会失效。
注:
我只是测试了从\uxxxx出来,但没测试从原字符串转换到\u。