1
#include
"
VAUTF8.h
"
2
#include
<
windows.h
>
3
4
bool
UTF8_Unicode_Possible
=
true
;
5
char
cUTF8Hdr[]
=
{(
char
)
0xEF
,(
char
)
0xBB
,(
char
)
0xBF
,
0
};
6
7
int
UTF8CharLen(
char
in
)
8
{
9
unsigned
char
uin
=
(unsigned
char
)
in
;
10
11
if
(uin
<
128
)
12
return
1
;
13
14
if
(uin
<
192
)
15
return
-
1
;
16
17
if
(uin
<
0xE0
)
18
return
2
;
19
20
if
(uin
<
0xF0
)
21
return
3
;
22
23
if
(uin
<
0xF8
)
24
return
4
;
25
26
if
(uin
<
0xFC
)
27
return
5
;
28
29
if
(uin
<
0xFE
)
30
return
6
;
31
32
if
(uin
<
0xFF
)
33
return
7
;
34
35
return
8
;
36
}
37
int
IsUTF8(
const
char
*
src, size_t max_source_len)
38
{
39
if
(max_source_len
<
0
)
40
return
0
;
41
42
if
(max_source_len
==
0
)
43
return
1
;
44
45
while
(
*
src
&&
max_source_len
--
)
46
{
47
int
bytes
=
UTF8CharLen(
*
src
++
);
48
if
(bytes
<
0
)
49
return
0
;
50
if
(static_cast
<
int
>
(max_source_len)
<
--
bytes)
51
return
0
;
52
while
(bytes
--
) {
53
if
((
*
src
++
&
0xC0
)
!=
0x80
)
54
return
0
;
55
}
56
}
57
58
return
1
;
59
}
60
61
//
===================================================================================================
62
//
宽字节字符串转化为UTF8
63
int
_stdcall WStr2UTF8(
const
char
*
source,
char
**
dest)
64
{
65
int
len
=
1
;
66
67
if
(source)
68
len
=
WStr2UTF8(source, NULL,
0
);
69
70
*
dest
=
(
char
*
)malloc(len);
71
72
if
(
!
source) {
73
*
dest
=
0
;
74
return
1
;
75
}
76
77
return
WideCharToMultiByte(CP_UTF8,
0
, (LPCWSTR)source,
-
1
,
78
*
dest, len, NULL, NULL);
79
}
80
81
int
_stdcall WStr2UTF8(
const
wchar_t
*
source,
char
**
dest)
82
{
83
return
WStr2UTF8((
char
*
)source, dest);
84
}
85
86
int
_stdcall WStr2UTF8(
const
char
*
source,
char
*
dest,
int
max_len)
87
{
88
if
(dest) {
89
if
(source
!=
dest) {
90
return
WideCharToMultiByte(CP_UTF8,
0
,
91
(LPCWSTR)source,
-
1
, dest, max_len, NULL, NULL);
92
}
else
{
93
int
dest_size
=
WStr2UTF8(source, NULL,
0
);
94
95
char
*
cTemp
=
NULL;
96
WStr2UTF8(source,
&
cTemp);
97
strcpy_s(dest, max_len, cTemp);
98
free(cTemp);
99
100
return
dest_size;
101
}
102
}
else
{
103
return
WideCharToMultiByte(CP_UTF8,
0
,(LPCWSTR)source,
-
1
,NULL,
0
,NULL,NULL);
104
}
105
106
return
0
;
107
}
108
//
===================================================================================================
109
110
//
===================================================================================================
111
//
短字节字符串转化为宽字节字符串
112
int
_stdcall Str2WStr(
const
char
*
source,
char
*
dest,
int
max_len)
113
{
114
if
(
!
source)
115
{
116
memset(dest,
0
,
2
);
117
return
2
;
118
}
119
size_t source_len
=
1
+
strlen(source);
120
121
if
(source
!=
dest)
122
{
123
if
(
!
dest)
124
return
2
*
MultiByteToWideChar(CP_THREAD_ACP,
0
, source,
-
1
, NULL,
0
);
125
126
127
return
2
*
MultiByteToWideChar(CP_THREAD_ACP,
0
,source,
-
1
,(LPWSTR)dest,max_len
/
2
);
128
}
129
else
130
{
131
char
*
cTemp
=
new
char
[
2
*
source_len];
132
int
i
=
2
*
MultiByteToWideChar(CP_THREAD_ACP,
0
,source,
-
1
,(LPWSTR)cTemp,max_len
/
2
);
133
memcpy(dest, cTemp, i);
134
delete[] cTemp;
135
return
i;
136
}
137
}
138
139
int
_stdcall Str2WStr(
const
char
*
source,
char
**
dest)
140
{
141
if
(
!
source)
142
{
143
*
dest
=
new
char
[
2
];
144
memset(
*
dest,
0
,
2
);
145
return
2
;
146
}
147
int
dest_len
=
Str2WStr(source, NULL,
0
);
148
*
dest
=
(
char
*
)calloc(
1
, dest_len);
149
return
2
*
MultiByteToWideChar(CP_THREAD_ACP,
0
,source,
-
1
,(LPWSTR)
*
dest,dest_len
/
2
);
150
}
151
//
===================================================================================================
152
153
154
//
===================================================================================================
155
//
宽字节字符串转化为短字节字符串
156
int
_stdcall WStr2Str(
const
char
*
source,
char
*
dest,
int
max_len)
157
{
158
int
len
=
WideCharToMultiByte(CP_THREAD_ACP,
0
, (LPCWSTR)source,
-
1
,
159
(LPSTR)dest, max_len, NULL, NULL);
160
return
len;
161
}
162
163
int
_stdcall WStr2Str(
const
char
*
source,
char
**
dest)
164
{
165
int
len
=
1
;
166
if
(source)
167
len
=
WideCharToMultiByte(CP_THREAD_ACP,
0
,(LPCWSTR)source,
-
1
,NULL,
0
,
0
,
0
);
168
*
dest
=
(
char
*
)malloc(len);
169
return
WideCharToMultiByte(CP_THREAD_ACP,
0
, (LPCWSTR)source,
170
-
1
,
*
dest, len,
0
,
0
);
171
}
172
//
===================================================================================================
173
174
175
//
===================================================================================================
176
//
短字节字符串转化到UTF8字符串
177
int
_stdcall Str2UTF8(
const
char
*
source,
char
*
dest,
int
max_len)
178
{
179
if
(
!
source)
180
{
181
*
dest
=
0
;
182
return
1
;
183
}
184
185
if
(max_len
<
0
)
186
return
0
;
187
188
int
temp_size;
189
size_t source_len
=
strlen(source)
+
1
;
190
if
(UTF8_Unicode_Possible)
191
{
192
temp_size
=
Str2WStr(source, (
char
*
)NULL);
193
}
else
{
194
temp_size
=
1
+
(
int
)strlen(source);
195
}
196
int
i;
197
198
unsigned
short
*
temp
=
new
unsigned
short
[temp_size];
199
200
if
(UTF8_Unicode_Possible) {
201
ZeroMemory(temp,
sizeof
(unsigned
short
)
*
temp_size);
202
203
if
(dest) {
204
MultiByteToWideChar(CP_THREAD_ACP,
0
,source,
-
1
,(LPWSTR)temp,temp_size);
205
i
=
WideCharToMultiByte(CP_UTF8,
0
,(LPCWSTR)temp,
-
1
,dest,max_len,
0
,
0
);
206
delete[] temp;
207
return
i;
208
}
else
{
209
MultiByteToWideChar(CP_THREAD_ACP,
0
,source,
-
1
,(LPWSTR)temp,temp_size);
210
i
=
WideCharToMultiByte(CP_UTF8,
0
,(LPCWSTR)temp,
-
1
,
0
,
0
,
0
,
0
);
211
delete[] temp;
212
return
i;
213
}
214
}
else
{
215
delete[] temp;
216
if
(dest) {
217
if
((
int
)source_len
<
max_len)
218
strcpy_s(dest, max_len, source);
219
else
{
220
strncpy_s(dest, max_len, source, max_len);
221
dest[(
int
)max_len
-
1
]
=
0
;
222
}
223
}
224
return
1
+
(
int
)strlen(source);
225
}
226
227
}
228
229
int
_stdcall Str2UTF8(
const
char
*
source,
char
**
dest)
230
{
231
if
(
!
dest)
232
return
-
1
;
233
234
if
(
!
source) {
235
*
dest
=
(
char
*
)calloc(
1
,
1
);
236
return
1
;
237
}
238
239
if
(UTF8_Unicode_Possible) {
240
unsigned
short
*
temp
=
NULL;
241
Str2WStr(source, (
char
**
)
&
temp);
242
int
result
=
WStr2UTF8((
char
*
)temp, dest);
243
free(temp);
244
return
result;
245
}
else
{
246
*
dest
=
_strdup(source);
247
return
(
int
)(
1
+
strlen(source));
248
}
249
}
250
//
===================================================================================================
251
252
253
254
//
===================================================================================================
255
//
UTF8串转化到短字节字符串
256
int
_stdcall UTF82Str(
const
char
*
source,
char
**
dest)
257
{
258
if
(
!
dest) {
259
return
-
1
;
260
}
261
262
if
(
!
source) {
263
*
dest
=
(
char
*
)calloc(
1
,
1
);
264
return
1
;
265
}
266
267
unsigned
short
*
temp
=
NULL;
268
269
if
(UTF8_Unicode_Possible) {
270
UTF82WStr(source,(
char
**
)
&
temp);
271
int
dest_len
=
WideCharToMultiByte(CP_THREAD_ACP,
0
,(LPCWSTR)temp,
-
1
,
0
,
0
,
0
,
0
);
272
273
if
(dest) {
274
*
dest
=
(
char
*
)calloc(
1
, dest_len);
275
int
r
=
WideCharToMultiByte(CP_THREAD_ACP,
0
,(LPCWSTR)temp,
-
1
,
*
dest,dest_len,
0
,
0
);
276
free(temp);
277
return
r;
278
}
else
{
279
int
r
=
WideCharToMultiByte(CP_THREAD_ACP,
0
,(LPCWSTR)temp,
-
1
,
0
,
0
,
0
,
0
);
280
free(temp);
281
return
r;
282
}
283
}
else
{
284
*
dest
=
_strdup(source);
285
return
(
int
)strlen(
*
dest)
+
1
;
286
}
287
}
288
289
int
_stdcall UTF82Str(
const
char
*
source,
char
*
dest,
int
max_len)
290
{
291
int
i;
292
293
if
(
!
source) {
294
if
(dest)
295
*
dest
=
0
;
296
return
1
;
297
}
298
299
unsigned
short
*
temp
=
NULL;
300
301
if
(UTF8_Unicode_Possible) {
302
UTF82WStr(source, (
char
**
)
&
temp);
303
if
(dest) {
304
i
=
WideCharToMultiByte(CP_THREAD_ACP,
0
,(LPCWSTR)temp,
-
1
,dest,max_len,
0
,
0
);
305
delete[] temp;
306
return
i;
307
}
else
{
308
i
=
WideCharToMultiByte(CP_THREAD_ACP,
0
,(LPCWSTR)temp,
-
1
,
0
,
0
,
0
,
0
);
309
delete[] temp;
310
return
i;
311
}
312
}
else
{
313
delete[] temp;
314
if
(dest)
315
strcpy_s(dest, max_len, source);
316
317
return
(
int
)strlen(source);
318
}
319
}
320
//
===================================================================================================
321
322
//
===================================================================================================
323
//
UTF8串转化到宽字节字符串
324
int
_stdcall UTF82WStr(
const
char
*
source,
char
**
dest)
325
{
326
size_t source_len
=
strlen(source)
+
1
;
327
int
dest_len
=
2
;
328
329
if
(source)
330
dest_len
=
2
*
MultiByteToWideChar(CP_UTF8,
0
, source,
-
1
,
0
,
0
);
331
332
if
(dest) {
333
*
dest
=
(
char
*
)malloc(dest_len);
334
return
sizeof
(wchar_t)
*
MultiByteToWideChar(CP_UTF8,
0
, source,
-
1
,
335
(LPWSTR)
*
dest, dest_len
/
sizeof
(wchar_t));
336
}
else
{
337
return
sizeof
(wchar_t)
*
MultiByteToWideChar(CP_UTF8,
0
, source,
-
1
,
0
,
0
);
338
}
339
}
340
341
342
int
_stdcall UTF82WStr(
const
char
*
source,
char
*
dest,
int
max_len)
343
{
344
int
i;
345
346
if
(
!
source)
347
return
0
;
348
349
size_t source_len
=
strlen(source)
+
1
;
350
351
if
(dest) {
352
if
(source
!=
dest) {
353
return
sizeof
(wchar_t)
*
MultiByteToWideChar(CP_UTF8,
0
, source,
-
1
,
354
(LPWSTR)dest, max_len
/
sizeof
(wchar_t));
355
}
else
{
356
char
*
cTemp
=
(
char
*
)malloc(UTF82WStr(source, NULL,
0
));
357
i
=
sizeof
(wchar_t)
*
MultiByteToWideChar(CP_UTF8,
0
, source,
358
-
1
, (LPWSTR)cTemp, max_len
/
sizeof
(wchar_t));
359
memcpy(dest, cTemp, i);
360
free(cTemp);
361
return
i;
362
}
363
}
else
{
364
return
2
*
MultiByteToWideChar(CP_UTF8,
0
,source,
-
1
,
0
,
0
);
365
}
366
}
367
368
//
===================================================================================================
369
370
371
int
StringConvert(
const
char
*
source, nsVAUTF8::eCharacterEncodingMode source_format,
/*
int max_source_len,
*/
char
**
dest, nsVAUTF8::eCharacterEncodingMode dest_format )
372
{
373
char
*
_source
=
(
char
*
)source;
374
switch
(source_format)
375
{
376
case
nsVAUTF8::ANSI:
377
switch
(dest_format) {
378
case
nsVAUTF8::ANSI:
*
dest
=
_strdup(_source);
break
;
379
case
nsVAUTF8::UTF8: Str2UTF8(_source, dest);
break
;
380
case
nsVAUTF8::UTF16LE: Str2WStr(_source, dest);
break
;
381
}
382
break
;
383
case
nsVAUTF8::UTF8:
384
switch
(dest_format) {
385
case
nsVAUTF8::ANSI: UTF82Str(_source, dest);
break
;
386
case
nsVAUTF8::UTF8:
*
dest
=
_strdup(_source);
break
;
387
case
nsVAUTF8::UTF16LE: UTF82WStr(_source, dest);
break
;
388
}
389
break
;
390
case
nsVAUTF8::UTF16LE:
391
switch
(dest_format) {
392
case
nsVAUTF8::ANSI:
393
WStr2Str(_source, dest);
394
break
;
395
case
nsVAUTF8::UTF8:
396
WStr2UTF8(_source, dest);
397
break
;
398
case
nsVAUTF8::UTF16LE:
399
*
dest
=
(
char
*
)_wcsdup((wchar_t
*
)_source);
400
break
;
401
}
402
break
;
403
}
404
return
1
;
405
}
406
407
int
FromUTF8(
const
char
*
source, wchar_t
**
dest)
408
{
409
return
StringConvert(source, nsVAUTF8::UTF8,
410
(
char
**
)dest, nsVAUTF8::UTF16LE);
411
}
412
413
int
FromUTF8(
const
char
*
source,
char
**
dest)
414
{
415
return
StringConvert(source, nsVAUTF8::UTF8,
416
(
char
**
)dest, nsVAUTF8::ANSI);
417
}
418
419
int
ToUTF8(
const
char
*
source,
char
**
dest)
420
{
421
return
StringConvert(source, nsVAUTF8::ANSI,
422
(
char
**
)dest, nsVAUTF8::UTF8);
423
}
424
425
int
ToUTF8(
const
wchar_t
*
source,
char
**
dest)
426
{
427
return
StringConvert((
char
*
)source, nsVAUTF8::UTF16LE,
428
(
char
**
)dest, nsVAUTF8::UTF8);
429
}
430
431
void
utf8_EnableRealUnicode(
bool
bEnabled )
432
{
433
UTF8_Unicode_Possible
=
bEnabled;
434
}
435
436
bool
utf8_IsUnicodeEnabled()
437
{
438
return
UTF8_Unicode_Possible;
439
}
440
VAUTF8::VAUTF8(
const
char
*
pSrc,
int
Encoding )
441
{
442
if
(pSrc)
443
{
444
if
(Encoding
==
nsVAUTF8::UTF8)
445
{
446
m_sUTF8
=
pSrc;
447
}
448
else
449
{
450
m_sANSI
=
pSrc;
451
}
452
453
Complete();
454
}
455
456
}
457
458
459
VAUTF8::VAUTF8(
const
char
*
pSrc )
460
{
461
if
(pSrc)
462
{
463
if
(IsUTF8(pSrc, strlen(pSrc)))
464
{
465
m_sUTF8
=
pSrc;
466
}
467
else
468
{
469
m_sANSI
=
pSrc;
470
}
471
472
Complete();
473
}
474
}
475
476
VAUTF8::VAUTF8(
const
wchar_t
*
pSrc )
477
{
478
if
(pSrc)
479
{
480
m_sUNICODE
=
pSrc;
481
Complete();
482
}
483
}
484
485
VAUTF8::VAUTF8(
const
EncodingStirngA
&
src )
486
{
487
if
(IsUTF8(src.c_str(), src.size()))
488
{
489
m_sUTF8
=
src;
490
}
491
else
492
{
493
m_sANSI
=
src;
494
}
495
496
Complete();
497
}
498
499
500
VAUTF8::VAUTF8(
const
EncodingStirngW
&
src )
501
{
502
m_sUNICODE
=
src;
503
Complete();
504
}
505
506
VAUTF8::VAUTF8(
const
VAUTF8
&
other )
507
{
508
*
this
=
other;
509
}
510
511
512
VAUTF8
&
VAUTF8::
operator
=
(
const
VAUTF8
&
rhs )
513
{
514
m_sUTF8
=
rhs.m_sUTF8;
515
Complete();
516
return
*
this
;
517
}
518
519
void
VAUTF8::Complete()
520
{
521
char
*
p
=
NULL;
522
523
if
(
!
m_sANSI.empty())
524
{
525
Str2UTF8(m_sANSI.c_str(),
&
p);
526
m_sUTF8
=
p;
527
free(p);
528
529
Str2WStr(m_sANSI.c_str(),
&
p);
530
m_sUNICODE
=
(wchar_t
*
)p;
531
free(p);
532
}
533
else
534
{
535
if
(
!
m_sUTF8.empty())
536
{
537
UTF82Str((
char
*
)m_sUTF8.c_str(),
&
p);
538
m_sANSI
=
p;
539
free(p);
540
541
UTF82WStr((
char
*
)m_sUTF8.c_str(),
&
p);
542
m_sUNICODE
=
(wchar_t
*
)p;
543
free(p);
544
}
545
else
546
{
547
if
(
!
m_sUNICODE.empty())
548
{
549
WStr2Str((
char
*
)m_sUNICODE.c_str(),
&
p);
550
m_sANSI
=
p;
551
free(p);
552
553
WStr2UTF8((
char
*
)m_sUNICODE.c_str(),
&
p);
554
m_sUTF8
=
p;
555
free(p);
556
}
557
}
558
}
559
}
560