关于下面的char *参数是必须为unsigned char* static const char trailingBytesForUTF8[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; static int isLegalUTF8( const unsigned char *source, int length ) { unsigned char a; const unsigned char *srcptr = source + length; switch ( length ) { default: return 0; /* Everything else falls through when "true"... */ case 4: if ( ( a = ( *--srcptr ) ) < 0x80 || a > 0xBF ) return 0; case 3: if ( ( a = ( *--srcptr ) ) < 0x80 || a > 0xBF ) return 0; case 2: if ( ( a = ( *--srcptr ) ) > 0xBF ) return 0; switch ( *source ) { /* no fall-through in this inner switch */ case 0xE0: if ( a < 0xA0 ) return 0; break; case 0xF0: if ( a < 0x90 ) return 0; break; case 0xF4: if ( a > 0x8F ) return 0; break; default: if ( a < 0x80 ) return 0; } case 1: if ( *source >= 0x80 && *source < 0xC2 ) return 0; if ( *source > 0xF4 ) return 0; } return 1; } static int bson_validate_string( const unsigned char *string, const int length ) { int position = 0; int sequence_length = 1; while ( position < length ) { sequence_length = trailingBytesForUTF8[*( string + position )] + 1; if ( ( position + sequence_length ) > length ) { return ERROR; } if ( !isLegalUTF8( string + position, sequence_length ) ) { return ERROR; } position += sequence_length; return OK; }