java字符串编码类型获取

阅读更多
 
 

原创作品,允许转载,转载时请务必以超链接形式标明文章 原始出处 、作者信息和本声明。否则将追究法律责任。http://cping1982.blog.51cto.com/601635/129912 

汉字编码是一项较为麻烦的事情,弄不好就会造出些谁都看不懂的乱码。比如我想做个针对汉字网站的爬虫系统,需要对非特定的页面进行数据解析处理,而此时我所访问的页面编码格式未知,如果不能正确处理页面编码,则很难获得我们理想中的数据。

通常这时候可能有几种选择:

一是根据response的ContentType获得,如果服务器支持的话此项中会返回charset数值,解析即可。但对不返回或者不支持的服务器则无能为力。

二是使用正则或自定义解析函数截取页面中‘charset=’后的数据,采取死钉战术,但万一采集的页面中没有此项或者此项有错,也就回天乏术。

三就是老老实实的解析全文,最后返回一个符合的编码格式。

此例中我演示了几种较常见编码的识别方法,通过统计编码为指定编码的或然率, 而后返回可能性最高的编码方式。在无法获得确切编码之时,这可说是一种唯一的选择。

这种识别方式主要是针对汉字编码而来,所以对应页面中的汉字数目越多,统计结果就越准确,反之则很难识别出正确结果。



package org.loon.test.encoding;

import java.io.InputStream;
import java.net.URL;

/**
 * 

* Title: LoonFramework *

*

* Description: *

*

* Copyright: Copyright (c) 2008 *

*

* Company: LoonFramework *

*

* License: http://www.apache.org/licenses/LICENSE-2.0 *

* * @author chenpeng * @email:[email protected] * @version 0.1 */ abstract class Encode extends Encoding { /* ps:EUC为双字节编码,超过10000个字符 */ int GB2312format[][]; // gb int GBKformat[][]; // gbk int Big5format[][]; // big5编码 int EUC_KRformat[][]; // euc-kr int JPformat[][]; // jp void init() { int i, j; for (i = 0; i < 94; i++) { for (j = 0; j < 94; j++) { GB2312format[i][j] = 0; } } for (i = 0; i < 126; i++) { for (j = 0; j < 191; j++) { GBKformat[i][j] = 0; } } for (i = 0; i < 94; i++) { for (j = 0; j < 158; j++) { Big5format[i][j] = 0; } } for (i = 0; i < 94; i++) { for (j = 0; j < 94; j++) { JPformat[i][j] = 0; } } GB2312format[20][35] = 599; GB2312format[49][26] = 598; GB2312format[41][38] = 597; GB2312format[17][26] = 596; GB2312format[32][42] = 595; GB2312format[39][42] = 594; GB2312format[45][49] = 593; GB2312format[51][57] = 592; GB2312format[50][47] = 591; GB2312format[42][90] = 590; GB2312format[52][65] = 589; GB2312format[53][47] = 588; GB2312format[19][82] = 587; GB2312format[31][19] = 586; GB2312format[40][46] = 585; GB2312format[24][89] = 584; GB2312format[23][85] = 583; GB2312format[20][28] = 582; GB2312format[42][20] = 581; GB2312format[34][38] = 580; GB2312format[45][9] = 579; GB2312format[54][50] = 578; GB2312format[25][44] = 577; GB2312format[35][66] = 576; GB2312format[20][55] = 575; GB2312format[18][85] = 574; GB2312format[20][31] = 573; GB2312format[49][17] = 572; GB2312format[41][16] = 571; GB2312format[35][73] = 570; GB2312format[20][34] = 569; GB2312format[29][44] = 568; GB2312format[35][38] = 567; GB2312format[49][9] = 566; GB2312format[46][33] = 565; GB2312format[49][51] = 564; GB2312format[40][89] = 563; GB2312format[26][64] = 562; GB2312format[54][51] = 561; GB2312format[54][36] = 560; GB2312format[39][4] = 559; GB2312format[53][13] = 558; GB2312format[24][92] = 557; GB2312format[27][49] = 556; GB2312format[48][6] = 555; GB2312format[21][51] = 554; GB2312format[30][40] = 553; GB2312format[42][92] = 552; GB2312format[31][78] = 551; GB2312format[25][82] = 550; GB2312format[47][0] = 549; GB2312format[34][19] = 548; GB2312format[47][35] = 547; GB2312format[21][63] = 546; GB2312format[43][75] = 545; GB2312format[21][87] = 544; GB2312format[35][59] = 543; GB2312format[25][34] = 542; GB2312format[21][27] = 541; GB2312format[39][26] = 540; GB2312format[34][26] = 539; GB2312format[39][52] = 538; GB2312format[50][57] = 537; GB2312format[37][79] = 536; GB2312format[26][24] = 535; GB2312format[22][1] = 534; GB2312format[18][40] = 533; GB2312format[41][33] = 532; GB2312format[53][26] = 531; GB2312format[54][86] = 530; GB2312format[20][16] = 529; GB2312format[46][74] = 528; GB2312format[30][19] = 527; GB2312format[45][35] = 526; GB2312format[45][61] = 525; GB2312format[30][9] = 524; GB2312format[41][53] = 523; GB2312format[41][13] = 522; GB2312format[50][34] = 521; GB2312format[53][86] = 520; GB2312format[47][47] = 519; GB2312format[22][28] = 518; GB2312format[50][53] = 517; GB2312format[39][70] = 516; GB2312format[38][15] = 515; GB2312format[42][88] = 514; GB2312format[16][29] = 513; GB2312format[27][90] = 512; GB2312format[29][12] = 511; GB2312format[44][22] = 510; GB2312format[34][69] = 509; GB2312format[24][10] = 508; GB2312format[44][11] = 507; GB2312format[39][92] = 506; GB2312format[49][48] = 505; GB2312format[31][46] = 504; GB2312format[19][50] = 503; GB2312format[21][14] = 502; GB2312format[32][28] = 501; GB2312format[18][3] = 500; GB2312format[53][9] = 499; GB2312format[34][80] = 498; GB2312format[48][88] = 497; GB2312format[46][53] = 496; GB2312format[22][53] = 495; GB2312format[28][10] = 494; GB2312format[44][65] = 493; GB2312format[20][10] = 492; GB2312format[40][76] = 491; GB2312format[47][8] = 490; GB2312format[50][74] = 489; GB2312format[23][62] = 488; GB2312format[49][65] = 487; GB2312format[28][87] = 486; GB2312format[15][48] = 485; GB2312format[22][7] = 484; GB2312format[19][42] = 483; GB2312format[41][20] = 482; GB2312format[26][55] = 481; GB2312format[21][93] = 480; GB2312format[31][76] = 479; GB2312format[34][31] = 478; GB2312format[20][66] = 477; GB2312format[51][33] = 476; GB2312format[34][86] = 475; GB2312format[37][67] = 474; GB2312format[53][53] = 473; GB2312format[40][88] = 472; GB2312format[39][10] = 471; GB2312format[24][3] = 470; GB2312format[27][25] = 469; GB2312format[26][15] = 468; GB2312format[21][88] = 467; GB2312format[52][62] = 466; GB2312format[46][81] = 465; GB2312format[38][72] = 464; GB2312format[17][30] = 463; GB2312format[52][92] = 462; GB2312format[34][90] = 461; GB2312format[21][7] = 460; GB2312format[36][13] = 459; GB2312format[45][41] = 458; GB2312format[32][5] = 457; GB2312format[26][89] = 456; GB2312format[23][87] = 455; GB2312format[20][39] = 454; GB2312format[27][23] = 453; GB2312format[25][59] = 452; GB2312format[49][20] = 451; GB2312format[54][77] = 450; GB2312format[27][67] = 449; GB2312format[47][33] = 448; GB2312format[41][17] = 447; GB2312format[19][81] = 446; GB2312format[16][66] = 445; GB2312format[45][26] = 444; GB2312format[49][81] = 443; GB2312format[53][55] = 442; GB2312format[16][26] = 441; GB2312format[54][62] = 440; GB2312format[20][70] = 439; GB2312format[42][35] = 438; GB2312format[20][57] = 437; GB2312format[34][36] = 436; GB2312format[46][63] = 435; GB2312format[19][45] = 434; GB2312format[21][10] = 433; GB2312format[52][93] = 432; GB2312format[25][2] = 431; GB2312format[30][57] = 430; GB2312format[41][24] = 429; GB2312format[28][43] = 428; GB2312format[45][86] = 427; GB2312format[51][56] = 426; GB2312format[37][28] = 425; GB2312format[52][69] = 424; GB2312format[43][92] = 423; GB2312format[41][31] = 422; GB2312format[37][87] = 421; GB2312format[47][36] = 420; GB2312format[16][16] = 419; GB2312format[40][56] = 418; GB2312format[24][55] = 417; GB2312format[17][1] = 416; GB2312format[35][57] = 415; GB2312format[27][50] = 414; GB2312format[26][14] = 413; GB2312format[50][40] = 412; GB2312format[39][19] = 411; GB2312format[19][89] = 410; GB2312format[29][91] = 409; GB2312format[17][89] = 408; GB2312format[39][74] = 407; GB2312format[46][39] = 406; GB2312format[40][28] = 405; GB2312format[45][68] = 404; GB2312format[43][10] = 403; GB2312format[42][13] = 402; GB2312format[44][81] = 401; GB2312format[41][47] = 400; GB2312format[48][58] = 399; GB2312format[43][68] = 398; GB2312format[16][79] = 397; GB2312format[19][5] = 396; GB2312format[54][59] = 395; GB2312format[17][36] = 394; GB2312format[18][0] = 393; GB2312format[41][5] = 392; GB2312format[41][72] = 391; GB2312format[16][39] = 390; GB2312format[54][0] = 389; GB2312format[51][16] = 388; GB2312format[29][36] = 387; GB2312format[47][5] = 386; GB2312format[47][51] = 385; GB2312format[44][7] = 384; GB2312format[35][30] = 383; GB2312format[26][9] = 382; GB2312format[16][7] = 381; GB2312format[32][1] = 380; GB2312format[33][76] = 379; GB2312format[34][91] = 378; GB2312format[52][36] = 377; GB2312format[26][77] = 376; GB2312format[35][48] = 375; GB2312format[40][80] = 374; GB2312format[41][92] = 373; GB2312format[27][93] = 372; GB2312format[15][17] = 371; GB2312format[16][76] = 370; GB2312format[51][12] = 369; GB2312format[18][20] = 368; GB2312format[15][54] = 367; GB2312format[50][5] = 366; GB2312format[33][22] = 365; GB2312format[37][57] = 364; GB2312format[28][47] = 363; GB2312format[42][31] = 362; GB2312format[18][2] = 361; GB2312format[43][64] = 360; GB2312format[23][47] = 359; GB2312format[28][79] = 358; GB2312format[25][45] = 357; GB2312format[23][91] = 356; GB2312format[22][19] = 355; GB2312format[25][46] = 354; GB2312format[22][36] = 353; GB2312format[54][85] = 352; GB2312format[46][20] = 351; GB2312format[27][37] = 350; GB2312format[26][81] = 349; GB2312format[42][29] = 348; GB2312format[31][90] = 347; GB2312format[41][59] = 346; GB2312format[24][65] = 345; GB2312format[44][84] = 344; GB2312format[24][90] = 343; GB2312format[38][54] = 342; GB2312format[28][70] = 341; GB2312format[27][15] = 340; GB2312format[28][80] = 339; GB2312format[29][8] = 338; GB2312format[45][80] = 337; GB2312format[53][37] = 336; GB2312format[28][65] = 335; GB2312format[23][86] = 334; GB2312format[39][45] = 333; GB2312format[53][32] = 332; GB2312format[38][68] = 331; GB2312format[45][78] = 330; GB2312format[43][7] = 329; GB2312format[46][82] = 328; GB2312format[27][38] = 327; GB2312format[16][62] = 326; GB2312format[24][17] = 325; GB2312format[22][70] = 324; GB2312format[52][28] = 323; GB2312format[23][40] = 322; GB2312format[28][50] = 321; GB2312format[42][91] = 320; GB2312format[47][76] = 319; GB2312format[15][42] = 318; GB2312format[43][55] = 317; GB2312format[29][84] = 316; GB2312format[44][90] = 315; GB2312format[53][16] = 314; GB2312format[22][93] = 313; GB2312format[34][10] = 312; GB2312format[32][53] = 311; GB2312format[43][65] = 310; GB2312format[28][7] = 309; GB2312format[35][46] = 308; GB2312format[21][39] = 307; GB2312format[44][18] = 306; GB2312format[40][10] = 305; GB2312format[54][53] = 304; GB2312format[38][74] = 303; GB2312format[28][26] = 302; GB2312format[15][13] = 301; GB2312format[39][34] = 300; GB2312format[39][46] = 299; GB2312format[42][66] = 298; GB2312format[33][58] = 297; GB2312format[15][56] = 296; GB2312format[18][51] = 295; GB2312format[49][68] = 294; GB2312format[30][37] = 293; GB2312format[51][84] = 292; GB2312format[51][9] = 291; GB2312format[40][70] = 290; GB2312format[41][84] = 289; GB2312format[28][64] = 288; GB2312format[32][88] = 287; GB2312format[24][5] = 286; GB2312format[53][23] = 285; GB2312format[42][27] = 284; GB2312format[22][38] = 283; GB2312format[32][86] = 282; GB2312format[34][30] = 281; GB2312format[38][63] = 280; GB2312format[24][59] = 279; GB2312format[22][81] = 278; GB2312format[32][11] = 277; GB2312format[51][21] = 276; GB2312format[54][41] = 275; GB2312format[21][50] = 274; GB2312format[23][89] = 273; GB2312format[19][87] = 272; GB2312format[26][7] = 271; GB2312format[30][75] = 270; GB2312format[43][84] = 269; GB2312format[51][25] = 268; GB2312format[16][67] = 267; GB2312format[32][9] = 266; GB2312format[48][51] = 265; GB2312format[39][7] = 264; GB2312format[44][88] = 263; GB2312format[52][24] = 262; GB2312format[23][34] = 261; GB2312format[32][75] = 260; GB2312format[19][10] = 259; GB2312format[28][91] = 258; GB2312format[32][83] = 257; GB2312format[25][75] = 256; GB2312format[53][45] = 255; GB2312format[29][85] = 254; GB2312format[53][59] = 253; GB2312format[16][2] = 252; GB2312format[19][78] = 251; GB2312format[15][75] = 250; GB2312format[51][42] = 249; GB2312format[45][67] = 248; GB2312format[15][74] = 247; GB2312format[25][81] = 246; GB2312format[37][62] = 245; GB2312format[16][55] = 244; GB2312format[18][38] = 243; GB2312format[23][23] = 242; GB2312format[38][30] = 241; GB2312format[17][28] = 240; GB2312format[44][73] = 239; GB2312format[23][78] = 238; GB2312format[40][77] = 237; GB2312format[38][87] = 236; GB2312format[27][19] = 235; GB2312format[38][82] = 234; GB2312format[37][22] = 233; GB2312format[41][30] = 232; GB2312format[54][9] = 231; GB2312format[32][30] = 230; GB2312format[30][52] = 229; GB2312format[40][84] = 228; GB2312format[53][57] = 227; GB2312format[27][27] = 226; GB2312format[38][64] = 225; GB2312format[18][43] = 224; GB2312format[23][69] = 223; GB2312format[28][12] = 222; GB2312format[50][78] = 221; GB2312format[50][1] = 220; GB2312format[26][88] = 219; GB2312format[36][40] = 218; GB2312format[33][89] = 217; GB2312format[41][28] = 216; GB2312format[31][77] = 215; GB2312format[46][1] = 214; GB2312format[47][19] = 213; GB2312format[35][55] = 212; GB2312format[41][21] = 211; GB2312format[27][10] = 210; GB2312format[32][77] = 209; GB2312format[26][37] = 208; GB2312format[20][33] = 207; GB2312format[41][52] = 206; GB2312format[32][18] = 205; GB2312format[38][13] = 204; GB2312format[20][18] = 203; GB2312format[20][24] = 202; GB2312format[45][19] = 201; GB2312format[18][53] = 200; /* * GB2312format[39][0] = 199; GB2312format[40][71] = 198; * GB2312format[41][27] = 197; GB2312format[15][69] = 196; * GB2312format[42][10] = 195; GB2312format[31][89] = 194; * GB2312format[51][28] = 193; GB2312format[41][22] = 192; * GB2312format[40][43] = 191; GB2312format[38][6] = 190; * GB2312format[37][11] = 189; GB2312format[39][60] = 188; * GB2312format[48][47] = 187; GB2312format[46][80] = 186; * GB2312format[52][49] = 185; GB2312format[50][48] = 184; * GB2312format[25][1] = 183; GB2312format[52][29] = 182; * GB2312format[24][66] = 181; GB2312format[23][35] = 180; * GB2312format[49][72] = 179; GB2312format[47][45] = 178; * GB2312format[45][14] = 177; GB2312format[51][70] = 176; * GB2312format[22][30] = 175; GB2312format[49][83] = 174; * GB2312format[26][79] = 173; GB2312format[27][41] = 172; * GB2312format[51][81] = 171; GB2312format[41][54] = 170; * GB2312format[20][4] = 169; GB2312format[29][60] = 168; * GB2312format[20][27] = 167; GB2312format[50][15] = 166; * GB2312format[41][6] = 165; GB2312format[35][34] = 164; * GB2312format[44][87] = 163; GB2312format[46][66] = 162; * GB2312format[42][37] = 161; GB2312format[42][24] = 160; * GB2312format[54][7] = 159; GB2312format[41][14] = 158; * GB2312format[39][83] = 157; GB2312format[16][87] = 156; * GB2312format[20][59] = 155; GB2312format[42][12] = 154; * GB2312format[47][2] = 153; GB2312format[21][32] = 152; * GB2312format[53][29] = 151; GB2312format[22][40] = 150; * GB2312format[24][58] = 149; GB2312format[52][88] = 148; * GB2312format[29][30] = 147; GB2312format[15][91] = 146; * GB2312format[54][72] = 145; GB2312format[51][75] = 144; * GB2312format[33][67] = 143; GB2312format[41][50] = 142; * GB2312format[27][34] = 141; GB2312format[46][17] = 140; * GB2312format[31][74] = 139; GB2312format[42][67] = 138; * GB2312format[54][87] = 137; GB2312format[27][14] = 136; * GB2312format[16][63] = 135; GB2312format[16][5] = 134; * GB2312format[43][23] = 133; GB2312format[23][13] = 132; * GB2312format[31][12] = 131; GB2312format[25][57] = 130; * GB2312format[38][49] = 129; GB2312format[42][69] = 128; * GB2312format[23][80] = 127; GB2312format[29][0] = 126; * GB2312format[28][2] = 125; GB2312format[28][17] = 124; * GB2312format[17][27] = 123; GB2312format[40][16] = 122; * GB2312format[45][1] = 121; GB2312format[36][33] = 120; * GB2312format[35][23] = 119; GB2312format[20][86] = 118; * GB2312format[29][53] = 117; GB2312format[23][88] = 116; * GB2312format[51][87] = 115; GB2312format[54][27] = 114; * GB2312format[44][36] = 113; GB2312format[21][45] = 112; * GB2312format[53][52] = 111; GB2312format[31][53] = 110; * GB2312format[38][47] = 109; GB2312format[27][21] = 108; * GB2312format[30][42] = 107; GB2312format[29][10] = 106; * GB2312format[35][35] = 105; GB2312format[24][56] = 104; * GB2312format[41][29] = 103; GB2312format[18][68] = 102; * GB2312format[29][24] = 101; GB2312format[25][84] = 100; * GB2312format[35][47] = 99; GB2312format[29][56] = 98; * GB2312format[30][44] = 97; GB2312format[53][3] = 96; * GB2312format[30][63] = 95; GB2312format[52][52] = 94; * GB2312format[54][1] = 93; GB2312format[22][48] = 92; * GB2312format[54][66] = 91; GB2312format[21][90] = 90; * GB2312format[52][47] = 89; GB2312format[39][25] = 88; * GB2312format[39][39] = 87; GB2312format[44][37] = 86; * GB2312format[44][76] = 85; GB2312format[46][75] = 84; * GB2312format[18][37] = 83; GB2312format[47][42] = 82; * GB2312format[19][92] = 81; GB2312format[51][27] = 80; * GB2312format[48][83] = 79; GB2312format[23][70] = 78; * GB2312format[29][9] = 77; GB2312format[33][79] = 76; * GB2312format[52][90] = 75; GB2312format[53][6] = 74; * GB2312format[24][36] = 73; GB2312format[25][25] = 72; * GB2312format[44][26] = 71; GB2312format[25][36] = 70; * GB2312format[29][87] = 69; GB2312format[48][0] = 68; * GB2312format[15][40] = 67; GB2312format[17][45] = 66; * GB2312format[30][14] = 65; GB2312format[48][38] = 64; * GB2312format[23][19] = 63; GB2312format[40][42] = 62; * GB2312format[31][63] = 61; GB2312format[16][23] = 60; * GB2312format[26][21] = 59; GB2312format[32][76] = 58; * GB2312format[23][58] = 57; GB2312format[41][37] = 56; * GB2312format[30][43] = 55; GB2312format[47][38] = 54; * GB2312format[21][46] = 53; GB2312format[18][33] = 52; * GB2312format[52][37] = 51; GB2312format[36][8] = 50; * GB2312format[49][24] = 49; GB2312format[15][66] = 48; * GB2312format[35][77] = 47; GB2312format[27][58] = 46; * GB2312format[35][51] = 45; GB2312format[24][69] = 44; * GB2312format[20][54] = 43; GB2312format[24][41] = 42; * GB2312format[41][0] = 41; GB2312format[33][71] = 40; * GB2312format[23][52] = 39; GB2312format[29][67] = 38; * GB2312format[46][51] = 37; GB2312format[46][90] = 36; * GB2312format[49][33] = 35; GB2312format[33][28] = 34; * GB2312format[37][86] = 33; GB2312format[39][22] = 32; * GB2312format[37][37] = 31; GB2312format[29][62] = 30; * GB2312format[29][50] = 29; GB2312format[36][89] = 28; * GB2312format[42][44] = 27; GB2312format[51][82] = 26; * GB2312format[28][83] = 25; GB2312format[15][78] = 24; * GB2312format[46][62] = 23; GB2312format[19][69] = 22; * GB2312format[51][23] = 21; GB2312format[37][69] = 20; * GB2312format[25][5] = 19; GB2312format[51][85] = 18; * GB2312format[48][77] = 17; GB2312format[32][46] = 16; * GB2312format[53][60] = 15; GB2312format[28][57] = 14; * GB2312format[54][82] = 13; GB2312format[54][15] = 12; * GB2312format[49][54] = 11; GB2312format[53][87] = 10; * GB2312format[27][16] = 9; GB2312format[29][34] = 8; * GB2312format[20][44] = 7; GB2312format[42][73] = 6; * GB2312format[47][71] = 5; GB2312format[29][37] = 4; * GB2312format[25][50] = 3; GB2312format[18][84] = 2; * GB2312format[50][45] = 1; GB2312format[48][46] = 0; */ // GB2312format[43][89] = -1; GB2312format[54][68] = -2; Big5format[9][89] = 600; Big5format[11][15] = 599; Big5format[3][66] = 598; Big5format[6][121] = 597; Big5format[3][0] = 596; Big5format[5][82] = 595; Big5format[3][42] = 594; Big5format[5][34] = 593; Big5format[3][8] = 592; Big5format[3][6] = 591; Big5format[3][67] = 590; Big5format[7][139] = 589; Big5format[23][137] = 588; Big5format[12][46] = 587; Big5format[4][8] = 586; Big5format[4][41] = 585; Big5format[18][47] = 584; Big5format[12][114] = 583; Big5format[6][1] = 582; Big5format[22][60] = 581; Big5format[5][46] = 580; Big5format[11][79] = 579; Big5format[3][23] = 578; Big5format[7][114] = 577; Big5format[29][102] = 576; Big5format[19][14] = 575; Big5format[4][133] = 574; Big5format[3][29] = 573; Big5format[4][109] = 572; Big5format[14][127] = 571; Big5format[5][48] = 570; Big5format[13][104] = 569; Big5format[3][132] = 568; Big5format[26][64] = 567; Big5format[7][19] = 566; Big5format[4][12] = 565; Big5format[11][124] = 564; Big5format[7][89] = 563; Big5format[15][124] = 562; Big5format[4][108] = 561; Big5format[19][66] = 560; Big5format[3][21] = 559; Big5format[24][12] = 558; Big5format[28][111] = 557; Big5format[12][107] = 556; Big5format[3][112] = 555; Big5format[8][113] = 554; Big5format[5][40] = 553; Big5format[26][145] = 552; Big5format[3][48] = 551; Big5format[3][70] = 550; Big5format[22][17] = 549; Big5format[16][47] = 548; Big5format[3][53] = 547; Big5format[4][24] = 546; Big5format[32][120] = 545; Big5format[24][49] = 544; Big5format[24][142] = 543; Big5format[18][66] = 542; Big5format[29][150] = 541; Big5format[5][122] = 540; Big5format[5][114] = 539; Big5format[3][44] = 538; Big5format[10][128] = 537; Big5format[15][20] = 536; Big5format[13][33] = 535; Big5format[14][87] = 534; Big5format[3][126] = 533; Big5format[4][53] = 532; Big5format[4][40] = 531; Big5format[9][93] = 530; Big5format[15][137] = 529; Big5format[10][123] = 528; Big5format[4][56] = 527; Big5format[5][71] = 526; Big5format[10][8] = 525; Big5format[5][16] = 524; Big5format[5][146] = 523; Big5format[18][88] = 522; Big5format[24][4] = 521; Big5format[20][47] = 520; Big5format[5][33] = 519; Big5format[9][43] = 518; Big5format[20][12] = 517; Big5format[20][13] = 516; Big5format[5][156] = 515; Big5format[22][140] = 514; Big5format[8][146] = 513; Big5format[21][123] = 512; Big5format[4][90] = 511; Big5format[5][62] = 510; Big5format[17][59] = 509; Big5format[10][37] = 508; Big5format[18][107] = 507; Big5format[14][53] = 506; Big5format[22][51] = 505; Big5format[8][13] = 504; Big5format[5][29] = 503; Big5format[9][7] = 502; Big5format[22][14] = 501; Big5format[8][55] = 500; Big5format[33][9] = 499; Big5format[16][64] = 498; Big5format[7][131] = 497; Big5format[34][4] = 496; Big5format[7][101] = 495; Big5format[11][139] = 494; Big5format[3][135] = 493; Big5format[7][102] = 492; Big5format[17][13] = 491; Big5format[3][20] = 490; Big5format[27][106] = 489; Big5format[5][88] = 488; Big5format[6][33] = 487; Big5format[5][139] = 486; Big5format[6][0] = 485; Big5format[17][58] = 484; Big5format[5][133] = 483; Big5format[9][107] = 482; Big5format[23][39] = 481; Big5format[5][23] = 480; Big5format[3][79] = 479; Big5format[32][97] = 478; Big5format[3][136] = 477; Big5format[4][94] = 476; Big5format[21][61] = 475; Big5format[23][123] = 474; Big5format[26][16] = 473; Big5format[24][137] = 472; Big5format[22][18] = 471; Big5format[5][1] = 470; Big5format[20][119] = 469; Big5format[3][7] = 468; Big5format[10][79] = 467; Big5format[15][105] = 466; Big5format[3][144] = 465; Big5format[12][80] = 464; Big5format[15][73] = 463; Big5format[3][19] = 462; Big5format[8][109] = 461; Big5format[3][15] = 460; Big5format[31][82] = 459; Big5format[3][43] = 458; Big5format[25][119] = 457; Big5format[16][111] = 456; Big5format[7][77] = 455; Big5format[3][95] = 454; Big5format[24][82] = 453; Big5format[7][52] = 452; Big5format[9][151] = 451; Big5format[3][129] = 450; Big5format[5][87] = 449; Big5format[3][55] = 448; Big5format[8][153] = 447; Big5format[4][83] = 446; Big5format[3][114] = 445; Big5format[23][147] = 444; Big5format[15][31] = 443; Big5format[3][54] = 442; Big5format[11][122] = 441; Big5format[4][4] = 440; Big5format[34][149] = 439; Big5format[3][17] = 438; Big5format[21][64] = 437; Big5format[26][144] = 436; Big5format[4][62] = 435; Big5format[8][15] = 434; Big5format[35][80] = 433; Big5format[7][110] = 432; Big5format[23][114] = 431; Big5format[3][108] = 430; Big5format[3][62] = 429; Big5format[21][41] = 428; Big5format[15][99] = 427; Big5format[5][47] = 426; Big5format[4][96] = 425; Big5format[20][122] = 424; Big5format[5][21] = 423; Big5format[4][157] = 422; Big5format[16][14] = 421; Big5format[3][117] = 420; Big5format[7][129] = 419; Big5format[4][27] = 418; Big5format[5][30] = 417; Big5format[22][16] = 416; Big5format[5][64] = 415; Big5format[17][99] = 414; Big5format[17][57] = 413; Big5format[8][105] = 412; Big5format[5][112] = 411; Big5format[20][59] = 410; Big5format[6][129] = 409; Big5format[18][17] = 408; Big5format[3][92] = 407; Big5format[28][118] = 406; Big5format[3][109] = 405; Big5format[31][51] = 404; Big5format[13][116] = 403; Big5format[6][15] = 402; Big5format[36][136] = 401; Big5format[12][74] = 400; Big5format[20][88] = 399; Big5format[36][68] = 398; Big5format[3][147] = 397; Big5format[15][84] = 396; Big5format[16][32] = 395; Big5format[16][58] = 394; Big5format[7][66] = 393; Big5format[23][107] = 392; Big5format[9][6] = 391; Big5format[12][86] = 390; Big5format[23][112] = 389; Big5format[37][23] = 388; Big5format[3][138] = 387; Big5format[20][68] = 386; Big5format[15][116] = 385; Big5format[18][64] = 384; Big5format[12][139] = 383; Big5format[11][155] = 382; Big5format[4][156] = 381; Big5format[12][84] = 380; Big5format[18][49] = 379; Big5format[25][125] = 378; Big5format[25][147] = 377; Big5format[15][110] = 376; Big5format[19][96] = 375; Big5format[30][152] = 374; Big5format[6][31] = 373; Big5format[27][117] = 372; Big5format[3][10] = 371; Big5format[6][131] = 370; Big5format[13][112] = 369; Big5format[36][156] = 368; Big5format[4][60] = 367; Big5format[15][121] = 366; Big5format[4][112] = 365; Big5format[30][142] = 364; Big5format[23][154] = 363; Big5format[27][101] = 362; Big5format[9][140] = 361; Big5format[3][89] = 360; Big5format[18][148] = 359; Big5format[4][69] = 358; Big5format[16][49] = 357; Big5format[6][117] = 356; Big5format[36][55] = 355; Big5format[5][123] = 354; Big5format[4][126] = 353; Big5format[4][119] = 352; Big5format[9][95] = 351; Big5format[5][24] = 350; Big5format[16][133] = 349; Big5format[10][134] = 348; Big5format[26][59] = 347; Big5format[6][41] = 346; Big5format[6][146] = 345; Big5format[19][24] = 344; Big5format[5][113] = 343; Big5format[10][118] = 342; Big5format[34][151] = 341; Big5format[9][72] = 340; Big5format[31][25] = 339; Big5format[18][126] = 338; Big5format[18][28] = 337; Big5format[4][153] = 336; Big5format[3][84] = 335; Big5format[21][18] = 334; Big5format[25][129] = 333; Big5format[6][107] = 332; Big5format[12][25] = 331; Big5format[17][109] = 330; Big5format[7][76] = 329; Big5format[15][15] = 328; Big5format[4][14] = 327; Big5format[23][88] = 326; Big5format[18][2] = 325; Big5format[6][88] = 324; Big5format[16][84] = 323; Big5format[12][48] = 322; Big5format[7][68] = 321; Big5format[5][50] = 320; Big5format[13][54] = 319; Big5format[7][98] = 318; Big5format[11][6] = 317; Big5format[9][80] = 316; Big5format[16][41] = 315; Big5format[7][43] = 314; Big5format[28][117] = 313; Big5format[3][51] = 312; Big5format[7][3] = 311; Big5format[20][81] = 310; Big5format[4][2] = 309; Big5format[11][16] = 308; Big5format[10][4] = 307; Big5format[10][119] = 306; Big5format[6][142] = 305; Big5format[18][51] = 304; Big5format[8][144] = 303; Big5format[10][65] = 302; Big5format[11][64] = 301; Big5format[11][130] = 300; Big5format[9][92] = 299; Big5format[18][29] = 298; Big5format[18][78] = 297; Big5format[18][151] = 296; Big5format[33][127] = 295; Big5format[35][113] = 294; Big5format[10][155] = 293; Big5format[3][76] = 292; Big5format[36][123] = 291; Big5format[13][143] = 290; Big5format[5][135] = 289; Big5format[23][116] = 288; Big5format[6][101] = 287; Big5format[14][74] = 286; Big5format[7][153] = 285; Big5format[3][101] = 284; Big5format[9][74] = 283; Big5format[3][156] = 282; Big5format[4][147] = 281; Big5format[9][12] = 280; Big5format[18][133] = 279; Big5format[4][0] = 278; Big5format[7][155] = 277; Big5format[9][144] = 276; Big5format[23][49] = 275; Big5format[5][89] = 274; Big5format[10][11] = 273; Big5format[3][110] = 272; Big5format[3][40] = 271; Big5format[29][115] = 270; Big5format[9][100] = 269; Big5format[21][67] = 268; Big5format[23][145] = 267; Big5format[10][47] = 266; Big5format[4][31] = 265; Big5format[4][81] = 264; Big5format[22][62] = 263; Big5format[4][28] = 262; Big5format[27][39] = 261; Big5format[27][54] = 260; Big5format[32][46] = 259; Big5format[4][76] = 258; Big5format[26][15] = 257; Big5format[12][154] = 256; Big5format[9][150] = 255; Big5format[15][17] = 254; Big5format[5][129] = 253; Big5format[10][40] = 252; Big5format[13][37] = 251; Big5format[31][104] = 250; Big5format[3][152] = 249; Big5format[5][22] = 248; Big5format[8][48] = 247; Big5format[4][74] = 246; Big5format[6][17] = 245; Big5format[30][82] = 244; Big5format[4][116] = 243; Big5format[16][42] = 242; Big5format[5][55] = 241; Big5format[4][64] = 240; Big5format[14][19] = 239; Big5format[35][82] = 238; Big5format[30][139] = 237; Big5format[26][152] = 236; Big5format[32][32] = 235; Big5format[21][102] = 234; Big5format[10][131] = 233; Big5format[9][128] = 232; Big5format[3][87] = 231; Big5format[4][51] = 230; Big5format[10][15] = 229; Big5format[4][150] = 228; Big5format[7][4] = 227; Big5format[7][51] = 226; Big5format[7][157] = 225; Big5format[4][146] = 224; Big5format[4][91] = 223; Big5format[7][13] = 222; Big5format[17][116] = 221; Big5format[23][21] = 220; Big5format[5][106] = 219; Big5format[14][100] = 218; Big5format[10][152] = 217; Big5format[14][89] = 216; Big5format[6][138] = 215; Big5format[12][157] = 214; Big5format[10][102] = 213; Big5format[19][94] = 212; Big5format[7][74] = 211; Big5format[18][128] = 210; Big5format[27][111] = 209; Big5format[11][57] = 208; Big5format[3][131] = 207; Big5format[30][23] = 206; Big5format[30][126] = 205; Big5format[4][36] = 204; Big5format[26][124] = 203; Big5format[4][19] = 202; Big5format[9][152] = 201; GBKformat[52][132] = 600; GBKformat[73][135] = 599; GBKformat[49][123] = 598; GBKformat[77][146] = 597; GBKformat[81][123] = 596; GBKformat[82][144] = 595; GBKformat[51][179] = 594; GBKformat[83][154] = 593; GBKformat[71][139] = 592; GBKformat[64][139] = 591; GBKformat[85][144] = 590; GBKformat[52][125] = 589; GBKformat[88][25] = 588; GBKformat[81][106] = 587; GBKformat[81][148] = 586; GBKformat[62][137] = 585; GBKformat[94][0] = 584; GBKformat[1][64] = 583; GBKformat[67][163] = 582; GBKformat[20][190] = 581; GBKformat[57][131] = 580; GBKformat[29][169] = 579; GBKformat[72][143] = 578; GBKformat[0][173] = 577; GBKformat[11][23] = 576; GBKformat[61][141] = 575; GBKformat[60][123] = 574; GBKformat[81][114] = 573; GBKformat[82][131] = 572; GBKformat[67][156] = 571; GBKformat[71][167] = 570; GBKformat[20][50] = 569; GBKformat[77][132] = 568; GBKformat[84][38] = 567; GBKformat[26][29] = 566; GBKformat[74][187] = 565; GBKformat[62][116] = 564; GBKformat[67][135] = 563; GBKformat[5][86] = 562; GBKformat[72][186] = 561; GBKformat[75][161] = 560; GBKformat[78][130] = 559; GBKformat[94][30] = 558; GBKformat[84][72] = 557; GBKformat[1][67] = 556; GBKformat[75][172] = 555; GBKformat[74][185] = 554; GBKformat[53][160] = 553; GBKformat[123][14] = 552; GBKformat[79][97] = 551; GBKformat[85][110] = 550; GBKformat[78][171] = 549; GBKformat[52][131] = 548; GBKformat[56][100] = 547; GBKformat[50][182] = 546; GBKformat[94][64] = 545; GBKformat[106][74] = 544; GBKformat[11][102] = 543; GBKformat[53][124] = 542; GBKformat[24][3] = 541; GBKformat[86][148] = 540; GBKformat[53][184] = 539; GBKformat[86][147] = 538; GBKformat[96][161] = 537; GBKformat[82][77] = 536; GBKformat[59][146] = 535; GBKformat[84][126] = 534; GBKformat[79][132] = 533; GBKformat[85][123] = 532; GBKformat[71][101] = 531; GBKformat[85][106] = 530; GBKformat[6][184] = 529; GBKformat[57][156] = 528; GBKformat[75][104] = 527; GBKformat[50][137] = 526; GBKformat[79][133] = 525; GBKformat[76][108] = 524; GBKformat[57][142] = 523; GBKformat[84][130] = 522; GBKformat[52][128] = 521; GBKformat[47][44] = 520; GBKformat[52][152] = 519; GBKformat[54][104] = 518; GBKformat[30][47] = 517; GBKformat[71][123] = 516; GBKformat[52][107] = 515; GBKformat[45][84] = 514; GBKformat[107][118] = 513; GBKformat[5][161] = 512; GBKformat[48][126] = 511; GBKformat[67][170] = 510; GBKformat[43][6] = 509; GBKformat[70][112] = 508; GBKformat[86][174] = 507; GBKformat[84][166] = 506; GBKformat[79][130] = 505; GBKformat[57][141] = 504; GBKformat[81][178] = 503; GBKformat[56][187] = 502; GBKformat[81][162] = 501; GBKformat[53][104] = 500; GBKformat[123][35] = 499; GBKformat[70][169] = 498; GBKformat[69][164] = 497; GBKformat[109][61] = 496; GBKformat[73][130] = 495; GBKformat[62][134] = 494; GBKformat[54][125] = 493; GBKformat[79][105] = 492; GBKformat[70][165] = 491; GBKformat[71][189] = 490; GBKformat[23][147] = 489; GBKformat[51][139] = 488; GBKformat[47][137] = 487; GBKformat[77][123] = 486; GBKformat[86][183] = 485; GBKformat[63][173] = 484; GBKformat[79][144] = 483; GBKformat[84][159] = 482; GBKformat[60][91] = 481; GBKformat[66][187] = 480; GBKformat[73][114] = 479; GBKformat[85][56] = 478; GBKformat[71][149] = 477; GBKformat[84][189] = 476; GBKformat[104][31] = 475; GBKformat[83][82] = 474; GBKformat[68][35] = 473; GBKformat[11][77] = 472; GBKformat[15][155] = 471; GBKformat[83][153] = 470; GBKformat[71][1] = 469; GBKformat[53][190] = 468; GBKformat[50][135] = 467; GBKformat[3][147] = 466; GBKformat[48][136] = 465; GBKformat[66][166] = 464; GBKformat[55][159] = 463; GBKformat[82][150] = 462; GBKformat[58][178] = 461; GBKformat[64][102] = 460; GBKformat[16][106] = 459; GBKformat[68][110] = 458; GBKformat[54][14] = 457; GBKformat[60][140] = 456; GBKformat[91][71] = 455; GBKformat[54][150] = 454; GBKformat[78][177] = 453; GBKformat[78][117] = 452; GBKformat[104][12] = 451; GBKformat[73][150] = 450; GBKformat[51][142] = 449; GBKformat[81][145] = 448; GBKformat[66][183] = 447; GBKformat[51][178] = 446; GBKformat[75][107] = 445; GBKformat[65][119] = 444; GBKformat[69][176] = 443; GBKformat[59][122] = 442; GBKformat[78][160] = 441; GBKformat[85][183] = 440; GBKformat[105][16] = 439; GBKformat[73][110] = 438; GBKformat[104][39] = 437; GBKformat[119][16] = 436; GBKformat[76][162] = 435; GBKformat[67][152] = 434; GBKformat[82][24] = 433; GBKformat[73][121] = 432; GBKformat[83][83] = 431; GBKformat[82][145] = 430; GBKformat[49][133] = 429; GBKformat[94][13] = 428; GBKformat[58][139] = 427; GBKformat[74][189] = 426; GBKformat[66][177] = 425; GBKformat[85][184] = 424; GBKformat[55][183] = 423; GBKformat[71][107] = 422; GBKformat[11][98] = 421; GBKformat[72][153] = 420; GBKformat[2][137] = 419; GBKformat[59][147] = 418; GBKformat[58][152] = 417; GBKformat[55][144] = 416; GBKformat[73][125] = 415; GBKformat[52][154] = 414; GBKformat[70][178] = 413; GBKformat[79][148] = 412; GBKformat[63][143] = 411; GBKformat[50][140] = 410; GBKformat[47][145] = 409; GBKformat[48][123] = 408; GBKformat[56][107] = 407; GBKformat[84][83] = 406; GBKformat[59][112] = 405; GBKformat[124][72] = 404; GBKformat[79][99] = 403; GBKformat[3][37] = 402; GBKformat[114][55] = 401; GBKformat[85][152] = 400; GBKformat[60][47] = 399; GBKformat[65][96] = 398; GBKformat[74][110] = 397; GBKformat[86][182] = 396; GBKformat[50][99] = 395; GBKformat[67][186] = 394; GBKformat[81][74] = 393; GBKformat[80][37] = 392; GBKformat[21][60] = 391; GBKformat[110][12] = 390; GBKformat[60][162] = 389; GBKformat[29][115] = 388; GBKformat[83][130] = 387; GBKformat[52][136] = 386; GBKformat[63][114] = 385; GBKformat[49][127] = 384; GBKformat[83][109] = 383; GBKformat[66][128] = 382; GBKformat[78][136] = 381; GBKformat[81][180] = 380; GBKformat[76][104] = 379; GBKformat[56][156] = 378; GBKformat[61][23] = 377; GBKformat[4][30] = 376; GBKformat[69][154] = 375; GBKformat[100][37] = 374; GBKformat[54][177] = 373; GBKformat[23][119] = 372; GBKformat[71][171] = 371; GBKformat[84][146] = 370; GBKformat[20][184] = 369; GBKformat[86][76] = 368; GBKformat[74][132] = 367; GBKformat[47][97] = 366; GBKformat[82][137] = 365; GBKformat[94][56] = 364; GBKformat[92][30] = 363; GBKformat[19][117] = 362; GBKformat[48][173] = 361; GBKformat[2][136] = 360; GBKformat[7][182] = 359; GBKformat[74][188] = 358; GBKformat[14][132] = 357; GBKformat[62][172] = 356; GBKformat[25][39] = 355; GBKformat[85][129] = 354; GBKformat[64][98] = 353; GBKformat[67][127] = 352; GBKformat[72][167] = 351; GBKformat[57][143] = 350; GBKformat[76][187] = 349; GBKformat[83][181] = 348; GBKformat[84][10] = 347; GBKformat[55][166] = 346; GBKformat[55][188] = 345; GBKformat[13][151] = 344; GBKformat[62][124] = 343; GBKformat[53][136] = 342; GBKformat[106][57] = 341; GBKformat[47][166] = 340; GBKformat[109][30] = 339; GBKformat[78][114] = 338; GBKformat[83][19] = 337; GBKformat[56][162] = 336; GBKformat[60][177] = 335; GBKformat[88][9] = 334; GBKformat[74][163] = 333; GBKformat[52][156] = 332; GBKformat[71][180] = 331; GBKformat[60][57] = 330; GBKformat[72][173] = 329; GBKformat[82][91] = 328; GBKformat[51][186] = 327; GBKformat[75][86] = 326; GBKformat[75][78] = 325; GBKformat[76][170] = 324; GBKformat[60][147] = 323; GBKformat[82][75] = 322; GBKformat[80][148] = 321; GBKformat[86][150] = 320; GBKformat[13][95] = 319; GBKformat[0][11] = 318; GBKformat[84][190] = 317; GBKformat[76][166] = 316; GBKformat[14][72] = 315; GBKformat[67][144] = 314; GBKformat[84][44] = 313; GBKformat[72][125] = 312; GBKformat[66][127] = 311; GBKformat[60][25] = 310; GBKformat[70][146] = 309; GBKformat[79][135] = 308; GBKformat[54][135] = 307; GBKformat[60][104] = 306; GBKformat[55][132] = 305; GBKformat[94][2] = 304; GBKformat[54][133] = 303; GBKformat[56][190] = 302; GBKformat[58][174] = 301; GBKformat[80][144] = 300; GBKformat[85][113] = 299; EUC_KRformat[31][43] = 600; EUC_KRformat[19][56] = 599; EUC_KRformat[38][46] = 598; EUC_KRformat[3][3] = 597; EUC_KRformat[29][77] = 596; EUC_KRformat[19][33] = 595; EUC_KRformat[30][0] = 594; EUC_KRformat[29][89] = 593; EUC_KRformat[31][26] = 592; EUC_KRformat[31][38] = 591; EUC_KRformat[32][85] = 590; EUC_KRformat[15][0] = 589; EUC_KRformat[16][54] = 588; EUC_KRformat[15][76] = 587; EUC_KRformat[31][25] = 586; EUC_KRformat[23][13] = 585; EUC_KRformat[28][34] = 584; EUC_KRformat[18][9] = 583; EUC_KRformat[29][37] = 582; EUC_KRformat[22][45] = 581; EUC_KRformat[19][46] = 580; EUC_KRformat[16][65] = 579; EUC_KRformat[23][5] = 578; EUC_KRformat[26][70] = 577; EUC_KRformat[31][53] = 576; EUC_KRformat[27][12] = 575; EUC_KRformat[30][67] = 574; EUC_KRformat[31][57] = 573; EUC_KRformat[20][20] = 572; EUC_KRformat[30][31] = 571; EUC_KRformat[20][72] = 570; EUC_KRformat[15][51] = 569; EUC_KRformat[3][8] = 568; EUC_KRformat[32][53] = 567; EUC_KRformat[27][85] = 566; EUC_KRformat[25][23] = 565; EUC_KRformat[15][44] = 564; EUC_KRformat[32][3] = 563; EUC_KRformat[31][68] = 562; EUC_KRformat[30][24] = 561; EUC_KRformat[29][49] = 560; EUC_KRformat[27][49] = 559; EUC_KRformat[23][23] = 558; EUC_KRformat[31][91] = 557; EUC_KRformat[31][46] = 556; EUC_KRformat[19][74] = 555; EUC_KRformat[27][27] = 554; EUC_KRformat[3][17] = 553; EUC_KRformat[20][38] = 552; EUC_KRformat[21][82] = 551; EUC_KRformat[28][25] = 550; EUC_KRformat[32][5] = 549; EUC_KRformat[31][23] = 548; EUC_KRformat[25][45] = 547; EUC_KRformat[32][87] = 546; EUC_KRformat[18][26] = 545; EUC_KRformat[24][10] = 544; EUC_KRformat[26][82] = 543; EUC_KRformat[15][89] = 542; EUC_KRformat[28][36] = 541; EUC_KRformat[28][31] = 540; EUC_KRformat[16][23] = 539; EUC_KRformat[16][77] = 538; EUC_KRformat[19][84] = 537; EUC_KRformat[23][72] = 536; EUC_KRformat[38][48] = 535; EUC_KRformat[23][2] = 534; EUC_KRformat[30][20] = 533; EUC_KRformat[38][47] = 532; EUC_KRformat[39][12] = 531; EUC_KRformat[23][21] = 530; EUC_KRformat[18][17] = 529; EUC_KRformat[30][87] = 528; EUC_KRformat[29][62] = 527; EUC_KRformat[29][87] = 526; EUC_KRformat[34][53] = 525; EUC_KRformat[32][29] = 524; EUC_KRformat[35][0] = 523; EUC_KRformat[24][43] = 522; EUC_KRformat[36][44] = 521; EUC_KRformat[20][30] = 520; EUC_KRformat[39][86] = 519; EUC_KRformat[22][14] = 518; EUC_KRformat[29][39] = 517; EUC_KRformat[28][38] = 516; EUC_KRformat[23][79] = 515; EUC_KRformat[24][56] = 514; EUC_KRformat[29][63] = 513; EUC_KRformat[31][45] = 512; EUC_KRformat[23][26] = 511; EUC_KRformat[15][87] = 510; EUC_KRformat[30][74] = 509; EUC_KRformat[24][69] = 508; EUC_KRformat[20][4] = 507; EUC_KRformat[27][50] = 506; EUC_KRformat[30][75] = 505; EUC_KRformat[24][13] = 504; EUC_KRformat[30][8] = 503; EUC_KRformat[31][6] = 502; EUC_KRformat[25][80] = 501; EUC_KRformat[36][8] = 500; EUC_KRformat[15][18] = 499; EUC_KRformat[39][23] = 498; EUC_KRformat[16][24] = 497; EUC_KRformat[31][89] = 496; EUC_KRformat[15][71] = 495; EUC_KRformat[15][57] = 494; EUC_KRformat[30][11] = 493; EUC_KRformat[15][36] = 492; EUC_KRformat[16][60] = 491; EUC_KRformat[24][45] = 490; EUC_KRformat[37][35] = 489; EUC_KRformat[24][87] = 488; EUC_KRformat[20][45] = 487; EUC_KRformat[31][90] = 486; EUC_KRformat[32][21] = 485; EUC_KRformat[19][70] = 484; EUC_KRformat[24][15] = 483; EUC_KRformat[26][92] = 482; EUC_KRformat[37][13] = 481; EUC_KRformat[39][2] = 480; EUC_KRformat[23][70] = 479; EUC_KRformat[27][25] = 478; EUC_KRformat[15][69] = 477; EUC_KRformat[19][61] = 476; EUC_KRformat[31][58] = 475; EUC_KRformat[24][57] = 474; EUC_KRformat[36][74] = 473; EUC_KRformat[21][6] = 472; EUC_KRformat[30][44] = 471; EUC_KRformat[15][91] = 470; EUC_KRformat[27][16] = 469; EUC_KRformat[29][42] = 468; EUC_KRformat[33][86] = 467; EUC_KRformat[29][41] = 466; EUC_KRformat[20][68] = 465; EUC_KRformat[25][47] = 464; EUC_KRformat[22][0] = 463; EUC_KRformat[18][14] = 462; EUC_KRformat[31][28] = 461; EUC_KRformat[15][2] = 460; EUC_KRformat[23][76] = 459; EUC_KRformat[38][32] = 458; EUC_KRformat[29][82] = 457; EUC_KRformat[21][86] = 456; EUC_KRformat[24][62] = 455; EUC_KRformat[31][64] = 454; EUC_KRformat[38][26] = 453; EUC_KRformat[32][86] = 452; EUC_KRformat[22][32] = 451; EUC_KRformat[19][59] = 450; EUC_KRformat[34][18] = 449; EUC_KRformat[18][54] = 448; EUC_KRformat[38][63] = 447; EUC_KRformat[36][23] = 446; EUC_KRformat[35][35] = 445; EUC_KRformat[32][62] = 444; EUC_KRformat[28][35] = 443; EUC_KRformat[27][13] = 442; EUC_KRformat[31][59] = 441; EUC_KRformat[29][29] = 440; EUC_KRformat[15][64] = 439; EUC_KRformat[26][84] = 438; EUC_KRformat[21][90] = 437; EUC_KRformat[20][24] = 436; EUC_KRformat[16][18] = 435; EUC_KRformat[22][23] = 434; EUC_KRformat[31][14] = 433; EUC_KRformat[15][1] = 432; EUC_KRformat[18][63] = 431; EUC_KRformat[19][10] = 430; EUC_KRformat[25][49] = 429; EUC_KRformat[36][57] = 428; EUC_KRformat[20][22] = 427; EUC_KRformat[15][15] = 426; EUC_KRformat[31][51] = 425; EUC_KRformat[24][60] = 424; EUC_KRformat[31][70] = 423; EUC_KRformat[15][7] = 422; EUC_KRformat[28][40] = 421; EUC_KRformat[18][41] = 420; EUC_KRformat[15][38] = 419; EUC_KRformat[32][0] = 418; EUC_KRformat[19][51] = 417; EUC_KRformat[34][62] = 416; EUC_KRformat[16][27] = 415; EUC_KRformat[20][70] = 414; EUC_KRformat[22][33] = 413; EUC_KRformat[26][73] = 412; EUC_KRformat[20][79] = 411; EUC_KRformat[23][6] = 410; EUC_KRformat[24][85] = 409; EUC_KRformat[38][51] = 408; EUC_KRformat[29][88] = 407; EUC_KRformat[38][55] = 406; EUC_KRformat[32][32] = 405; EUC_KRformat[27][18] = 404; EUC_KRformat[23][87] = 403; EUC_KRformat[35][6] = 402; EUC_KRformat[34][27] = 401; EUC_KRformat[39][35] = 400; EUC_KRformat[30][88] = 399; EUC_KRformat[32][92] = 398; EUC_KRformat[32][49] = 397; EUC_KRformat[24][61] = 396; EUC_KRformat[18][74] = 395; EUC_KRformat[23][77] = 394; EUC_KRformat[23][50] = 393; EUC_KRformat[23][32] = 392; EUC_KRformat[23][36] = 391; EUC_KRformat[38][38] = 390; EUC_KRformat[29][86] = 389; EUC_KRformat[36][15] = 388; EUC_KRformat[31][50] = 387; EUC_KRformat[15][86] = 386; EUC_KRformat[39][13] = 385; EUC_KRformat[34][26] = 384; EUC_KRformat[19][34] = 383; EUC_KRformat[16][3] = 382; EUC_KRformat[26][93] = 381; EUC_KRformat[19][67] = 380; EUC_KRformat[24][72] = 379; EUC_KRformat[29][17] = 378; EUC_KRformat[23][24] = 377; EUC_KRformat[25][19] = 376; EUC_KRformat[18][65] = 375; EUC_KRformat[30][78] = 374; EUC_KRformat[27][52] = 373; EUC_KRformat[22][18] = 372; EUC_KRformat[16][38] = 371; EUC_KRformat[21][26] = 370; EUC_KRformat[34][20] = 369; EUC_KRformat[15][42] = 368; EUC_KRformat[16][71] = 367; EUC_KRformat[17][17] = 366; EUC_KRformat[24][71] = 365; EUC_KRformat[18][84] = 364; EUC_KRformat[15][40] = 363; EUC_KRformat[31][62] = 362; EUC_KRformat[15][8] = 361; EUC_KRformat[16][69] = 360; EUC_KRformat[29][79] = 359; EUC_KRformat[38][91] = 358; EUC_KRformat[31][92] = 357; EUC_KRformat[20][77] = 356; EUC_KRformat[3][16] = 355; EUC_KRformat[27][87] = 354; EUC_KRformat[16][25] = 353; EUC_KRformat[36][33] = 352; EUC_KRformat[37][76] = 351; EUC_KRformat[30][12] = 350; EUC_KRformat[26][75] = 349; EUC_KRformat[25][14] = 348; EUC_KRformat[32][26] = 347; EUC_KRformat[23][22] = 346; EUC_KRformat[20][90] = 345; EUC_KRformat[19][8] = 344; EUC_KRformat[38][41] = 343; EUC_KRformat[34][2] = 342; EUC_KRformat[39][4] = 341; EUC_KRformat[27][89] = 340; EUC_KRformat[28][41] = 339; EUC_KRformat[28][44] = 338; EUC_KRformat[24][92] = 337; EUC_KRformat[34][65] = 336; EUC_KRformat[39][14] = 335; EUC_KRformat[21][38] = 334; EUC_KRformat[19][31] = 333; EUC_KRformat[37][39] = 332; EUC_KRformat[33][41] = 331; EUC_KRformat[38][4] = 330; EUC_KRformat[23][80] = 329; EUC_KRformat[25][24] = 328; EUC_KRformat[37][17] = 327; EUC_KRformat[22][16] = 326; EUC_KRformat[22][46] = 325; EUC_KRformat[33][91] = 324; EUC_KRformat[24][89] = 323; EUC_KRformat[30][52] = 322; EUC_KRformat[29][38] = 321; EUC_KRformat[38][85] = 320; EUC_KRformat[15][12] = 319; EUC_KRformat[27][58] = 318; EUC_KRformat[29][52] = 317; EUC_KRformat[37][38] = 316; EUC_KRformat[34][41] = 315; EUC_KRformat[31][65] = 314; EUC_KRformat[29][53] = 313; EUC_KRformat[22][47] = 312; EUC_KRformat[22][19] = 311; EUC_KRformat[26][0] = 310; EUC_KRformat[37][86] = 309; EUC_KRformat[35][4] = 308; EUC_KRformat[36][54] = 307; EUC_KRformat[20][76] = 306; EUC_KRformat[30][9] = 305; EUC_KRformat[30][33] = 304; EUC_KRformat[23][17] = 303; EUC_KRformat[23][33] = 302; EUC_KRformat[38][52] = 301; EUC_KRformat[15][19] = 300; EUC_KRformat[28][45] = 299; EUC_KRformat[29][78] = 298; EUC_KRformat[23][15] = 297; EUC_KRformat[33][5] = 296; EUC_KRformat[17][40] = 295; EUC_KRformat[30][83] = 294; EUC_KRformat[18][1] = 293; EUC_KRformat[30][81] = 292; EUC_KRformat[19][40] = 291; EUC_KRformat[24][47] = 290; EUC_KRformat[17][56] = 289; EUC_KRformat[39][80] = 288; EUC_KRformat[30][46] = 287; EUC_KRformat[16][61] = 286; EUC_KRformat[26][78] = 285; EUC_KRformat[26][57] = 284; EUC_KRformat[20][46] = 283; EUC_KRformat[25][15] = 282; EUC_KRformat[25][91] = 281; EUC_KRformat[21][83] = 280; EUC_KRformat[30][77] = 279; EUC_KRformat[35][30] = 278; EUC_KRformat[30][34] = 277; EUC_KRformat[20][69] = 276; EUC_KRformat[35][10] = 275; EUC_KRformat[29][70] = 274; EUC_KRformat[22][50] = 273; EUC_KRformat[18][0] = 272; EUC_KRformat[22][64] = 271; EUC_KRformat[38][65] = 270; EUC_KRformat[22][70] = 269; EUC_KRformat[24][58] = 268; EUC_KRformat[19][66] = 267; EUC_KRformat[30][59] = 266; EUC_KRformat[37][14] = 265; EUC_KRformat[16][56] = 264; EUC_KRformat[29][85] = 263; EUC_KRformat[31][15] = 262; EUC_KRformat[36][84] = 261; EUC_KRformat[39][15] = 260; EUC_KRformat[39][90] = 259; EUC_KRformat[18][12] = 258; EUC_KRformat[21][93] = 257; EUC_KRformat[24][66] = 256; EUC_KRformat[27][90] = 255; EUC_KRformat[25][90] = 254; EUC_KRformat[22][24] = 253; EUC_KRformat[36][67] = 252; EUC_KRformat[33][90] = 251; EUC_KRformat[15][60] = 250; EUC_KRformat[23][85] = 249; EUC_KRformat[34][1] = 248; EUC_KRformat[39][37] = 247; EUC_KRformat[21][18] = 246; EUC_KRformat[34][4] = 245; EUC_KRformat[28][33] = 244; EUC_KRformat[15][13] = 243; EUC_KRformat[32][22] = 242; EUC_KRformat[30][76] = 241; EUC_KRformat[20][21] = 240; EUC_KRformat[38][66] = 239; EUC_KRformat[32][55] = 238; EUC_KRformat[32][89] = 237; EUC_KRformat[25][26] = 236; EUC_KRformat[16][80] = 235; EUC_KRformat[15][43] = 234; EUC_KRformat[38][54] = 233; EUC_KRformat[39][68] = 232; EUC_KRformat[22][88] = 231; EUC_KRformat[21][84] = 230; EUC_KRformat[21][17] = 229; EUC_KRformat[20][28] = 228; EUC_KRformat[32][1] = 227; EUC_KRformat[33][87] = 226; EUC_KRformat[38][71] = 225; EUC_KRformat[37][47] = 224; EUC_KRformat[18][77] = 223; EUC_KRformat[37][58] = 222; EUC_KRformat[34][74] = 221; EUC_KRformat[32][54] = 220; EUC_KRformat[27][33] = 219; EUC_KRformat[32][93] = 218; EUC_KRformat[23][51] = 217; EUC_KRformat[20][57] = 216; EUC_KRformat[22][37] = 215; EUC_KRformat[39][10] = 214; EUC_KRformat[39][17] = 213; EUC_KRformat[33][4] = 212; EUC_KRformat[32][84] = 211; EUC_KRformat[34][3] = 210; EUC_KRformat[28][27] = 209; EUC_KRformat[15][79] = 208; EUC_KRformat[34][21] = 207; EUC_KRformat[34][69] = 206; EUC_KRformat[21][62] = 205; EUC_KRformat[36][24] = 204; EUC_KRformat[16][89] = 203; EUC_KRformat[18][48] = 202; EUC_KRformat[38][15] = 201; EUC_KRformat[36][58] = 200; EUC_KRformat[21][56] = 199; EUC_KRformat[34][48] = 198; EUC_KRformat[21][15] = 197; EUC_KRformat[39][3] = 196; EUC_KRformat[16][44] = 195; EUC_KRformat[18][79] = 194; EUC_KRformat[25][13] = 193; EUC_KRformat[29][47] = 192; EUC_KRformat[38][88] = 191; EUC_KRformat[20][71] = 190; EUC_KRformat[16][58] = 189; EUC_KRformat[35][57] = 188; EUC_KRformat[29][30] = 187; EUC_KRformat[29][23] = 186; EUC_KRformat[34][93] = 185; EUC_KRformat[30][85] = 184; EUC_KRformat[15][80] = 183; EUC_KRformat[32][78] = 182; EUC_KRformat[37][82] = 181; EUC_KRformat[22][40] = 180; EUC_KRformat[21][69] = 179; EUC_KRformat[26][85] = 178; EUC_KRformat[31][31] = 177; EUC_KRformat[28][64] = 176; EUC_KRformat[38][13] = 175; EUC_KRformat[25][2] = 174; EUC_KRformat[22][34] = 173; EUC_KRformat[28][28] = 172; EUC_KRformat[24][91] = 171; EUC_KRformat[33][74] = 170; EUC_KRformat[29][40] = 169; EUC_KRformat[15][77] = 168; EUC_KRformat[32][80] = 167; EUC_KRformat[30][41] = 166; EUC_KRformat[23][30] = 165; EUC_KRformat[24][63] = 164; EUC_KRformat[30][53] = 163; EUC_KRformat[39][70] = 162; EUC_KRformat[23][61] = 161; EUC_KRformat[37][27] = 160; EUC_KRformat[16][55] = 159; EUC_KRformat[22][74] = 158; EUC_KRformat[26][50] = 157; EUC_KRformat[16][10] = 156; EUC_KRformat[34][63] = 155; EUC_KRformat[35][14] = 154; EUC_KRformat[17][7] = 153; EUC_KRformat[15][59] = 152; EUC_KRformat[27][23] = 151; EUC_KRformat[18][70] = 150; EUC_KRformat[32][56] = 149; EUC_KRformat[37][87] = 148; EUC_KRformat[17][61] = 147; EUC_KRformat[18][83] = 146; EUC_KRformat[23][86] = 145; EUC_KRformat[17][31] = 144; EUC_KRformat[23][83] = 143; EUC_KRformat[35][2] = 142; EUC_KRformat[18][64] = 141; EUC_KRformat[27][43] = 140; EUC_KRformat[32][42] = 139; EUC_KRformat[25][76] = 138; EUC_KRformat[19][85] = 137; EUC_KRformat[37][81] = 136; EUC_KRformat[38][83] = 135; EUC_KRformat[35][7] = 134; EUC_KRformat[16][51] = 133; EUC_KRformat[27][22] = 132; EUC_KRformat[16][76] = 131; EUC_KRformat[22][4] = 130; EUC_KRformat[38][84] = 129; EUC_KRformat[17][83] = 128; EUC_KRformat[24][46] = 127; EUC_KRformat[33][15] = 126; EUC_KRformat[20][48] = 125; EUC_KRformat[17][30] = 124; EUC_KRformat[30][93] = 123; EUC_KRformat[28][11] = 122; EUC_KRformat[28][30] = 121; EUC_KRformat[15][62] = 120; EUC_KRformat[17][87] = 119; EUC_KRformat[32][81] = 118; EUC_KRformat[23][37] = 117; EUC_KRformat[30][22] = 116; EUC_KRformat[32][66] = 115; EUC_KRformat[33][78] = 114; EUC_KRformat[21][4] = 113; EUC_KRformat[31][17] = 112; EUC_KRformat[39][61] = 111; EUC_KRformat[18][76] = 110; EUC_KRformat[15][85] = 109; EUC_KRformat[31][47] = 108; EUC_KRformat[19][57] = 107; EUC_KRformat[23][55] = 106; EUC_KRformat[27][29] = 105; EUC_KRformat[29][46] = 104; EUC_KRformat[33][0] = 103; EUC_KRformat[16][83] = 102; EUC_KRformat[39][78] = 101; EUC_KRformat[32][77] = 100; EUC_KRformat[36][25] = 99; EUC_KRformat[34][19] = 98; EUC_KRformat[38][49] = 97; EUC_KRformat[19][25] = 96; EUC_KRformat[23][53] = 95; EUC_KRformat[28][43] = 94; EUC_KRformat[31][44] = 93; EUC_KRformat[36][34] = 92; EUC_KRformat[16][34] = 91; EUC_KRformat[35][1] = 90; EUC_KRformat[19][87] = 89; EUC_KRformat[18][53] = 88; EUC_KRformat[29][54] = 87; EUC_KRformat[22][41] = 86; EUC_KRformat[38][18] = 85; EUC_KRformat[22][2] = 84; EUC_KRformat[20][3] = 83; EUC_KRformat[39][69] = 82; EUC_KRformat[30][29] = 81; EUC_KRformat[28][19] = 80; EUC_KRformat[29][90] = 79; EUC_KRformat[17][86] = 78; EUC_KRformat[15][9] = 77; EUC_KRformat[39][73] = 76; EUC_KRformat[15][37] = 75; EUC_KRformat[35][40] = 74; EUC_KRformat[33][77] = 73; EUC_KRformat[27][86] = 72; EUC_KRformat[36][79] = 71; EUC_KRformat[23][18] = 70; EUC_KRformat[34][87] = 69; EUC_KRformat[39][24] = 68; EUC_KRformat[26][8] = 67; EUC_KRformat[33][48] = 66; EUC_KRformat[39][30] = 65; EUC_KRformat[33][28] = 64; EUC_KRformat[16][67] = 63; EUC_KRformat[31][78] = 62; EUC_KRformat[32][23] = 61; EUC_KRformat[24][55] = 60; EUC_KRformat[30][68] = 59; EUC_KRformat[18][60] = 58; EUC_KRformat[15][17] = 57; EUC_KRformat[23][34] = 56; EUC_KRformat[20][49] = 55; EUC_KRformat[15][78] = 54; EUC_KRformat[24][14] = 53; EUC_KRformat[19][41] = 52; EUC_KRformat[31][55] = 51; EUC_KRformat[21][39] = 50; EUC_KRformat[35][9] = 49; EUC_KRformat[30][15] = 48; EUC_KRformat[20][52] = 47; EUC_KRformat[35][71] = 46; EUC_KRformat[20][7] = 45; EUC_KRformat[29][72] = 44; EUC_KRformat[37][77] = 43; EUC_KRformat[22][35] = 42; EUC_KRformat[20][61] = 41; EUC_KRformat[31][60] = 40; EUC_KRformat[20][93] = 39; EUC_KRformat[27][92] = 38; EUC_KRformat[28][16] = 37; EUC_KRformat[36][26] = 36; EUC_KRformat[18][89] = 35; EUC_KRformat[21][63] = 34; EUC_KRformat[22][52] = 33; EUC_KRformat[24][65] = 32; EUC_KRformat[31][8] = 31; EUC_KRformat[31][49] = 30; EUC_KRformat[33][30] = 29; EUC_KRformat[37][15] = 28; EUC_KRformat[18][18] = 27; EUC_KRformat[25][50] = 26; EUC_KRformat[29][20] = 25; EUC_KRformat[35][48] = 24; EUC_KRformat[38][75] = 23; EUC_KRformat[26][83] = 22; EUC_KRformat[21][87] = 21; EUC_KRformat[27][71] = 20; EUC_KRformat[32][91] = 19; EUC_KRformat[25][73] = 18; EUC_KRformat[16][84] = 17; EUC_KRformat[25][31] = 16; EUC_KRformat[17][90] = 15; EUC_KRformat[18][40] = 14; EUC_KRformat[17][77] = 13; EUC_KRformat[17][35] = 12; EUC_KRformat[23][52] = 11; EUC_KRformat[23][35] = 10; EUC_KRformat[16][5] = 9; EUC_KRformat[23][58] = 8; EUC_KRformat[19][60] = 7; EUC_KRformat[30][32] = 6; EUC_KRformat[38][34] = 5; EUC_KRformat[23][4] = 4; EUC_KRformat[23][1] = 3; EUC_KRformat[27][57] = 2; EUC_KRformat[39][38] = 1; EUC_KRformat[32][33] = 0; JPformat[3][74] = 600; JPformat[3][45] = 599; JPformat[3][3] = 598; JPformat[3][24] = 597; JPformat[3][30] = 596; JPformat[3][42] = 595; JPformat[3][46] = 594; JPformat[3][39] = 593; JPformat[3][11] = 592; JPformat[3][37] = 591; JPformat[3][38] = 590; JPformat[3][31] = 589; JPformat[3][41] = 588; JPformat[3][5] = 587; JPformat[3][10] = 586; JPformat[3][75] = 585; JPformat[3][65] = 584; JPformat[3][72] = 583; JPformat[37][91] = 582; JPformat[0][27] = 581; JPformat[3][18] = 580; JPformat[3][22] = 579; JPformat[3][61] = 578; JPformat[3][14] = 577; JPformat[24][80] = 576; JPformat[4][82] = 575; JPformat[17][80] = 574; JPformat[30][44] = 573; JPformat[3][73] = 572; JPformat[3][64] = 571; JPformat[38][14] = 570; JPformat[33][70] = 569; JPformat[3][1] = 568; JPformat[3][16] = 567; JPformat[3][35] = 566; JPformat[3][40] = 565; JPformat[4][74] = 564; JPformat[4][24] = 563; JPformat[42][59] = 562; JPformat[3][7] = 561; JPformat[3][71] = 560; JPformat[3][12] = 559; JPformat[15][75] = 558; JPformat[3][20] = 557; JPformat[4][39] = 556; JPformat[34][69] = 555; JPformat[3][28] = 554; JPformat[35][24] = 553; JPformat[3][82] = 552; JPformat[28][47] = 551; JPformat[3][67] = 550; JPformat[37][16] = 549; JPformat[26][93] = 548; JPformat[4][1] = 547; JPformat[26][85] = 546; JPformat[31][14] = 545; JPformat[4][3] = 544; JPformat[4][72] = 543; JPformat[24][51] = 542; JPformat[27][51] = 541; JPformat[27][49] = 540; JPformat[22][77] = 539; JPformat[27][10] = 538; JPformat[29][68] = 537; JPformat[20][35] = 536; JPformat[41][11] = 535; JPformat[24][70] = 534; JPformat[36][61] = 533; JPformat[31][23] = 532; JPformat[43][16] = 531; JPformat[23][68] = 530; JPformat[32][15] = 529; JPformat[3][32] = 528; JPformat[19][53] = 527; JPformat[40][83] = 526; JPformat[4][14] = 525; JPformat[36][9] = 524; JPformat[4][73] = 523; JPformat[23][10] = 522; JPformat[3][63] = 521; JPformat[39][14] = 520; JPformat[3][78] = 519; JPformat[33][47] = 518; JPformat[21][39] = 517; JPformat[34][46] = 516; JPformat[36][75] = 515; JPformat[41][92] = 514; JPformat[37][93] = 513; JPformat[4][34] = 512; JPformat[15][86] = 511; JPformat[46][1] = 510; JPformat[37][65] = 509; JPformat[3][62] = 508; JPformat[32][73] = 507; JPformat[21][65] = 506; JPformat[29][75] = 505; JPformat[26][51] = 504; JPformat[3][34] = 503; JPformat[4][10] = 502; JPformat[30][22] = 501; JPformat[35][73] = 500; JPformat[17][82] = 499; JPformat[45][8] = 498; JPformat[27][73] = 497; JPformat[18][55] = 496; JPformat[25][2] = 495; JPformat[3][26] = 494; JPformat[45][46] = 493; JPformat[4][22] = 492; JPformat[4][40] = 491; JPformat[18][10] = 490; JPformat[32][9] = 489; JPformat[26][49] = 488; JPformat[3][47] = 487; JPformat[24][65] = 486; JPformat[4][76] = 485; JPformat[43][67] = 484; JPformat[3][9] = 483; JPformat[41][37] = 482; JPformat[33][68] = 481; JPformat[43][31] = 480; JPformat[19][55] = 479; JPformat[4][30] = 478; JPformat[27][33] = 477; JPformat[16][62] = 476; JPformat[36][35] = 475; JPformat[37][15] = 474; JPformat[27][70] = 473; JPformat[22][71] = 472; JPformat[33][45] = 471; JPformat[31][78] = 470; JPformat[43][59] = 469; JPformat[32][19] = 468; JPformat[17][28] = 467; JPformat[40][28] = 466; JPformat[20][93] = 465; JPformat[18][15] = 464; JPformat[4][23] = 463; JPformat[3][23] = 462; JPformat[26][64] = 461; JPformat[44][92] = 460; JPformat[17][27] = 459; JPformat[3][56] = 458; JPformat[25][38] = 457; JPformat[23][31] = 456; JPformat[35][43] = 455; JPformat[4][54] = 454; JPformat[35][19] = 453; JPformat[22][47] = 452; JPformat[42][0] = 451; JPformat[23][28] = 450; JPformat[46][33] = 449; JPformat[36][85] = 448; JPformat[31][12] = 447; JPformat[3][76] = 446; JPformat[4][75] = 445; JPformat[36][56] = 444; JPformat[4][64] = 443; JPformat[25][77] = 442; JPformat[15][52] = 441; JPformat[33][73] = 440; JPformat[3][55] = 439; JPformat[43][82] = 438; JPformat[27][82] = 437; JPformat[20][3] = 436; JPformat[40][51] = 435; JPformat[3][17] = 434; JPformat[27][71] = 433; JPformat[4][52] = 432; JPformat[44][48] = 431; JPformat[27][2] = 430; JPformat[17][39] = 429; JPformat[31][8] = 428; JPformat[44][54] = 427; JPformat[43][18] = 426; JPformat[43][77] = 425; JPformat[4][61] = 424; JPformat[19][91] = 423; JPformat[31][13] = 422; JPformat[44][71] = 421; JPformat[20][0] = 420; JPformat[23][87] = 419; JPformat[21][14] = 418; JPformat[29][13] = 417; JPformat[3][58] = 416; JPformat[26][18] = 415; JPformat[4][47] = 414; JPformat[4][18] = 413; JPformat[3][53] = 412; JPformat[26][92] = 411; JPformat[21][7] = 410; JPformat[4][37] = 409; JPformat[4][63] = 408; JPformat[36][51] = 407; JPformat[4][32] = 406; JPformat[28][73] = 405; JPformat[4][50] = 404; JPformat[41][60] = 403; JPformat[23][1] = 402; JPformat[36][92] = 401; JPformat[15][41] = 400; JPformat[21][71] = 399; JPformat[41][30] = 398; JPformat[32][76] = 397; JPformat[17][34] = 396; JPformat[26][15] = 395; JPformat[26][25] = 394; JPformat[31][77] = 393; JPformat[31][3] = 392; JPformat[46][34] = 391; JPformat[27][84] = 390; JPformat[23][8] = 389; JPformat[16][0] = 388; JPformat[28][80] = 387; JPformat[26][54] = 386; JPformat[33][18] = 385; JPformat[31][20] = 384; JPformat[31][62] = 383; JPformat[30][41] = 382; JPformat[33][30] = 381; JPformat[45][45] = 380; JPformat[37][82] = 379; JPformat[15][33] = 378; JPformat[20][12] = 377; JPformat[18][5] = 376; JPformat[28][86] = 375; JPformat[30][19] = 374; JPformat[42][43] = 373; JPformat[36][31] = 372; JPformat[17][93] = 371; JPformat[4][15] = 370; JPformat[21][20] = 369; JPformat[23][21] = 368; JPformat[28][72] = 367; JPformat[4][20] = 366; JPformat[26][55] = 365; JPformat[21][5] = 364; JPformat[19][16] = 363; JPformat[23][64] = 362; JPformat[40][59] = 361; JPformat[37][26] = 360; JPformat[26][56] = 359; JPformat[4][12] = 358; JPformat[33][71] = 357; JPformat[32][39] = 356; JPformat[38][40] = 355; JPformat[22][74] = 354; JPformat[3][25] = 353; JPformat[15][48] = 352; JPformat[41][82] = 351; JPformat[41][9] = 350; JPformat[25][48] = 349; JPformat[31][71] = 348; JPformat[43][29] = 347; JPformat[26][80] = 346; JPformat[4][5] = 345; JPformat[18][71] = 344; JPformat[29][0] = 343; JPformat[43][43] = 342; JPformat[23][81] = 341; JPformat[4][42] = 340; JPformat[44][28] = 339; JPformat[23][93] = 338; JPformat[17][81] = 337; JPformat[25][25] = 336; JPformat[41][23] = 335; JPformat[34][35] = 334; JPformat[4][53] = 333; JPformat[28][36] = 332; JPformat[4][41] = 331; JPformat[25][60] = 330; JPformat[23][20] = 329; JPformat[3][43] = 328; JPformat[24][79] = 327; JPformat[29][41] = 326; JPformat[30][83] = 325; JPformat[3][50] = 324; JPformat[22][18] = 323; JPformat[18][3] = 322; JPformat[39][30] = 321; JPformat[4][28] = 320; JPformat[21][64] = 319; JPformat[4][68] = 318; JPformat[17][71] = 317; JPformat[27][0] = 316; JPformat[39][28] = 315; JPformat[30][13] = 314; JPformat[36][70] = 313; JPformat[20][82] = 312; JPformat[33][38] = 311; JPformat[44][87] = 310; JPformat[34][45] = 309; JPformat[4][26] = 308; JPformat[24][44] = 307; JPformat[38][67] = 306; JPformat[38][6] = 305; JPformat[30][68] = 304; JPformat[15][89] = 303; JPformat[24][93] = 302; JPformat[40][41] = 301; JPformat[38][3] = 300; JPformat[28][23] = 299; JPformat[26][17] = 298; JPformat[4][38] = 297; JPformat[22][78] = 296; JPformat[15][37] = 295; JPformat[25][85] = 294; JPformat[4][9] = 293; JPformat[4][7] = 292; JPformat[27][53] = 291; JPformat[39][29] = 290; JPformat[41][43] = 289; JPformat[25][62] = 288; JPformat[4][48] = 287; JPformat[28][28] = 286; JPformat[21][40] = 285; JPformat[36][73] = 284; JPformat[26][39] = 283; JPformat[22][54] = 282; JPformat[33][5] = 281; JPformat[19][21] = 280; JPformat[46][31] = 279; JPformat[20][64] = 278; JPformat[26][63] = 277; JPformat[22][23] = 276; JPformat[25][81] = 275; JPformat[4][62] = 274; JPformat[37][31] = 273; JPformat[40][52] = 272; JPformat[29][79] = 271; JPformat[41][48] = 270; JPformat[31][57] = 269; JPformat[32][92] = 268; JPformat[36][36] = 267; JPformat[27][7] = 266; JPformat[35][29] = 265; JPformat[37][34] = 264; JPformat[34][42] = 263; JPformat[27][15] = 262; JPformat[33][27] = 261; JPformat[31][38] = 260; JPformat[19][79] = 259; JPformat[4][31] = 258; JPformat[4][66] = 257; JPformat[17][32] = 256; JPformat[26][67] = 255; JPformat[16][30] = 254; JPformat[26][46] = 253; JPformat[24][26] = 252; JPformat[35][10] = 251; JPformat[18][37] = 250; JPformat[3][19] = 249; JPformat[33][69] = 248; JPformat[31][9] = 247; JPformat[45][29] = 246; JPformat[3][15] = 245; JPformat[18][54] = 244; JPformat[3][44] = 243; JPformat[31][29] = 242; JPformat[18][45] = 241; JPformat[38][28] = 240; JPformat[24][12] = 239; JPformat[35][82] = 238; JPformat[17][43] = 237; JPformat[28][9] = 236; JPformat[23][25] = 235; JPformat[44][37] = 234; JPformat[23][75] = 233; JPformat[23][92] = 232; JPformat[0][24] = 231; JPformat[19][74] = 230; JPformat[45][32] = 229; JPformat[16][72] = 228; JPformat[16][93] = 227; JPformat[45][13] = 226; JPformat[24][8] = 225; JPformat[25][47] = 224; JPformat[28][26] = 223; JPformat[43][81] = 222; JPformat[32][71] = 221; JPformat[18][41] = 220; JPformat[26][62] = 219; JPformat[41][24] = 218; JPformat[40][11] = 217; JPformat[43][57] = 216; JPformat[34][53] = 215; JPformat[20][32] = 214; JPformat[34][43] = 213; JPformat[41][91] = 212; JPformat[29][57] = 211; JPformat[15][43] = 210; JPformat[22][89] = 209; JPformat[33][83] = 208; JPformat[43][20] = 207; JPformat[25][58] = 206; JPformat[30][30] = 205; JPformat[4][56] = 204; JPformat[17][64] = 203; JPformat[23][0] = 202; JPformat[44][12] = 201; JPformat[25][37] = 200; JPformat[35][13] = 199; JPformat[20][30] = 198; JPformat[21][84] = 197; JPformat[29][14] = 196; JPformat[30][5] = 195; JPformat[37][2] = 194; JPformat[4][78] = 193; JPformat[29][78] = 192; JPformat[29][84] = 191; JPformat[32][86] = 190; JPformat[20][68] = 189; JPformat[30][39] = 188; JPformat[15][69] = 187; JPformat[4][60] = 186; JPformat[20][61] = 185; JPformat[41][67] = 184; JPformat[16][35] = 183; JPformat[36][57] = 182; JPformat[39][80] = 181; JPformat[4][59] = 180; JPformat[4][44] = 179; JPformat[40][54] = 178; JPformat[30][8] = 177; JPformat[44][30] = 176; JPformat[31][93] = 175; JPformat[31][47] = 174; JPformat[16][70] = 173; JPformat[21][0] = 172; JPformat[17][35] = 171; JPformat[21][67] = 170; JPformat[44][18] = 169; JPformat[36][29] = 168; JPformat[18][67] = 167; JPformat[24][28] = 166; JPformat[36][24] = 165; JPformat[23][5] = 164; JPformat[31][65] = 163; JPformat[26][59] = 162; JPformat[28][2] = 161; JPformat[39][69] = 160; JPformat[42][40] = 159; JPformat[37][80] = 158; JPformat[15][66] = 157; JPformat[34][38] = 156; JPformat[28][48] = 155; JPformat[37][77] = 154; JPformat[29][34] = 153; JPformat[33][12] = 152; JPformat[4][65] = 151; JPformat[30][31] = 150; JPformat[27][92] = 149; JPformat[4][2] = 148; JPformat[4][51] = 147; JPformat[23][77] = 146; JPformat[4][35] = 145; JPformat[3][13] = 144; JPformat[26][26] = 143; JPformat[44][4] = 142; JPformat[39][53] = 141; JPformat[20][11] = 140; JPformat[40][33] = 139; JPformat[45][7] = 138; JPformat[4][70] = 137; JPformat[3][49] = 136; JPformat[20][59] = 135; JPformat[21][12] = 134; JPformat[33][53] = 133; JPformat[20][14] = 132; JPformat[37][18] = 131; JPformat[18][17] = 130; JPformat[36][23] = 129; JPformat[18][57] = 128; JPformat[26][74] = 127; JPformat[35][2] = 126; JPformat[38][58] = 125; JPformat[34][68] = 124; JPformat[29][81] = 123; JPformat[20][69] = 122; JPformat[39][86] = 121; JPformat[4][16] = 120; JPformat[16][49] = 119; JPformat[15][72] = 118; JPformat[26][35] = 117; JPformat[32][14] = 116; JPformat[40][90] = 115; JPformat[33][79] = 114; JPformat[35][4] = 113; JPformat[23][33] = 112; JPformat[19][19] = 111; JPformat[31][41] = 110; JPformat[44][1] = 109; JPformat[22][56] = 108; JPformat[31][27] = 107; JPformat[32][18] = 106; JPformat[27][32] = 105; JPformat[37][39] = 104; JPformat[42][11] = 103; JPformat[29][71] = 102; JPformat[32][58] = 101; JPformat[46][10] = 100; JPformat[17][30] = 99; JPformat[38][15] = 98; JPformat[29][60] = 97; JPformat[4][11] = 96; JPformat[38][31] = 95; JPformat[40][79] = 94; JPformat[28][49] = 93; JPformat[28][84] = 92; JPformat[26][77] = 91; JPformat[22][32] = 90; JPformat[33][17] = 89; JPformat[23][18] = 88; JPformat[32][64] = 87; JPformat[4][6] = 86; JPformat[33][51] = 85; JPformat[44][77] = 84; JPformat[29][5] = 83; JPformat[46][25] = 82; JPformat[19][58] = 81; JPformat[4][46] = 80; JPformat[15][71] = 79; JPformat[18][58] = 78; JPformat[26][45] = 77; JPformat[45][66] = 76; JPformat[34][10] = 75; JPformat[19][37] = 74; JPformat[33][65] = 73; JPformat[44][52] = 72; JPformat[16][38] = 71; JPformat[36][46] = 70; JPformat[20][26] = 69; JPformat[30][37] = 68; JPformat[4][58] = 67; JPformat[43][2] = 66; JPformat[30][18] = 65; JPformat[19][35] = 64; JPformat[15][68] = 63; JPformat[3][36] = 62; JPformat[35][40] = 61; JPformat[36][32] = 60; JPformat[37][14] = 59; JPformat[17][11] = 58; JPformat[19][78] = 57; JPformat[37][11] = 56; JPformat[28][63] = 55; JPformat[29][61] = 54; JPformat[33][3] = 53; JPformat[41][52] = 52; JPformat[33][63] = 51; JPformat[22][41] = 50; JPformat[4][19] = 49; JPformat[32][41] = 48; JPformat[24][4] = 47; JPformat[31][28] = 46; JPformat[43][30] = 45; JPformat[17][3] = 44; JPformat[43][70] = 43; JPformat[34][19] = 42; JPformat[20][77] = 41; JPformat[18][83] = 40; JPformat[17][15] = 39; JPformat[23][61] = 38; JPformat[40][27] = 37; JPformat[16][48] = 36; JPformat[39][78] = 35; JPformat[41][53] = 34; JPformat[40][91] = 33; JPformat[40][72] = 32; JPformat[18][52] = 31; JPformat[35][66] = 30; JPformat[39][93] = 29; JPformat[19][48] = 28; JPformat[26][36] = 27; JPformat[27][25] = 26; JPformat[42][71] = 25; JPformat[42][85] = 24; JPformat[26][48] = 23; JPformat[28][15] = 22; JPformat[3][66] = 21; JPformat[25][24] = 20; JPformat[27][43] = 19; JPformat[27][78] = 18; JPformat[45][43] = 17; JPformat[27][72] = 16; JPformat[40][29] = 15; JPformat[41][0] = 14; JPformat[19][57] = 13; JPformat[15][59] = 12; JPformat[29][29] = 11; JPformat[4][25] = 10; JPformat[21][42] = 9; JPformat[23][35] = 8; JPformat[33][1] = 7; JPformat[4][57] = 6; JPformat[17][60] = 5; JPformat[25][19] = 4; JPformat[22][65] = 3; JPformat[42][29] = 2; JPformat[27][66] = 1; JPformat[26][89] = 0; } abstract public String getEncoding(String path); abstract public String getEncoding(InputStream in); abstract public String getEncoding(byte[] buffer); abstract public String getEncoding(URL url); }


package org.loon.test.encoding;

/**
 * 

* Title: LoonFramework *

*

* Description:编码基本类型集合 *

*

* Copyright: Copyright (c) 2008 *

*

* Company: LoonFramework *

*

* License: http://www.apache.org/licenses/LICENSE-2.0 *

* * @author chenpeng * @email:[email protected] * @version 0.1 */ public class Encoding { // 支持的字符格式 public static int GB2312 = 0; public static int GBK = 1; public static int BIG5 = 2; public static int UTF8 = 3; public static int UNICODE = 4; public static int EUC_KR = 5; public static int SJIS = 6; public static int EUC_JP = 7; public static int ASCII = 8; public static int UNKNOWN = 9; public static int TOTALT = 10; public final static int SIMP = 0; public final static int TRAD = 1; // 解析名称用 public static String[] javaname; // 编码用 public static String[] nicename; // 应用于html中的字符集 public static String[] htmlname; public Encoding() { javaname = new String[TOTALT]; nicename = new String[TOTALT]; htmlname = new String[TOTALT]; javaname[GB2312] = "GB2312"; javaname[GBK] = "GBK"; javaname[BIG5] = "BIG5"; javaname[UTF8] = "UTF8"; javaname[UNICODE] = "Unicode"; javaname[EUC_KR] = "EUC_KR"; javaname[SJIS] = "SJIS"; javaname[EUC_JP] = "EUC_JP"; javaname[ASCII] = "ASCII"; javaname[UNKNOWN] = "ISO8859_1"; // 分配编码名称 htmlname[GB2312] = "GB2312"; htmlname[GBK] = "GBK"; htmlname[BIG5] = "BIG5"; htmlname[UTF8] = "UTF-8"; htmlname[UNICODE] = "UTF-16"; htmlname[EUC_KR] = "EUC-KR"; htmlname[SJIS] = "Shift_JIS"; htmlname[EUC_JP] = "EUC-JP"; htmlname[ASCII] = "ASCII"; htmlname[UNKNOWN] = "ISO8859-1"; // 分配可读名称 nicename[GB2312] = "GB-2312"; nicename[GBK] = "GBK"; nicename[BIG5] = "Big5"; nicename[UTF8] = "UTF-8"; nicename[UNICODE] = "Unicode"; nicename[EUC_KR] = "EUC-KR"; nicename[SJIS] = "Shift-JIS"; nicename[EUC_JP] = "EUC-JP"; nicename[ASCII] = "ASCII"; nicename[UNKNOWN] = "UNKNOWN"; } public String toEncoding(final int type) { return (javaname[type] + "," + nicename[type] + "," + htmlname[type]) .intern(); } }




package org.loon.test.encoding;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;

/**
 * 

* Title: LoonFramework *

*

* Description: *

*

* Copyright: Copyright (c) 2008 *

*

* Company: LoonFramework *

*

* License: http://www.apache.org/licenses/LICENSE-2.0 *

* * @author chenpeng * @email:[email protected] * @version 0.1 */ public class ParseEncoding extends Encode { public ParseEncoding() { super(); GB2312format = new int[94][94]; GBKformat = new int[126][191]; Big5format = new int[94][158]; EUC_KRformat = new int[94][94]; JPformat = new int[94][94]; // 初始化编码格式 init(); } public String getEncoding(final String path) { return check(getEncodeValue(path)); } public String getEncoding(final InputStream in) { return check(getEncodeValue(in)); } public String getEncoding(final byte[] buffer) { return check(getEncodeValue(buffer)); } public String getEncoding(final URL url) { return check(getEncodeValue(url)); } private String check(final int result) { if (result == -1) { return nicename[UNKNOWN]; } return nicename[result]; } /** * 解析指定字符串路径编码所用格式 * * @param path * @return */ private int getEncodeValue(String path) { int express = UNKNOWN; if (path.startsWith("http://")) { try { express = getEncodeValue(new URL(path)); } catch (MalformedURLException e) { express = -1; } } else { express = getEncodeValue(new File(path)); } return express; } /** * * 解析指定InputStream所用编码,返回或然率最高的编码类型数值 * * @param in * @return */ public int getEncodeValue(InputStream in) { byte[] rawtext = new byte[8192]; int bytesread = 0, byteoffset = 0; int express = UNKNOWN; InputStream stream = in; try { while ((bytesread = stream.read(rawtext, byteoffset, rawtext.length - byteoffset)) > 0) { byteoffset += bytesread; } ; stream.close(); express = getEncodeValue(rawtext); } catch (Exception e) { express = -1; } return express; } /** * 解析指定url下数据所用编码,返回或然率最高的编码类型数值 * * @param url * @return */ public int getEncodeValue(URL url) { InputStream stream; try { stream = url.openStream(); } catch (IOException e) { stream = null; } return getEncodeValue(stream); } /** * 解析指定file所用编码,返回或然率最高的编码类型数值 * * @param file * @return */ public int getEncodeValue(File file) { byte[] buffer; try { buffer = read(new FileInputStream(file)); } catch (FileNotFoundException e) { buffer = null; } return getEncodeValue(buffer); } /** * 将inputstream转为byte[] * * @param inputStream * @return */ private final byte[] read(final InputStream inputStream) { byte[] arrayByte = null; ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); byte[] bytes = new byte[8192]; try { bytes = new byte[inputStream.available()]; int read; while ((read = inputStream.read(bytes)) >= 0) { byteArrayOutputStream.write(bytes, 0, read); } arrayByte = byteArrayOutputStream.toByteArray(); } catch (IOException e) { return null; } return arrayByte; } /** * 解析指定byte[]所用编码,返回或然率最高的数值类型 * * @param content * @return */ public int getEncodeValue(byte[] content) { if (content == null) return -1; int[] scores; int index, maxscore = 0; int encoding = UNKNOWN; scores = new int[TOTALT]; // 分配或然率 scores[GB2312] = gb2312probability(content); scores[GBK] = gbkprobability(content); scores[BIG5] = big5probability(content); scores[UTF8] = utf8probability(content); scores[UNICODE] = utf16probability(content); scores[EUC_KR] = euc_krprobability(content); scores[ASCII] = asciiprobability(content); scores[SJIS] = sjisprobability(content); scores[EUC_JP] = euc_jpprobability(content); scores[UNKNOWN] = 0; // 概率比较 for (index = 0; index < TOTALT; index++) { if (scores[index] > maxscore) { // 索引 encoding = index; // 最大几率 maxscore = scores[index]; } } // 返回或然率大于50%的数据 if (maxscore <= 50) { encoding = UNKNOWN; } return encoding; } /** * gb2312数据或然率计算 * * @param content * @return */ private int gb2312probability(byte[] content) { int i, rawtextlen = 0; int dbchars = 1, gbchars = 1; long gbformat = 0, totalformat = 1; float rangeval = 0, formatval = 0; int row, column; // 检查是否在亚洲汉字范围内 rawtextlen = content.length; for (i = 0; i < rawtextlen - 1; i++) { if (content[i] >= 0) { } else { dbchars++; // 汉字GB码由两个字节组成,每个字节的范围是0xA1 ~ 0xFE if ((byte) 0xA1 <= content[i] && content[i] <= (byte) 0xF7 && (byte) 0xA1 <= content[i + 1] && content[i + 1] <= (byte) 0xFE) { gbchars++; totalformat += 500; row = content[i] + 256 - 0xA1; column = content[i + 1] + 256 - 0xA1; if (GB2312format[row][column] != 0) { gbformat += GB2312format[row][column]; } else if (15 <= row && row < 55) { // 在gb编码范围 gbformat += 200; } } i++; } } rangeval = 50 * ((float) gbchars / (float) dbchars); formatval = 50 * ((float) gbformat / (float) totalformat); return (int) (rangeval + formatval); } /** * gb2312或然率计算 * * @param content * @return */ private int gbkprobability(byte[] content) { int i, rawtextlen = 0; int dbchars = 1, gbchars = 1; long gbformat = 0, totalformat = 1; float rangeval = 0, formatval = 0; int row, column; rawtextlen = content.length; for (i = 0; i < rawtextlen - 1; i++) { if (content[i] >= 0) { } else { dbchars++; if ((byte) 0xA1 <= content[i] && content[i] <= (byte) 0xF7 && // gb范围 (byte) 0xA1 <= content[i + 1] && content[i + 1] <= (byte) 0xFE) { gbchars++; totalformat += 500; row = content[i] + 256 - 0xA1; column = content[i + 1] + 256 - 0xA1; if (GB2312format[row][column] != 0) { gbformat += GB2312format[row][column]; } else if (15 <= row && row < 55) { gbformat += 200; } } else if ((byte) 0x81 <= content[i] && content[i] <= (byte) 0xFE && // gb扩展区域 (((byte) 0x80 <= content[i + 1] && content[i + 1] <= (byte) 0xFE) || ((byte) 0x40 <= content[i + 1] && content[i + 1] <= (byte) 0x7E))) { gbchars++; totalformat += 500; row = content[i] + 256 - 0x81; if (0x40 <= content[i + 1] && content[i + 1] <= 0x7E) { column = content[i + 1] - 0x40; } else { column = content[i + 1] + 256 - 0x40; } if (GBKformat[row][column] != 0) { gbformat += GBKformat[row][column]; } } i++; } } rangeval = 50 * ((float) gbchars / (float) dbchars); formatval = 50 * ((float) gbformat / (float) totalformat); return (int) (rangeval + formatval) - 1; } /** * 解析为big5的或然率 * * @param content * @return */ private int big5probability(byte[] content) { int i, rawtextlen = 0; int dbchars = 1, bfchars = 1; float rangeval = 0, formatval = 0; long bfformat = 0, totalformat = 1; int row, column; rawtextlen = content.length; for (i = 0; i < rawtextlen - 1; i++) { if (content[i] >= 0) { } else { dbchars++; if ((byte) 0xA1 <= content[i] && content[i] <= (byte) 0xF9 && (((byte) 0x40 <= content[i + 1] && content[i + 1] <= (byte) 0x7E) || ((byte) 0xA1 <= content[i + 1] && content[i + 1] <= (byte) 0xFE))) { bfchars++; totalformat += 500; row = content[i] + 256 - 0xA1; if (0x40 <= content[i + 1] && content[i + 1] <= 0x7E) { column = content[i + 1] - 0x40; } else { column = content[i + 1] + 256 - 0x61; } if (Big5format[row][column] != 0) { bfformat += Big5format[row][column]; } else if (3 <= row && row <= 37) { bfformat += 200; } } i++; } } rangeval = 50 * ((float) bfchars / (float) dbchars); formatval = 50 * ((float) bfformat / (float) totalformat); return (int) (rangeval + formatval); } /** * 在utf-8中的或然率 * * @param content * @return */ private int utf8probability(byte[] content) { int score = 0; int i, rawtextlen = 0; int goodbytes = 0, asciibytes = 0; // 检查是否为汉字可接受范围 rawtextlen = content.length; for (i = 0; i < rawtextlen; i++) { if ((content[i] & (byte) 0x7F) == content[i]) { asciibytes++; } else if (-64 <= content[i] && content[i] <= -33 && i + 1 < rawtextlen && -128 <= content[i + 1] && content[i + 1] <= -65) { goodbytes += 2; i++; } else if (-32 <= content[i] && content[i] <= -17 && i + 2 < rawtextlen && -128 <= content[i + 1] && content[i + 1] <= -65 && -128 <= content[i + 2] && content[i + 2] <= -65) { goodbytes += 3; i += 2; } } if (asciibytes == rawtextlen) { return 0; } score = (int) (100 * ((float) goodbytes / (float) (rawtextlen - asciibytes))); // 如果不高于98则减少到零 if (score > 98) { return score; } else if (score > 95 && goodbytes > 30) { return score; } else { return 0; } } /** * 检查为utf-16的或然率 * * @param content * @return */ private int utf16probability(byte[] content) { if (content.length > 1 && ((byte) 0xFE == content[0] && (byte) 0xFF == content[1]) || ((byte) 0xFF == content[0] && (byte) 0xFE == content[1])) { return 100; } return 0; } /** * 检查为ascii的或然率 * * @param content * @return */ private int asciiprobability(byte[] content) { int score = 75; int i, rawtextlen; rawtextlen = content.length; for (i = 0; i < rawtextlen; i++) { if (content[i] < 0) { score = score - 5; } else if (content[i] == (byte) 0x1B) { // ESC (used by ISO 2022) score = score - 5; } if (score <= 0) { return 0; } } return score; } /** * 检查为euc_kr的或然率 * * @param content * @return */ private int euc_krprobability(byte[] content) { int i, rawtextlen = 0; int dbchars = 1, krchars = 1; long krformat = 0, totalformat = 1; float rangeval = 0, formatval = 0; int row, column; rawtextlen = content.length; for (i = 0; i < rawtextlen - 1; i++) { if (content[i] >= 0) { } else { dbchars++; if ((byte) 0xA1 <= content[i] && content[i] <= (byte) 0xFE && (byte) 0xA1 <= content[i + 1] && content[i + 1] <= (byte) 0xFE) { krchars++; totalformat += 500; row = content[i] + 256 - 0xA1; column = content[i + 1] + 256 - 0xA1; if (EUC_KRformat[row][column] != 0) { krformat += EUC_KRformat[row][column]; } else if (15 <= row && row < 55) { krformat += 0; } } i++; } } rangeval = 50 * ((float) krchars / (float) dbchars); formatval = 50 * ((float) krformat / (float) totalformat); return (int) (rangeval + formatval); } private int euc_jpprobability(byte[] content) { int i, rawtextlen = 0; int dbchars = 1, jpchars = 1; long jpformat = 0, totalformat = 1; float rangeval = 0, formatval = 0; int row, column; rawtextlen = content.length; for (i = 0; i < rawtextlen - 1; i++) { if (content[i] >= 0) { } else { dbchars++; if ((byte) 0xA1 <= content[i] && content[i] <= (byte) 0xFE && (byte) 0xA1 <= content[i + 1] && content[i + 1] <= (byte) 0xFE) { jpchars++; totalformat += 500; row = content[i] + 256 - 0xA1; column = content[i + 1] + 256 - 0xA1; if (JPformat[row][column] != 0) { jpformat += JPformat[row][column]; } else if (15 <= row && row < 55) { jpformat += 0; } } i++; } } rangeval = 50 * ((float) jpchars / (float) dbchars); formatval = 50 * ((float) jpformat / (float) totalformat); return (int) (rangeval + formatval); } private int sjisprobability(byte[] content) { int i, rawtextlen = 0; int dbchars = 1, jpchars = 1; long jpformat = 0, totalformat = 1; float rangeval = 0, formatval = 0; int row, column, adjust; rawtextlen = content.length; for (i = 0; i < rawtextlen - 1; i++) { if (content[i] >= 0) { } else { dbchars++; if (i + 1 < content.length && (((byte) 0x81 <= content[i] && content[i] <= (byte) 0x9F) || ((byte) 0xE0 <= content[i] && content[i] <= (byte) 0xEF)) && (((byte) 0x40 <= content[i + 1] && content[i + 1] <= (byte) 0x7E) || ((byte) 0x80 <= content[i + 1] && content[i + 1] <= (byte) 0xFC))) { jpchars++; totalformat += 500; row = content[i] + 256; column = content[i + 1] + 256; if (column < 0x9f) { adjust = 1; if (column > 0x7f) { column -= 0x20; } else { column -= 0x19; } } else { adjust = 0; column -= 0x7e; } if (row < 0xa0) { row = ((row - 0x70) << 1) - adjust; } else { row = ((row - 0xb0) << 1) - adjust; } row -= 0x20; column = 0x20; if (row < JPformat.length && column < JPformat[row].length && JPformat[row][column] != 0) { jpformat += JPformat[row][column]; } i++; } else if ((byte) 0xA1 <= content[i] && content[i] <= (byte) 0xDF) { } } } rangeval = 50 * ((float) jpchars / (float) dbchars); formatval = 50 * ((float) jpformat / (float) totalformat); return (int) (rangeval + formatval) - 1; } }



package org.loon.test.encoding;
/**
 * 

Title: LoonFramework

*

Description:

*

Copyright: Copyright (c) 2008

*

Company: LoonFramework

*

License: http://www.apache.org/licenses/LICENSE-2.0

* @author chenpeng * @email:[email protected] * @version 0.1 */ public class EncodingTest { public static void main(String argc[]) { ParseEncoding parse; parse = new ParseEncoding(); System.out.println("中国大陆:"); System.out.println("测试字符串,编码格式="+parse.getEncoding("百度".getBytes())); System.out.println("测试站点,编码格式="+parse.getEncoding("http://www.baidu.com")); System.out.println(); System.out.println("中国台湾:"); System.out.println("测试字符串,编码格式="+parse.getEncoding("い地チ瓣".getBytes())); System.out.println("测试站点,编码格式="+parse.getEncoding("http://tw.yahoo.com/")); System.out.println("测试站点(繁体字,UTF编码),编码格式="+parse.getEncoding("http://www.javaworld.com.tw/jute")); System.out.println(); System.out.println("日本:"); System.out.println("测试字符串,编码格式="+parse.getEncoding("その機能".getBytes())); System.out.println("测试站点,编码格式="+parse.getEncoding("http://www.4gamer.net")); System.out.println(); System.out.println("自称蚩尤后代那群……:"); System.out.println("测试站点,编码格式="+parse.getEncoding("http://www.easyjava.co.kr/")); } }

你可能感兴趣的:(java字符串编码类型获取)