扫盲时间:
星际译王,即Star Dict,是利用GTK(GIMP TOOLKIT)开发的国际化的、跨平台的自由的桌面字典软件。它并不包含字典档,使用者须自行下载配合使用。它可以运行于多种不同的平台,如Linux,Microsoft Windows ,FreeBSD及Solaris,并使用GPL授权。
星际译王项目:http://stardict.sourceforge.net/
星际译王词典:
下载:http://stardict.sourceforge.net/Dictionaries.php
好,言归正传:
有个学长用c+python写了个web版的在线词典,c做词典服务器,python做cgi服务器,然后通过FIFO通信,词典用的是星际译王格式的词典,很有意思。本人模仿学长思路,仅为唤醒内心深处对纯c的美好回忆-_-,当然是在业余写作,不过由于工作中需要学的东西渐渐多了一些,就少有些时间继续做了。现在先公布一些代码给大家分享,免得以后忘光光-_-
最关键的就是对星际译王词典文件的解析,对于其格式,读者可以自己安装一个,在其安装目录里有个文件专门介绍了词典格式,英文的。
我简单说一下:一个词典分为三个文件,ifo:描述词典信息;idx:存放了单词的索引位置,格式为:单词+偏移值+长度;dict:可以被压缩,只存放解释。
思路:根据输入的单词,在idx中查找其在dict文件中的偏移值和长度,直接取出,很简单。
以下代码使用emacs NT在winXP+MinGW环境下编译通过:

词典信息头文件(dict_info.h):
1
/*
2
* dict_info.h
3
* Author:shoru
4
* 2009-08-23 12:53
5
*/
6
7
#ifndef _DICT_IFO_H
8
#define
_DICT_IFO_H
9
10
/*
11
* 测试开关
12
*/
13
#define
DEBUG
14
15
/*
16
* 行缓冲区大小
17
*/
18
#define
BUFFER_SIZE 500
19
20
/*
21
* ifo文件后缀
22
*/
23
#define
IFO_EXT ".ifo"
24
25
/*
26
* dict info file struct.
27
*/
28
typedef
struct
29
{
30
char
version[
100
];
//
版本
31
int
wordcount;
//
单词数量
32
int
idxfilesize;
//
索引文件大小
33
char
bookname[
100
];
//
词典名称
34
char
sametypesequence[
10
];
35
char
other_info[
1000
];
//
其他不关心的信息
36
} DICT_INFO;
37
38
/*
39
* 解析词典,返回一个DICT_INFO结构体指针
40
*/
41
DICT_INFO
*
get_dict_info(
char
*
file);
42
43
/*
44
* 解析每行
45
*/
46
static
void
parse_line(
char
*
line, DICT_INFO
*
dict_info);
47
48
#endif
/* _DICT_IFO_H */
词典信息源码(dict_info.c):
1
/*
2
* dict_info.c
3
* Author:shoru
4
* 2009-08-23 12:54
5
*/
6
7
#include
<
stdio.h
>
8
#include
<
stdlib.h
>
9
#include
<
string
.h
>
10
#include
<
errno.h
>
11
#include
"
dict_info.h
"
12
13
/*
14
* 将词典的信息文件装入结构体,并返回该结构体指针
15
* 失败返回NULL
16
*/
17
DICT_INFO
*
get_dict_info(
char
*
info_file)
18
{
19
FILE
*
ifo;
20
char
*
line;
21
char
buffer[BUFFER_SIZE];
22
23
DICT_INFO
*
dict_info
=
(DICT_INFO
*
)malloc(
sizeof
(DICT_INFO));
24
25
ifo
=
fopen(info_file,
"
r
"
);
26
if
(ifo
==
NULL)
27
{
28
fprintf(stderr,
"
%s
"
,strerror(errno));
29
return
NULL;
30
}
31
32
while
((line
=
fgets(buffer,BUFFER_SIZE,ifo))
!=
NULL)
33
{
34
parse_line(line,dict_info);
35
}
36
fclose(ifo);
37
38
return
dict_info;
39
}
40
41
/*
42
* 逐行解析文件,将信息装入特定字段
43
*/
44
static
void
parse_line(
char
*
line,DICT_INFO
*
dict_info)
45
{
46
char
*
idx;
47
48
if
((idx
=
strchr(line,
'
=
'
))
!=
NULL)
49
{
50
if
(strstr(line,
"
version
"
)
!=
NULL)
51
{
52
strcpy(dict_info
->
version,idx
+
1
);
53
}
else
if
(strstr(line,
"
wordcount
"
)
!=
NULL)
54
{
55
dict_info
->
wordcount
=
atoi(idx
+
1
);
56
}
else
if
(strstr(line,
"
idxfilesize
"
)
!=
NULL)
57
{
58
dict_info
->
idxfilesize
=
atoi(idx
+
1
);
59
}
else
if
(strstr(line,
"
bookname
"
)
!=
NULL)
60
{
61
strcpy(dict_info
->
bookname,idx
+
1
);
62
}
else
if
(strstr(line,
"
sametypesequence
"
)
!=
NULL)
63
{
64
strcpy(dict_info
->
sametypesequence,idx
+
1
);
65
}
else
{
66
strcat(dict_info
->
other_info,line);
67
}
68
}
69
}
70
71
72
#ifdef DEBUG
73
74
int
main(
int
argc,
char
**
argv)
75
{
76
DICT_INFO
*
tmp
=
get_dict_info(
"
../dict/oxford-gb/oxford-gb-formated.ifo
"
);
77
if
(tmp
==
NULL)
78
{
79
printf(
"
error\n
"
);
80
exit(EXIT_FAILURE);
81
}
else
{
82
83
}
84
printf(
"
version:%s
"
,tmp
->
version);
85
printf(
"
bookname:%s
"
,tmp
->
bookname);
86
printf(
"
wordcount:%d\n
"
,tmp
->
wordcount);
87
printf(
"
idxfilesize:%d\n
"
,tmp
->
idxfilesize);
88
printf(
"
sts:%s\n
"
,tmp
->
sametypesequence);
89
printf(
"
%s
"
,tmp
->
other_info);
90
free(tmp);
91
return
EXIT_SUCCESS;
92
}
93
#endif
/* DEBUG */
词典索引头文件(dict_idx.h):
1
/*
2
* dict_idx.h
3
* Author:shoru
4
* 2009-09-09 12:27
5
*/
6
7
#ifndef _DICT_IDX_H
8
#define
_DICT_IDX_H
9
10
#include
"
dict_info.h
"
11
/*
12
* 测试开关
13
*/
14
#define
DEBUG
15
16
#define
TRUE 1
17
/*
18
* idx文件后缀
19
*/
20
#define
IDX_EXT "idx"
21
22
/*
23
* Struct to describe the idx file.
24
*/
25
typedef
struct
26
{
27
char
word[
100
];
28
int
offset;
29
int
length;
30
} WORD_IDX;
31
32
/*
33
* Get a OFF_LEN struct of a word.
34
*/
35
static
void
*
get_words(
char
*
filename, DICT_INFO
*
dict_info, WORD_IDX
*
word_idx);
36
37
/*
38
* Binary search for word's idx information.
39
*/
40
WORD_IDX
*
get_idx(
char
*
word,WORD_IDX
*
word_idx, DICT_INFO
*
dict_info0);
41
inline
static
int
to_int(unsigned
char
*
from_int);
42
#endif
/* _DICT_IDX_H */
词典索引源码(dict_idx.c):
1
/*
2
* dict_idx.c
3
* Author:shoru
4
* 2009-09-09 12:27
5
*/
6
7
#include
<
stdlib.h
>
8
#include
<
stdio.h
>
9
#include
<
string
.h
>
10
#include
"
dict_idx.h
"
11
#include
"
dict_info.h
"
12
13
static
void
*
get_words(
char
*
filename, DICT_INFO
*
dict_info, WORD_IDX
*
word_idx)
14
{
15
FILE
*
fd
=
fopen(filename,
"
rb
"
);
16
size_t nread
=
0
;
17
18
if
(fd
==
NULL
||
dict_info
==
NULL)
19
{
20
return
NULL;
21
}
22
unsigned
char
buffer[dict_info
->
idxfilesize];
23
24
nread
=
fread(buffer,dict_info
->
idxfilesize,
1
,fd);
25
26
unsigned
char
*
head,
*
tail;
27
head
=
tail
=
buffer;
28
int
it
=
0
;
29
int
total
=
1
;
30
for
(; it
<
dict_info
->
idxfilesize; it
++
)
31
{
32
if
(
*
head
==
'
\0
'
)
33
{
34
strncpy((word_idx
+
total)
->
word,tail,head
-
tail
+
1
);
35
(word_idx
+
total)
->
offset
=
to_int(head
+
1
);
36
(word_idx
+
total)
->
length
=
to_int(head
+
5
);
37
total
++
;
38
head
+=
9
;
39
tail
=
head;
40
if
(total
==
dict_info
->
wordcount)
break
;
41
}
else
{
42
head
++
;
43
continue
;
44
}
45
}
46
}
47
48
inline
static
int
to_int(unsigned
char
*
from_int)
49
{
50
return
*
(from_int
+
3
)
+
(
*
(from_int
+
2
)
<<
8
)
+
(
*
(from_int
+
1
)
<<
16
)
+
(
*
from_int
<<
24
);
51
}
52
53
WORD_IDX
*
get_idx(
char
*
word,WORD_IDX
*
word_idx, DICT_INFO
*
dict_info)
54
{
55
if
(word
==
NULL
||
word_idx
==
NULL
||
dict_info
==
NULL)
56
{
57
return
NULL;
58
}
59
int
head
=
0
,tail
=
dict_info
->
wordcount,cur
=
tail
/
2
;
60
61
int
i
=
0
;
62
63
while
(TRUE)
64
{
65
int
cmp
=
strcasecmp(word,word_idx[cur].word);
66
if
(
0
==
cmp)
67
{
68
return
&
word_idx[cur];
69
}
else
if
(
0
>
cmp){
70
tail
=
cur;
71
}
else
{
72
head
=
cur;
73
}
74
cur
=
(tail
+
head)
/
2
;
75
}
76
}
77
78
79
80
81
#ifdef DEBUG
82
83
int
main(
int
argc,
char
**
argv)
84
{
85
char
*
filename
=
"
../dict/oxford-gb/oxford-gb-formated.idx
"
;
86
char
*
dictname
=
"
../dict/oxford-gb/oxford-gb-formated.dict
"
;
87
88
DICT_INFO dict_info;
89
dict_info.wordcount
=
39429
;
90
dict_info.idxfilesize
=
721264
;
91
WORD_IDX
*
idx
=
(WORD_IDX
*
)malloc(
sizeof
(WORD_IDX)
*
dict_info.wordcount);
92
get_words(filename,
&
dict_info,idx);
93
94
WORD_IDX
*
word
=
get_idx(
"
a
"
,idx,
&
dict_info);
95
96
printf(
"
%s,%d,%d\n
"
,word
->
word,word
->
offset,word
->
length);
97
98
FILE
*
dict
=
fopen(dictname,
"
r
"
);
99
if
(dict
==
NULL)
100
{
101
printf(
"
dict error\n
"
);
102
return
-
1
;
103
}
104
if
(
0
!=
fseek(dict,word
->
offset,SEEK_SET)){
105
printf(
"
seek error\n
"
);
106
return
-
1
;
107
}
108
109
char
explain[word
->
length
+
1
];
110
memset(explain,
'
\0
'
,word
->
length
+
1
);
111
fread(explain,word
->
length,
1
,dict);
112
113
printf(
"
%s\n
"
,explain);
114
free(idx);
115
return
EXIT_SUCCESS;
116
}
117
118
#endif
/* DEBUG */