LZW 编码解码代码
从 libtiff 4.0.2 中提取出来并稍加修改的 lzw 的代码,符合 TIFF6 标准中的 LZW 部分。本人目前对开源协议还不太清楚,不知是否存在侵权问题,如果有,请告知。
代码:
1
/*
2 codec_lzw.h
3
4 LZW 编码解码。
5 基于 TIFF6 标准,不考虑与之前 TIFF 版本兼容。
6 以字节为字符单位进行编码。
7 */
8
9
10 #ifndef __CODEC_LZW_H_INCLUDED__
11 #define __CODEC_LZW_H_INCLUDED__
12
13
14 #define MAXCODE(n) ((1L<<(n))-1)
15
16 #define BITS_MIN 9
17 #define BITS_MAX 12
18
19 #define CODE_CLEAR 256
20 #define CODE_EOI 257
21 #define CODE_FIRST 258
22 #define CODE_MAX MAXCODE(BITS_MAX)
23
24
25 /*
26 编码函数
27
28 输入参数 input :输入数据,即待编码数据
29 输入参数 inputSize :输入数据量,单位 字节
30 输出参数 output :输出缓冲,编码所得数据置于其中
31 输出数据以 CODE_CLEAR 开始
32 输出数据以 CODE_EOI 结束
33 若输出缓冲太小,则编码所得数据将其填满
34 输入参数 outputLimit : 输出缓冲大小,单位 字节
35 输出参数 *outputSize :编码输出数据量,单位 字节
36 若输出缓冲太小,则返回实际完成编码可得到的数据量
37
38 返回值:
39 非负整数,编码成功,其值表示编码所得数据量,与 (*outputSize) 相同
40 -1 表示出错失败
41 -2 表示输出缓冲太小
42 */
43 int LZWencode( const unsigned char * input, int inputSize,
44 unsigned char * output, int outputLimit,
45 int * outputSize );
46
47 /*
48 解码函数
49
50 输入参数 input :输入数据,即待解码数据
51 认为输入数据以 CODE_CLEAR 开始
52 认为输入数据以 CODE_EOI 结束
53 输入参数 inputSize :输入数据量,单位 字节
54 输出参数 output :输出缓冲,解码所得数据置于其中
55 若输出缓冲太小,则解码所得数据将其填满
56 输入参数 outputLimit : 输出缓冲大小,单位 字节
57 输出参数 *outputSize :解码输出数据量,单位 字节
58 若输出缓冲太小,则返回实际完成解码可得到的数据量
59
60 返回值:
61 非负整数,解码成功,其值表示解码所得数据量,与 (*outputSize) 相同
62 -1 表示出错失败
63 -2 表示输出缓冲太小
64 */
65 int LZWdecode( const unsigned char * input, int inputSize,
66 unsigned char * output, int outputLimit,
67 int * outputSize );
68
69
70 #endif /* __CODEC_LZW_H_INCLUDED__ */
71
2 codec_lzw.h
3
4 LZW 编码解码。
5 基于 TIFF6 标准,不考虑与之前 TIFF 版本兼容。
6 以字节为字符单位进行编码。
7 */
8
9
10 #ifndef __CODEC_LZW_H_INCLUDED__
11 #define __CODEC_LZW_H_INCLUDED__
12
13
14 #define MAXCODE(n) ((1L<<(n))-1)
15
16 #define BITS_MIN 9
17 #define BITS_MAX 12
18
19 #define CODE_CLEAR 256
20 #define CODE_EOI 257
21 #define CODE_FIRST 258
22 #define CODE_MAX MAXCODE(BITS_MAX)
23
24
25 /*
26 编码函数
27
28 输入参数 input :输入数据,即待编码数据
29 输入参数 inputSize :输入数据量,单位 字节
30 输出参数 output :输出缓冲,编码所得数据置于其中
31 输出数据以 CODE_CLEAR 开始
32 输出数据以 CODE_EOI 结束
33 若输出缓冲太小,则编码所得数据将其填满
34 输入参数 outputLimit : 输出缓冲大小,单位 字节
35 输出参数 *outputSize :编码输出数据量,单位 字节
36 若输出缓冲太小,则返回实际完成编码可得到的数据量
37
38 返回值:
39 非负整数,编码成功,其值表示编码所得数据量,与 (*outputSize) 相同
40 -1 表示出错失败
41 -2 表示输出缓冲太小
42 */
43 int LZWencode( const unsigned char * input, int inputSize,
44 unsigned char * output, int outputLimit,
45 int * outputSize );
46
47 /*
48 解码函数
49
50 输入参数 input :输入数据,即待解码数据
51 认为输入数据以 CODE_CLEAR 开始
52 认为输入数据以 CODE_EOI 结束
53 输入参数 inputSize :输入数据量,单位 字节
54 输出参数 output :输出缓冲,解码所得数据置于其中
55 若输出缓冲太小,则解码所得数据将其填满
56 输入参数 outputLimit : 输出缓冲大小,单位 字节
57 输出参数 *outputSize :解码输出数据量,单位 字节
58 若输出缓冲太小,则返回实际完成解码可得到的数据量
59
60 返回值:
61 非负整数,解码成功,其值表示解码所得数据量,与 (*outputSize) 相同
62 -1 表示出错失败
63 -2 表示输出缓冲太小
64 */
65 int LZWdecode( const unsigned char * input, int inputSize,
66 unsigned char * output, int outputLimit,
67 int * outputSize );
68
69
70 #endif /* __CODEC_LZW_H_INCLUDED__ */
71
1
/*
2 codec_lzw.c
3
4 LZW 编码解码。
5 */
6
7
8 #include " codec_lzw.h "
9
10 #include < stdlib.h >
11 #include < string .h >
12
13
14 #define HSIZE 9001L /* 91% occupancy */
15 #define HSHIFT (13-8)
16 #define CSIZE (MAXCODE(BITS_MAX)+1L)
17 #define CHECK_GAP 10000
18
19 typedef unsigned short hcode_t; /* codes fit in 16 bits */
20 typedef struct {
21 int hash;
22 hcode_t code;
23 } hash_t;
24
25 typedef struct code_ent {
26 struct code_ent * next;
27 unsigned short length; /* string len, including this token */
28 unsigned char value; /* data value */
29 unsigned char firstchar; /* first token of string */
30 } code_t;
31
32 /*
33 * Reset encoding hash table.
34 */
35 static void cl_hash( hash_t * hashtab ) {
36 hash_t * hp = & hashtab[HSIZE - 1 ];
37 int i = HSIZE - 8 ;
38 do {
39 i -= 8 ;
40 hp[ - 7 ].hash = - 1 ;
41 hp[ - 6 ].hash = - 1 ;
42 hp[ - 5 ].hash = - 1 ;
43 hp[ - 4 ].hash = - 1 ;
44 hp[ - 3 ].hash = - 1 ;
45 hp[ - 2 ].hash = - 1 ;
46 hp[ - 1 ].hash = - 1 ;
47 hp[ 0 ].hash = - 1 ;
48 hp -= 8 ;
49 } while (i >= 0 );
50 for (i += 8 ; i > 0 ; i -- , hp -- )
51 hp -> hash = - 1 ;
52 }
53
54
55 int LZWencode( const unsigned char * input, int inputSize,
56 unsigned char * output, int outputLimit,
57 int * outputSize ) {
58 int fcode;
59 hash_t * hp;
60 int h, c;
61 hcode_t ent = (hcode_t)( - 1 );
62 int disp;
63 int incount = 0 ;
64 int outcount = 0 ;
65 int checkpoint = CHECK_GAP;
66 int nextdata = 0 ;
67 int nextbits = 0 ;
68 int free_ent = CODE_FIRST;
69 int maxcode = MAXCODE(BITS_MIN);
70 int nbits = BITS_MIN;
71 int enc_ratio = 0 ;
72 const unsigned char * bp = input;
73 int cc = inputSize;
74 unsigned char * op = output;
75 unsigned char * limit = output + outputLimit;
76 hash_t * hashtab;
77
78 if ( (NULL == input) || ( 0 > inputSize) ||
79 (NULL == output) || ( 1 > outputLimit) ) {
80 return ( - 1 );
81 }
82
83 hashtab = (hash_t * )malloc(HSIZE * sizeof (hash_t));
84 if ( hashtab == NULL ) {
85 return ( - 1 );
86 }
87 cl_hash( hashtab );
88
89 #define CALCRATIO(rat) { \
90 if (incount > 0x007fffff ) { /* NB: shift will overflow */ \
91 rat = outcount >> 8 ; \
92 rat = (rat == 0 ? 0x7fffffff : incount / rat); \
93 } else \
94 rat = (incount << 8 ) / outcount; \
95 }
96
97 #define PutNextCode(op, c) { \
98 nextdata = (nextdata << nbits) | c; \
99 nextbits += nbits; \
100 if (op < limit) \
101 * op = (unsigned char )(nextdata >> (nextbits - 8 )); \
102 op = op + 1 ; \
103 nextbits -= 8 ; \
104 if (nextbits >= 8 ) { \
105 if (op < limit) \
106 * op = (unsigned char )(nextdata >> (nextbits - 8 ));\
107 op = op + 1 ; \
108 nextbits -= 8 ; \
109 } \
110 outcount += nbits; \
111 }
112
113 PutNextCode(op, CODE_CLEAR);
114 if (cc > 0 ) {
115 ent = * bp ++ ; cc -- ; incount ++ ;
116 }
117 while (cc > 0 ) {
118 c = * bp ++ ; cc -- ; incount ++ ;
119 fcode = (( int )c << BITS_MAX) + ent;
120 h = (c << HSHIFT) ^ ent; /* xor hashing */
121 /*
122 * Check hash index for an overflow.
123 */
124 if (h >= HSIZE)
125 h -= HSIZE;
126
127 hp = & hashtab[h];
128 if (hp -> hash == fcode) {
129 ent = hp -> code;
130 continue ;
131 }
132 if (hp -> hash >= 0 ) {
133 /*
134 * Primary hash failed, check secondary hash.
135 */
136 disp = HSIZE - h;
137 if (h == 0 )
138 disp = 1 ;
139 do {
140 /*
141 * Avoid pointer arithmetic 'cuz of
142 * wraparound problems with segments.
143 */
144 if ((h -= disp) < 0 )
145 h += HSIZE;
146 hp = & hashtab[h];
147 if (hp -> hash == fcode) {
148 ent = hp -> code;
149 goto hit;
150 }
151 } while (hp -> hash >= 0 );
152 }
153 /*
154 * New entry, emit code and add to table.
155 */
156 PutNextCode(op, ent);
157 ent = c;
158 hp -> code = free_ent ++ ;
159 hp -> hash = fcode;
160 if (free_ent == CODE_MAX - 1 ) {
161 /* table is full, emit clear code and reset */
162 cl_hash(hashtab);
163 enc_ratio = 0 ;
164 incount = 0 ;
165 outcount = 0 ;
166 free_ent = CODE_FIRST;
167 PutNextCode(op, CODE_CLEAR);
168 nbits = BITS_MIN;
169 maxcode = MAXCODE(BITS_MIN);
170 } else {
171 /*
172 * If the next entry is going to be too big for
173 * the code size, then increase it, if possible.
174 */
175 if (free_ent > maxcode) {
176 nbits ++ ;
177 maxcode = ( int ) MAXCODE(nbits);
178 } else if (incount >= checkpoint) {
179 int rat;
180 /*
181 * Check compression ratio and, if things seem
182 * to be slipping, clear the hash table and
183 * reset state. The compression ratio is a
184 * 24+8-bit fractional number.
185 */
186 checkpoint = incount + CHECK_GAP;
187 CALCRATIO(rat);
188 if (rat <= enc_ratio) {
189 cl_hash(hashtab);
190 enc_ratio = 0 ;
191 incount = 0 ;
192 outcount = 0 ;
193 free_ent = CODE_FIRST;
194 PutNextCode(op, CODE_CLEAR);
195 nbits = BITS_MIN;
196 maxcode = MAXCODE(BITS_MIN);
197 } else
198 enc_ratio = rat;
199 }
200 }
201 hit:
202 ;
203 }
204 if (ent != (hcode_t) - 1 ) {
205 PutNextCode(op, ent);
206 }
207 PutNextCode(op, CODE_EOI);
208 if (nextbits > 0 ) {
209 if (op < limit)
210 * op = (unsigned char )(nextdata << ( 8 - nextbits));
211 op = op + 1 ;
212 }
213
214 if ( outputSize != NULL ) {
215 * outputSize = op - output;
216 }
217
218 return ((op <= limit) ? (op - output) : ( - 2 ));
219 }
220
221
222
223
224 int LZWdecode( const unsigned char * input, int inputSize,
225 unsigned char * output, int outputLimit,
226 int * outputSize ) {
227 const unsigned char * bp = input;
228 unsigned char * op = output;
229 int occ = outputLimit;
230 int cc = inputSize;
231
232 unsigned char * tp;
233 hcode_t code;
234 int len;
235 int i;
236
237 int nbits = BITS_MIN;
238 int nextbits = 0 ;
239 int nextdata = 0 ;
240 int nbitsmask = MAXCODE(BITS_MIN);
241
242 code_t * codep;
243 code_t * free_entp;
244 code_t * maxcodep;
245 code_t * oldcodep;
246 code_t * codetab;
247
248 if ( (NULL == input) || ( 1 > inputSize) ||
249 (NULL == output) || ( 1 > outputLimit) ) {
250 return ( - 1 );
251 }
252
253 codetab = (code_t * )malloc(CSIZE * sizeof (code_t));
254 if ( NULL == codetab ) {
255 return ( - 1 );
256 }
257
258 i = 255 ;
259 do {
260 codetab[i].value = i;
261 codetab[i].firstchar = i;
262 codetab[i].length = 1 ;
263 codetab[i].next = NULL;
264 } while (i -- );
265 memset( & codetab[CODE_CLEAR], 0 ,
266 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
267
268 #define DecFail do { free( codetab ); return (-1); } while (0)
269
270 #define GetNextCode(bp, code) { \
271 if (cc <= 0 ) DecFail; \
272 nextdata = (nextdata << 8 ) | * (bp) ++ ; \
273 nextbits += 8 ; \
274 -- cc; \
275 if (nextbits < nbits) { \
276 if (cc <= 0 ) DecFail; \
277 nextdata = (nextdata << 8 ) | * (bp) ++ ; \
278 nextbits += 8 ; \
279 -- cc; \
280 } \
281 code = (hcode_t)((nextdata >> (nextbits - nbits)) & nbitsmask); \
282 nextbits -= nbits; \
283 }
284
285 GetNextCode(bp, code);
286 if (code != CODE_CLEAR) {
287 DecFail;
288 }
289 while (code != CODE_EOI) {
290 if (code == CODE_CLEAR) {
291 free_entp = codetab + CODE_FIRST;
292 memset(free_entp, 0 ,
293 (CSIZE - CODE_FIRST) * sizeof (code_t));
294 nbits = BITS_MIN;
295 nbitsmask = MAXCODE(BITS_MIN);
296 maxcodep = codetab + nbitsmask - 1 ;
297 GetNextCode(bp, code);
298 if (code == CODE_EOI)
299 break ;
300 if (code >= CODE_CLEAR) {
301 DecFail;
302 }
303 if ( occ -- > 0 ) {
304 * op ++ = (unsigned char )code;
305 }
306 oldcodep = codetab + code;
307 GetNextCode(bp, code);
308 continue ;
309 }
310
311 codep = codetab + code;
312
313 if (free_entp < & codetab[ 0 ] ||
314 free_entp >= & codetab[CSIZE]) {
315 DecFail;
316 }
317
318 free_entp -> next = oldcodep;
319 if (free_entp -> next < & codetab[ 0 ] ||
320 free_entp -> next >= & codetab[CSIZE]) {
321 DecFail;
322 }
323
324 free_entp -> firstchar = free_entp -> next -> firstchar;
325 free_entp -> length = free_entp -> next -> length + 1 ;
326 free_entp -> value = (codep < free_entp) ?
327 codep -> firstchar :
328 free_entp -> firstchar;
329 if ( ++ free_entp > maxcodep) {
330 if ( ++ nbits > BITS_MAX) /* should not happen */
331 DecFail;
332 nbitsmask = MAXCODE(nbits);
333 maxcodep = codetab + nbitsmask - 1 ;
334 }
335 oldcodep = codep;
336
337 if (code >= 256 ) {
338 if (codep -> length == 0 ) {
339 DecFail;
340 }
341 i = len = codep -> length;
342 while ( (len > 0 ) && (len > occ) && codep ) {
343 codep = codep -> next;
344 -- len;
345 }
346 tp = op + len;
347 while ( codep && (tp > op) ) {
348 * ( -- tp) = codep -> value;
349 codep = codep -> next;
350 }
351 op += len;
352 occ -= i;
353 }
354 else if ( occ -- > 0 ) {
355 * op ++ = (unsigned char )code;
356 }
357
358 GetNextCode(bp, code);
359 }
360
361 free( codetab );
362 if ( outputSize != NULL ) {
363 * outputSize = outputLimit - occ;
364 }
365 return ((occ >= 0 ) ? (outputLimit - occ) : ( - 2 ));
366 }
367
2 codec_lzw.c
3
4 LZW 编码解码。
5 */
6
7
8 #include " codec_lzw.h "
9
10 #include < stdlib.h >
11 #include < string .h >
12
13
14 #define HSIZE 9001L /* 91% occupancy */
15 #define HSHIFT (13-8)
16 #define CSIZE (MAXCODE(BITS_MAX)+1L)
17 #define CHECK_GAP 10000
18
19 typedef unsigned short hcode_t; /* codes fit in 16 bits */
20 typedef struct {
21 int hash;
22 hcode_t code;
23 } hash_t;
24
25 typedef struct code_ent {
26 struct code_ent * next;
27 unsigned short length; /* string len, including this token */
28 unsigned char value; /* data value */
29 unsigned char firstchar; /* first token of string */
30 } code_t;
31
32 /*
33 * Reset encoding hash table.
34 */
35 static void cl_hash( hash_t * hashtab ) {
36 hash_t * hp = & hashtab[HSIZE - 1 ];
37 int i = HSIZE - 8 ;
38 do {
39 i -= 8 ;
40 hp[ - 7 ].hash = - 1 ;
41 hp[ - 6 ].hash = - 1 ;
42 hp[ - 5 ].hash = - 1 ;
43 hp[ - 4 ].hash = - 1 ;
44 hp[ - 3 ].hash = - 1 ;
45 hp[ - 2 ].hash = - 1 ;
46 hp[ - 1 ].hash = - 1 ;
47 hp[ 0 ].hash = - 1 ;
48 hp -= 8 ;
49 } while (i >= 0 );
50 for (i += 8 ; i > 0 ; i -- , hp -- )
51 hp -> hash = - 1 ;
52 }
53
54
55 int LZWencode( const unsigned char * input, int inputSize,
56 unsigned char * output, int outputLimit,
57 int * outputSize ) {
58 int fcode;
59 hash_t * hp;
60 int h, c;
61 hcode_t ent = (hcode_t)( - 1 );
62 int disp;
63 int incount = 0 ;
64 int outcount = 0 ;
65 int checkpoint = CHECK_GAP;
66 int nextdata = 0 ;
67 int nextbits = 0 ;
68 int free_ent = CODE_FIRST;
69 int maxcode = MAXCODE(BITS_MIN);
70 int nbits = BITS_MIN;
71 int enc_ratio = 0 ;
72 const unsigned char * bp = input;
73 int cc = inputSize;
74 unsigned char * op = output;
75 unsigned char * limit = output + outputLimit;
76 hash_t * hashtab;
77
78 if ( (NULL == input) || ( 0 > inputSize) ||
79 (NULL == output) || ( 1 > outputLimit) ) {
80 return ( - 1 );
81 }
82
83 hashtab = (hash_t * )malloc(HSIZE * sizeof (hash_t));
84 if ( hashtab == NULL ) {
85 return ( - 1 );
86 }
87 cl_hash( hashtab );
88
89 #define CALCRATIO(rat) { \
90 if (incount > 0x007fffff ) { /* NB: shift will overflow */ \
91 rat = outcount >> 8 ; \
92 rat = (rat == 0 ? 0x7fffffff : incount / rat); \
93 } else \
94 rat = (incount << 8 ) / outcount; \
95 }
96
97 #define PutNextCode(op, c) { \
98 nextdata = (nextdata << nbits) | c; \
99 nextbits += nbits; \
100 if (op < limit) \
101 * op = (unsigned char )(nextdata >> (nextbits - 8 )); \
102 op = op + 1 ; \
103 nextbits -= 8 ; \
104 if (nextbits >= 8 ) { \
105 if (op < limit) \
106 * op = (unsigned char )(nextdata >> (nextbits - 8 ));\
107 op = op + 1 ; \
108 nextbits -= 8 ; \
109 } \
110 outcount += nbits; \
111 }
112
113 PutNextCode(op, CODE_CLEAR);
114 if (cc > 0 ) {
115 ent = * bp ++ ; cc -- ; incount ++ ;
116 }
117 while (cc > 0 ) {
118 c = * bp ++ ; cc -- ; incount ++ ;
119 fcode = (( int )c << BITS_MAX) + ent;
120 h = (c << HSHIFT) ^ ent; /* xor hashing */
121 /*
122 * Check hash index for an overflow.
123 */
124 if (h >= HSIZE)
125 h -= HSIZE;
126
127 hp = & hashtab[h];
128 if (hp -> hash == fcode) {
129 ent = hp -> code;
130 continue ;
131 }
132 if (hp -> hash >= 0 ) {
133 /*
134 * Primary hash failed, check secondary hash.
135 */
136 disp = HSIZE - h;
137 if (h == 0 )
138 disp = 1 ;
139 do {
140 /*
141 * Avoid pointer arithmetic 'cuz of
142 * wraparound problems with segments.
143 */
144 if ((h -= disp) < 0 )
145 h += HSIZE;
146 hp = & hashtab[h];
147 if (hp -> hash == fcode) {
148 ent = hp -> code;
149 goto hit;
150 }
151 } while (hp -> hash >= 0 );
152 }
153 /*
154 * New entry, emit code and add to table.
155 */
156 PutNextCode(op, ent);
157 ent = c;
158 hp -> code = free_ent ++ ;
159 hp -> hash = fcode;
160 if (free_ent == CODE_MAX - 1 ) {
161 /* table is full, emit clear code and reset */
162 cl_hash(hashtab);
163 enc_ratio = 0 ;
164 incount = 0 ;
165 outcount = 0 ;
166 free_ent = CODE_FIRST;
167 PutNextCode(op, CODE_CLEAR);
168 nbits = BITS_MIN;
169 maxcode = MAXCODE(BITS_MIN);
170 } else {
171 /*
172 * If the next entry is going to be too big for
173 * the code size, then increase it, if possible.
174 */
175 if (free_ent > maxcode) {
176 nbits ++ ;
177 maxcode = ( int ) MAXCODE(nbits);
178 } else if (incount >= checkpoint) {
179 int rat;
180 /*
181 * Check compression ratio and, if things seem
182 * to be slipping, clear the hash table and
183 * reset state. The compression ratio is a
184 * 24+8-bit fractional number.
185 */
186 checkpoint = incount + CHECK_GAP;
187 CALCRATIO(rat);
188 if (rat <= enc_ratio) {
189 cl_hash(hashtab);
190 enc_ratio = 0 ;
191 incount = 0 ;
192 outcount = 0 ;
193 free_ent = CODE_FIRST;
194 PutNextCode(op, CODE_CLEAR);
195 nbits = BITS_MIN;
196 maxcode = MAXCODE(BITS_MIN);
197 } else
198 enc_ratio = rat;
199 }
200 }
201 hit:
202 ;
203 }
204 if (ent != (hcode_t) - 1 ) {
205 PutNextCode(op, ent);
206 }
207 PutNextCode(op, CODE_EOI);
208 if (nextbits > 0 ) {
209 if (op < limit)
210 * op = (unsigned char )(nextdata << ( 8 - nextbits));
211 op = op + 1 ;
212 }
213
214 if ( outputSize != NULL ) {
215 * outputSize = op - output;
216 }
217
218 return ((op <= limit) ? (op - output) : ( - 2 ));
219 }
220
221
222
223
224 int LZWdecode( const unsigned char * input, int inputSize,
225 unsigned char * output, int outputLimit,
226 int * outputSize ) {
227 const unsigned char * bp = input;
228 unsigned char * op = output;
229 int occ = outputLimit;
230 int cc = inputSize;
231
232 unsigned char * tp;
233 hcode_t code;
234 int len;
235 int i;
236
237 int nbits = BITS_MIN;
238 int nextbits = 0 ;
239 int nextdata = 0 ;
240 int nbitsmask = MAXCODE(BITS_MIN);
241
242 code_t * codep;
243 code_t * free_entp;
244 code_t * maxcodep;
245 code_t * oldcodep;
246 code_t * codetab;
247
248 if ( (NULL == input) || ( 1 > inputSize) ||
249 (NULL == output) || ( 1 > outputLimit) ) {
250 return ( - 1 );
251 }
252
253 codetab = (code_t * )malloc(CSIZE * sizeof (code_t));
254 if ( NULL == codetab ) {
255 return ( - 1 );
256 }
257
258 i = 255 ;
259 do {
260 codetab[i].value = i;
261 codetab[i].firstchar = i;
262 codetab[i].length = 1 ;
263 codetab[i].next = NULL;
264 } while (i -- );
265 memset( & codetab[CODE_CLEAR], 0 ,
266 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
267
268 #define DecFail do { free( codetab ); return (-1); } while (0)
269
270 #define GetNextCode(bp, code) { \
271 if (cc <= 0 ) DecFail; \
272 nextdata = (nextdata << 8 ) | * (bp) ++ ; \
273 nextbits += 8 ; \
274 -- cc; \
275 if (nextbits < nbits) { \
276 if (cc <= 0 ) DecFail; \
277 nextdata = (nextdata << 8 ) | * (bp) ++ ; \
278 nextbits += 8 ; \
279 -- cc; \
280 } \
281 code = (hcode_t)((nextdata >> (nextbits - nbits)) & nbitsmask); \
282 nextbits -= nbits; \
283 }
284
285 GetNextCode(bp, code);
286 if (code != CODE_CLEAR) {
287 DecFail;
288 }
289 while (code != CODE_EOI) {
290 if (code == CODE_CLEAR) {
291 free_entp = codetab + CODE_FIRST;
292 memset(free_entp, 0 ,
293 (CSIZE - CODE_FIRST) * sizeof (code_t));
294 nbits = BITS_MIN;
295 nbitsmask = MAXCODE(BITS_MIN);
296 maxcodep = codetab + nbitsmask - 1 ;
297 GetNextCode(bp, code);
298 if (code == CODE_EOI)
299 break ;
300 if (code >= CODE_CLEAR) {
301 DecFail;
302 }
303 if ( occ -- > 0 ) {
304 * op ++ = (unsigned char )code;
305 }
306 oldcodep = codetab + code;
307 GetNextCode(bp, code);
308 continue ;
309 }
310
311 codep = codetab + code;
312
313 if (free_entp < & codetab[ 0 ] ||
314 free_entp >= & codetab[CSIZE]) {
315 DecFail;
316 }
317
318 free_entp -> next = oldcodep;
319 if (free_entp -> next < & codetab[ 0 ] ||
320 free_entp -> next >= & codetab[CSIZE]) {
321 DecFail;
322 }
323
324 free_entp -> firstchar = free_entp -> next -> firstchar;
325 free_entp -> length = free_entp -> next -> length + 1 ;
326 free_entp -> value = (codep < free_entp) ?
327 codep -> firstchar :
328 free_entp -> firstchar;
329 if ( ++ free_entp > maxcodep) {
330 if ( ++ nbits > BITS_MAX) /* should not happen */
331 DecFail;
332 nbitsmask = MAXCODE(nbits);
333 maxcodep = codetab + nbitsmask - 1 ;
334 }
335 oldcodep = codep;
336
337 if (code >= 256 ) {
338 if (codep -> length == 0 ) {
339 DecFail;
340 }
341 i = len = codep -> length;
342 while ( (len > 0 ) && (len > occ) && codep ) {
343 codep = codep -> next;
344 -- len;
345 }
346 tp = op + len;
347 while ( codep && (tp > op) ) {
348 * ( -- tp) = codep -> value;
349 codep = codep -> next;
350 }
351 op += len;
352 occ -= i;
353 }
354 else if ( occ -- > 0 ) {
355 * op ++ = (unsigned char )code;
356 }
357
358 GetNextCode(bp, code);
359 }
360
361 free( codetab );
362 if ( outputSize != NULL ) {
363 * outputSize = outputLimit - occ;
364 }
365 return ((occ >= 0 ) ? (outputLimit - occ) : ( - 2 ));
366 }
367
测试程序:
1
/*
2 测试 LZW 编码解码,
3 并未做到 100% 测试。
4 */
5
6 #include < stdio.h >
7 #include < string .h >
8 #include < time.h >
9 #include < stdlib.h >
10
11 #include " codec_lzw.h "
12
13 /* 测试字符串最大长度,确保使字典超出大小 */
14 #define L (4096+4096)
15
16 char a[ L + L ];
17 int na;
18
19 char b[ L + L ];
20 int nb;
21
22 int res;
23
24 /* 小数据 */
25 char * data[] =
26 {
27 "" , /* 空串 */
28 " a " ,
29 " aaa " ,
30 " abcdefgabcdefgabcdefg " ,
31 " wabba_wabba_wabba_wabba_woo_woo_woo " ,
32 " abababababababab " , /* 解码时使用未构造好的字典条目 */
33 };
34
35 /* 使字典超出大小的数据 */
36 #define DAL 5
37 char da[ L + 3 ];
38 void initDa( int i ) {
39 int j;
40 switch ( i ) {
41 case 0 :
42 for ( j = 0 ; j < L; ++ j ) {
43 da[ j ] = ' a ' ;
44 }
45 break ;
46 case 1 :
47 for ( j = 0 ; j < L; ++ j ) {
48 da[ j ] = ((j & 1 ) ? ' b ' : ' a ' );
49 }
50 break ;
51 case 2 :
52 for ( j = 0 ; j < L; ++ j ) {
53 da[ j ] = (rand() % 26 ) + ' a ' ;
54 }
55 break ;
56 case 3 :
57 for ( j = 0 ; j < L; ++ j ) {
58 da[ j ] = (j % 26 ) + ' a ' ;
59 }
60 break ;
61 case 4 :
62 for ( j = 0 ; j < L; ++ j ) {
63 da[ j ] = rand() & 0xFF ;
64 }
65 break ;
66 }
67 }
68
69 char * tobin( char d ) {
70 static char tmp[ 512 ];
71 int i;
72 for ( i = 7 ; i >= 0 ; -- i ) {
73 tmp[ 7 - i ] = ' 0 ' + ((d >> i) & 0x1 );
74 }
75 tmp[ 8 ] = ' \0 ' ;
76 return tmp;
77 }
78
79
80 int main() {
81 printf( " \n ---- test ---- \n " );
82 int tc = sizeof (data) / sizeof (data[ 0 ]);
83 for ( tc = 0 ; tc < sizeof (data) / sizeof (data[ 0 ]); ++ tc ) {
84 int i;
85 printf( " \n*********************\ntest case %d:\n " , tc );
86 printf( " text : \n\ " % s\ " \n " , data[tc] );
87 res = LZWencode( data[tc], strlen(data[tc]), a, L, & na );
88 printf( " encode : \nres = %d, len = %d\n " , res, na );
89 for ( i = 0 ; i < na; ++ i ) {
90 printf( " %s " , tobin(a[i]) );
91 }
92 res = LZWdecode( a, na, b, L, & nb );
93 printf( " \ndecode : \nres = %d, len = %d\n\ "" , res, nb );
94 for ( i = 0 ; i < nb; ++ i ) {
95 printf( " %c " , b[i] );
96 }
97 printf( " \ " \n " );
98 }
99
100 printf( " \n\n ---- huge ---- \n " );
101 srand( time(NULL) );
102 for ( tc = 0 ; tc < DAL; ++ tc ) {
103 printf( " \n*********************\nhuge case %d:\n " , tc );
104 initDa( tc );
105 res = LZWencode( da, L, a, L + L, & na );
106 printf( " encode : \nres = %d, len = %d\n " , res, na );
107 res = LZWdecode( a, na, b, L + L, & nb );
108 printf( " decode : \nres = %d, len = %d\n " , res, nb );
109 printf( ( 0 == memcmp(da, b, nb)) ? " yes\n " : " no !!!!\n " );
110 }
111
112
113 printf( " \n\n ---- error ---- \n " );
114 for ( tc = 0 ; tc < DAL; ++ tc ) {
115 printf( " \n*********************\nerror case %d:\n " , tc );
116 initDa( tc );
117 res = LZWencode( da, L, a, L / 3 , & na );
118 printf( " encode : \nres = %d, len = %d\n " , res, na );
119 if ( - 2 == res ) {
120 res = LZWencode( da, L, a, na, & na );
121 printf( " encode : \nres = %d, len = %d\n " , res, na );
122 }
123 res = LZWdecode( a, na, b, L / 3 , & nb );
124 printf( " decode : \nres = %d, len = %d\n " , res, nb );
125 if ( - 2 == res ) {
126 printf( ( 0 == memcmp(da, b, L / 3 )) ? " partly yes\n " : " partly no\n " );
127 res = LZWdecode( a, na, b, nb, & nb );
128 printf( " decode : \nres = %d, len = %d\n " , res, nb );
129 }
130 printf( ( 0 == memcmp(da, b, nb)) ? " yes\n " : " no !!!!\n " );
131 }
132 return 0 ;
133 }
134
2 测试 LZW 编码解码,
3 并未做到 100% 测试。
4 */
5
6 #include < stdio.h >
7 #include < string .h >
8 #include < time.h >
9 #include < stdlib.h >
10
11 #include " codec_lzw.h "
12
13 /* 测试字符串最大长度,确保使字典超出大小 */
14 #define L (4096+4096)
15
16 char a[ L + L ];
17 int na;
18
19 char b[ L + L ];
20 int nb;
21
22 int res;
23
24 /* 小数据 */
25 char * data[] =
26 {
27 "" , /* 空串 */
28 " a " ,
29 " aaa " ,
30 " abcdefgabcdefgabcdefg " ,
31 " wabba_wabba_wabba_wabba_woo_woo_woo " ,
32 " abababababababab " , /* 解码时使用未构造好的字典条目 */
33 };
34
35 /* 使字典超出大小的数据 */
36 #define DAL 5
37 char da[ L + 3 ];
38 void initDa( int i ) {
39 int j;
40 switch ( i ) {
41 case 0 :
42 for ( j = 0 ; j < L; ++ j ) {
43 da[ j ] = ' a ' ;
44 }
45 break ;
46 case 1 :
47 for ( j = 0 ; j < L; ++ j ) {
48 da[ j ] = ((j & 1 ) ? ' b ' : ' a ' );
49 }
50 break ;
51 case 2 :
52 for ( j = 0 ; j < L; ++ j ) {
53 da[ j ] = (rand() % 26 ) + ' a ' ;
54 }
55 break ;
56 case 3 :
57 for ( j = 0 ; j < L; ++ j ) {
58 da[ j ] = (j % 26 ) + ' a ' ;
59 }
60 break ;
61 case 4 :
62 for ( j = 0 ; j < L; ++ j ) {
63 da[ j ] = rand() & 0xFF ;
64 }
65 break ;
66 }
67 }
68
69 char * tobin( char d ) {
70 static char tmp[ 512 ];
71 int i;
72 for ( i = 7 ; i >= 0 ; -- i ) {
73 tmp[ 7 - i ] = ' 0 ' + ((d >> i) & 0x1 );
74 }
75 tmp[ 8 ] = ' \0 ' ;
76 return tmp;
77 }
78
79
80 int main() {
81 printf( " \n ---- test ---- \n " );
82 int tc = sizeof (data) / sizeof (data[ 0 ]);
83 for ( tc = 0 ; tc < sizeof (data) / sizeof (data[ 0 ]); ++ tc ) {
84 int i;
85 printf( " \n*********************\ntest case %d:\n " , tc );
86 printf( " text : \n\ " % s\ " \n " , data[tc] );
87 res = LZWencode( data[tc], strlen(data[tc]), a, L, & na );
88 printf( " encode : \nres = %d, len = %d\n " , res, na );
89 for ( i = 0 ; i < na; ++ i ) {
90 printf( " %s " , tobin(a[i]) );
91 }
92 res = LZWdecode( a, na, b, L, & nb );
93 printf( " \ndecode : \nres = %d, len = %d\n\ "" , res, nb );
94 for ( i = 0 ; i < nb; ++ i ) {
95 printf( " %c " , b[i] );
96 }
97 printf( " \ " \n " );
98 }
99
100 printf( " \n\n ---- huge ---- \n " );
101 srand( time(NULL) );
102 for ( tc = 0 ; tc < DAL; ++ tc ) {
103 printf( " \n*********************\nhuge case %d:\n " , tc );
104 initDa( tc );
105 res = LZWencode( da, L, a, L + L, & na );
106 printf( " encode : \nres = %d, len = %d\n " , res, na );
107 res = LZWdecode( a, na, b, L + L, & nb );
108 printf( " decode : \nres = %d, len = %d\n " , res, nb );
109 printf( ( 0 == memcmp(da, b, nb)) ? " yes\n " : " no !!!!\n " );
110 }
111
112
113 printf( " \n\n ---- error ---- \n " );
114 for ( tc = 0 ; tc < DAL; ++ tc ) {
115 printf( " \n*********************\nerror case %d:\n " , tc );
116 initDa( tc );
117 res = LZWencode( da, L, a, L / 3 , & na );
118 printf( " encode : \nres = %d, len = %d\n " , res, na );
119 if ( - 2 == res ) {
120 res = LZWencode( da, L, a, na, & na );
121 printf( " encode : \nres = %d, len = %d\n " , res, na );
122 }
123 res = LZWdecode( a, na, b, L / 3 , & nb );
124 printf( " decode : \nres = %d, len = %d\n " , res, nb );
125 if ( - 2 == res ) {
126 printf( ( 0 == memcmp(da, b, L / 3 )) ? " partly yes\n " : " partly no\n " );
127 res = LZWdecode( a, na, b, nb, & nb );
128 printf( " decode : \nres = %d, len = %d\n " , res, nb );
129 }
130 printf( ( 0 == memcmp(da, b, nb)) ? " yes\n " : " no !!!!\n " );
131 }
132 return 0 ;
133 }
134
测试结果:
---- test ----
*********************
test case 0 :
text :
""
encode :
res = 3 , len = 3
10000000 01000000 01000000
decode :
res = 0 , len = 0
""
*********************
test case 1 :
text :
" a "
encode :
res = 4 , len = 4
10000000 00011000 01100000 00100000
decode :
res = 1 , len = 1
" a "
*********************
test case 2 :
text :
" aaa "
encode :
res = 5 , len = 5
10000000 00011000 01100000 01010000 00010000
decode :
res = 3 , len = 3
" aaa "
*********************
test case 3 :
text :
" abcdefgabcdefgabcdefg "
encode :
res = 18 , len = 18
10000000 00011000 01001100 01000110 00110011 00100001 10010100 11001100 01100111 10000001 01000001 00100000 11010000 10001000 00011100 00010110 00001111 00000001
decode :
res = 21 , len = 21
" abcdefgabcdefgabcdefg "
*********************
test case 4 :
text :
" wabba_wabba_wabba_wabba_woo_woo_woo "
encode :
res = 26 , len = 26
10000000 00011101 11001100 00100110 00100011 00010001 10000100 10111111 00000010 10000010 01000001 10100001 00010000 01011000 00111100 00001110 00011000 01110111 00110111 10011011 11100000 11110001 00011000 10011001 10111110 00000010
decode :
res = 35 , len = 35
" wabba_wabba_wabba_wabba_woo_woo_woo "
*********************
test case 5 :
text :
" abababababababab "
encode :
res = 11 , len = 11
10000000 00011000 01001100 01010000 00101000 00100100 00001110 00001101 00000101 10000000 10000000
decode :
res = 16 , len = 16
" abababababababab "
---- huge ----
*********************
huge case 0 :
encode :
res = 147 , len = 147
decode :
res = 8192 , len = 8192
yes
*********************
huge case 1 :
encode :
res = 206 , len = 206
decode :
res = 8192 , len = 8192
yes
*********************
huge case 2 :
encode :
res = 6219 , len = 6219
decode :
res = 8192 , len = 8192
yes
*********************
huge case 3 :
encode :
res = 771 , len = 771
decode :
res = 8192 , len = 8192
yes
*********************
huge case 4 :
encode :
res = 11176 , len = 11176
decode :
res = 8192 , len = 8192
yes
---- error ----
*********************
error case 0 :
encode :
res = 147 , len = 147
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes
*********************
error case 1 :
encode :
res = 206 , len = 206
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes
*********************
error case 2 :
encode :
res = - 2 , len = 6204
encode :
res = 6204 , len = 6204
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes
*********************
error case 3 :
encode :
res = 771 , len = 771
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes
*********************
error case 4 :
encode :
res = - 2 , len = 11161
encode :
res = 11161 , len = 11161
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes