LZW 编码解码代码

从 libtiff 4.0.2 中提取出来并稍加修改的 lzw 的代码，符合 TIFF6 标准中的 LZW 部分。

本人目前对开源协议还不太清楚，不知是否存在侵权问题，如果有，请告知。

代码：

1 /*
2 codec_lzw.h
3
4 LZW 编码解码。
5 基于 TIFF6 标准，不考虑与之前 TIFF 版本兼容。
6 以字节为字符单位进行编码。
7 */
8
9
10 #ifndef  __CODEC_LZW_H_INCLUDED__
11 #define   __CODEC_LZW_H_INCLUDED__
12
13
14 #define MAXCODE(n)      ((1L<<(n))-1)
15
16 #define BITS_MIN        9
17 #define BITS_MAX        12
18
19 #define CODE_CLEAR      256
20 #define CODE_EOI        257
21 #define CODE_FIRST      258
22 #define CODE_MAX        MAXCODE(BITS_MAX)
23
24
25          /*
26         编码函数
27
28         输入参数 input       ：输入数据，即待编码数据
29         输入参数 inputSize   ：输入数据量，单位字节
30         输出参数 output      ：输出缓冲，编码所得数据置于其中
31                 输出数据以 CODE_CLEAR 开始
32                 输出数据以 CODE_EOI   结束
33                 若输出缓冲太小，则编码所得数据将其填满
34         输入参数 outputLimit : 输出缓冲大小，单位字节
35         输出参数 *outputSize ：编码输出数据量，单位字节
36                 若输出缓冲太小，则返回实际完成编码可得到的数据量
37
38         返回值：
39                 非负整数，编码成功，其值表示编码所得数据量，与 (*outputSize) 相同
40                 -1 表示出错失败
41                 -2 表示输出缓冲太小
42          */
43 int   LZWencode( const unsigned char * input, int inputSize,
44                         unsigned char * output, int outputLimit,
45                          int * outputSize );
46
47          /*
48         解码函数
49
50         输入参数 input       ：输入数据，即待解码数据
51                 认为输入数据以 CODE_CLEAR 开始
52                 认为输入数据以 CODE_EOI   结束
53         输入参数 inputSize   ：输入数据量，单位字节
54         输出参数 output      ：输出缓冲，解码所得数据置于其中
55                 若输出缓冲太小，则解码所得数据将其填满
56         输入参数 outputLimit : 输出缓冲大小，单位字节
57         输出参数 *outputSize ：解码输出数据量，单位字节
58                 若输出缓冲太小，则返回实际完成解码可得到的数据量
59
60         返回值：
61                 非负整数，解码成功，其值表示解码所得数据量，与 (*outputSize) 相同
62                 -1 表示出错失败
63                 -2 表示输出缓冲太小
64          */
65 int   LZWdecode( const unsigned char * input, int inputSize,
66                         unsigned char * output, int outputLimit,
67                          int * outputSize );
68
69
70 #endif   /* __CODEC_LZW_H_INCLUDED__ */
71

  1 /*
  2 codec_lzw.c
  3
  4 LZW 编码解码。
  5 */
  6
  7
  8 #include " codec_lzw.h "
  9
10 #include < stdlib.h >
11 #include < string .h >
12
13
14 #define HSIZE           9001L                   /* 91% occupancy */
15 #define HSHIFT          (13-8)
16 #define CSIZE           (MAXCODE(BITS_MAX)+1L)
17 #define CHECK_GAP        10000
18
19 typedef unsigned short hcode_t;                  /* codes fit in 16 bits */
20 typedef struct {
21          int         hash;
22         hcode_t    code;
23 } hash_t;
24
25 typedef struct code_ent {
26          struct code_ent      * next;
27         unsigned short       length;            /* string len, including this token */
28         unsigned char        value;             /* data value */
29         unsigned char        firstchar;         /* first token of string */
30 } code_t;
31
32 /*
33 * Reset encoding hash table.
34 */
35 static void cl_hash( hash_t * hashtab ) {
36         hash_t * hp = & hashtab[HSIZE - 1 ];
37          int     i    = HSIZE - 8 ;
38          do {
39                 i -= 8 ;
40                 hp[ - 7 ].hash = - 1 ;
41                 hp[ - 6 ].hash = - 1 ;
42                 hp[ - 5 ].hash = - 1 ;
43                 hp[ - 4 ].hash = - 1 ;
44                 hp[ - 3 ].hash = - 1 ;
45                 hp[ - 2 ].hash = - 1 ;
46                 hp[ - 1 ].hash = - 1 ;
47                 hp[ 0 ].hash = - 1 ;
48                 hp -= 8 ;
49         } while (i >= 0 );
50          for (i += 8 ; i > 0 ; i -- , hp -- )
51                 hp -> hash = - 1 ;
52 }
53
54
55 int   LZWencode( const unsigned char * input, int inputSize,
56                         unsigned char * output, int outputLimit,
57                          int * outputSize ) {
58          int       fcode;
59         hash_t    * hp;
60          int       h, c;
61         hcode_t  ent            = (hcode_t)( - 1 );
62          int       disp;
63          int       incount        = 0 ;
64          int       outcount       = 0 ;
65          int       checkpoint     = CHECK_GAP;
66          int       nextdata       = 0 ;
67          int       nextbits       = 0 ;
68          int       free_ent       = CODE_FIRST;
69          int       maxcode        = MAXCODE(BITS_MIN);
70          int       nbits          = BITS_MIN;
71          int       enc_ratio      = 0 ;
72          const unsigned char * bp = input;
73          int            cc        = inputSize;
74         unsigned char * op       = output;
75         unsigned char * limit    = output + outputLimit;
76         hash_t         * hashtab;
77
78          if ( (NULL == input) || ( 0 > inputSize) ||
79                         (NULL == output) || ( 1 > outputLimit) ) {
80                  return ( - 1 );
81         }
82
83         hashtab = (hash_t * )malloc(HSIZE * sizeof (hash_t));
84          if ( hashtab == NULL ) {
85                  return ( - 1 );
86         }
87         cl_hash( hashtab );
88
89 #define CALCRATIO(rat) {                                            \
90          if (incount > 0x007fffff ) { /* NB: shift will overflow */    \
91                 rat = outcount >> 8 ;                                \
92                 rat = (rat == 0 ? 0x7fffffff : incount / rat);        \
93         } else                                                       \
94                 rat = (incount << 8 ) / outcount;                      \
95 }
96
97 #define PutNextCode(op, c) {                                           \
98         nextdata = (nextdata << nbits) | c;                            \
99         nextbits += nbits;                                             \
100          if (op < limit)                                                \
101                  * op = (unsigned char )(nextdata >> (nextbits - 8 ));       \
102         op = op + 1 ;                                                   \
103         nextbits -= 8 ;                                                 \
104          if (nextbits >= 8 ) {                                           \
105                  if (op < limit)                                        \
106                         * op = (unsigned char )(nextdata >> (nextbits - 8 ));\
107                 op = op + 1 ;                                           \
108                 nextbits -= 8 ;                                         \
109         }                                                              \
110         outcount += nbits;                                             \
111 }
112
113         PutNextCode(op, CODE_CLEAR);
114          if (cc > 0 ) {
115                 ent = * bp ++ ; cc -- ; incount ++ ;
116         }
117          while (cc > 0 ) {
118                 c = * bp ++ ; cc -- ; incount ++ ;
119                 fcode = (( int )c << BITS_MAX) + ent;
120                 h = (c << HSHIFT) ^ ent;         /* xor hashing */
121                  /*
122                  * Check hash index for an overflow.
123                   */
124                  if (h >= HSIZE)
125                         h -= HSIZE;
126
127                 hp = & hashtab[h];
128                  if (hp -> hash == fcode) {
129                         ent = hp -> code;
130                          continue ;
131                 }
132                  if (hp -> hash >= 0 ) {
133                          /*
134                          * Primary hash failed, check secondary hash.
135                           */
136                         disp = HSIZE - h;
137                          if (h == 0 )
138                                 disp = 1 ;
139                          do {
140                                  /*
141                                  * Avoid pointer arithmetic 'cuz of
142                                  * wraparound problems with segments.
143                                   */
144                                  if ((h -= disp) < 0 )
145                                         h += HSIZE;
146                                 hp = & hashtab[h];
147                                  if (hp -> hash == fcode) {
148                                         ent = hp -> code;
149                                          goto hit;
150                                 }
151                         } while (hp -> hash >= 0 );
152                 }
153                  /*
154                  * New entry, emit code and add to table.
155                   */
156                 PutNextCode(op, ent);
157                 ent = c;
158                 hp -> code = free_ent ++ ;
159                 hp -> hash = fcode;
160                  if (free_ent == CODE_MAX - 1 ) {
161                          /* table is full, emit clear code and reset */
162                         cl_hash(hashtab);
163                         enc_ratio = 0 ;
164                         incount = 0 ;
165                         outcount = 0 ;
166                         free_ent = CODE_FIRST;
167                         PutNextCode(op, CODE_CLEAR);
168                         nbits = BITS_MIN;
169                         maxcode = MAXCODE(BITS_MIN);
170                 } else {
171                          /*
172                          * If the next entry is going to be too big for
173                          * the code size, then increase it, if possible.
174                           */
175                          if (free_ent > maxcode) {
176                                 nbits ++ ;
177                                 maxcode = ( int ) MAXCODE(nbits);
178                         } else if (incount >= checkpoint) {
179                                  int rat;
180                                  /*
181                                  * Check compression ratio and, if things seem
182                                  * to be slipping, clear the hash table and
183                                  * reset state.  The compression ratio is a
184                                  * 24+8-bit fractional number.
185                                   */
186                                 checkpoint = incount + CHECK_GAP;
187                                 CALCRATIO(rat);
188                                  if (rat <= enc_ratio) {
189                                         cl_hash(hashtab);
190                                         enc_ratio = 0 ;
191                                         incount = 0 ;
192                                         outcount = 0 ;
193                                         free_ent = CODE_FIRST;
194                                         PutNextCode(op, CODE_CLEAR);
195                                         nbits = BITS_MIN;
196                                         maxcode = MAXCODE(BITS_MIN);
197                                 } else
198                                         enc_ratio = rat;
199                         }
200                 }
201         hit:
202                 ;
203         }
204          if (ent != (hcode_t) - 1 ) {
205                 PutNextCode(op, ent);
206         }
207         PutNextCode(op, CODE_EOI);
208          if (nextbits > 0 )  {
209                  if (op < limit)
210                          * op = (unsigned char )(nextdata << ( 8 - nextbits));
211                 op = op + 1 ;
212         }
213
214          if ( outputSize != NULL ) {
215                  * outputSize = op - output;
216         }
217
218          return ((op <= limit) ? (op - output) : ( - 2 ));
219 }
220
221
222
223
224 int   LZWdecode( const unsigned char * input, int inputSize,
225                         unsigned char * output, int outputLimit,
226                          int * outputSize ) {
227          const unsigned char * bp = input;
228         unsigned char         * op = output;
229          int                  occ = outputLimit;
230          int                  cc   = inputSize;
231
232         unsigned char * tp;
233         hcode_t       code;
234          int            len;
235          int            i;
236
237          int   nbits      = BITS_MIN;
238          int   nextbits   = 0 ;
239          int   nextdata   = 0 ;
240          int   nbitsmask = MAXCODE(BITS_MIN);
241
242         code_t * codep;
243         code_t * free_entp;
244         code_t * maxcodep;
245         code_t * oldcodep;
246         code_t * codetab;
247
248          if ( (NULL == input) || ( 1 > inputSize) ||
249                         (NULL == output) || ( 1 > outputLimit) ) {
250                  return ( - 1 );
251         }
252
253         codetab = (code_t * )malloc(CSIZE * sizeof (code_t));
254          if ( NULL == codetab ) {
255                  return ( - 1 );
256         }
257
258         i = 255 ;
259          do {
260                 codetab[i].value = i;
261                 codetab[i].firstchar = i;
262                 codetab[i].length = 1 ;
263                 codetab[i].next = NULL;
264         } while (i -- );
265         memset( & codetab[CODE_CLEAR], 0 ,
266                 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
267
268 #define DecFail   do { free( codetab ); return (-1); } while (0)
269
270 #define GetNextCode(bp, code) {                                         \
271          if (cc <= 0 ) DecFail;                                           \
272         nextdata = (nextdata << 8 ) | * (bp) ++ ;                             \
273         nextbits += 8 ;                                                  \
274          -- cc;                                                           \
275          if (nextbits < nbits) {                                         \
276                  if (cc <= 0 ) DecFail;                                   \
277                 nextdata = (nextdata << 8 ) | * (bp) ++ ;                     \
278                 nextbits += 8 ;                                          \
279                  -- cc;                                                   \
280         }                                                               \
281         code = (hcode_t)((nextdata >> (nextbits - nbits)) & nbitsmask);   \
282         nextbits -= nbits;                                              \
283 }
284
285         GetNextCode(bp, code);
286          if (code != CODE_CLEAR) {
287                 DecFail;
288         }
289          while (code != CODE_EOI) {
290                  if (code == CODE_CLEAR) {
291                         free_entp = codetab + CODE_FIRST;
292                         memset(free_entp, 0 ,
293                                     (CSIZE - CODE_FIRST) * sizeof (code_t));
294                         nbits      = BITS_MIN;
295                         nbitsmask = MAXCODE(BITS_MIN);
296                         maxcodep   = codetab + nbitsmask - 1 ;
297                         GetNextCode(bp, code);
298                          if (code == CODE_EOI)
299                                  break ;
300                          if (code >= CODE_CLEAR) {
301                                 DecFail;
302                         }
303                          if ( occ -- > 0 ) {
304                                  * op ++ = (unsigned char )code;
305                         }
306                         oldcodep = codetab + code;
307                         GetNextCode(bp, code);
308                          continue ;
309                 }
310
311                 codep = codetab + code;
312
313                  if (free_entp < & codetab[ 0 ] ||
314                     free_entp >= & codetab[CSIZE]) {
315                         DecFail;
316                 }
317
318                 free_entp -> next = oldcodep;
319                  if (free_entp -> next < & codetab[ 0 ] ||
320                     free_entp -> next >= & codetab[CSIZE]) {
321                         DecFail;
322                 }
323
324                 free_entp -> firstchar = free_entp -> next -> firstchar;
325                 free_entp -> length     = free_entp -> next -> length + 1 ;
326                 free_entp -> value      = (codep < free_entp) ?
327                                                 codep -> firstchar :
328                                                 free_entp -> firstchar;
329                  if ( ++ free_entp > maxcodep) {
330                          if ( ++ nbits > BITS_MAX)                 /* should not happen */
331                                 DecFail;
332                         nbitsmask = MAXCODE(nbits);
333                         maxcodep   = codetab + nbitsmask - 1 ;
334                 }
335                 oldcodep = codep;
336
337                  if (code >= 256 ) {
338                          if (codep -> length == 0 ) {
339                                 DecFail;
340                         }
341                         i = len = codep -> length;
342                          while ( (len > 0 ) && (len > occ) && codep ) {
343                                 codep = codep -> next;
344                                  -- len;
345                         }
346                         tp = op + len;
347                          while ( codep && (tp > op) ) {
348                                  * ( -- tp) = codep -> value;
349                                 codep    = codep -> next;
350                         }
351                         op   += len;
352                         occ -= i;
353                 }
354                  else if ( occ -- > 0 ) {
355                          * op ++ = (unsigned char )code;
356                 }
357
358                 GetNextCode(bp, code);
359         }
360
361         free( codetab );
362          if ( outputSize != NULL ) {
363                  * outputSize = outputLimit - occ;
364         }
365          return ((occ >= 0 ) ? (outputLimit - occ) : ( - 2 ));
366 }
367

测试程序：

  1 /*
  2 测试 LZW 编码解码，
  3 并未做到 100％测试。
  4 */
  5
  6 #include < stdio.h >
  7 #include < string .h >
  8 #include < time.h >
  9 #include < stdlib.h >
10
11 #include " codec_lzw.h "
12
13          /* 测试字符串最大长度，确保使字典超出大小 */
14 #define   L  (4096+4096)
15
16 char a[ L + L ];
17 int   na;
18
19 char b[ L + L ];
20 int   nb;
21
22 int res;
23
24 /* 小数据 */
25 char * data[] =
26 {
27          "" ,        /* 空串 */
28          " a " ,
29          " aaa " ,
30          " abcdefgabcdefgabcdefg " ,
31          " wabba_wabba_wabba_wabba_woo_woo_woo " ,
32          " abababababababab " ,    /* 解码时使用未构造好的字典条目 */
33 };
34
35 /* 使字典超出大小的数据 */
36 #define   DAL  5
37 char da[ L + 3 ];
38 void initDa( int i ) {
39          int j;
40          switch ( i ) {
41          case 0 :
42                  for ( j = 0 ; j < L; ++ j ) {
43                         da[ j ] = ' a ' ;
44                 }
45                  break ;
46          case 1 :
47                  for ( j = 0 ; j < L; ++ j ) {
48                         da[ j ] = ((j & 1 ) ? ' b ' : ' a ' );
49                 }
50                  break ;
51          case 2 :
52                  for ( j = 0 ; j < L; ++ j ) {
53                         da[ j ] = (rand() % 26 ) + ' a ' ;
54                 }
55                  break ;
56          case 3 :
57                  for ( j = 0 ; j < L; ++ j ) {
58                         da[ j ] = (j % 26 ) + ' a ' ;
59                 }
60                  break ;
61          case 4 :
62                  for ( j = 0 ; j < L; ++ j ) {
63                         da[ j ] = rand() & 0xFF ;
64                 }
65                  break ;
66         }
67 }
68
69 char * tobin( char d ) {
70          static char tmp[ 512 ];
71          int i;
72          for ( i = 7 ; i >= 0 ; -- i ) {
73                 tmp[ 7 - i ] = ' 0 ' + ((d >> i) & 0x1 );
74         }
75         tmp[ 8 ] = ' \0 ' ;
76          return tmp;
77 }
78
79
80 int main() {
81         printf( " \n ---- test ---- \n " );
82          int tc = sizeof (data) / sizeof (data[ 0 ]);
83          for ( tc = 0 ; tc < sizeof (data) / sizeof (data[ 0 ]); ++ tc ) {
84                  int i;
85                 printf( " \n*********************\ntest case %d:\n " , tc );
86                 printf( " text : \n\ " % s\ " \n " , data[tc] );
87                 res = LZWencode( data[tc], strlen(data[tc]), a, L, & na );
88                 printf( " encode : \nres = %d, len = %d\n " , res, na );
89                  for ( i = 0 ; i < na; ++ i ) {
90                         printf( " %s " , tobin(a[i]) );
91                 }
92                 res = LZWdecode( a, na, b, L, & nb );
93                 printf( " \ndecode : \nres = %d, len = %d\n\ "" , res, nb );
94                  for ( i = 0 ; i < nb; ++ i ) {
95                         printf( " %c " , b[i] );
96                 }
97                 printf( " \ " \n " );
98         }
99
100         printf( " \n\n ---- huge ---- \n " );
101         srand( time(NULL) );
102          for ( tc = 0 ; tc < DAL; ++ tc ) {
103                 printf( " \n*********************\nhuge case %d:\n " , tc );
104                 initDa( tc );
105                 res = LZWencode( da, L, a, L + L, & na );
106                 printf( " encode : \nres = %d, len = %d\n " , res, na );
107                 res = LZWdecode( a, na, b, L + L, & nb );
108                 printf( " decode : \nres = %d, len = %d\n " , res, nb );
109                 printf( ( 0 == memcmp(da, b, nb)) ? " yes\n " : "      no !!!!\n " );
110         }
111
112
113         printf( " \n\n ---- error ---- \n " );
114          for ( tc = 0 ; tc < DAL; ++ tc ) {
115                 printf( " \n*********************\nerror case %d:\n " , tc );
116                 initDa( tc );
117                 res = LZWencode( da, L, a, L / 3 , & na );
118                 printf( " encode : \nres = %d, len = %d\n " , res, na );
119                  if ( - 2 == res ) {
120                         res = LZWencode( da, L, a, na, & na );
121                         printf( " encode : \nres = %d, len = %d\n " , res, na );
122                 }
123                 res = LZWdecode( a, na, b, L / 3 , & nb );
124                 printf( " decode : \nres = %d, len = %d\n " , res, nb );
125                  if ( - 2 == res ) {
126                         printf( ( 0 == memcmp(da, b, L / 3 )) ? " partly yes\n " : " partly no\n " );
127                         res = LZWdecode( a, na, b, nb, & nb );
128                         printf( " decode : \nres = %d, len = %d\n " , res, nb );
129                 }
130                 printf( ( 0 == memcmp(da, b, nb)) ? " yes\n " : "      no !!!!\n " );
131         }
132          return 0 ;
133 }
134

测试结果：

---- test ----

*********************
test case 0 :
text :
""
encode :
res = 3 , len = 3
10000000 01000000 01000000
decode :
res = 0 , len = 0
""

*********************
test case 1 :
text :
" a "
encode :
res = 4 , len = 4
10000000 00011000 01100000 00100000
decode :
res = 1 , len = 1
" a "

*********************
test case 2 :
text :
" aaa "
encode :
res = 5 , len = 5
10000000 00011000 01100000 01010000 00010000
decode :
res = 3 , len = 3
" aaa "

*********************
test case 3 :
text :
" abcdefgabcdefgabcdefg "
encode :
res = 18 , len = 18
10000000 00011000 01001100 01000110 00110011 00100001 10010100 11001100 01100111 10000001 01000001 00100000 11010000 10001000 00011100 00010110 00001111 00000001
decode :
res = 21 , len = 21
" abcdefgabcdefgabcdefg "

*********************
test case 4 :
text :
" wabba_wabba_wabba_wabba_woo_woo_woo "
encode :
res = 26 , len = 26
10000000 00011101 11001100 00100110 00100011 00010001 10000100 10111111 00000010 10000010 01000001 10100001 00010000 01011000 00111100 00001110 00011000 01110111 00110111 10011011 11100000 11110001 00011000 10011001 10111110 00000010
decode :
res = 35 , len = 35
" wabba_wabba_wabba_wabba_woo_woo_woo "

*********************
test case 5 :
text :
" abababababababab "
encode :
res = 11 , len = 11
10000000 00011000 01001100 01010000 00101000 00100100 00001110 00001101 00000101 10000000 10000000
decode :
res = 16 , len = 16
" abababababababab "

---- huge ----

*********************
huge case 0 :
encode :
res = 147 , len = 147
decode :
res = 8192 , len = 8192
yes

*********************
huge case 1 :
encode :
res = 206 , len = 206
decode :
res = 8192 , len = 8192
yes

*********************
huge case 2 :
encode :
res = 6219 , len = 6219
decode :
res = 8192 , len = 8192
yes

*********************
huge case 3 :
encode :
res = 771 , len = 771
decode :
res = 8192 , len = 8192
yes

*********************
huge case 4 :
encode :
res = 11176 , len = 11176
decode :
res = 8192 , len = 8192
yes

---- error ----

*********************
error case 0 :
encode :
res = 147 , len = 147
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes

*********************
error case 1 :
encode :
res = 206 , len = 206
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes

*********************
error case 2 :
encode :
res = - 2 , len = 6204
encode :
res = 6204 , len = 6204
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes

*********************
error case 3 :
encode :
res = 771 , len = 771
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes

*********************
error case 4 :
encode :
res = - 2 , len = 11161
encode :
res = 11161 , len = 11161
decode :
res = - 2 , len = 8192
partly yes
decode :
res = 8192 , len = 8192
yes

LZW 编码解码代码

LZW 编码解码代码

你可能感兴趣的:(LZW 编码解码代码)