lzw 压缩算法源代码:
/*
*******************************************************************
**
** Copyright (c) 1989 Mark R. Nelson
**
** LZW data compression/expansion demonstration program.
**
** April 13, 1989
**
** Minor mods made 7/19/2006 to conform with ANSI-C - prototypes, casting,
** and argument agreement.
**
****************************************************************************
*/
#include
<
stdio.h
>
#include
<
stdlib.h
>
#include
<
string
.h
>
#define
BITS 12 /* Setting the number of bits to 12, 13*/
#define
HASHING_SHIFT (BITS-8) /* or 14 affects several constants. */
#define
MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to */
#define
MAX_CODE MAX_VALUE - 1 /* compile their code in large model if*/
/*
14 bits are selected.
*/
#if
BITS == 14
#define
TABLE_SIZE 18041 /* The string table size needs to be a */
#endif
/* prime number that is somewhat larger*/
#if
BITS == 13 /* than 2**BITS. */
#define
TABLE_SIZE 9029
#endif
#if
BITS <= 12
#define
TABLE_SIZE 5021
#endif
void
*
malloc();
int
*
code_value;
/*
This is the code value array
*/
unsigned
int
*
prefix_code;
/*
This array holds the prefix codes
*/
unsigned
char
*
append_character;
/*
This array holds the appended chars
*/
unsigned
char
decode_stack[
4000
];
/*
This array holds the decoded string
*/
/*
* Forward declarations
*/
void
compress(FILE
*
input,FILE
*
output);
void
expand(FILE
*
input,FILE
*
output);
int
find_match(
int
hash_prefix,unsigned
int
hash_character);
void
output_code(FILE
*
output,unsigned
int
code);
unsigned
int
input_code(FILE
*
input);
unsigned
char
*
decode_string(unsigned
char
*
buffer,unsigned
int
code);
/*
*******************************************************************
**
** This program gets a file name from the command line. It compresses the
** file, placing its output in a file named test.lzw. It then expands
** test.lzw into test.out. Test.out should then be an exact duplicate of
** the input file.
**
************************************************************************
*/
main(
int
argc,
char
*
argv[])
{
FILE
*
input_file;
FILE
*
output_file;
FILE
*
lzw_file;
char
input_file_name[
81
];
/*
** The three buffers are needed for the compression phase.
*/
code_value
=
(
int
*
)malloc(TABLE_SIZE
*
sizeof
(
int
));
prefix_code
=
(unsigned
int
*
)malloc(TABLE_SIZE
*
sizeof
(unsigned
int
));
append_character
=
(unsigned
char
*
)malloc(TABLE_SIZE
*
sizeof
(unsigned
char
));
if
(code_value
==
NULL
||
prefix_code
==
NULL
||
append_character
==
NULL)
{
printf(
"
Fatal error allocating table space!
"
);
exit(
-
1
);
}
/*
** Get the file name, open it up, and open up the lzw output file.
*/
if
(argc
>
1
)
strcpy(input_file_name,argv[
1
]);
else
{
printf(
"
Input file name?
"
);
scanf(
"
%s
"
,input_file_name);
}
input_file
=
fopen(input_file_name,
"
rb
"
);
lzw_file
=
fopen(
"
test.lzw
"
,
"
wb
"
);
if
(input_file
==
NULL
||
lzw_file
==
NULL)
{
printf(
"
Fatal error opening files.
"
);
exit(
-
1
);
};
/*
** Compress the file.
*/
compress(input_file,lzw_file);
fclose(input_file);
fclose(lzw_file);
free(code_value);
/*
** Now open the files for the expansion.
*/
lzw_file
=
fopen(
"
test.lzw
"
,
"
rb
"
);
output_file
=
fopen(
"
test.out
"
,
"
wb
"
);
if
(lzw_file
==
NULL
||
output_file
==
NULL)
{
printf(
"
Fatal error opening files.
"
);
exit(
-
2
);
};
/*
** Expand the file.
*/
expand(lzw_file,output_file);
fclose(lzw_file);
fclose(output_file);
free(prefix_code);
free(append_character);
}
/*
** This is the compression routine. The code should be a fairly close
** match to the algorithm accompanying the article.
**
*/
void
compress(FILE
*
input,FILE
*
output)
{
unsigned
int
next_code;
unsigned
int
character;
unsigned
int
string_code;
unsigned
int
index;
int
i;
next_code
=
256
;
/*
Next code is the next available string code
*/
for
(i
=
0
;i
<
TABLE_SIZE;i
++
)
/*
Clear out the string table before starting
*/
code_value[i]
=-
1
;
i
=
0
;
printf(
"
Compressing...
"
);
string_code
=
getc(input);
/*
Get the first code
*/
/*
** This is the main loop where it all happens. This loop runs util all of
** the input has been exhausted. Note that it stops adding codes to the
** table after all of the possible codes have been defined.
*/
while
((character
=
getc(input))
!=
(unsigned)EOF)
{
if
(
++
i
==
1000
)
/*
Print a * every 1000
*/
{
/*
input characters. This
*/
i
=
0
;
/*
is just a pacifier.
*/
printf(
"
*
"
);
}
index
=
find_match(string_code,character);
/*
See if the string is in
*/
if
(code_value[index]
!=
-
1
)
/*
the table. If it is,
*/
string_code
=
code_value[index];
/*
get the code value. If
*/
else
/*
the string is not in the
*/
{
/*
table, try to add it.
*/
if
(next_code
<=
MAX_CODE)
{
code_value[index]
=
next_code
++
;
prefix_code[index]
=
string_code;
append_character[index]
=
character;
}
output_code(output,string_code);
/*
When a string is found
*/
string_code
=
character;
/*
that is not in the table
*/
}
/*
I output the last string
*/
}
/*
after adding the new one
*/
/*
** End of the main loop.
*/
output_code(output,string_code);
/*
Output the last code
*/
output_code(output,MAX_VALUE);
/*
Output the end of buffer code
*/
output_code(output,
0
);
/*
This code flushes the output buffer
*/
printf(
"
"
);
}
/*
** This is the hashing routine. It tries to find a match for the prefix+char
** string in the string table. If it finds it, the index is returned. If
** the string is not found, the first available index in the string table is
** returned instead.
*/
int
find_match(
int
hash_prefix,unsigned
int
hash_character)
{
int
index;
int
offset;
index
=
(hash_character
<<
HASHING_SHIFT)
^
hash_prefix;
if
(index
==
0
)
offset
=
1
;
else
offset
=
TABLE_SIZE
-
index;
while
(
1
)
{
if
(code_value[index]
==
-
1
)
return
(index);
if
(prefix_code[index]
==
hash_prefix
&&
append_character[index]
==
hash_character)
return
(index);
index
-=
offset;
if
(index
<
0
)
index
+=
TABLE_SIZE;
}
}
/*
** This is the expansion routine. It takes an LZW format file, and expands
** it to an output file. The code here should be a fairly close match to
** the algorithm in the accompanying article.
*/
void
expand(FILE
*
input,FILE
*
output)
{
unsigned
int
next_code;
unsigned
int
new_code;
unsigned
int
old_code;
int
character;
int
counter;
unsigned
char
*
string
;
next_code
=
256
;
/*
This is the next available code to define
*/
counter
=
0
;
/*
Counter is used as a pacifier.
*/
printf(
"
Expanding...
"
);
old_code
=
input_code(input);
/*
Read in the first code, initialize the
*/
character
=
old_code;
/*
character variable, and send the first
*/
putc(old_code,output);
/*
code to the output file
*/
/*
** This is the main expansion loop. It reads in characters from the LZW file
** until it sees the special code used to inidicate the end of the data.
*/
while
((new_code
=
input_code(input))
!=
(MAX_VALUE))
{
if
(
++
counter
==
1000
)
/*
This section of code prints out
*/
{
/*
an asterisk every 1000 characters
*/
counter
=
0
;
/*
It is just a pacifier.
*/
printf(
"
*
"
);
}
/*
** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
** case which generates an undefined code. It handles it by decoding
** the last code, and adding a single character to the end of the decode string.
*/
if
(new_code
>=
next_code)
{
*
decode_stack
=
character;
string
=
decode_string(decode_stack
+
1
,old_code);
}
/*
** Otherwise we do a straight decode of the new code.
*/
else
string
=
decode_string(decode_stack,new_code);
/*
** Now we output the decoded string in reverse order.
*/
character
=*
string
;
while
(
string
>=
decode_stack)
putc(
*
string
--
,output);
/*
** Finally, if possible, add a new code to the string table.
*/
if
(next_code
<=
MAX_CODE)
{
prefix_code[next_code]
=
old_code;
append_character[next_code]
=
character;
next_code
++
;
}
old_code
=
new_code;
}
printf(
"
"
);
}
/*
** This routine simply decodes a string from the string table, storing
** it in a buffer. The buffer can then be output in reverse order by
** the expansion program.
*/
unsigned
char
*
decode_string(unsigned
char
*
buffer,unsigned
int
code)
{
int
i;
i
=
0
;
while
(code
>
255
)
{
*
buffer
++
=
append_character[code];
code
=
prefix_code[code];
if
(i
++>=
MAX_CODE)
{
printf(
"
Fatal error during code expansion.
"
);
exit(
-
3
);
}
}
*
buffer
=
code;
return
(buffer);
}
/*
** The following two routines are used to output variable length
** codes. They are written strictly for clarity, and are not
** particularyl efficient.
*/
unsigned
int
input_code(FILE
*
input)
{
unsigned
int
return_value;
static
int
input_bit_count
=
0
;
static
unsigned
long
input_bit_buffer
=
0L
;
while
(input_bit_count
<=
24
)
{
input_bit_buffer
|=
(unsigned
long
) getc(input)
<<
(
24
-
input_bit_count);
input_bit_count
+=
8
;
}
return_value
=
input_bit_buffer
>>
(
32
-
BITS);
input_bit_buffer
<<=
BITS;
input_bit_count
-=
BITS;
return
(return_value);
}
void
output_code(FILE
*
output,unsigned
int
code)
{
static
int
output_bit_count
=
0
;
static
unsigned
long
output_bit_buffer
=
0L
;
output_bit_buffer
|=
(unsigned
long
) code
<<
(
32
-
BITS
-
output_bit_count);
output_bit_count
+=
BITS;
while
(output_bit_count
>=
8
)
{
putc(output_bit_buffer
>>
24
,output);
output_bit_buffer
<<=
8
;
output_bit_count
-=
8
;
}
}