package com.anywhere;
import java.io.*;
/** 一个lzw 压缩算法的 编码 和译码 的实现程序
* 压缩一个已有文件(sourcefile)到目标文件(targetfile) ,然后读取压缩的码;
* 此程序采用12位压缩码,词典作多可以存储2^12个词条;
* 生成的压缩码 经过解压缩,可以恢复为原先文件;
*对文本文件的压缩率,大约为60%,尚不能支持中文的文件输入:)
* @author Lai Yongxuan 2003.3.12
* @version 1.0
*/
public class lzwCode
{
/**
@see Dictionary
*/
Dictionary dic=new Dictionary();
/**count1: the bytes of input file,count2:the bytes of output file
*/
int count1=0,count2=0;
/** the max number of the dictionary;
*this number can be add to the codebuf[] if
* the file has only odd words to be treated ;
*/
/** the input file : character file or coding file
*/
BufferedInputStream in;
/** the output file: character file or coding file
*/
BufferedOutputStream out;
final short END=4095;
/**the entry of the class,and check the arguments first
@param args array of string arguments
-c sourceFile [targetFile] 建立一个压缩文件
-d sourceFile [targetFile] 解压缩一个文件
@return No return value
@exception No ecceptions thrown
*/
public static void main(String []args)
{
if ( args.length<=1 || args.length>4 )
{
System.out.println("-c sourceFile [targetFile] [-dic] 建立一个压
缩文件\n");
System.out.println("-d sourceFile [targetFile] [-dic] 解压缩一个
文件\n");
}
else if(! ( args[0].equals(new String("-c") )||args[0].equals(new
String("-d") ) ) )
{
System.out.println("-c sourceFile [targetFile] 建立一个压缩文件\
n");
System.out.println("-d sourceFile [targetFile] 解压缩一个文件\n"
);
}
else if(args.length>=2)
{
lzwCode a=new lzwCode(args);
a.run(args);
}
return ;
}
/** the constuctor of the class of "lzwCode "
*@param args array of string arguments input at the main()
*
*
*/
public lzwCode(String []args)
{
try{
String f=new String();
in =new BufferedInputStream(
new FileInputStream(
new File(args[1])));
if(args.length==3 && !args[2].equals(new String("-dic")))
{
f=args[2];
}
else
{
int i=args[1].lastIndexOf(new String(".") );
f=args[1].substring(0,i)+((args[0].equals("-c")
)?".lzw":".dlzw");
}
out=new BufferedOutputStream(
new FileOutputStream(
new File(f)));
}//try
catch(FileNotFoundException e )
{
System.err.println(e);
return;
}
catch(IOException e )
{
System.err.println(e);
return;
}
}
/** the entry of the process;
@param Srring args[]: array of string arguments input at the main()
BufferedInputStream in: the input charstream file
BufferedOutputStream out:the output code stream file
* @return No return value
*/
public void run(String args[] )
{
if(args[0].equals(new String("-c")) )
{
code(in,out);
}
else
{
decode(in,out);
}
if(args[args.length-1].equals(new String("-dic") ))
System.out.println(dic.toString ());
}
/** input the charstream from a file,and output the code stream to anpther
file
* @param BufferedInputStream in: the input charstream file
BufferedOutputStream out:the output code stream file
* @return No return value
*
*/
public void code(BufferedInputStream in,BufferedOutputStream out)
{
System.out.println("coding...\n"+ ".......\n");
//a:the buffer byte read from the input file,then to be converted to
String
//buf: the codestream to store in the code file
//prefix :the pre_String of the dictory
// the indexbuf[] is the index of dictionary to be converted in
// the code file
//str: the current charecter of the character input Stream
byte a[]=new byte[1],buf[]=new byte[3];
String prefix="",cur="";
byte i=0;
short indexbuf[]=new short[2];
String str=null;
try{
short m=0;
while( (a[0]=(byte)in.read() ) != -1 )
{
cur=new String(a);// be converted
count1++; // the number of bytes of input file
str=prefix;
str=str.concat(cur);
m=(short)dic.indexOf(str);
if( m!=-1)//the prefix is in the dictionary,
{
prefix=str;
}
else//
{
if(i==0)//the first indexbuf,store in codebuf[]
{
indexbuf[0]=(short)dic.indexOf(prefix);
i=1;
}
else// now have 2 index number,then ouput to the code file
{
indexbuf[1]=(short)dic.indexOf(prefix);
zipOutput(out,indexbuf);
count2+=3;//3 bytes stored to the code file
i=0;
}
dic.add(str);
prefix=cur;
}//else
}//while
// System.out.println("i="+i);
if(i==(byte)1) //this is the case that the
//input file has only odd index number to store
{
indexbuf[1]=END;//put a special index number
//(the max number of the dictionary) END to the
code file
zipOutput(out,indexbuf);
count2+=3;
}
dic.add(str);
in.close ();
out.close ();
System.out.println("zip rate:"+(float)count2*100/count1+"% ");
}catch(IOException e )
{
System.err.println(e);
return;
}
catch(OutDictionaryException e)
{
System.err.println(e);
return;
}
}
/** input the code stream from a file,and output the char stream to anpther
file
* @param BufferedInputStream in: the input code file
BufferedOutputStream out:the output charstream stream file
* @return No return value
* @exception No return Exception
*
*
*/ public void decode(BufferedInputStream in,BufferedOutputStream out)
{
System.out.println("decoding...\n"+".......\n");
short precode=0,curcode=0;
String prefix=null;
short i=0;
short bufcode[]=new short[2];//2 code read from the code file
boolean more=true;//indicate the end of the file or some error while
input the file
// DataOutputStream out2=new DataOutputStream(out);
try{
more=zipInput(in,bufcode);//first input 2 code
if(more)
{
curcode=bufcode[0];
// out2.writeChars(dic.getString(curcode));
stringOut(out,dic.getString(curcode) );
}
else
System.out.println("error in the beginning...");
while(more)
{
precode=curcode;
if(i==0)
{
curcode=bufcode[1];
i=1;
}
else
{
more=zipInput(in,bufcode);
curcode=bufcode[0];
if(bufcode[1]==END)
{
stringOut(out,dic.getString (bufcode[0] ));
break;
}
i=0;
}
if(curcode<dic.length())//if the prefix string can be found in the
dictory
{
// out2.writeChars(dic.getString(curcode));
stringOut(out,dic.getString(curcode) );
prefix=dic.getString(precode);
prefix+=(dic.getString(curcode)).substring(0,1);
dic.add(prefix);
}
else
{
prefix=dic.getString(precode);
prefix+=prefix.substring(0,1);
// out2.writeChars(prefix);
stringOut(out,prefix );
dic.add(prefix);
}//else
}//while
in.close ();
out.close ();
}catch( OutDictionaryException e )
{
System.err.println(e);
return;
}
catch(IOException e)
{
System.err.println(e);
return;
}
}
/** output the index number of the dictionary to the code stream;
ecah index is converted to 12 bit ;and output 2 short numbers at a
time
* @param BufferedOutputStream out:the output charstream stream file
short index[]:the 2 short array to be converted to code form
* @return No return value
* @exception No return Exception
*
*
*/
private void zipOutput(BufferedOutputStream out,short index[])
{
try{
byte buf[]=new byte[3];
buf[1]=(byte)(index[0]<<4);
buf[0]=(byte)(index[0]>>4);
buf[2]=(byte)index[1];
buf[1]+=(byte)(index[1]>>8);
out.write(buf,0,3);
//out put the decoding
// System.out.println(index[0]+"\t"+index[1]+"\t");
/* short codebuf[]=new short[2];
//codebuf[0]=(short)(buf[0]<<4);
codebuf[0]=toRight(buf[0],4);
codebuf[0]+=(short)(toRight(buf[1],0)>>4);
//codebuf[1]=(short)buf[2];
codebuf[1]=toRight(buf[2],0);
//codebuf[1]=(byte)(buf[1]<<4);
byte temp=(byte)(toRight(buf[1],4));
codebuf[1]+=toRight(temp,4);
// codebuf[1]+=(short)(buf[1]<<4);
System.out.println("\t"+codebuf[0]+"\t"+codebuf[1]);
*/
}catch( IOException e )
{
System.err.println(e);
return;
}
}
/** convert the code stream to the file in the original way;
* each time deel with 3 bytes,and return 2 index number
* @param BufferedOutputStream in :the input code stream file
short index[]:the 2 short array buffer of index of dictionary
* @return return loolean value:if not the end of file and the converted
code
is right ,return true;else ,return false
* @exception No return Exception
*
*
*/
private boolean zipInput(BufferedInputStream in,short codebuf[])
{
byte buf[]=new byte[3],temp;
//int intbuf[]=new int[3],temp;
short le=(short)dic.length();
try{
if(in.read(buf,0,3)!=3)
{
System.out.println("the end of the file!");
return false;
}
//codebuf[0]=(short)(buf[0]<<4);
codebuf[0]=toRight(buf[0],4);
codebuf[0]+=(short)(toRight(buf[1],0)>>4);
//codebuf[1]=(short)buf[2];
codebuf[1]=toRight(buf[2],0);
//codebuf[1]=(byte)(buf[1]<<4);
temp=(byte)(toRight(buf[1],4));
codebuf[1]+=toRight(temp,4);
// System.out.println(codebuf[0]+"\t"+codebuf[1]);
if(codebuf[0]<-1 ||codebuf[1]<-1)
{
System.out.println("erroring while getting the code
:"+codebuf[0]+"\t"+codebuf[1]);
System.out.println(dic);
return false;
}
//System.out.println(codebuf[0]+"\t"+codebuf[1]);
}
catch(IOException e )
{
System.err.println(e);
return false;
}
return true;
}
/**converte a byte number,to the short form;and
* shift a byte n bits to the right;and reglect whether
*&the byte is positive or negective
*@param byte:the byte you want to shift
* int :the bits you shift to the right
*@return int :the result of the shifted
*/ private short toRight(byte buf,int n)
{
short s=0;
for(short i=7;i>=0;i--)
{
if( ( (1L<<i)&buf )!=0 )
s+=(short)(1L<<(i+n));
}
return s;
}
/**output the String to a file,but in a form of "byte" way;
* in order to be ecactly as the oririnal file ,i deel with
* the file in bytes form
*@param BufferedOutputStream out:the output file
* String str:the buf of String to be output
*/ private void stringOut(BufferedOutputStream out,String str)
{
byte a[]=str.getBytes();
try{
out.write(a,0,str.length());
}
catch(IOException e )
{
System.err.println(e);
}
}
}
//Dictionary.java
package com.anywhere;
import java.util.*;
/**the Exception to indicate that the dictionary is too large
*/
class OutDictionaryException extends Exception
{
public String toString()
{
return (super.toString ()+"out of the dictionary size!!");
}
}
/**
a dictonry that contains at most 2^12 words,and should be inited
at the beginning; it can be looked up,can be added and return the size
@author :Lai Yongxuan 2002.3.10
@version :1.0
*/
public class Dictionary
{
/** the container of the dictionary,use ArrayList
*@see java.util.ArrayList
*/
ArrayList ar=new ArrayList();
/**the constuctor of the class,and put the 128 ASCII to the dictionary
*/
public Dictionary()
{
// byte i[]=new byte[1];
char c[]=new char[1];
for( c[0]=0;c[0]<128;c[0]++)
{
ar.add(new String(c));
}
}
/**return the index number of the word in the dictionary
*/
public int indexOf(String a)
{
return ar.indexOf(a);
}
/**add a string to the dictionary
@param String :the word to be added
@return NO returned value
@Exception OutDictionaryException is thrown if the dictionary is too
large ,it only can contain 4096(2^12) words at most
*/
public void add (String a) throws OutDictionaryException
{
if( length()<4096)
ar.add(a);
else
{
throw(new OutDictionaryException());
}
}
/** the size of the dictionary
*/
public int length()
{
return (short)ar.size();
}
public String toString()
{
Integer le=new Integer(length() );
String str="size of the dictionary: "+le.toString ()+"\n";
for(int i=0;i<length();i++)
str+=new String(i+": "+(String)ar.get(i)+"\t");
return str;
}
/** return the word by the index pointor
*/
public String getString(short i)
{
return (String)ar.get(i);
}
/** only to test the dictionary
*/
public static void main(String []args )
{
Dictionary a=new Dictionary();
/* try{
for(int i=128;i<6000;i++)
{
a.add(new String("i am a student") );
}
}
catch(Exception e)
{
System.err.println (e.toString());
}*/
System.out.println(a);
}
}