哈夫曼树编码

由于建成树的随机性以及编码的随机性
文件的读入不能以ascii码形式读入

ascii码的读入方式会忽略掉一些流不能完整读入 所以要用二进制码读入方式


下面是压缩的代码

#include<cstring>
#include<cstdio>
#include<cmath>
#include<cstdlib>
#include<iostream>
using namespace std;
struct heap
{
    int ele[22222];
    int  e[22222];
    int n;
    heap(){n=0;}
    int pre(int x){return (x-1)>>1;}
    int lson(int x) {return (x<<1)+1;}
    int rson(int x) {return (x<<1)+2;}
    void ins(int  x,int  k)
    {
        int p=n++;
        for(;p>0;)
        {
            if(ele[pre(p)]>x)
            {
                ele[p]=ele[pre(p)];e[p]=e[pre(p)];
                p=pre(p);
            }
            else break;
        }
        ele[p]=x,e[p]=k;
    }
    int pop(int  &y)
    {
        int tmp,p,x=ele[0];y=e[0];
        for(n--,p=0;lson(p)<n;)
        {
            tmp=(rson(p)<n&&ele[rson(p)]<ele[lson(p)])?rson(p):lson(p);
            if(ele[tmp]<ele[n])  {ele[p]=ele[tmp];e[p]=e[tmp];p=tmp;}
            else break;
        }
        ele[p]=ele[n];e[p]=e[n];
        return x;
    }
}G;
int n;
int num[258];
int next[513][2];
bool yes[256][256];
int len[256];
bool ye[256];
void dfs(int r,int k)
{
    if(r<=256)
    {
        len[r]=k;
        for(int i=0;i<k;++i)
        yes[r][i]=ye[i];
        return;
    }
   ye[k]=0; dfs(next[r][0],k+1);
   ye[k]=1;dfs(next[r][1],k+1);
}
int main()
{
   FILE *p,*ps;
   ps=fopen("data.txt","r");
  p=fopen("data.out","wb");
   n=0;
   unsigned char c;
   int l=0;
   memset(num,0,sizeof(num));
   while(fscanf(ps,"%c",&c)!=EOF)
   {
       num[(int)c]++;
     //  printf("%d\n",(int) c);
   }
   for(int i=0;i<=256;++i)
   if(num[i]!=0) G.ins(num[i],i),++n;
   int y=256;
   for(int i=0;i<n-1;++i)
   {
       int  t1,t2;
       int x1,x2;
       x1=G.pop(t1);x2=G.pop(t2);
       ++y;
       G.ins(x1+x2,y);
       next[y][0]=t1;
       next[y][1]=t2;
   }
     dfs(y,0);
      unsigned char  q=0;int j=0;
     for(int i=0;i<32;++i)
     {
         q+=((n&(1<<(i)))?(1<<(i%8)):0);
         if(i%8==7) {fprintf(p,"%c",q);q=0;}
     }
     for(int i=0;i<=256;++i)
     {
        if(num[i]) {fprintf(p,"%c",(char)i);
        for(int j=0;j<32;++j)
     {
         q+=(num[i]&(1<<(j)))?(1<<(j%8)):0;;
         if(j%8==7) {fprintf(p,"%c",q);q=0;}
     }
     }
     }
     FILE *pp=fopen("data.txt","r");
     q=0;
     char sb;
     while(fscanf(pp,"%c",&sb)==1)
     {
         int r=sb;
         for(int k=0;k<len[r];++k,j=(j+1)%8)
         {
             q+=(1<<j)*yes[r][k];
                if(j==7) {fprintf(p,"%c",(unsigned char )q);q=0;}
         }
     }
   if((int)q!=0) fprintf(p,"%c",(unsigned char )q);
    return 0;
}

这个是解压的代码

#include<cstring>
#include<cstdio>
#include<cmath>
#include<cstdlib>
#include<iostream>
using namespace std;
struct heap
{
    int ele[22222];
    int  e[22222];
    int n;
    heap(){n=0;}
    int pre(int x){return (x-1)>>1;}
    int lson(int x) {return (x<<1)+1;}
    int rson(int x) {return (x<<1)+2;}
    void ins(int  x,int  k)
    {
        int p=n++;
        for(;p>0;)
        {
            if(ele[pre(p)]>x)
            {
                ele[p]=ele[pre(p)];e[p]=e[pre(p)];
                p=pre(p);
            }
            else break;
        }
        ele[p]=x,e[p]=k;
    }
    int pop(int  &y)
    {
        int tmp,p,x=ele[0];y=e[0];
        for(n--,p=0;lson(p)<n;)
        {
            tmp=(rson(p)<n&&ele[rson(p)]<ele[lson(p)])?rson(p):lson(p);
            if(ele[tmp]<ele[n])  {ele[p]=ele[tmp];e[p]=e[tmp];p=tmp;}
            else break;
        }
        ele[p]=ele[n];e[p]=e[n];
        return x;
    }
}G;
int n;
int num[258];
int next[514][2];
int fseek(FILE * in)
{
    int res=0;
    int i=0;
    for(int j=0;j<4;++j)
    {
        unsigned char k;
        fscanf(in,"%c",&k);
        for(int y=0;y<8;++y,++i)
        if(k&(1<<y)) res+=(1<<i);
    }
    return res;
}
int main()
{
    FILE *p;
    p=fopen("data.out","rb");
 //freopen("data1.txt","w",stdout);
unsigned char c;
   int l=0;
   memset(num,0,sizeof(num));
  int ks=0;
  n=fseek(p);
  for(int i=0;i<n;++i)
  {
      int t;
      fscanf(p,"%c",&c);
     t=fseek(p);
      l+=t;
      G.ins(t,(int)c);
      }
   int y=256;
   for(int i=0;i<n-1;++i)
   {
       int  t1,t2;
       int x1,x2;
       x1=G.pop(t1);x2=G.pop(t2);
       ++y;
       G.ins(x1+x2,y);
       next[y][0]=t1;
       next[y][1]=t2;
   }
   int temp=y;
   int nu=0;
 unsigned char sb;
   int xx=0;
  // cout<<l<<endl;
   while(l)
   {
       fscanf(p,"%c",&sb);
       int s=(int) sb;
     for(int i=0;i<8&&l;++i)
       {
           temp=next[temp][(s&(1<<i))==0?0:1];
           if(temp<=256) {putchar((char)temp);++xx;temp=y;--l;}
       }
   }
  // cout<<endl<<l<<" "<<xx<<endl;
    return 0;
}

-。-由于编码方式的问题暂时不支持中文的压缩的

你可能感兴趣的:(哈夫曼树编码)