rolling checksum算法

package com.baidu.test;


import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.TreeSet;
import java.util.zip.Adler32;


public class Test {

static int i = 0;//从第0个字节开始
static int off = 4;//分块的步长,根据分块的步长
static byte[] block;//放将要比对的块
static HashMap hm = new HashMap();//存放相同块的map
static HashMap hmd = new HashMap();//存放不同的字节及下标
static HashSet hs = new HashSet();//存放不同字节的下标
static List list = new ArrayList();
/**
* 将不同字节组成块存放到      hmd  里面
* @param h
*/
public static void Merge(HashSet h,byte[] ball){
TreeSet ts = new TreeSet(h);
Iterator it = ts.iterator();
while(it.hasNext()){
int i = (Integer)it.next();
//System.out.println("下标"+i);
byte b = ball[i];
if(list != null){
list.add(b);
}else{
list = new ArrayList();
list.add(b);
}
if(!ts.contains(i+1)){
//System.out.println("hello");
hmd.put(i, list);
list = null;
}
}

/**
  * java 的强校验
  * @param b
  * @return
  * @throws NoSuchAlgorithmException
  */
 public static String getMD5(byte[] b) throws NoSuchAlgorithmException{
 StringBuilder ret=new StringBuilder(b.length<<1);
 String s = null;
 MessageDigest d = MessageDigest.getInstance("MD5");
 b = d.digest(b);
 for(int i=0;i<b.length;i++){
 ret.append(Character.forDigit((b[i]>>4)&0xf,16));
 ret.append(Character.forDigit(b[i]&0xf,16));
}
 //System.out.println(ret);
s = ret.substring(0);
 return s;
 }
/**
  * java 的弱校验,
  */
public static long getJavaAdler32Value(byte[] b){
 long l1 = 0;
 Adler32 al = new Adler32();
 al.update(b);
 l1= al.getValue();
 al.reset();//重置
 return l1;
 }
/**
* 回滚检验
* @param lli 弱检验链表
* @param llq 强检验链表
* @param ball 新文件的字节流
* @throws NoSuchAlgorithmException 
*/
public static void rollingChecksum(LinkedList<Long> lli,byte[] ball,LinkedList llq) throws NoSuchAlgorithmException{
int k = 0;
int v = 0;
//按4个一组进行比较
inner:for(;i <= ball.length - off;i+=off){
// System.out.println("i的值是"+i);
block = new byte[off];
System.arraycopy(ball, i, block, 0, off);
long lb = getJavaAdler32Value(block);//弱校验
String blo = getMD5(block);//强校验
ListIterator<Long> li = lli.listIterator(0);
while(li.hasNext()){//迭代匹配
long check = li.next();
if(lb == check){//检验弱校验
ListIterator liq = llq.listIterator(0);
while(liq.hasNext()){
String che = (String)liq.next();
if(blo.equals(che)){//检验强检验
int index = li.nextIndex()-1;
//System.out.println("index:"+index);
hm.put(index, block);//将小标以及对应的块存到map里面
//System.out.println("lb="+lb+"  i="+i);
k = i;
continue inner;
}
}
}
}
//回滚一位进行比较
out:for(;i <= ball.length-off;i++){
//System.out.println("i的值是"+i);
block = new byte[off];
System.arraycopy(ball, i, block, 0, off);
long lb2 = getJavaAdler32Value(block);//弱校验
String blo2 = getMD5(block);//强校验
ListIterator<Long> li2 = lli.listIterator(0);
while(li2.hasNext()){//迭代匹配
long check2 = li2.next();
if(lb2 == check2){//检验弱校验
ListIterator liq = llq.listIterator(0);
while(liq.hasNext()){
String che = (String)liq.next();
if(blo2.equals(che)){//检验强校验
int index = li2.nextIndex()-1;//找到相同块在链表中的下标
//System.out.println("index:"+index);
hm.put(index, block);//将小标以及对应的块存到map里面
//System.out.println("lb2="+lb2+"  i="+i);
v = i;
break out;
}
}
}
}
//System.out.println("the different index is:"+i);
hs.add(i);//记录不同的字节在新文件的字节数组的下标
}
}

if(k > v){
i = k;
i += off;
}else{
i = v;
i += off;
}
//System.out.println(i);//最后一次需要上传的不同块的开始点
for(;i< ball.length;i++){
boolean boo = hs.add(i);
//System.out.println(boo);
}
Merge(hs, ball);//储存不同的块
}
/**
     * 把一个文件转化为字节
     * @param file
     * @return   byte[]
     * @throws Exception
     */
    public static byte[] getByte(File file) throws Exception
    {
        byte[] bytes = null;
        if(file!=null)
        {
            InputStream is = new FileInputStream(file);
            int length = (int) file.length();
            if(length>Integer.MAX_VALUE)   //当文件的长度超过了int的最大值
            {
                System.out.println("this file is max ");
                return null;
            }
            bytes = new byte[length];
            int offset = 0;
            int numRead = 0;
            while(offset<bytes.length&&(numRead=is.read(bytes,offset,bytes.length-offset))>=0)
            {
                offset+=numRead;
            }
            //如果得到的字节长度和file实际的长度不一致就可能出错了
            if(offset<bytes.length)
            {
                System.out.println("file length is error");
                return null;
            }
            is.close();
        }
        return bytes;
    }


public static void main(String[] args) throws Exception {
//String old = "abcdefghijklmnopqrstuvwxyz";
String s1 = "abcd";//0
String s2 = "bcde";//1
String s3 = "cdef";//2
String s4 = "defg";//3
String s5 = "efgh";//4
String s6 = "fghi";//5
String s7 = "ghij";//6
String s8 = "hijk";//7
String s9 = "ijkl";//8
String s10 = "jklm";//9
String s11 = "klmn";//10
String s12 = "lmno";//11
String s13 = "mnop";//12
String s14 = "nopq";//13
String s15 = "opqr";//14
String s16 = "pqrs";//15
String s17 = "qrst";//16
String s18 = "rstu";//17
String s19 = "stuv";//18
String s20 = "tuvw";//19
String s21 = "uvwx";//20
String s22 = "vwxy";//21
String s23 = "wxyz";//22

String all = "abcde我全国计算机考试0ffic考试你好私は祁です。&&&&&&&*****(((()))ます_____+++++?????]]]]}}}}啊fghijk我是二五年嗯嗯嗯嗯嗯嗯嗯嗯嗯额嗯嗯嗯嗯嗯呐呐嫩嫩嗯嗯嫩嗯嗯嗯嗯嗯嗯嗯嗯嗯的你呢来吧好吗可以啊是吗好事真的吗是lmn3opqr你stuvw哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈哈xyz";
byte[] ball = all.getBytes();
long l1 = getJavaAdler32Value(s1.getBytes());
long l2 = getJavaAdler32Value(s2.getBytes());
//System.out.println(l1+"-"+l2);
long l3 = getJavaAdler32Value(s3.getBytes());
long l4 = getJavaAdler32Value(s4.getBytes());
long l5 = getJavaAdler32Value(s5.getBytes());
long l6 = getJavaAdler32Value(s6.getBytes());
long l7 = getJavaAdler32Value(s7.getBytes());
long l8 = getJavaAdler32Value(s8.getBytes());
long l9 = getJavaAdler32Value(s9.getBytes());
long l10 = getJavaAdler32Value(s10.getBytes());
long l11 = getJavaAdler32Value(s11.getBytes());
long l12 = getJavaAdler32Value(s12.getBytes());
long l13 = getJavaAdler32Value(s13.getBytes());
long l14 = getJavaAdler32Value(s14.getBytes());
long l15 = getJavaAdler32Value(s15.getBytes());
long l16= getJavaAdler32Value(s16.getBytes());
long l17 = getJavaAdler32Value(s17.getBytes());
long l18 = getJavaAdler32Value(s18.getBytes());
long l19 = getJavaAdler32Value(s19.getBytes());
long l20 = getJavaAdler32Value(s20.getBytes());
long l21 = getJavaAdler32Value(s21.getBytes());
long l22 = getJavaAdler32Value(s22.getBytes());
long l23 = getJavaAdler32Value(s23.getBytes());
//System.out.println(l1+"-"+l2+"-"+l3+"-"+l4+"-"+l5+"-"+l6+"-"+l7);
LinkedList<Long> lli = new LinkedList<Long>();
lli.add(l1);
lli.add(l2);
lli.add(l3);
lli.add(l4);
lli.add(l5);
lli.add(l6);
lli.add(l7);
lli.add(l8);
lli.add(l9);
lli.add(l10);
lli.add(l11);
lli.add(l12);
lli.add(l13);
lli.add(l14);
lli.add(l15);
lli.add(l16);
lli.add(l17);
lli.add(l18);
lli.add(l19);
lli.add(l20);
lli.add(l21);
lli.add(l22);
lli.add(l23);
LinkedList llq = new LinkedList();
String str1 = getMD5(s1.getBytes());
llq.add(str1);
String str2 = getMD5(s2.getBytes());
llq.add(str2);
String str3 = getMD5(s3.getBytes());
llq.add(str3);
String str4 = getMD5(s4.getBytes());
llq.add(str4);
String str5 = getMD5(s5.getBytes());
llq.add(str5);
String str6 = getMD5(s6.getBytes());
llq.add(str6);
String str7 = getMD5(s7.getBytes());
llq.add(str7);
String str8 = getMD5(s8.getBytes());
llq.add(str8);
String str9 = getMD5(s9.getBytes());
llq.add(str9);
String str10 = getMD5(s10.getBytes());
llq.add(str10);
String str11 = getMD5(s11.getBytes());
llq.add(str11);
String str12 = getMD5(s12.getBytes());
llq.add(str12);
String str13 = getMD5(s13.getBytes());
llq.add(str13);
String str14 = getMD5(s14.getBytes());
llq.add(str14);
String str15 = getMD5(s15.getBytes());
llq.add(str15);
String str16 = getMD5(s16.getBytes());
llq.add(str16);
String str17 = getMD5(s17.getBytes());
llq.add(str17);
String str18 = getMD5(s18.getBytes());
llq.add(str18);
String str19 = getMD5(s19.getBytes());
llq.add(str19);
String str20 = getMD5(s20.getBytes());
llq.add(str20);
String str21 = getMD5(s21.getBytes());
llq.add(str21);
String str22 = getMD5(s22.getBytes());
llq.add(str22);
String str23 = getMD5(s23.getBytes());
llq.add(str23);
/*File file = new File("E:/Desert.jpg");
byte[] b = getByte(file);
byte[] c = new byte[20000];
System.arraycopy(b, 0, c, 0, 19999);
long l24 = getJavaAdler32Value(c);
byte[] d = new byte[20000];
System.arraycopy(b, 20000, d, 0, 39999);
long l25 = getJavaAdler32Value(d);
byte[] e = new byte[20000];
System.arraycopy(b, 40000, e, 0, 59999);
long l26 = getJavaAdler32Value(e);
byte[] f = new byte[20000];
System.arraycopy(b, 60000, f, 0, 79999);
long l27 = getJavaAdler32Value(f);
LinkedList<Long> lli1 = new LinkedList<Long>();
lli1.add(l24);
lli1.add(l25);
lli1.add(l26);
lli1.add(l27);*/
Test.rollingChecksum(lli, ball,llq);

//System.out.println(b.length);
/*File file1 = new File("F:/Deserts.jpg");
byte[] b1 = getByte(file1);
System.out.println(b1.length);
long l = getJavaAdler32Value(b);
long l1 = getJavaAdler32Value(b1);
System.out.println(l+"-"+l1);
String s = getMD5(b);
String s1 = getMD5(b1);
System.out.println(s+"--"+s1);*/
Set set = hm.keySet();
Iterator it1 = set.iterator();
while(it1.hasNext()){
byte[] byt = (byte[])hm.get(it1.next());
String s = new String(byt);
System.out.println(s);
}
System.out.println("--------------------------------------");
//Merge(hs,ball);
Set ss = hmd.keySet();
Iterator it = ss.iterator();
while(it.hasNext()){
ArrayList arr = (ArrayList)hmd.get(it.next());
byte[] bb = new byte[arr.size()];
for(int i = 0; i < arr.size();i++){
bb[i] = (Byte)arr.get(i);
}
String s = new String(bb);
System.out.println(s);
}
}

}

你可能感兴趣的:(checksum,Rolling)