以下对比了用Stream完成去重复和从list
两千万记录去重,一百万取属性:
static class Class1 {
public String staffId;
public String getStaffId() {
return staffId;
}
public String getStaffName() {
return staffName;
}
public String staffName;
public Class1(){}
public Class1(String staffId,String staffName){
this.staffId=staffId;
this.staffName=staffName;
}
public String toString(){
return "{id:"+staffId+",name:"+staffName+"}";
}
/*@Override
public int hashCode(){
return staffId.hashCode()+staffName.hashCode();
}*/
/*
@Override
public boolean equals(Object v){
Class1 a=(Class1) v;
return a.staffId.equals(this.staffId)&&a.staffName.equals(this.staffName);
}
@Override
public int hashCode(){
return staffId.hashCode()*staffName.hashCode();
}*/
}
static class ResultBean{
public List result;
public long consume;
}
public ResultBean getUnique(List pvData,Function,List> pvFun){
ResultBean lvRet=new ResultBean();
long lvTmStart=System.currentTimeMillis();
lvRet.result= pvFun.apply(pvData);
lvRet.consume= System.currentTimeMillis()-lvTmStart;
return lvRet;
}
static class ResultBean2{
public List result;
public long consume;
}
public ResultBean2 getUnique2(List pvData,Function,List> pvFun){
ResultBean2 lvRet=new ResultBean2();
long lvTmStart=System.currentTimeMillis();
lvRet.result= pvFun.apply(pvData);
lvRet.consume= System.currentTimeMillis()-lvTmStart;
return lvRet;
}
@Test
public void testRemoveDuplicate() {
List lvList1 = new ArrayList<>();
lvList1.add("A");
lvList1.add("B");
lvList1.add("B");
lvList1.add("A");
lvList1.add("C");
lvList1.add("C");
lvList1 = lvList1.stream().distinct().collect(Collectors.toList());
System.out.println(lvList1);
List lvList2 = new ArrayList<>();
for (int i = 0; i < 1000000; i++) {
lvList2.add(new Class1("001", "Name001"));
lvList2.add(new Class1("002", "Name002"));
lvList2.add(new Class1("003", "Name003"));
lvList2.add(new Class1("001", "Name001"));
lvList2.add(new Class1("002", "Name002"));
lvList2.add(new Class1("003", "Name003"));
lvList2.add(new Class1("001", "Name001"));
lvList2.add(new Class1("002", "Name002"));
lvList2.add(new Class1("003", "Name003"));
lvList2.add(new Class1("003", "Name003"));
lvList2.add(new Class1("003", "Name003"));
lvList2.add(new Class1("004", "Name003"));
lvList2.add(new Class1("005", "Name003"));
lvList2.add(new Class1("004", "Name003"));
lvList2.add(new Class1("005", "Name003"));
lvList2.add(new Class1("004", "Name003"));
lvList2.add(new Class1("005", "Name003"));
lvList2.add(new Class1("003", "Name003"));
lvList2.add(new Class1("002", "Name003"));
lvList2.add(new Class1("002", "Name003"));
}
System.out.println(lvList2.size());
//lvList2=lvList2.stream().distinct().collect(Collectors.toList());不起作用
ResultBean lvLambdaResult = getUnique(lvList2, p -> {
return lvList2.stream().collect(Collectors.collectingAndThen(
Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(Class1::getStaffId)
.thenComparing(Class1::getStaffName))), ArrayList::new
)
);
//return lvList2.stream().distinct().collect(Collectors.toList());
});
System.out.println("Consume:" + lvLambdaResult.consume);
System.out.println("result:" + lvLambdaResult.result);
ResultBean lvNormalResult = getUnique(lvList2, p -> {
TreeSet lvTree = new TreeSet(new Comparator() {
@Override
public int compare(Class1 class1, Class1 t1) {
int lvRet = class1.staffId.compareTo(t1.staffId) * 100;
return lvRet + class1.staffName.compareTo(t1.staffName);
}
});
lvTree.addAll(p);
return Arrays.asList(lvTree.toArray(new Class1[lvTree.size()]));
});
System.out.println("Consume:" + lvNormalResult.consume);
System.out.println("result:" + lvNormalResult.result);
lvList2.clear();
for (int i = 0; i < 1000000; i++) {
lvList2.add(new Class1(i+"", "Name00"+i));
}
ResultBean2 lvLambdaResult2=getUnique2(lvList2,p->{
return p.stream().map(i->i.staffId).collect(Collectors.toList());
});
System.out.println("Consume:"+lvLambdaResult2.consume);
ResultBean2 lvNormalResult2=getUnique2(lvList2,p->{
List lvRet=new ArrayList<>();
for (Class1 rec:p){
lvRet.add(rec.staffId);
}
return lvRet;
});
System.out.println("Consume:"+lvNormalResult2.consume);
}
结果如下, stream函数式的写法与传统写法性能要低些, 去重慢13%, 提取慢42%. 如果是在性能要求比较高的地方就要避免了.
20000000
Consume:806
result:[{id:001,name:Name001}, {id:002,name:Name002}, {id:002,name:Name003}, {id:003,name:Name003}, {id:004,name:Name003}, {id:005,name:Name003}]
Consume:713
result:[{id:001,name:Name001}, {id:002,name:Name002}, {id:002,name:Name003}, {id:003,name:Name003}, {id:004,name:Name003}, {id:005,name:Name003}]
Consume:30
Consume:21