package indi.demo.container.hash;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
class Person {
private String name;
private int age;
public Person(String name, int age) {
this.name = name;
this.age = age;
}
//setter and getter
@Override
public String toString() {
return "Person [name=" + name + ", age=" + age + "]";
}
}
public class OddExample {
public static void main(String[] args) {
Set set = new HashSet();
set.add(new Person("Jhon", 23));
set.add(new Person("Jhon", 23));
set.add(new Person("Li Gang", 24));
Iterator it = set.iterator();
while(it.hasNext()) {
System.out.println(it.next());
}
}
}
程序运行的结果如下:
Person [name=Li Gang, age=24]
Person [name=Jhon, age=23]
Person [name=Jhon, age=23]
刚学Java的时候,这样的代码是信手拈来,很显然这样写是错的,因为Set中是不允许元素重复的,这是集合里面有两个Person对象他们拥有相同的名字和年龄,它们应该被视为同一个对象,那么为什么HashSet没有发现它们是同一对象呢?HashSet到底是如何保证元素的唯一性呢?要弄清楚这个问题,我们必须要去Java源码一探究竟。(很明显,维护元素唯一性的代码肯定实在add方法中),下面是HashSet的部分源码:
private transient HashMap map;
// Dummy value to associate with an Object in the backing Map
private static final Object PRESENT = new Object();
public boolean add(E e) {
return map.put(e, PRESENT)==null;
}
可以看到HashSet是用HashMap来实现的,因此我们还得到HashMap的put方法中看看:
public V put(K key, V value) {
return putVal(hash(key), key, value, false, true);
}
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node[] tab; Node p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node e; K k;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
e = ((TreeNode)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}
从上面的代码可以看到,在判断两个Key对象是否相同时,进行了两个主要操作:
1. 先比较两个Key的哈希值(用hash函数计算出)是否一样
2. 在上面的基础上再调用其中一个key的equals方法判断两个对象是否相等
因此,我们定义的类要在基于hash的集合中使用时,需要覆写equals()和hashCode()这两个方法,来确保元素的唯一性
所以,上面的类正确的实现方式应该是:
package indi.demo.container.hash;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class CorrectDemo {
private static class Person {
private String name;
private int age;
public Person(String name, int age) {
this.name = name;
this.age = age;
}
public String getName() {
return name;
}
public int getAge() {
return age;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + age;
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
if (this == obj) {
return true;
}
Person other = (Person) obj;
if (name.equals(other.getName()) && age == other.getAge()) {
return true;
}
return false;
}
@Override
public String toString() {
return "Person [name=" + name + ", age=" + age + "]";
}
}
public static void main(String[] args) {
Set set = new HashSet();
set.add(new Person("Jhon", 23));
set.add(new Person("Jhon", 23));
set.add(new Person("Li Gang", 24));
Iterator it = set.iterator();
while (it.hasNext()) {
System.out.println(it.next());
}
}
}
运行结果如下:
Person [name=Jhon, age=23]
Person [name=Li Gang, age=24]