今天面试提到了HasmMap,之前也有看过其源代码,了解其原理,不过又忘得差不多,今天就在读下,加深印象。
1、HashMap得内部元素以Entry存在,继承Map.Entry,元素包含,Entry相当于一个LinkedList
final Object key; Object value; final int hash; Entry next; Entry(int i, Object obj, Object obj1, Entry entry) { value = obj1; next = entry; key = obj; hash = i; }
2、HashMap得变量有
static final int DEFAULT_INITIAL_CAPACITY = 16; //默认容量 static final int MAXIMUM_CAPACITY = 1073741824; //最大容量 static final float DEFAULT_LOAD_FACTOR = 0.75F; //默认加载因子 transient Entry table[]; //HashMap得Entry集合 transient int size; //当前数量 int threshold; //容量*加载因子,当size>threshold时就会resize final float loadFactor; //加载因子 volatile transient int modCount; //修改计量,在迭代器中使用,如果迭代中有修改modCount那么会抛出ConcurrentModificationException()异常 static final Object NULL_KEY = new Object(); //默认空值 private static final boolean useNewHash = false; private transient Set entrySet;
3、put方法
public Object put(Object obj, Object obj1) { if(obj == null) return putForNullKey(obj1); //新增一个 newObjedct(),如果有null key,返回一个存在的值 //计算HashCode int i = hash(obj.hashCode()); //计算位置 相当于hashCode mod table.length 使新增的对象能平均分配到各个桶中 int j = indexFor(i, table.length); //首先根据index找到桶,然后循环桶中的Entry for(Entry entry = table[j]; entry != null; entry = entry.next) { Object obj2; //根据hashCode和(equal or ==)比较新增key是否存在,如果存在就覆盖以前的值,并返回 if(entry.hash == i && ((obj2 = entry.key) == obj || obj.equals(obj2))) { Object obj3 = entry.value; entry.value = obj1; entry.recordAccess(this); return obj3; } } modCount++; //新增entry addEntry(i, obj, obj1, j); return null; }
4、addEntry
/** * 新增一个Entry * @param i hashCode * @param obj key * @param obj1 value * @param j index,桶的位置 */ void addEntry(int i, Object obj, Object obj1, int j) { //找到桶的Entry Entry entry = table[j]; //在桶中新增一个Entry,相当于Entry.next = new Entry() table[j] = new Entry(i, obj, obj1, entry); //如果HashMap中的size大于容量*加载因子,就需要扩容,把容量*2, //从这里可以看出,HashMap希望一个Entry就一个值,尽量少的next,如果HashCode分布不均匀,那么可能数据都分配到少数桶中,使HashMap等于一个LinkedList if(size++ >= threshold) resize(2 * table.length); }
5、resize扩容
//HashMap扩容 void resize(int i) { Entry aentry[] = table; int j = aentry.length; if(j == 1073741824) { threshold = 2147483647; return; } else { //重新分配内存 Entry aentry1[] = new Entry[i]; transfer(aentry1); table = aentry1; threshold = (int)((float)i * loadFactor); return; } } void transfer(Entry aentry[]) { Entry aentry1[] = table; int i = aentry.length; //循环桶 for(int j = 0; j < aentry1.length; j++) { Entry entry = aentry1[j]; if(entry == null) continue; aentry1[j] = null; //遍历Entry,重新Hash,从这可以看出,如果可以预见HashMap的数量,那么就最好指定桶的数量,不然随着数量的增多就需要resize() //resize是非常耗性能的步骤,需要把所有的值重新resize,然后分配到各个桶中 do { Entry entry1 = entry.next; int k = indexFor(entry.hash, i); entry.next = aentry[k]; aentry[k] = entry; entry = entry1; } while(entry != null); } }
6、get方法
/** * 这个方法很简单,就是根据hashCode获取桶,然后遍历桶找key */ public Object get(Object obj) { if(obj == null) return getForNullKey(); int i = hash(obj.hashCode()); for(Entry entry = table[indexFor(i, table.length)]; entry != null; entry = entry.next) { Object obj1; if(entry.hash == i && ((obj1 = entry.key) == obj || obj.equals(obj1))) return entry.value; } return null; }
6、迭代器
/** * keyIterator,valueIterator的父类,这里都是内部类,好处是可以随意的使用外部HashMap的变量和属性 * @author Administrator * */ private abstract class HashIterator implements Iterator { /** * 构造函数,特别注意expectedModCount = modCount;这个玩意说明HashMap的Iterator是不安全的,如果在遍历过程中有修改HashMap那么就Exception了 */ HashIterator() { this$0 = HashMap.this; super(); expectedModCount = modCount; HashMap.Entry aentry[] = table; int i = aentry.length; HashMap.Entry entry = null; if(size != 0) while(i > 0 && (entry = aentry[--i]) == null) ; next = entry; index = i; } public boolean hasNext() { return next != null; } /** * * @return */ HashMap.Entry nextEntry() { if(modCount != expectedModCount) throw new ConcurrentModificationException(); HashMap.Entry entry = next; if(entry == null) throw new NoSuchElementException(); HashMap.Entry entry1 = entry.next; HashMap.Entry aentry[] = table; int i; for(i = index; entry1 == null && i > 0; entry1 = aentry[--i]); index = i; next = entry1; return current = entry; } /** * 由这个方法说明可以通过Iterator可以删除HashMap */ public void remove() { if(current == null) throw new IllegalStateException(); if(modCount != expectedModCount) { throw new ConcurrentModificationException(); } else { Object obj = current.key; current = null; removeEntryForKey(obj); expectedModCount = modCount; return; } } HashMap.Entry next; int expectedModCount; int index; HashMap.Entry current; final HashMap this$0; }
总结:
1、HashMap是根据Key的HashCode来存储和获取value的,所以Key对象最好是重载HashCode()和equals()方法,否则会导致put和get出现问题,同时也使性能出现问题
2、如果知道存储HashMap的数量,最好是指定其桶的数量,默认是16(有点小),否则会频繁的resize,每次把当前桶的数量乘以2,然后重新Hash和分配存储位置,这是个非常耗性能的过程。
3、加载因子,默认是0.75,当HashMap.size > 桶数量*加载因子时就会resize(),这个最好不要修改,保留默认值。如果过高会导致性能降低,如果过低,会浪费空间,同时会导致频繁resize()
4、HashMap是允许null的,相当于存储一个new Object();貌似意义不大
5、HashMap的迭代器是不安全的,如果在迭代器外部修改HashMap,比如add(),remove(),都会导致迭代过程中报错。