扩容会建立一个新的buckets,将旧的bucket数据搬迁到新的buckets。在搬迁的过程中,并不会将旧的bucket从oldbucket中删除,而是加上一个已删除的标记。
只有当所有的bucket都从旧buckets移到新buckets之后,才会将oldbucket释放掉。 如果扩容过程中,阈值又超了呢?如果正在扩容,那么不会再进行扩容。
数据搬迁不是一次性完成,而是数据更新中逐步的完成,这样就避免了因为一次数据更新因此触发扩容而导致长时间消耗
扩容条件
- 没有正在扩容中
- 下面的二者满足之一
- 元素个数大于 > bucket_count * 负载因子(6.5)
- overflow 太多
if noverflow >= uint16(1)<<(B&15), 进行扩容 # 其中B的值大于15,取15, # 也就是 noverflow=32768(1<<15)一定会发生扩容
源码
源码位置: $GOROOT/src/runtime/map_faststr.go
以插入元素为例进行说明
在加入新元素的时候,判断是否需要扩容,如果需要进行扩容。扩容不是一次完成,而是随着元素的更新来逐渐完成的
func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
// 省略部分源码
again:
bucket := hash & bucketMask(h.B)
if h.growing() {
// 当判断需要扩容后,在后面会分配内存,在这里进行数据搬迁
growWork_faststr(t, h, bucket)
}
// 省略部分源码
// 判断要不要进行扩容
// 不是正在扩容 &&
// count数多了 or overflow 多了
if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h) // 进行扩容
goto again // Growing the table invalidates everything, so try again
}
// 省略部分源码
}
分配内存
func hashGrow(t *maptype, h *hmap) {
// If we've hit the load factor, get bigger.
// Otherwise, there are too many overflow buckets,
// so keep the same number of buckets and "grow" laterally.
bigger := uint8(1)
// h.count为当前map中元素的个数
// 判断count个数是否大于 bucket个数*负载因子
// 如果大于,那么bigger=1,意味着扩容为原来bucket个数的2倍,否则申请的空间与原来一致
// bucket的个数为 1 << (h.B+bigger), 因此如果 bigger=1,
// 那么, 1 << (h.B+bigger) / 1 << (h.B) = 2, 会申请两倍的内存空间
if !overLoadFactor(h.count+1, h.B) {
bigger = 0
h.flags |= sameSizeGrow
}
oldbuckets := h.buckets
newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)
flags := h.flags &^ (iterator | oldIterator)
if h.flags&iterator != 0 {
flags |= oldIterator
}
// commit the grow (atomic wrt gc)
// 设置各个参数
h.B += bigger
h.flags = flags
h.oldbuckets = oldbuckets
h.buckets = newbuckets
h.nevacuate = 0
h.noverflow = 0
if h.extra != nil && h.extra.overflow != nil {
// Promote current overflow buckets to the old generation.
if h.extra.oldoverflow != nil {
throw("oldoverflow is not nil")
}
h.extra.oldoverflow = h.extra.overflow
h.extra.overflow = nil
}
if nextOverflow != nil {
if h.extra == nil {
h.extra = new(mapextra)
}
h.extra.nextOverflow = nextOverflow
}
// the actual copying of the hash table data is done incrementally
// by growWork() and evacuate().
// 实际的数据搬迁是在growWork中,如果进入growWork中,见mapassign_faststr函数的分析
}
数据搬迁
func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
// make sure we evacuate the oldbucket corresponding
// to the bucket we're about to use
evacuate_faststr(t, h, bucket&h.oldbucketmask())
// evacuate one more oldbucket to make progress on growing
if h.growing() {
evacuate_faststr(t, h, h.nevacuate)
}
}
func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
newbit := h.noldbuckets()
if !evacuated(b) {
// TODO: reuse overflow buckets instead of using new ones, if there
// is no iterator using the old buckets. (If !oldIterator.)
// xy contains the x and y (low and high) evacuation destinations.
var xy [2]evacDst
x := &xy[0]
x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
x.k = add(unsafe.Pointer(x.b), dataOffset)
x.v = add(x.k, bucketCnt*2*sys.PtrSize)
if !h.sameSizeGrow() {
// Only calculate y pointers if we're growing bigger.
// Otherwise GC can see bad pointers.
y := &xy[1]
y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
y.k = add(unsafe.Pointer(y.b), dataOffset)
y.v = add(y.k, bucketCnt*2*sys.PtrSize)
}
for ; b != nil; b = b.overflow(t) {
k := add(unsafe.Pointer(b), dataOffset)
v := add(k, bucketCnt*2*sys.PtrSize)
for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
top := b.tophash[i]
if top == empty {
b.tophash[i] = evacuatedEmpty
continue
}
if top < minTopHash {
throw("bad map state")
}
var useY uint8
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/value to bucket x or bucket y).
hash := t.key.alg.hash(k, uintptr(h.hash0))
if hash&newbit != 0 {
useY = 1
}
}
b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
dst := &xy[useY] // evacuation destination
if dst.i == bucketCnt {
dst.b = h.newoverflow(t, dst.b)
dst.i = 0
dst.k = add(unsafe.Pointer(dst.b), dataOffset)
dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
}
dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
// Copy key.
*(*string)(dst.k) = *(*string)(k)
typedmemmove(t.elem, dst.v, v)
dst.i++
// These updates might push these pointers past the end of the
// key or value arrays. That's ok, as we have the overflow pointer
// at the end of the bucket to protect against pointing past the
// end of the bucket.
dst.k = add(dst.k, 2*sys.PtrSize)
dst.v = add(dst.v, uintptr(t.valuesize))
}
}
// Unlink the overflow buckets & clear key/value to help GC.
// Unlink the overflow buckets & clear key/value to help GC.
if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
// Preserve b.tophash because the evacuation
// state is maintained there.
ptr := add(b, dataOffset)
n := uintptr(t.bucketsize) - dataOffset
memclrHasPointers(ptr, n)
}
}
if oldbucket == h.nevacuate {
advanceEvacuationMark(h, t, newbit)
}
}