go map源码分析(三)-扩容

扩容会建立一个新的buckets,将旧的bucket数据搬迁到新的buckets。在搬迁的过程中,并不会将旧的bucket从oldbucket中删除,而是加上一个已删除的标记。

只有当所有的bucket都从旧buckets移到新buckets之后,才会将oldbucket释放掉。 如果扩容过程中,阈值又超了呢?如果正在扩容,那么不会再进行扩容。

数据搬迁不是一次性完成,而是数据更新中逐步的完成,这样就避免了因为一次数据更新因此触发扩容而导致长时间消耗

扩容条件

  • 没有正在扩容中
  • 下面的二者满足之一
    • 元素个数大于 > bucket_count * 负载因子(6.5)
    • overflow 太多
    if noverflow >= uint16(1)<<(B&15), 进行扩容
    # 其中B的值大于15,取15,
    # 也就是 noverflow=32768(1<<15)一定会发生扩容
    

源码

源码位置: $GOROOT/src/runtime/map_faststr.go
以插入元素为例进行说明
在加入新元素的时候,判断是否需要扩容,如果需要进行扩容。扩容不是一次完成,而是随着元素的更新来逐渐完成的

func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
    // 省略部分源码
    again:
    bucket := hash & bucketMask(h.B)
    if h.growing() {
        // 当判断需要扩容后,在后面会分配内存,在这里进行数据搬迁
        growWork_faststr(t, h, bucket)
    }
    // 省略部分源码

    // 判断要不要进行扩容
    // 不是正在扩容 &&
    // count数多了 or overflow 多了
    if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
        hashGrow(t, h) // 进行扩容
        goto again // Growing the table invalidates everything, so try again
    }

    // 省略部分源码
}

分配内存

func hashGrow(t *maptype, h *hmap) {
    // If we've hit the load factor, get bigger.
    // Otherwise, there are too many overflow buckets,
    // so keep the same number of buckets and "grow" laterally.
    bigger := uint8(1)
    // h.count为当前map中元素的个数 
    // 判断count个数是否大于 bucket个数*负载因子
    // 如果大于,那么bigger=1,意味着扩容为原来bucket个数的2倍,否则申请的空间与原来一致
    // bucket的个数为 1 << (h.B+bigger), 因此如果 bigger=1,
    // 那么, 1 << (h.B+bigger) / 1 << (h.B) = 2, 会申请两倍的内存空间
    if !overLoadFactor(h.count+1, h.B) {
        bigger = 0
        h.flags |= sameSizeGrow
    }
    oldbuckets := h.buckets
    newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)

    flags := h.flags &^ (iterator | oldIterator)
    if h.flags&iterator != 0 {
        flags |= oldIterator
    }
    // commit the grow (atomic wrt gc)
    // 设置各个参数
    h.B += bigger
    h.flags = flags
    h.oldbuckets = oldbuckets
    h.buckets = newbuckets
    h.nevacuate = 0
    h.noverflow = 0

    if h.extra != nil && h.extra.overflow != nil {
        // Promote current overflow buckets to the old generation.
        if h.extra.oldoverflow != nil {
            throw("oldoverflow is not nil")
        }
        h.extra.oldoverflow = h.extra.overflow
        h.extra.overflow = nil
    }
    if nextOverflow != nil {
        if h.extra == nil {
            h.extra = new(mapextra)
        }
        h.extra.nextOverflow = nextOverflow
    }

    // the actual copying of the hash table data is done incrementally
    // by growWork() and evacuate().
    // 实际的数据搬迁是在growWork中,如果进入growWork中,见mapassign_faststr函数的分析
}

数据搬迁

func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
    // make sure we evacuate the oldbucket corresponding
    // to the bucket we're about to use
    evacuate_faststr(t, h, bucket&h.oldbucketmask())

    // evacuate one more oldbucket to make progress on growing
    if h.growing() {
        evacuate_faststr(t, h, h.nevacuate)
    }
}

func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
    b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
    newbit := h.noldbuckets()
    if !evacuated(b) {
        // TODO: reuse overflow buckets instead of using new ones, if there
        // is no iterator using the old buckets.  (If !oldIterator.)

        // xy contains the x and y (low and high) evacuation destinations.
        var xy [2]evacDst
        x := &xy[0]
        x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
        x.k = add(unsafe.Pointer(x.b), dataOffset)
        x.v = add(x.k, bucketCnt*2*sys.PtrSize)

        if !h.sameSizeGrow() {
            // Only calculate y pointers if we're growing bigger.
            // Otherwise GC can see bad pointers.
            y := &xy[1]
            y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
            y.k = add(unsafe.Pointer(y.b), dataOffset)
            y.v = add(y.k, bucketCnt*2*sys.PtrSize)
        }

        for ; b != nil; b = b.overflow(t) {
            k := add(unsafe.Pointer(b), dataOffset)
            v := add(k, bucketCnt*2*sys.PtrSize)
            for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
                top := b.tophash[i]
                if top == empty {
                    b.tophash[i] = evacuatedEmpty
                    continue
                }
                if top < minTopHash {
                    throw("bad map state")
                }
                var useY uint8
                if !h.sameSizeGrow() {
                    // Compute hash to make our evacuation decision (whether we need
                    // to send this key/value to bucket x or bucket y).
                    hash := t.key.alg.hash(k, uintptr(h.hash0))
                    if hash&newbit != 0 {
                        useY = 1
                    }
                }

                b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
                dst := &xy[useY]                 // evacuation destination

                if dst.i == bucketCnt {
                    dst.b = h.newoverflow(t, dst.b)
                    dst.i = 0
                    dst.k = add(unsafe.Pointer(dst.b), dataOffset)
                    dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
                }
                dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check

                // Copy key.
                *(*string)(dst.k) = *(*string)(k)

                typedmemmove(t.elem, dst.v, v)
                dst.i++
                // These updates might push these pointers past the end of the
                // key or value arrays.  That's ok, as we have the overflow pointer
                // at the end of the bucket to protect against pointing past the
                // end of the bucket.
                dst.k = add(dst.k, 2*sys.PtrSize)
                dst.v = add(dst.v, uintptr(t.valuesize))
            }
        }
        // Unlink the overflow buckets & clear key/value to help GC.
        // Unlink the overflow buckets & clear key/value to help GC.
        if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
            b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
            // Preserve b.tophash because the evacuation
            // state is maintained there.
            ptr := add(b, dataOffset)
            n := uintptr(t.bucketsize) - dataOffset
            memclrHasPointers(ptr, n)
        }
    }

    if oldbucket == h.nevacuate {
        advanceEvacuationMark(h, t, newbit)
    }
}

你可能感兴趣的:(go map源码分析(三)-扩容)