Go gcache 源码分析（图解）

概述

gcache是一个用go实现的并发安全的本地缓存库。他可以实现如下功能：

指定缓存的的大小，初始化之时为cache设置size大小。
支持多种缓存的策略：Simple、LRU、LFU、ARC

Simple：最普通的缓存策略，根据先存入的先淘汰。
LUR：Least Recently Used，意思是最近最少使用。LRU Cache 的替换原则就是将最近最少使用的内容替换掉。
LFU：Least Frequently Used ，意思是最近最不常用。LFU Cache先淘汰一定时间内被访问次数最少的页面。
ARC：Adaptive Replacement Cache，ARC介于 LRU 和 LFU 之间。

支持多个回调函数

LoaderExpireFunc：过期回调函数
EvictedFunc：淘汰回调函数
PurgeVisitorFunc：清除所有key回调函数
AddedFunc：新怎key回调函数
SerializeFunc：对value序列化回调函数
DeserializeFunc：对value反序列化回调函数

支持计数事件

HitCount：命中次数
MissCount：没有命中的次数
LookupCount：查找次数
HitRate：命中率

使用singleflight机制，多个人请求一个key，保证只有一个真正获取数据其余等待结果。

简单使用

其实github上已经有了很详细的例子，其中有简单key/value、设置超时时间、设置淘汰策略、设置回调函数等各种例子。这里简单摘抄一些简单的例子：

简单key/value 设置

package main

import (
  "github.com/bluele/gcache"
  "fmt"
)

func main() {
  gc := gcache.New(20).
    LRU().
    Build()
  gc.Set("key", "ok")
  value, err := gc.Get("key")
  if err != nil {
    panic(err)
  }
  fmt.Println("Get:", value)
}

Get: ok

设置过期时间

package main

import (
  "github.com/bluele/gcache"
  "fmt"
  "time"
)

func main() {
  gc := gcache.New(20).
    LRU().
    Build()
  gc.SetWithExpire("key", "ok", time.Second*10)
  value, _ := gc.Get("key")
  fmt.Println("Get:", value)

  // Wait for value to expire
  time.Sleep(time.Second*10)

  value, err = gc.Get("key")
  if err != nil {
    panic(err)
  }
  fmt.Println("Get:", value)
}

Get: ok
// 10 seconds later, new attempt:
panic: ErrKeyNotFound

使用load回调函数

package main

import (
  "github.com/bluele/gcache"
  "fmt"
)

func main() {
  gc := gcache.New(20).
    LRU().
    LoaderFunc(func(key interface{}) (interface{}, error) {
      return "ok", nil
    }).
    Build()
  value, err := gc.Get("key")
  if err != nil {
    panic(err)
  }
  fmt.Println("Get:", value)
}
Get: ok

源码分析

实体和初始化

builder类

// 缓存builder对象，存放时间、大小和各种回调函数
type CacheBuilder struct {
    clock            Clock
    tp               string
    size             int
    loaderExpireFunc LoaderExpireFunc
    evictedFunc      EvictedFunc
    purgeVisitorFunc PurgeVisitorFunc
    addedFunc        AddedFunc
    expiration       *time.Duration
    deserializeFunc  DeserializeFunc
    serializeFunc    SerializeFunc
}

设置过期时间、策略、回调函数

// 设置策略 设置CacheBuilder的回调函数属性
func (cb *CacheBuilder) LRU() *CacheBuilder {
    return cb.EvictType(TYPE_LRU)
}

// 设置过期时间 设置CacheBuilder的Expiration属性
func (cb *CacheBuilder) Expiration(expiration time.Duration) *CacheBuilder {
    cb.expiration = &expiration
    return cb
}

// 设置驱除回调函数
func (cb *CacheBuilder) EvictedFunc(evictedFunc EvictedFunc) *CacheBuilder {
    cb.evictedFunc = evictedFunc
    return cb
}

build 输出cache对象

// 判断size和类型
func (cb *CacheBuilder) Build() Cache {
    if cb.size <= 0 && cb.tp != TYPE_SIMPLE {
        panic("gcache: Cache size <= 0")
    }

    return cb.build()
}

// 根据type来新建相对应的cache对象
func (cb *CacheBuilder) build() Cache {
    switch cb.tp {
    case TYPE_SIMPLE:
        return newSimpleCache(cb)
    case TYPE_LRU:
        return newLRUCache(cb)
    case TYPE_LFU:
        return newLFUCache(cb)
    case TYPE_ARC:
        return newARC(cb)
    default:
        panic("gcache: Unknown type " + cb.tp)
    }
}

// 举例一个SimpleCache  
func newSimpleCache(cb *CacheBuilder) *SimpleCache {
    c := &SimpleCache{}
    buildCache(&c.baseCache, cb)

    c.init()
    c.loadGroup.cache = c
    return c
}

// init 初始化simple 中的map
func (c *SimpleCache) init() {
    if c.size <= 0 {
        c.items = make(map[interface{}]*simpleItem)
    } else {
        c.items = make(map[interface{}]*simpleItem, c.size)
    }
}

// 初始化回调函数
func buildCache(c *baseCache, cb *CacheBuilder) {
    c.clock = cb.clock
    c.size = cb.size
    c.loaderExpireFunc = cb.loaderExpireFunc
    c.expiration = cb.expiration
    c.addedFunc = cb.addedFunc
    c.deserializeFunc = cb.deserializeFunc
    c.serializeFunc = cb.serializeFunc
    c.evictedFunc = cb.evictedFunc
    c.purgeVisitorFunc = cb.purgeVisitorFunc
    c.stats = &stats{}
}

接口和总体流程

type Cache interface {
    Set(key, value interface{}) error
    SetWithExpire(key, value interface{}, expiration time.Duration) error
    Get(key interface{}) (interface{}, error)
    GetIFPresent(key interface{}) (interface{}, error)
    GetALL(checkExpired bool) map[interface{}]interface{}
    get(key interface{}, onLoad bool) (interface{}, error)
    Remove(key interface{}) bool
    Purge()
    Keys(checkExpired bool) []interface{}
    Len(checkExpired bool) int
    Has(key interface{}) bool

    statsAccessor
}

type statsAccessor interface {
    HitCount() uint64
    MissCount() uint64
    LookupCount() uint64
    HitRate() float64
}


type baseCache struct {
    clock            Clock
    size             int
    loaderExpireFunc LoaderExpireFunc
    evictedFunc      EvictedFunc
    purgeVisitorFunc PurgeVisitorFunc
    addedFunc        AddedFunc
    deserializeFunc  DeserializeFunc
    serializeFunc    SerializeFunc
    expiration       *time.Duration
    mu               sync.RWMutex
    loadGroup        Group
    *stats
}

SimpleCache

SimpleCache是gcache中最简单的一种，其中比较重要的函数就是Get，Set。
在SimpleCache结构体中items保存这simpleItem。simpleItem结构体中保存具体值和过期时间。
Get，Set函数就是通过操作items属性来保存和获取缓存中的值的。下面我们详细看一下代码：

结构体

type SimpleCache struct {
    baseCache
    items map[interface{}]*simpleItem
}

type simpleItem struct {
    clock      Clock
    value      interface{}
    expiration *time.Time
}

Set方法

func (c *SimpleCache) set(key, value interface{}) (interface{}, error) {
    var err error
    // 判断是否有序列化函数 有则执行回调函数
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    // 检查是否存在key
    item, ok := c.items[key]
    if ok {
        item.value = value
    } else {
        // 检查是否超过设置的大小范围
        if (len(c.items) >= c.size) && c.size > 0 {
            // 如果超过大小则驱逐一个
            c.evict(1)
        }
        // 组成simpleItem对象
        item = &simpleItem{
            clock: c.clock,
            value: value,
        }
        c.items[key] = item
    }
    
    // 判断是否有过期时间
    if c.expiration != nil {
        // 如果有则设置过期时间
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }
    // 判断是否有添加函数 有则添加
    if c.addedFunc != nil {
        c.addedFunc(key, value)
    }

    return item, nil
}

// SimpleCache 驱逐方法 
// 驱逐策略则是最简单的淘汰一个，因为map的特性 range访问的是随机的数据。所以驱逐出去的数据也是随机的一个。
func (c *SimpleCache) evict(count int) {
    now := c.clock.Now()
    current := 0
    for key, item := range c.items {
        if current >= count {
            return
        }
        if item.expiration == nil || now.After(*item.expiration) {
            defer c.remove(key)
            current++
        }
    }
}

Get方法

// get函数 从缓存中获取数据
func (c *SimpleCache) get(key interface{}, onLoad bool) (interface{}, error) {
    // 内部方法根据key获取值
    v, err := c.getValue(key, onLoad)
    if err != nil {
        return nil, err
    }
    if c.deserializeFunc != nil {
        return c.deserializeFunc(key, v)
    }
    return v, nil
}

// 内部获取方法
// 1. 加锁
// 2. 判断是否过期 如果过期直接删除数据
// 3. 如果没有过期则返回数据 增加hit基数器 
// 4. 如果没有命中 增加MissCount
func (c *SimpleCache) getValue(key interface{}, onLoad bool) (interface{}, error) {
    c.mu.Lock()
    item, ok := c.items[key]
    if ok {
        if !item.IsExpired(nil) {
            v := item.value
            c.mu.Unlock()
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return v, nil
        }
        c.remove(key)
    }
    c.mu.Unlock()
    if !onLoad {
        c.stats.IncrMissCount()
    }
    return nil, KeyNotFoundError
}

LRUCache

LRU在之前已经介绍过了，意思是最近最少使用。LRU Cache 的替换原则就是将最近最少使用的内容替换掉。
gcache实现的方法是通过链表来实现这个策略。当每次get或者set之后则把这个节点放到链表的头部，当需要超过size时则删除链表尾部的节点数据。这样就实现了最近最少使用的策略。

结构体

type LRUCache struct {
    baseCache
    items     map[interface{}]*list.Element
    evictList *list.List
}

type lruItem struct {
    clock      Clock
    key        interface{}
    value      interface{}
    expiration *time.Time
}

Set方法

// 先加锁防止多线程修改数据，调用内部set方法设置数据。
func (c *LRUCache) Set(key, value interface{}) error {
    c.mu.Lock()
    defer c.mu.Unlock()
    _, err := c.set(key, value)
    return err
}

// 内部设置数据方法
func (c *LRUCache) set(key, value interface{}) (interface{}, error) {
    var err error
    // 判断执行序列化回调函数
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    // Check for existing item
    var item *lruItem
    // 从items map中获取值 
    if it, ok := c.items[key]; ok {
        // 如果key原本就存在，则重新设置然后移动节点到链表的头部
        c.evictList.MoveToFront(it)
        item = it.Value.(*lruItem)
        item.value = value
    } else {
        // 如果超过size则调用evict函数根据LRU策略去除缓存中的一个数据
        if c.evictList.Len() >= c.size {
            c.evict(1)
        }
        // 创建对象然后放入链表和items中
        item = &lruItem{
            clock: c.clock,
            key:   key,
            value: value,
        }
        c.items[key] = c.evictList.PushFront(item)
    }
    // 判断是否有过期时间 有则设置
    if c.expiration != nil {
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }
    // 判断调用 added回调函数
    if c.addedFunc != nil {
        c.addedFunc(key, value)
    }

    return item, nil
}

// 驱逐函数 
func (c *LRUCache) evict(count int) {
    // 循环删除链表尾部的节点
    for i := 0; i < count; i++ {
        ent := c.evictList.Back()
        if ent == nil {
            return
        } else {
            c.removeElement(ent)
        }
    }
}

LFU Cache

LFU：意思是最近最不常用。LFU Cache先淘汰一定时间内被访问次数最少的页面。

源码分析

LFU策略，淘汰的是访问次数最少的，意味着cache需要保存每个缓存数据的访问次数。但如何保存访问次数呢，我们可以看下面的结构体定义。

items map[interface{}]*lfuItem ：保存数据，保证访问时候的高效
lfuItem：保存在map中，其中存放这key、value、过期时间、一个链表节点的地址。这个地址用来方便操作链表中的数据。
freqList：链表结构，保存freqEntry
freqEntry：包含两个字段一个是freq用来保存访问次数，另一个是items map类型用来保存次访问次数的具体数据，可以是多个

gcache的LFU使用一个map来保存数据一个链表（包含次数和map）来保存缓存中数据被访问的次数。初次set时访问次数默认为0。如果淘汰则是淘汰被访问次数最少的，则可以从链表的头部开始扫描，一直找到最少的。

图解

初始化

图一是set5个字符串到cache中，5个字符串不重复。items中的数据我们不看只画了链表中的数据状态。
这个时候链表中只有一个节点，这个节点数据中的freq为0，意味着这个节点中的数据都是没有被访问的。

操作过后的图

图二是经过几次get和一次set操作后的链表数据结果。可以看到链表的每一个节点都代表着一个访问次数并且依次递增。
每次get访问数据时候通过上面提到的lfuItem中的指针获取到节点在链表所在的位置，把数据往后移动一个节点。如果没有节点测创建一个以此类推。那么得到的结果就是越靠近头部的数据访问次数是最少的。如果淘汰则优先淘汰这些数据。

结构体

type LFUCache struct {
    baseCache
    items    map[interface{}]*lfuItem
    freqList *list.List // list for freqEntry
}

type freqEntry struct {
    freq  uint
    items map[*lfuItem]struct{}
}

type lfuItem struct {
    clock       Clock
    key         interface{}
    value       interface{}
    freqElement *list.Element
    expiration  *time.Time
}

Set方法

func (c *LFUCache) Set(key, value interface{}) error {
    c.mu.Lock()
    defer c.mu.Unlock()
    _, err := c.set(key, value)
    return err
}

// set内部方法
func (c *LFUCache) set(key, value interface{}) (interface{}, error) {
    var err error
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    // 检查key是否存在
    item, ok := c.items[key]
    if ok {
        // 存在则直接赋值
        item.value = value
    } else {
        // 不存在并且数量超出则执行驱逐函数
        if len(c.items) >= c.size {
            c.evict(1)
        }
        // 新建item对象
        item = &lfuItem{
            clock:       c.clock,
            key:         key,
            value:       value,
            freqElement: nil,
        }
        // 把新建的lfuitem对象放到链表第一个节点中
        el := c.freqList.Front()
        fe := el.Value.(*freqEntry)
        fe.items[item] = struct{}{}

        item.freqElement = el
        c.items[key] = item
    }

    if c.expiration != nil {
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }

    if c.addedFunc != nil {
        c.addedFunc(key, value)
    }

    return item, nil
}

// 驱逐函数
func (c *LFUCache) evict(count int) {
    // 获取链表第一个节点
    entry := c.freqList.Front()
    // 循环count
    for i := 0; i < count; {
        if entry == nil {
            return
        } else {
            // 循环判断啊链表节点中是否有数据 如果没有则调用next 继续循环
            for item, _ := range entry.Value.(*freqEntry).items {
                if i >= count {
                    return
                }
                c.removeItem(item)
                i++
            }
            entry = entry.Next()
        }
    }
}

Get方法

func (c *LFUCache) get(key interface{}, onLoad bool) (interface{}, error) {
    v, err := c.getValue(key, onLoad)
    if err != nil {
        return nil, err
    }
    if c.deserializeFunc != nil {
        return c.deserializeFunc(key, v)
    }
    return v, nil
}

// 判断是否过期，如果没过期则获取并且执行increment函数操作链表
func (c *LFUCache) getValue(key interface{}, onLoad bool) (interface{}, error) {
    c.mu.Lock()
    item, ok := c.items[key]
    if ok {
        if !item.IsExpired(nil) {
            c.increment(item)
            v := item.value
            c.mu.Unlock()
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return v, nil
        }
        c.removeItem(item)
    }
    c.mu.Unlock()
    if !onLoad {
        c.stats.IncrMissCount()
    }
    return nil, KeyNotFoundError
}

// 将lfuItem 放入下一个节点中的map中，如果没有则创建一个新的lfuItem
func (c *LFUCache) increment(item *lfuItem) {
    currentFreqElement := item.freqElement
    currentFreqEntry := currentFreqElement.Value.(*freqEntry)
    nextFreq := currentFreqEntry.freq + 1
    delete(currentFreqEntry.items, item)

    nextFreqElement := currentFreqElement.Next()
    if nextFreqElement == nil {
        nextFreqElement = c.freqList.InsertAfter(&freqEntry{
            freq:  nextFreq,
            items: make(map[*lfuItem]struct{}),
        }, currentFreqElement)
    }
    nextFreqElement.Value.(*freqEntry).items[item] = struct{}{}
    item.freqElement = nextFreqElement
}

ARC Cache

ARC：Adaptive Replacement Cache，ARC介于 LRU 和 LFU 之间。

源码分析

ARC是介于LRU和LFU之间的算法。也是通过map来存储数据，保证存取的性能。那是如何实现LRU和LFU又是如何平衡两个策略的呢？
结构体可以参看下面的代码：

items: map数据结构保存key，value则是arcItem结构体，其中包含了key、value、过期时间。注意其中没有像LFU的链表指针。
t1：LRU策略，set之后会放入t1中限制数量跟整个cache数量相同。
t2：LFU策略，当get访问之后会从t1移动到t2之中，不过无论访问几次都会在t2之中，不像LFU一样会记录访问次数。
b1：接收t1（LRU）策略淘汰的缓存数据。如果超过size则直接从cache中删除。
b2：接收t2（LFU）策略淘汰的缓存数据。跟b1一样超过size也会从cache中删除。

那每次Set、Get数据又是怎么流动的呢？下面图解：
图一：是初始化并且添加5条数据之后cache内部数据结构。items保存全部数据，因为没有访问数据则所有数据都会放到t1中。

图二：获取了aaa、bbb、ddd、eee4个数据，然后有set了fff到cache中。假设这个cache的size为5。
其中aaa、bbb、ddd、eee被移动到了t2中，剩下的ccc没有访问则会继续保留再t1之中。但是最后一条语句又设置了fff到cache中。发现size已经满则需要淘汰一个数据，则会淘汰t1中的数据ccc移动到b1中。items之中则没有ccc数据了。
最终的数据流动如下图：

结构体

type ARC struct {
    baseCache
    items map[interface{}]*arcItem

    part int
    t1   *arcList
    t2   *arcList
    b1   *arcList
    b2   *arcList
}

type arcItem struct {
    clock      Clock
    key        interface{}
    value      interface{}
    expiration *time.Time
}

type arcList struct {
    l    *list.List
    keys map[interface{}]*list.Element
}

Set方法

func (c *ARC) Set(key, value interface{}) error {
    c.mu.Lock()
    defer c.mu.Unlock()
    _, err := c.set(key, value)
    return err
}

// 1. 判断缓存中是否有数据
// 2. 在b1，b2中查看是否存在，如果存在则删除b1 b2 重新放入到t2中
// 3. 
func (c *ARC) set(key, value interface{}) (interface{}, error) {
    var err error
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    item, ok := c.items[key]
    if ok {
        item.value = value
    } else {
        item = &arcItem{
            clock: c.clock,
            key:   key,
            value: value,
        }
        c.items[key] = item
    }

    if c.expiration != nil {
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }

    defer func() {
        if c.addedFunc != nil {
            c.addedFunc(key, value)
        }
    }()

    if c.t1.Has(key) || c.t2.Has(key) {
        return item, nil
    }

    if elt := c.b1.Lookup(key); elt != nil {
        c.setPart(minInt(c.size, c.part+maxInt(c.b2.Len()/c.b1.Len(), 1)))
        c.replace(key)
        c.b1.Remove(key, elt)
        c.t2.PushFront(key)
        return item, nil
    }

    if elt := c.b2.Lookup(key); elt != nil {
        c.setPart(maxInt(0, c.part-maxInt(c.b1.Len()/c.b2.Len(), 1)))
        c.replace(key)
        c.b2.Remove(key, elt)
        c.t2.PushFront(key)
        return item, nil
    }

    if c.isCacheFull() && c.t1.Len()+c.b1.Len() == c.size {
        if c.t1.Len() < c.size {
            c.b1.RemoveTail()
            c.replace(key)
        } else {
            pop := c.t1.RemoveTail()
            item, ok := c.items[pop]
            if ok {
                delete(c.items, pop)
                if c.evictedFunc != nil {
                    c.evictedFunc(item.key, item.value)
                }
            }
        }
    } else {
        total := c.t1.Len() + c.b1.Len() + c.t2.Len() + c.b2.Len()
        if total >= c.size {
            if total == (2 * c.size) {
                if c.b2.Len() > 0 {
                    c.b2.RemoveTail()
                } else {
                    c.b1.RemoveTail()
                }
            }
            c.replace(key)
        }
    }
    c.t1.PushFront(key)
    return item, nil
}

Get方法

如果t1中存在则从t1移动到t2，如果存在再t2之中则放到t2的头部节点。

func (c *ARC) Get(key interface{}) (interface{}, error) {
    v, err := c.get(key, false)
    if err == KeyNotFoundError {
        return c.getWithLoader(key, true)
    }
    return v, err
}

func (c *ARC) get(key interface{}, onLoad bool) (interface{}, error) {
    v, err := c.getValue(key, onLoad)
    if err != nil {
        return nil, err
    }
    if c.deserializeFunc != nil {
        return c.deserializeFunc(key, v)
    }
    return v, nil
}

func (c *ARC) getValue(key interface{}, onLoad bool) (interface{}, error) {
    c.mu.Lock()
    defer c.mu.Unlock()
    if elt := c.t1.Lookup(key); elt != nil {
        c.t1.Remove(key, elt)
        item := c.items[key]
        if !item.IsExpired(nil) {
            c.t2.PushFront(key)
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return item.value, nil
        } else {
            delete(c.items, key)
            c.b1.PushFront(key)
            if c.evictedFunc != nil {
                c.evictedFunc(item.key, item.value)
            }
        }
    }
    if elt := c.t2.Lookup(key); elt != nil {
        item := c.items[key]
        if !item.IsExpired(nil) {
            c.t2.MoveToFront(elt)
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return item.value, nil
        } else {
            delete(c.items, key)
            c.t2.Remove(key, elt)
            c.b2.PushFront(key)
            if c.evictedFunc != nil {
                c.evictedFunc(item.key, item.value)
            }
        }
    }

    if !onLoad {
        c.stats.IncrMissCount()
    }
    return nil, KeyNotFoundError
}

总结

自此gcache所有的策略都已经分析完了。看完分析可以看出来gcache支持的策略很多，并且使用十分简单。只要在声明的时候确定好策略就可以使用对应的策略。更加支持各种回调函数，让逻辑更加灵活复合各种需求。
写这篇文章也在网上找了一些资料，但是都不是特别的详细所以不停的调试和画图分析出来的结果。希望能对大家能有所帮助。

Go gcache 源码分析（图解）

概述

简单使用

简单key/value 设置

设置过期时间

使用load回调函数

源码分析

实体和初始化

builder类

设置过期时间、策略、回调函数

build 输出cache对象

接口和总体流程

SimpleCache

结构体

Set方法

Get方法

LRUCache

结构体

Set方法

LFU Cache

源码分析

图解

初始化

操作过后的图

结构体

Set方法

Get方法

ARC Cache

源码分析

结构体

Set方法

Get方法

总结

你可能感兴趣的:(源码分析,golang)