长安链DAG并行调度源码解析

区块链中影响性能的地方有很多,这里主要分析在共识算法中如何快速验证交易。长安链中实现高性能特性之一就是交易的并行执行。简绍一下用到的算法及其作用

DAG(有向无环图)

DAG的结构能够用来描述节点之间的依赖关系,如果节点之间没有依赖,则就可以并行执行,这里的节点指代就是交易

DAG拓扑排序

当DAG结构创建完成后,可以根据其结构来确定节点顺序

Bitmap位图

位图算法很多这里主要用到了 位图的查找 去重,将交易的读写集里的Key映射到位图上,用来判断多交易里的读写集是否冲突

模拟数据

为了模拟数据 写了一个合约,这个合约只有2个方法,一个add方法添加金额,一个sub方法减去金额,判断余额是否充足,否则提示错误。这么做也是为了看下如何解决双花问题 实例代码如下

    for i:=0;i<4;i++{
            params := make(map[string]string)
            if i==0 {
                params["key"] = "amount"
                params["value"] = "10"
                _,_,err=manager.Invoke(contractName, "add", params, false)
            }else {
                params["key"] = "amount"
                params["value"] = "3"
                _,_,err=manager.Invoke(contractName, "sub", params, false)
            }
            if err!=nil{
                fmt.Println(err)
            }
    }

注意这里发送了4笔交易 1笔为充值10元 其余3笔为消费3元

源码部分

交易调度源码入口在module/core/scheduler/tx_scheduler_impl.go里的func (ts *TxSchedulerImpl) Schedule(block *commonpb.Block, txBatch []*commonpb.Transaction, snapshot protocol.Snapshot)方法,调度算法解决的就是多交易里的读写集冲突问题,核心思路就是如果所有交易没有读写集冲突,可以直接进行交易的并行执行,否则需要将有冲突的交易进行一个排序,所有共识节点都按照该顺序执行,用于保证分布式一致性,解释一下输入参数

  • block 生成的新区块
  • txBatch 本次调度的交易数量,就是4笔
  • snapshot 交易快照,记录交易预执行后的交易结果,交易读写集,创建DAG结构等

// Schedule according to a batch of transactions, and generating DAG according to the conflict relationship
func (ts *TxSchedulerImpl) Schedule(block *commonpb.Block, txBatch []*commonpb.Transaction, snapshot protocol.Snapshot) (map[string]*commonpb.TxRWSet, error) {
...
    txBatchSize := len(txBatch)
    //创建交易通道
    runningTxC := make(chan *commonpb.Transaction, txBatchSize)
    timeoutC := time.After(ScheduleTimeout * time.Second)
    finishC := make(chan bool)
...
    //第三方的一个高性能协程池,在大量并发时,可以复用协程
    if goRoutinePool, err = ants.NewPool(runtime.NumCPU()*4, ants.WithPreAlloc(true)); err != nil {
        return nil, err
    }
    go func() {
        for {
            select {
            case tx := <-runningTxC:
                //并发执行交易
                err := goRoutinePool.Submit(func() {
                    // If snapshot is sealed, no more transaction will be added into snapshot
                    // txSimContext记录合约以及合约相关操作
                    txSimContext := newTxSimContext(ts.VmManager, snapshot, tx)
                    ...
                    //运行合约,无论合约是否执行成功都需要存储到区块中
                    if txResult, err = ts.runVM(tx, txSimContext); err != nil {
                        runVmSuccess = false
                        tx.Result = txResult
                        txSimContext.SetTxResult(txResult)
                        ts.log.Errorf("failed to run vm for tx id:%s during schedule, tx result:%+v, error:%+v", tx.Header.GetTxId(), txResult, err)
                    } else {
                        tx.Result = txResult
                        txSimContext.SetTxResult(txResult)
                    }
                    //交易根据是否有读写集冲突进行重新排序,将存在读写集冲突的交易放到runningTxC后面重新执行合约
                    applyResult, applySize := snapshot.ApplyTxSimContext(txSimContext, runVmSuccess)
                    if !applyResult {
                        runningTxC <- tx
                    } else {
                    ...
                    }
                    // If all transactions have been successfully added to dag
                    if applySize >= txBatchSize {
                        finishC <- true
                    }
                })
            case <-timeoutC:
                ts.scheduleFinishC <- true
                ts.log.Debugf("schedule reached time limit")
                return
            case <-finishC:
                ts.log.Debugf("schedule finish")
                ts.scheduleFinishC <- true
                return
            }
        }
    }()
    // 将交易放到通道里
    go func() {
        for _, tx := range txBatch {
            runningTxC <- tx
        }
    }()
    // 等调度完成
    <-ts.scheduleFinishC
    ...
    // 获取DAG结构
    block.Dag = snapshot.BuildDAG()
    // 获取与DAG结构相映射的交易
    block.Txs = snapshot.GetTxTable()
    ...
    return txRWSetMap, nil
}

这里有2个非常核心的代码片段,依次分析一下
首先长安链的对于产生错误的交易也存储到区块中了 下面讲解一下几个核心代码

  1. txSimContext := newTxSimContext(ts.VmManager, snapshot, tx)

    txSimContext实例临时存储交易执行结果,读写集和交易执行的索引,其中最重要的一个属性txExecSeq 记录这笔交易当时运行时的位置

     txSimContextImpl{
         txExecSeq:     snapshot.GetSnapshotSize(),
         tx:            tx,
         txReadKeyMap:  make(map[string]*commonpb.TxRead, 8),
         txWriteKeyMap: make(map[string]*commonpb.TxWrite, 8),
         snapshot:      snapshot,
         vmManager:     vmManager,
         gasUsed:       0,
         currentDeep:   0,
         hisResult:     make([]*callContractResult, 0),
     }
    
  2. snapshot.ApplyTxSimContext(txSimContext, runVmSuccess)

    交易根据是否有读写集冲突进行重新排序,将存在读写集冲突的交易放到runningTxC后面进行重试。

    applyResult, applySize := snapshot.ApplyTxSimContext(txSimContext, runVmSuccess)
    if !applyResult {
       runningTxC <- tx
    }
    

    解释一下就是交易并发时,总会有个先后顺序,如果后执行的交易与已经执行的交易有读写集冲突,则将后执行的交易移动到队列里后进行重试,则最终在快照里生成一个新的交易顺序 来看下ApplyTxSimContext源码

    
    // After the read-write set is generated, add TxSimContext to the snapshot
    // return if apply successfully or not, and current applied tx num
    func (s *SnapshotImpl) ApplyTxSimContext(cache protocol.TxSimContext, runVmSuccess bool) (bool, int) {
     if s.IsSealed() {
         return false, s.GetSnapshotSize()
     }
    
     s.lock.Lock()
     defer s.lock.Unlock()
    
     tx := cache.GetTx()
     //获取并发瞬时时该交易执行的位置
     txExecSeq := cache.GetTxExecSeq()
     var txRWSet *commonPb.TxRWSet
     var txResult *commonPb.Result
    
     // Only when the virtual machine is running normally can the read-write set be saved
     if runVmSuccess {
         txRWSet = cache.GetTxRWSet()
     } else {
         // 交易执行错误 意味这没有读写集 可以并行执行
         txRWSet = &commonPb.TxRWSet{
             TxId:     tx.Header.TxId,
             TxReads:  nil,
             TxWrites: nil,
         }
     }
     //获取交易结果
     txResult = cache.GetTxResult()
     
     // 1.这个判断非常重要因为快照s*SnapshotImpl是记录了所有的交易的结果和顺序
     // 2.记住这里是并发执行的,所以s.txTable是会变的
     // 解析一下,如果当前交易在瞬时执行的位置 正好是快照里的最后一个,那么顺序就定了,可以把它添加到快照交易里
     // 否则 txExecSeq= len(s.txTable) {
         s.apply(tx, txRWSet, txResult)
         return true, len(s.txTable)
     }
     
     // 判断读写冲突 
     // Check whether the dependent state has been modified during the run
     for _, txRead := range txRWSet.TxReads {
         finalKey := constructKey(txRead.ContractName, txRead.Key)
         if sv, ok := s.writeTable[finalKey]; ok {
             // 如果新加入的交易写集 和 当前交易的读集有冲突,则需要启用重试机制
             if sv.seq >= txExecSeq {
                 //log.Debugf("Key Conflicted %+v-%+v", sv.seq, txExecSeq)
                 return false, len(s.txTable)
             }
         }
     }
    
     // 如果只是写写冲突 那么就需要确定一个顺序就行,不需要重试
     s.apply(tx, txRWSet, txResult)
     return true, len(s.txTable)
    }
    
    

snapshot.BuildDAG()

当上面的调度完成后,snapshot里的txTable()就是新的交易顺序,并且是没有冲突的交易排在前面,有冲突的交易排在后面,但是目前还不能知道快照里的交易,哪些是可以并行执行 哪些是需要串行执行。这时候就需要构建DAG了,来看下如何构建DAG

   func (s *SnapshotImpl) BuildDAG() *commonPb.DAG {
   ...
   txCount := len(s.txTable)
   // build read-write bitmap for all transactions
   // 创建快照里所有交易的读写集位图
   readBitmaps, writeBitmaps := s.buildRWBitmaps()
   // 记住有读写集冲突的交易都排在s.txTable后面
   // cumulativeReadBitmap cumulativeWriteBitmap 分别将s.txTable中上一个的交易的读写集并集到下一个交易的读写集里
   // 作用就是通过位图能快速 查询读写集是否存在
   cumulativeReadBitmap, cumulativeWriteBitmap := s.buildCumulativeBitmap(readBitmaps, writeBitmaps)

   dag := &commonPb.DAG{}
   if txCount == 0 {
       return dag
   }

   dag.Vertexes = make([]*commonPb.DAG_Neighbor, txCount)

   // build DAG base on read and write bitmaps
   // reachMap describes reachability from tx i to tx j in DAG.
   // For example, if the DAG is tx3 -> tx2 -> tx1 -> begin, the reachMap is
   //      tx1     tx2     tx3
   // tx1  0       0       0
   // tx2  1       0       0
   // tx3  1       1       0
   // 记录交易DAG可达性
   reachMap := make([]*bitmap.Bitmap, txCount)
   //通过位图判断读写集冲突,来构建DAG结构
   for i := 0; i < txCount; i++ {
       // 1、get read and write bitmap for tx i
       readBitmapForI := readBitmaps[i]
       writeBitmapForI := writeBitmaps[i]

       // directReach is used to build DAG
       // reach is used to save reachability we have already known
       directReachFromI := &bitmap.Bitmap{}
       reachFromI := &bitmap.Bitmap{}
       reachFromI.Set(i)
       //判断前面的交易读写集有没有冲突,没有冲突就可以并行执行
       if i > 0 && s.fastConflicted(readBitmapForI, writeBitmapForI, cumulativeReadBitmap[i-1], cumulativeWriteBitmap[i-1]) {
           // check reachability one by one, then build table
           // 查找依赖关系
           s.buildReach(i, reachFromI, readBitmaps, writeBitmaps, readBitmapForI, writeBitmapForI, directReachFromI, reachMap)
       }
// 记录DAG描述可达性
       reachMap[i] = reachFromI

       // build DAG based on directReach bitmap
       dag.Vertexes[i] = &commonPb.DAG_Neighbor{
           Neighbors: make([]int32, 0, 16),
       }
       // 获取冲突交易时 交易在txTable里的index
       // dag.Vertexes[i].Neighbors 里的值 与 txTable的index相对应
       for _, j := range directReachFromI.Pos1() {
           dag.Vertexes[i].Neighbors = append(dag.Vertexes[i].Neighbors, int32(j))
       }
   }
   log.Debugf("build DAG for block %d finished", s.blockHeight)
   return dag
}

s.buildRWBitmaps()根据交易创建读写集位图

func (s *SnapshotImpl) buildRWBitmaps() ([]*bitmap.Bitmap, []*bitmap.Bitmap) {
  //记录不同key的左移位置
   dictIndex := 0
   txCount := len(s.txTable)
   readBitmap := make([]*bitmap.Bitmap, txCount)
   writeBitmap := make([]*bitmap.Bitmap, txCount)
//缓存已有key和所在位
   keyDict := make(map[string]int, 1024)
   for i := 0; i < txCount; i++ {
       readTableItemForI := s.txRWSetTable[i].TxReads
       writeTableItemForI := s.txRWSetTable[i].TxWrites
       //将每笔交易的读集转换为位图表示
       //读集还默认包含了调用合约名称,版本,合约方法名等来确定唯一性
       readBitmap[i] = &bitmap.Bitmap{}
       for _, keyForI := range readTableItemForI {
//如果是新Key 则添加位位置dictIndex
           if existIndex, ok := keyDict[string(keyForI.Key)]; !ok {
               keyDict[string(keyForI.Key)] = dictIndex
               readBitmap[i].Set(dictIndex)
               dictIndex++
           } else {
               readBitmap[i].Set(existIndex)
           }
       }
       //将每笔交易的写集转换为位图表示
       writeBitmap[i] = &bitmap.Bitmap{}
       for _, keyForI := range writeTableItemForI {
//如果是新Key 则添加位位置dictIndex
           if existIndex, ok := keyDict[string(keyForI.Key)]; !ok {
               keyDict[string(keyForI.Key)] = dictIndex
               writeBitmap[i].Set(dictIndex)
               dictIndex++
           } else {
               writeBitmap[i].Set(existIndex)
           }
       }
   }
   return readBitmap, writeBitmap
}

buildCumulativeBitmap()用于快速检索读写集冲突

 func (s *SnapshotImpl) buildCumulativeBitmap(readBitmap []*bitmap.Bitmap, writeBitmap []*bitmap.Bitmap) ([]*bitmap.Bitmap, []*bitmap.Bitmap) {
    cumulativeReadBitmap := make([]*bitmap.Bitmap, len(readBitmap))
    cumulativeWriteBitmap := make([]*bitmap.Bitmap, len(writeBitmap))

    for i, b := range readBitmap {
        cumulativeReadBitmap[i] = b.Clone()
        if i > 0 {
            //将上一笔交易的读集合并
            cumulativeReadBitmap[i].Or(cumulativeReadBitmap[i-1])
        }
    }
    for i, b := range writeBitmap {
        cumulativeWriteBitmap[i] = b.Clone()
        if i > 0 {
            //将上一笔交易的写集合并
            cumulativeWriteBitmap[i].Or(cumulativeWriteBitmap[i-1])
        }
    }
    // 作用就是将所有交易的读写集位图依次合并,使用位图算法可以快速检索出读写集是否存在
    return cumulativeReadBitmap, cumulativeWriteBitmap
}

s.buildReach(i, reachFromI, readBitmaps, writeBitmaps, readBitmapForI, writeBitmapForI, directReachFromI, reachMap) 存在读写依赖,并找到出现冲突的交易,并记录位图位置源码如下

func (s *SnapshotImpl) buildReach(i int, reachFromI *bitmap.Bitmap,
    readBitmaps []*bitmap.Bitmap, writeBitmaps []*bitmap.Bitmap,
    readBitmapForI *bitmap.Bitmap, writeBitmapForI *bitmap.Bitmap,
    directReachFromI *bitmap.Bitmap, reachMap []*bitmap.Bitmap) {

    for j := i - 1; j >= 0; j-- {
        // 通过位图来判断是否计算过
        if reachFromI.Has(j) {
            continue
        }

        readBitmapForJ := readBitmaps[j]
        writeBitmapForJ := writeBitmaps[j]
        // 判断当前交易的读写集是否冲突
        if s.conflicted(readBitmapForI, writeBitmapForI, readBitmapForJ, writeBitmapForJ) {
            // 描边 建立依赖关系 描述当前交易的父交易
            directReachFromI.Set(j)
            // 并集 reachFromI 描述DAG可达性
            reachFromI.Or(reachMap[j])
        }
    }
}

源码解析基本结束,总结一下就是DAG用于确定交易可以并发执行时的顺序,最后附上一个手写图希望能加深理解

s.txTable 快照里调度完成后的交易
reachMap 就是代码里的reachMap := make([]*bitmap.Bitmap, txCount)记录交易DAG可达性
DAG索引 就是代码里的dag.Vertexes 也就是最终决定哪些交易能并发执行

共识节点验证交易DAG

共识节点会按照DAG结构进行并发验证交易,验证代码如下


// SimulateWithDag based on the dag in the block, perform scheduling and execution transactions
func (ts *TxSchedulerImpl) SimulateWithDag(block *commonpb.Block, snapshot protocol.Snapshot) (map[string]*commonpb.TxRWSet, map[string]*commonpb.Result, error) {
    ...
    // 将交易索引作为Key 交易作为value 方便并发时获取交易
    txMapping := make(map[int]*commonpb.Transaction)
    for index, tx := range block.Txs {
        txMapping[index] = tx
    }

    // Construct the adjacency list of dag, which describes the subsequent adjacency transactions of all transactions
    dag := block.Dag
    //将DAG转换成字典 方便获取和查询
    dagRemain := make(map[int]dagNeighbors)
    for txIndex, neighbors := range dag.Vertexes {
        dn := make(dagNeighbors)
        //记录有依赖的交易索引
        for _, neighbor := range neighbors.Neighbors {
            dn[int(neighbor)] = true
        }
        dagRemain[txIndex] = dn
    }

    txBatchSize := len(block.Dag.Vertexes)
    runningTxC := make(chan int, txBatchSize)
    doneTxC := make(chan int, txBatchSize)

    timeoutC := time.After(ScheduleWithDagTimeout * time.Second)
    finishC := make(chan bool)

    var goRoutinePool *ants.Pool
    var err error
    if goRoutinePool, err = ants.NewPool(runtime.NumCPU()*4, ants.WithPreAlloc(true)); err != nil {
        return nil, nil, err
    }
    defer goRoutinePool.Release()

    go func() {
        for {
            select {
            case txIndex := <-runningTxC:
                //获取交易
                tx := txMapping[txIndex]
                err := goRoutinePool.Submit(func() {
                    ts.log.Debugf("run vm with dag for tx id %s", tx.Header.GetTxId())
                    txSimContext := newTxSimContext(ts.VmManager, snapshot, tx)

                    runVmSuccess := true
                    var txResult *commonpb.Result
                    var err error
                    //运行合约
                    if txResult, err = ts.runVM(tx, txSimContext); err != nil {
                        runVmSuccess = false
                        txSimContext.SetTxResult(txResult)
                        ts.log.Errorf("failed to run vm for tx id:%s during simulate with dag, tx result:%+v, error:%+v", tx.Header.GetTxId(), txResult, err)
                    } else {
                        //ts.log.Debugf("success to run vm for tx id:%s during simulate with dag, tx result:%+v", tx.Header.GetTxId(), txResult)
                        txSimContext.SetTxResult(txResult)
                    }
                   
                    applyResult, applySize := snapshot.ApplyTxSimContext(txSimContext, runVmSuccess)
                    // 如果读写集冲突就放到runningTxC 后面进行重试
                    if !applyResult {
                        ts.log.Debugf("failed to apply according to dag with tx %s ", tx.Header.TxId)
                        runningTxC <- txIndex
                    } else {
                        //查找txIndex交易有没有指定的下一个交易
                        ts.log.Debugf("apply to snapshot tx id:%s, result:%+v, apply count:%d", tx.Header.GetTxId(), txResult, applySize)
                        doneTxC <- txIndex
                    }
                    // If all transactions in current batch have been successfully added to dag
                    if applySize >= txBatchSize {
                        finishC <- true
                    }
                })
                if err != nil {
                    ts.log.Warnf("failed to submit tx id %s during simulate with dag, %+v", tx.Header.GetTxId(), err)
                }
            case doneTxIndex := <-doneTxC:
                // 从dagRemain中删除已经执行的交易index
                ts.shrinkDag(doneTxIndex, dagRemain)
                // 获取下一批没有依赖关系节点 为nil 表示已经全部执行完成
                txIndexBatch := ts.popNextTxBatchFromDag(dagRemain)
                ts.log.Debugf("pop next tx index batch %v", txIndexBatch)
                // 将下一个交易放到队列里
                for _, tx := range txIndexBatch {
                    runningTxC <- tx
                }
            case <-finishC:
                ts.log.Debugf("schedule with dag finish")
                ts.scheduleFinishC <- true
                return
            case <-timeoutC:
                ts.log.Errorf("schedule with dag timeout")
                ts.scheduleFinishC <- true
                return
            }
        }
    }()
    //获取没有依赖关系的交易索引
    txIndexBatch := ts.popNextTxBatchFromDag(dagRemain)

    go func() {
        for _, tx := range txIndexBatch {
            runningTxC <- tx
        }
    }()
        //等所有交易完成
    <-ts.scheduleFinishC
    ...
    return txRWSetMap, snapshot.GetTxResultMap(), nil
}

// 删除已执行合约 的交易
func (ts *TxSchedulerImpl) shrinkDag(txIndex int, dagRemain map[int]dagNeighbors) {
    for _, neighbors := range dagRemain {
        delete(neighbors, txIndex)
    }
}

// DAG拓扑排序 获取没有依赖的节点 并删除
func (ts *TxSchedulerImpl) popNextTxBatchFromDag(dagRemain map[int]dagNeighbors) []int {
    var txIndexBatch []int
    for checkIndex, neighbors := range dagRemain {
        //没有依赖了 可以并发
        if len(neighbors) == 0 {
            txIndexBatch = append(txIndexBatch, checkIndex)
            // 删除
            delete(dagRemain, checkIndex)
        }
    }
    return txIndexBatch
}

你可能感兴趣的:(长安链DAG并行调度源码解析)