Tendermint同步区块-JobPlus

最近在学习Tendermint的代码，记录下同步区块的流程，直接跳过P2P阶段，后续再写一篇文章记录P2P流程吧

blockchain/pool.go的OnStart()新建了gorountine来发起获取区块请求

[plain]

func (pool *BlockPool) OnStart() error {
go pool.makeRequestersRoutine()
pool.startTime = time.Now()
return nil
}

看下makeRequestersRoutine的代码

[plain]

// Run spawns requesters as needed.
func (pool *BlockPool) makeRequestersRoutine() {
for {
if !pool.IsRunning() {
break
}
_, numPending, lenRequesters := pool.GetStatus()
if numPending >= maxPendingRequests {
// sleep for a bit.
time.Sleep(requestIntervalMS * time.Millisecond)
// check for timed out peers
pool.removeTimedoutPeers()
} else if lenRequesters >= maxTotalRequesters {
// sleep for a bit.
time.Sleep(requestIntervalMS * time.Millisecond)
// check for timed out peers
pool.removeTimedoutPeers()
} else {
// request for more blocks.
pool.makeNextRequester()
}
}
}

这里是一个for循环，获取当前pool的状态

1 如果当前pending的个数大于等于10000个，sleep 100ms，并且remove掉timeout的peer（remove timeout peer的逻辑是当前接收peer的bitrate小于10KB/S

2 如果当前request的个数大于等于10000个，sleep 100ms并且remove timeout的peer

3 新建一个请求

[plain]

func (pool *BlockPool) makeNextRequester() {
pool.mtx.Lock()
defer pool.mtx.Unlock()
nextHeight := pool.height + pool.requestersLen()
request := newBPRequester(pool, nextHeight)
// request.SetLogger(pool.Logger.With("height", nextHeight))
pool.requesters[nextHeight] = request
pool.numPending++
err := request.Start()
if err != nil {
request.Logger.Error("Error starting request", "err", err)
}
}

新建请求的时候rquester和pending都会+1，然后调用request.Start()，这个调用的是blockchain/pool.go的另外一个OnStart()

[plain]

func (bpr *bpRequester) OnStart() error {
go bpr.requestRoutine()
return nil
}

看下requestRoutine的代码

[plain]

func (bpr *bpRequester) requestRoutine() {
OUTER_LOOP:
for {
// Pick a peer to send request to.
var peer *bpPeer = nil
PICK_PEER_LOOP:
for {
if !bpr.IsRunning() || !bpr.pool.IsRunning() {
return
}
peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
if peer == nil {
//log.Info("No peers available", "height", height)
time.Sleep(requestIntervalMS * time.Millisecond)
continue PICK_PEER_LOOP
}
break PICK_PEER_LOOP
}
bpr.mtx.Lock()
bpr.peerID = peer.id
bpr.mtx.Unlock()
// Send request and wait.
bpr.pool.sendRequest(bpr.height, peer.id)
...
}
}

pickIncrAvailablePeer的作用是找到有高度（bpr.height）的节点

sendRequest其实是写了一个channel

[plain]

func (pool *BlockPool) sendRequest(height int64, peerID string) {
if !pool.IsRunning() {
return
}
pool.requestsCh <- BlockRequest{height, peerID}
}

接着找requestsCh读取的地方，就在blockchain/reactor.go中：

[plain]

func (bcR *BlockchainReactor) poolRoutine() {
trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond)
statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)
switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
blocksSynced := 0
chainID := bcR.initialState.ChainID
state := bcR.initialState
lastHundred := time.Now()
lastRate := 0.0
FOR_LOOP:
for {
select {
case request := <-bcR.requestsCh: // chan BlockRequest
peer := bcR.Switch.Peers().Get(request.PeerID)
if peer == nil {
continue FOR_LOOP // Peer has since been disconnected.
}
msg := &bcBlockRequestMessage{request.Height}
queued := peer.TrySend(BlockchainChannel, struct{ BlockchainMessage }{msg})
if !queued {
// We couldn't make the request, send-queue full.
// The pool handles timeouts, just let it go.
continue FOR_LOOP
}
case peerID := <-bcR.timeoutsCh: // chan string
// Peer timed out.
peer := bcR.Switch.Peers().Get(peerID)
if peer != nil {
bcR.Switch.StopPeerForError(peer, errors.New("BlockchainReactor Timeout"))
}
case <-statusUpdateTicker.C:
// ask for status updates
go bcR.BroadcastStatusRequest() // nolint: errcheck
case <-switchToConsensusTicker.C:
height, numPending, lenRequesters := bcR.pool.GetStatus()
outbound, inbound, _ := bcR.Switch.NumPeers()
bcR.Logger.Debug("Consensus ticker", "numPending", numPending, "total", lenRequesters,
"outbound", outbound, "inbound", inbound)
if bcR.pool.IsCaughtUp() {
bcR.Logger.Info("Time to switch to consensus reactor!", "height", height)
bcR.pool.Stop()
conR := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
conR.SwitchToConsensus(state, blocksSynced)
break FOR_LOOP
}
case <-trySyncTicker.C: // chan time
// This loop can be slow as long as it's doing syncing work.
SYNC_LOOP:
for i := 0; i < 10; i++ {
// See if there are any blocks to sync.
first, second := bcR.pool.PeekTwoBlocks()
//bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
if first == nil || second == nil {
// We need both to sync the first block.
break SYNC_LOOP
}
firstParts := first.MakePartSet(state.ConsensusParams.BlockPartSizeBytes)
firstPartsHeader := firstParts.Header()
firstID := types.BlockID{first.Hash(), firstPartsHeader}
// Finally, verify the first block using the second's commit
// NOTE: we can probably make this more efficient, but note that calling
// first.Hash() doesn't verify the tx contents, so MakePartSet() is
// currently necessary.
err := state.Validators.VerifyCommit(
chainID, firstID, first.Height, second.LastCommit)
if err != nil {
bcR.Logger.Error("Error in validation", "err", err)
bcR.pool.RedoRequest(first.Height)
break SYNC_LOOP
} else {
bcR.pool.PopRequest()
bcR.store.SaveBlock(first, firstParts, second.LastCommit)
// NOTE: we could improve performance if we
// didn't make the app commit to disk every block
// ... but we would need a way to get the hash without it persisting
var err error
state, err = bcR.blockExec.ApplyBlock(state, firstID, first)
if err != nil {
// TODO This is bad, are we zombie?
cmn.PanicQ(cmn.Fmt("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
}
blocksSynced += 1
// update the consensus params
bcR.updateConsensusParams(state.ConsensusParams)
if blocksSynced%100 == 0 {
lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height,
"max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
lastHundred = time.Now()
}
}
}
continue FOR_LOOP
case <-bcR.Quit:
break FOR_LOOP
}
}
}

1 收到reqeustCh的数据，发送bcBlockRequestMessage到对端节点（对端节点收到消息后返回高度的block）。是在本文件中的Receive()函数处理的

2 本函数有个trySyncTicker 50ms的timer一直在运行，作用是拿到两个已经从对端peer获取的block，经过VerifyCommit之后，SaveBlock存在tendermint内部，再调用ApplyBlock存到application

3 也有个statusUpdateTicker 10s的timer，作用是broadcast bcStatusRequestMessage去获取对端节点的block高度

4 还有个switchToConsensusTicker 1s的timer也在运行，作用是看本节点的block高度有没有大于等于其他节点的最大高度，如果到达了（也就是同步区块完成了）就SwitchToConsensus切换fast mode到consensus mode

最近在学习Tendermint的代码，记录下同步区块的流程，直接跳过P2P阶段，后续再写一篇文章记录P2P流程吧blockchain/pool.go的OnStart()新建了gorountine来发起获取区块请求[plain] <ol><li>func (pool *BlockPool) OnStart() error {  </li><li>    go pool.makeRequestersRoutine()  </li><li>    pool.startTime = time.Now()  </li><li>    return nil  </li><li>}  </li></ol>看下makeRequestersRoutine的代码[plain]<ol><li>// Run spawns requesters as needed.  </li><li>func (pool *BlockPool) makeRequestersRoutine() {  </li><li>  </li><li>    for {  </li><li>        if !pool.IsRunning() {  </li><li>            break  </li><li>        }  </li><li>  </li><li>        _, numPending, lenRequesters := pool.GetStatus()  </li><li>        if numPending >= maxPendingRequests {  </li><li>            // sleep for a bit.  </li><li>            time.Sleep(requestIntervalMS * time.Millisecond)  </li><li>            // check for timed out peers  </li><li>            pool.removeTimedoutPeers()  </li><li>        } else if lenRequesters >= maxTotalRequesters {  </li><li>            // sleep for a bit.  </li><li>            time.Sleep(requestIntervalMS * time.Millisecond)  </li><li>            // check for timed out peers  </li><li>            pool.removeTimedoutPeers()  </li><li>        } else {  </li><li>            // request for more blocks.  </li><li>            pool.makeNextRequester()  </li><li>        }  </li><li>    }  </li><li>}  </li></ol>这里是一个for循环，获取当前pool的状态1 如果当前pending的个数大于等于10000个，sleep 100ms，并且remove掉timeout的peer（remove timeout peer的逻辑是当前接收peer的bitrate小于10KB/S2 如果当前request的个数大于等于10000个，sleep 100ms并且remove timeout的peer3 新建一个请求[plain] <ol><li>func (pool *BlockPool) makeNextRequester() {  </li><li>    pool.mtx.Lock()  </li><li>    defer pool.mtx.Unlock()  </li><li>  </li><li>    nextHeight := pool.height + pool.requestersLen()  </li><li>    request := newBPRequester(pool, nextHeight)  </li><li>    // request.SetLogger(pool.Logger.With("height", nextHeight))  </li><li>  </li><li>    pool.requesters[nextHeight] = request  </li><li>    pool.numPending++  </li><li>  </li><li>    err := request.Start()  </li><li>    if err != nil {  </li><li>        request.Logger.Error("Error starting request", "err", err)  </li><li>    }  </li><li>}  </li></ol>新建请求的时候rquester和pending都会+1，然后调用request.Start()，这个调用的是blockchain/pool.go的另外一个OnStart()[plain]<ol><li>func (bpr *bpRequester) OnStart() error {  </li><li>    go bpr.requestRoutine()  </li><li>    return nil  </li><li>}  </li></ol>看下requestRoutine的代码[plain] <ol><li>func (bpr *bpRequester) requestRoutine() {  </li><li>OUTER_LOOP:  </li><li>    for {  </li><li>        // Pick a peer to send request to.  </li><li>        var peer *bpPeer = nil  </li><li>    PICK_PEER_LOOP:  </li><li>        for {  </li><li>            if !bpr.IsRunning() || !bpr.pool.IsRunning() {  </li><li>                return  </li><li>            }  </li><li>            peer = bpr.pool.pickIncrAvailablePeer(bpr.height)  </li><li>            if peer == nil {  </li><li>                //log.Info("No peers available", "height", height)  </li><li>                time.Sleep(requestIntervalMS * time.Millisecond)  </li><li>                continue PICK_PEER_LOOP  </li><li>            }  </li><li>            break PICK_PEER_LOOP  </li><li>        }  </li><li>        bpr.mtx.Lock()  </li><li>        bpr.peerID = peer.id  </li><li>        bpr.mtx.Unlock()  </li><li>  </li><li>        // Send request and wait.  </li><li>        bpr.pool.sendRequest(bpr.height, peer.id)  </li><li>        ...  </li><li>    }  </li><li>}  </li></ol>pickIncrAvailablePeer的作用是找到有高度（bpr.height）的节点sendRequest其实是写了一个channel[plain] <ol><li>func (pool *BlockPool) sendRequest(height int64, peerID string) {  </li><li>    if !pool.IsRunning() {  </li><li>        return  </li><li>    }  </li><li>    pool.requestsCh <- BlockRequest{height, peerID}  </li><li>}  </li></ol>接着找requestsCh读取的地方，就在blockchain/reactor.go中：[plain] <ol><li>func (bcR *BlockchainReactor) poolRoutine() {  </li><li>  </li><li>    trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond)  </li><li>    statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)  </li><li>    switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second)  </li><li>  </li><li>    blocksSynced := 0  </li><li>  </li><li>    chainID := bcR.initialState.ChainID  </li><li>    state := bcR.initialState  </li><li>  </li><li>    lastHundred := time.Now()  </li><li>    lastRate := 0.0  </li><li>  </li><li>FOR_LOOP:  </li><li>    for {  </li><li>        select {  </li><li>        case request := <-bcR.requestsCh: // chan BlockRequest  </li><li>            peer := bcR.Switch.Peers().Get(request.PeerID)  </li><li>            if peer == nil {  </li><li>                continue FOR_LOOP // Peer has since been disconnected.  </li><li>            }  </li><li>            msg := &bcBlockRequestMessage{request.Height}  </li><li>            queued := peer.TrySend(BlockchainChannel, struct{ BlockchainMessage }{msg})  </li><li>            if !queued {  </li><li>                // We couldn't make the request, send-queue full.  </li><li>                // The pool handles timeouts, just let it go.  </li><li>                continue FOR_LOOP  </li><li>            }  </li><li>        case peerID := <-bcR.timeoutsCh: // chan string  </li><li>            // Peer timed out.  </li><li>            peer := bcR.Switch.Peers().Get(peerID)  </li><li>            if peer != nil {  </li><li>                bcR.Switch.StopPeerForError(peer, errors.New("BlockchainReactor Timeout"))  </li><li>            }  </li><li>        case <-statusUpdateTicker.C:  </li><li>            // ask for status updates  </li><li>            go bcR.BroadcastStatusRequest() // nolint: errcheck  </li><li>        case <-switchToConsensusTicker.C:  </li><li>            height, numPending, lenRequesters := bcR.pool.GetStatus()  </li><li>            outbound, inbound, _ := bcR.Switch.NumPeers()  </li><li>            bcR.Logger.Debug("Consensus ticker", "numPending", numPending, "total", lenRequesters,  </li><li>                "outbound", outbound, "inbound", inbound)  </li><li>            if bcR.pool.IsCaughtUp() {  </li><li>                bcR.Logger.Info("Time to switch to consensus reactor!", "height", height)  </li><li>                bcR.pool.Stop()  </li><li>  </li><li>                conR := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)  </li><li>                conR.SwitchToConsensus(state, blocksSynced)  </li><li>  </li><li>                break FOR_LOOP  </li><li>            }  </li><li>        case <-trySyncTicker.C: // chan time  </li><li>            // This loop can be slow as long as it's doing syncing work.  </li><li>        SYNC_LOOP:  </li><li>            for i := 0; i < 10; i++ {  </li><li>                // See if there are any blocks to sync.  </li><li>                first, second := bcR.pool.PeekTwoBlocks()  </li><li>                //bcR.Logger.Info("TrySync peeked", "first", first, "second", second)  </li><li>                if first == nil || second == nil {  </li><li>                    // We need both to sync the first block.  </li><li>                    break SYNC_LOOP  </li><li>                }  </li><li>                firstParts := first.MakePartSet(state.ConsensusParams.BlockPartSizeBytes)  </li><li>                firstPartsHeader := firstParts.Header()  </li><li>                firstID := types.BlockID{first.Hash(), firstPartsHeader}  </li><li>                // Finally, verify the first block using the second's commit  </li><li>                // NOTE: we can probably make this more efficient, but note that calling  </li><li>                // first.Hash() doesn't verify the tx contents, so MakePartSet() is  </li><li>                // currently necessary.  </li><li>                err := state.Validators.VerifyCommit(  </li><li>                    chainID, firstID, first.Height, second.LastCommit)  </li><li>                if err != nil {  </li><li>                    bcR.Logger.Error("Error in validation", "err", err)  </li><li>                    bcR.pool.RedoRequest(first.Height)  </li><li>                    break SYNC_LOOP  </li><li>                } else {  </li><li>                    bcR.pool.PopRequest()  </li><li>  </li><li>                    bcR.store.SaveBlock(first, firstParts, second.LastCommit)  </li><li>  </li><li>                    // NOTE: we could improve performance if we  </li><li>                    // didn't make the app commit to disk every block  </li><li>                    // ... but we would need a way to get the hash without it persisting  </li><li>                    var err error  </li><li>                    state, err = bcR.blockExec.ApplyBlock(state, firstID, first)  </li><li>                    if err != nil {  </li><li>                        // TODO This is bad, are we zombie?  </li><li>                        cmn.PanicQ(cmn.Fmt("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))  </li><li>                    }  </li><li>                    blocksSynced += 1  </li><li>  </li><li>                    // update the consensus params  </li><li>                    bcR.updateConsensusParams(state.ConsensusParams)  </li><li>  </li><li>                    if blocksSynced%100 == 0 {  </li><li>                        lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())  </li><li>                        bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height,  </li><li>                            "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)  </li><li>                        lastHundred = time.Now()  </li><li>                    }  </li><li>                }  </li><li>            }  </li><li>            continue FOR_LOOP  </li><li>        case <-bcR.Quit:  </li><li>            break FOR_LOOP  </li><li>        }  </li><li>    }  </li><li>}  </li></ol>1 收到reqeustCh的数据，发送bcBlockRequestMessage到对端节点（对端节点收到消息后返回高度的block）。是在本文件中的Receive()函数处理的2 本函数有个trySyncTicker 50ms的timer一直在运行，作用是拿到两个已经从对端peer获取的block，经过VerifyCommit之后，SaveBlock存在tendermint内部，再调用ApplyBlock存到application3 也有个statusUpdateTicker 10s的timer，作用是broadcast bcStatusRequestMessage去获取对端节点的block高度4 还有个switchToConsensusTicker 1s的timer也在运行，作用是看本节点的block高度有没有大于等于其他节点的最大高度，如果到达了（也就是同步区块完成了）就SwitchToConsensus切换fast mode到consensus mode