raft算法细节部分的文档还未整理好.找个时间再更新
@TOC
创建一个etcdserver的实例
etcdserver.NewServer
启动服务
e.Server.Start()
etcd/embed/etcd.go
func StartEtcd(inCfg *Config) (e *Etcd, err error) {
…省略其它代码
if e.Server, err = etcdserver.NewServer(srvcfg); err != nil {
return e, err
}
…省略其它代码
e.Server.Start()
…省略其它代码
}
创建节点startNode
初始化http服务Transport
并且添加其它节点
etcd/etcdserver/server.go
func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
...省略其它代码
id, n, s, w = startNode(cfg, cl, nil)
...省略其它代码
srv = &EtcdServer{
readych: make(chan struct{}),
Cfg: cfg,
lgMu: new(sync.RWMutex),
lg: cfg.Logger,
errorc: make(chan error, 1),
v2store: st,
snapshotter: ss,
//创建raftNode
r: *newRaftNode(
raftNodeConfig{
lg: cfg.Logger,
isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
Node: n,
heartbeat: heartbeat,
raftStorage: s,
storage: NewStorage(w, ss),
},
),
id: id,
attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
cluster: cl,
stats: sstats,
lstats: lstats,
SyncTicker: time.NewTicker(500 * time.Millisecond),
peerRt: prt,
reqIDGen: idutil.NewGenerator(uint16(id), time.Now()),
forceVersionC: make(chan struct{}),
AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist},
}
…省略其它代码
// TODO: move transport initialization near the definition of remote
tr := &rafthttp.Transport{
Logger: cfg.Logger,
TLSInfo: cfg.PeerTLSInfo,
DialTimeout: cfg.peerDialTimeout(),
ID: id, //当前节点自己的ID
URLs: cfg.PeerURLs, //当前节点与集群中其他节点交互时使用的URL地址
ClusterID: cl.ID(), //当前节点所在的集群的ID
Raft: srv, //raft状态机
Snapshotter: ss, //负责管理快照文件
ServerStats: sstats, //用于统计一般的transportation统计
LeaderStats: lstats, //raft协议中的leader节点统计followers节点的 transportation 状态
ErrorC: srv.errorc,
}
if err = tr.Start(); err != nil {
return nil, err
}
// add all remotes into transport
for _, m := range remotes {
if m.ID != id {
tr.AddRemote(m.ID, m.PeerURLs)
}
}
for _, m := range cl.Members() {
if m.ID != id {
tr.AddPeer(m.ID, m.PeerURLs)
}
}
srv.r.transport = tr
return srv, nil
}
etcd/etcdserver/raft.go
func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
if len(peers) == 0 {
n = raft.RestartNode(c)
} else {
n = raft.StartNode(c, peers)
}
}
node提供了一个基础对外接口,并且同时启一个goroutine 处理各个状态机之间通信
etcd/raft/node.go
func StartNode(c *Config, peers []Peer) Node {
if len(peers) == 0 {
panic("no peers given; use RestartNode instead")
}
rn, err := NewRawNode(c)
if err != nil {
panic(err)
}
rn.Bootstrap(peers)
n := newNode(rn)
go n.run()
return &n
}
func (n *node) run() {
var propc chan msgWithResult
var readyc chan Ready
var advancec chan struct{}
var rd Ready
r := n.rn.raft
lead := None
for {
if advancec != nil {
readyc = nil
} else if n.rn.HasReady() {//判断是否有消息
rd = n.rn.readyWithoutAccept()//构造消息
readyc = n.readyc
}
if lead != r.lead {
if r.hasLeader() {
if lead == None {
r.logger.Infof("raft.node: %x elected leader %x at term %d", r.id, r.lead, r.Term)
} else {
r.logger.Infof("raft.node: %x changed leader from %x to %x at term %d", r.id, lead, r.lead, r.Term)
}
propc = n.propc
} else {
r.logger.Infof("raft.node: %x lost leader %x at term %d", r.id, lead, r.Term)
propc = nil
}
lead = r.lead
}
select {
case pm := <-propc://接收到写消息 其他节点通过监听propc channel获取其他节点发送的投票消息,并调用Step对消息进行判断,选择是否投票
m := pm.m
m.From = r.id
err := r.Step(m)
if pm.result != nil {
pm.result <- err
close(pm.result)
}
case m := <-n.recvc://接收到readindex 请求
if pr := r.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) {
r.Step(m)
}
case cc := <-n.confc://配置变更
_, okBefore := r.prs.Progress[r.id]
cs := r.applyConfChange(cc)
if _, okAfter := r.prs.Progress[r.id]; okBefore && !okAfter {
var found bool
for _, sl := range [][]uint64{cs.Voters, cs.VotersOutgoing} {
for _, id := range sl {
if id == r.id {
found = true
}
}
}
if !found {
propc = nil
}
}
select {
case n.confstatec <- cs:
case <-n.done:
}
case <-n.tickc://超时时间到,包括心跳超时和选举超时等
n.rn.Tick()
case readyc <- rd://数据ready
n.rn.acceptReady(rd)
advancec = n.advancec
case <-advancec://可以进行状态变更和日志提交
n.rn.Advance(rd)
rd = Ready{}
advancec = nil
case c := <-n.status://节点状态信号
c <- getStatus(r)
case <-n.stop://收到停止信号
close(n.done)
return
}
}
}
//触发时钟事件
func (n *node) Tick() {
…略
}
/TODO 为外界提供了日志提交接口 Propose 客户端写请求消息类型 pb.MsgProp
//阻塞等待该用户请求被RAFT状态机接受
func (n *node) Propose(ctx context.Context, data []byte) error {
…略
}
etcd/etcdserver/api/rafthttp/transport.go
type Transport struct {
Logger *zap.Logger
DialTimeout time.Duration // maximum duration before timing out dial of the request
DialRetryFrequency rate.Limit
TLSInfo transport.TLSInfo // TLS information used when creating connection
ID types.ID // local member ID 当前节点自己的ID
URLs types.URLs // local peer URLs 当前节点与集群中其他节点交互时使用的URL地址
ClusterID types.ID // raft cluster ID for request validation 当前节点所在的集群的ID
Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
Snapshotter *snap.Snapshotter //负责管理快照文件
ServerStats *stats.ServerStats // used to record general transportation statistics 用于统计一般的transportation统计
LeaderStats *stats.LeaderStats //raft协议中的leader节点统计followers节点的 transportation 状态
ErrorC chan error
streamRt http.RoundTripper // roundTripper used by streams Stream消息通道中使用http.RoundTripper实例,HTTP长连接
pipelineRt http.RoundTripper // roundTripper used by pipelines Pipeline消息通道中使用的http.RoundTripper实例,传输完成后会立即关闭连接,传输数据量较大、发送频率较低的消息,如MsgSnap消息
mu sync.RWMutex // protect the remote and peer map
//remote中只封装了pipeline实例,remote主要负责发送快照数据,帮助新加入的节点快速追上其他节点的数据
remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
/*
Peer接口是当前节点对集群中其他节点的抽象表示。对于当前节点来说,集群中其他节点在本地都会有一个Peer实例与之对应,
peers字段维护了节点ID到对应Peer实例之间的映射关系
*/
peers map[types.ID]Peer // peers map
//用于探测Pipeline消息通道是否可用
pipelineProber probing.Prober
streamProber probing.Prober
}
//TODO 启动HTTP服务
func (t *Transport) Start() error {
…省略其它代码
}
func (t *Transport) Handler() http.Handler {
//创建pipelineHandler、streamHandler 、snapHandler 三个实例,这三个实例都实现了Handler接口
pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
mux := http.NewServeMux()//mux是多路复用器 ServeMux主要通过m字段(map[string]muxEntry)存储URL和Handler实例之间的映射关系,设置URL和Handler之间的对应关系
mux.Handle(RaftPrefix, pipelineHandler)
mux.Handle(RaftStreamPrefix+"/", streamHandler)
mux.Handle(RaftSnapshotPrefix, snapHandler)
mux.Handle(ProbingPrefix, probing.NewHandler())
return mux
}
…省略其它代码
//TODO 添加对端服务,如果是三个节点,会添加两个
func (t *Transport) AddPeer(id types.ID, us []string) {
t.mu.Lock()
defer t.mu.Unlock()
if t.peers == nil {
panic("transport stopped")
}
if _, ok := t.peers[id]; ok {
return
}
urls, err := types.NewURLs(us)
if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
} else {
plog.Panicf("newURLs %+v should never fail: %+v", us, err)
}
}
fs := t.LeaderStats.Follower(id.String())
t.peers[id] = startPeer(t, urls, id, fs) //starting peer
addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
if t.Logger != nil {
t.Logger.Info(
"added remote peer",
zap.String("local-member-id", t.ID.String()),
zap.String("remote-peer-id", id.String()),
zap.Strings("remote-peer-urls", us),
)
} else {
plog.Infof("added peer %s", id)
}
}
在startPeer中,声明一个Raft状态机实例,并启动goroutine通过channel进行通信
etcd/etcdserver/api/rafthttp/peer.go
type peer struct {
lg *zap.Logger
localID types.ID //当前节点ID
// id of the remote raft peer node
id types.ID //该peer实例对应的节点ID,对端ID
r Raft
status *peerStatus
/*
每个节点可能提供了多个URL供其他节点正常访问,当其中一个访问失败时,我们应该可以尝试访问另一个。
urlPicker提供的主要功能就是在这些URL之间进行切换
*/
picker *urlPicker
msgAppV2Writer *streamWriter
writer *streamWriter //负责向Stream消息通道中写消息
pipeline *pipeline //pipeline消息通道
snapSender *snapshotSender // snapshot sender to send v3 snapshot messages
msgAppV2Reader *streamReader
msgAppReader *streamReader //负责从Stream消息通道中读消息
recvc chan raftpb.Message //从Stream消息通道中读取到消息之后,会通过该通道将消息交给Raft接口,然后由它返回给底层etcd-raft模块进行处理
propc chan raftpb.Message //从Stream消息通道中读取到MsgProp类型的消息之后,会通过该通道将MsgApp消息交给Raft接口,然后由它返回给底层的etcd-raft模块进行处理
mu sync.Mutex
paused bool //是否暂停向其他节点发送消息
cancel context.CancelFunc // cancel pending works in go routine created by peer.
stopc chan struct{}
}
func startPeer(t *Transport, urls types.URLs, peerID types.ID, fs *stats.FollowerStats) *peer {
if t.Logger != nil {
t.Logger.Info("starting remote peer", zap.String("remote-peer-id", peerID.String()))
} else {
plog.Infof("starting peer %s...", peerID)
}
defer func() {
if t.Logger != nil {
t.Logger.Info("started remote peer", zap.String("remote-peer-id", peerID.String()))
} else {
plog.Infof("started peer %s", peerID)
}
}()
status := newPeerStatus(t.Logger, t.ID, peerID)//创建节点的状态信息 status
picker := newURLPicker(urls)//根据节点提供的URL创建urlPicker
errorc := t.ErrorC
r := t.Raft //底层的Raft状态机
pipeline := &pipeline{
peerID: peerID,
tr: t,
picker: picker,
status: status,
followerStats: fs,
raft: r,
errorc: errorc,
}
pipeline.start() //这里会启动一个协程处理
p := &peer{
lg: t.Logger,
localID: t.ID,
id: peerID,
r: r,
status: status,
picker: picker,
msgAppV2Writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),//创建并启动streamWriter
writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
pipeline: pipeline,
snapSender: newSnapshotSender(t, picker, peerID, status),
recvc: make(chan raftpb.Message, recvBufSize),//创建recvc通道
propc: make(chan raftpb.Message, maxPendingProposals),//创建propc通道
stopc: make(chan struct{}),
}
//启动单独的goroutine,它负责将recvc通道中读取消息,该通道中的消息就是从对端节点发送过来的消息,
// 然后将读取到的消息交给底层的Raft状态机进行处理
ctx, cancel := context.WithCancel(context.Background())
p.cancel = cancel
go func() {
for {
select {
case mm := <-p.recvc://从recvc通道中获取连接上读取到的消息
//TODO 调用process
if err := r.Process(ctx, mm); err != nil {//将Message交给底层Raft状态机处理
if t.Logger != nil {
t.Logger.Warn("failed to process Raft message", zap.Error(err))
} else {
plog.Warningf("failed to process raft message (%v)", err)
}
}
case <-p.stopc:
return
}
}
}()
// r.Process might block for processing proposal when there is no leader.
// Thus propc must be put into a separate routine with recvc to avoid blocking
// processing other raft messages.
//在底层的Raft状态机处理MsgProp类型的消息时,可能会阻塞,所以启动单独的goroutine来处理
go func() {
for {
select {
case mm := <-p.propc://从propc通道中获取MsgProp类型的Message
if err := r.Process(ctx, mm); err != nil {
plog.Warningf("failed to process raft message (%v)", err)
}
case <-p.stopc:
return
}
}
}()
//创建并启动streamReader实例,主要负责从Stream消息通道上读取消息
p.msgAppV2Reader = &streamReader{
lg: t.Logger,
peerID: peerID,
typ: streamTypeMsgAppV2,
tr: t,
picker: picker,
status: status,
recvc: p.recvc,
propc: p.propc,
rl: rate.NewLimiter(t.DialRetryFrequency, 1),
}
p.msgAppReader = &streamReader{
lg: t.Logger,
peerID: peerID,
typ: streamTypeMessage,
tr: t,
picker: picker,
status: status,
recvc: p.recvc,
propc: p.propc,
rl: rate.NewLimiter(t.DialRetryFrequency, 1),
}
p.msgAppV2Reader.start()
p.msgAppReader.start()
return p
}
上面创建了一个pipeline,并调用了它的start方法,处理消息raft状态机的消息发送与返回结果
type pipeline struct {
peerID types.ID //该pipeline对应节点的ID
tr *Transport //关联的rafthttp.Transport实例
picker *urlPicker //用于选择可用的url
status *peerStatus //当前peer的状态
raft Raft
errorc chan error
// deprecate when we depercate v2 API
followerStats *stats.FollowerStats
msgc chan raftpb.Message //pipeline实例从该通道中获取待发送的消息
// wait for the handling routines
wg sync.WaitGroup //负责同步多个goroutine结束。每个pipeline默认开启4个goroutine来处理msgc中的消息,必须先关闭这些goroutine,才能真正关闭该pipeline
stopc chan struct{}
}
func (p *pipeline) start() {
p.stopc = make(chan struct{})
p.msgc = make(chan raftpb.Message, pipelineBufSize)//初始化msgc通道,默认缓冲是64个
p.wg.Add(connPerPipeline)
for i := 0; i < connPerPipeline; i++ {//默认开启4个goroutine来处理msgc中待发送的消息
go p.handle()//并将消息发送给对端节点
}
if p.tr != nil && p.tr.Logger != nil {
p.tr.Logger.Info(
"started HTTP pipelining with remote peer",
zap.String("local-member-id", p.tr.ID.String()),
zap.String("remote-peer-id", p.peerID.String()),
)
} else {
plog.Infof("started HTTP pipelining with peer %s", p.peerID)
}
}
…省略其它代码
//下面是发送消息相关
//循环处理msgc通道中待发送的消息,然后调用pipeline.post()方法将其发送出去,发送结束之后会调用底层的Raft接口的响应方法报告发送结果
func (p *pipeline) handle() {
defer p.wg.Done()
for {
select {
case m := <-p.msgc://获取待发送的MsgSnap类型的消息
start := time.Now()
err := p.post(pbutil.MustMarshal(&m))//将消息序列化,然后创建HTTP请求并发送出去
end := time.Now()
if err != nil {
//通知不可达
p.status.deactivate(failureType{source: pipelineMsg, action: "write"}, err.Error())
if m.Type == raftpb.MsgApp && p.followerStats != nil {
p.followerStats.Fail()
}
p.raft.ReportUnreachable(m.To)//通知底层的etcd-raft模块,当前节点与指定的节点无法连通
if isMsgSnap(m) {//快照数据则向状态机报告发送失败
p.raft.ReportSnapshot(m.To, raft.SnapshotFailure)
}
sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
continue
}
p.status.activate()//连接状态为连通active
if m.Type == raftpb.MsgApp && p.followerStats != nil {
p.followerStats.Succ(end.Sub(start))
}
if isMsgSnap(m) {//向底层raft-node状态机发送成功的消息
p.raft.ReportSnapshot(m.To, raft.SnapshotFinish)
}
sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(m.Size()))
case <-p.stopc:
return
}
}
}
// post POSTs a data payload to a url. Returns nil if the POST succeeds,
// error on any failure.
func (p *pipeline) post(data []byte) (err error) {
u := p.picker.pick()//获取对端暴露的url地址
//创建HTTP POST请求的Request
req := createPostRequest(u, RaftPrefix, bytes.NewBuffer(data), "application/protobuf", p.tr.URLs, p.tr.ID, p.tr.ClusterID)
done := make(chan struct{}, 1)//通知下面的goroutine请求是否已经发送完成
ctx, cancel := context.WithCancel(context.Background())
req = req.WithContext(ctx)
go func() {//该goroutine主要用于监听请求是否需要取消
select {
case <-done:
case <-p.stopc://如果请求过程中,pipeline被关闭,则取消该请求
waitSchedule()
cancel()//取消请求
}
}()
resp, err := p.tr.pipelineRt.RoundTrip(req)//发送上述HTTP POST请求,并获取到对应的的响应
done <- struct{}{}//通知上述goroutine,请求已经发送完毕
if err != nil {
p.picker.unreachable(u)
return err
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)//读取HTTP Response.Body内容
if err != nil {
p.picker.unreachable(u)//出现异常则将该URL标识为不可用
return err
}
err = checkPostResponse(resp, b, req, p.peerID)//检测响应的内容
if err != nil {
p.picker.unreachable(u)
// errMemberRemoved is a critical error since a removed member should
// always be stopped. So we use reportCriticalError to report it to errorc.
if err == errMemberRemoved {
reportCriticalError(err, p.errorc)
}
return err
}
return nil
}
…省略其它代码
func newPipelineHandler(t *Transport, r Raft, cid types.ID) http.Handler {
…省略其它代码
}
func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
…省略其它代码
//TODO 调用Process 同步raft状态机
if err := h.r.Process(context.TODO(), m); err != nil {
…省略其它代码
}
…省略其它代码
}
Raft接口
etcd/etcdserver/api/rafthttp/transport.go
type Raft interface {
Process(ctx context.Context, m raftpb.Message) error //将指定消息传递到etcd-raft模块进行处理
IsIDRemoved(id uint64) bool //检测当前节点是否从当前集群中被移除
ReportUnreachable(id uint64) //通知底层的etcd-raft模块,当前节点与指定的节点无法连通
ReportSnapshot(id uint64, status raft.SnapshotStatus) //通知底层的etcd-raft模块,快照数据是否发送成功
}
2.EtcdServer是核心结构体,实现了transport里面的Raft接口,然后再通过自已实现的process方法调用底层raft状态机Step方法(整体的逻辑都在这个结构体,具体实现再通过其它子模块来实现)
etcd/etcdserver/server.go
func (s *EtcdServer) Start() {
s.start()
…省略其它代码
}
func (s *EtcdServer) start() {
…省略其它代码
go s.run()
}
func (s *EtcdServer) run() {
…省略其它代码
//真正去启动raft
s.r.start(rh)
…省略其它代码
}
func (s *EtcdServer) Process(ctx context.Context, m raftpb.Message) error {
…省略其它代码
return s.r.Step(ctx, m)
}
func (s *EtcdServer) IsIDRemoved(id uint64) bool { return s.cluster.IsIDRemoved(types.ID(id)) }
func (s *EtcdServer) ReportUnreachable(id uint64) { s.r.ReportUnreachable(id) }
etcd/raft/raft.go
//TODO 节点投票过程
func (r *raft) Step(m pb.Message) error {
// Handle the message term, which may result in our stepping down to a follower.
switch {
case m.Term == 0:
// local message
case m.Term > r.Term: //例如参与选举的Term值会比当前未参与的值大
//当节点(无论是什么角色,包括上一届Leader,Follower,Candidate)收到Term比自己任期号大,
//并且消息类型是MsgApp、MsgHeartbeat、MsgSnap类型的消息都会调用becomeFollower(m.Term,m.From),
//都会将当前节点的状态切换成Follower,并进行相关状态的初始化
if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote {
//根据消息的Context字段判断收到的MsgPreVote(或MsgVote)消息是否为Leader
//节点转移场景下产生的,如果是,则强制当前节点参与本次预选(或选举)
force := bytes.Equal(m.Context, []byte(campaignTransfer))
//检测集群是否开启CheckQuorum模式,当前节点是否有已知的Lead节点,以及其选举计时器的时间
inLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout
if !force && inLease { //满足此条件,该节点不参与此次选举
return nil
}
}
switch { //在这个switch中,当前节点会根据消息类型决定是否切换状态
case m.Type == pb.MsgPreVote: //收到MsgPreVote消息时,不会引起当前节点的状态切换
// Never change our term in response to a PreVote
case m.Type == pb.MsgPreVoteResp && !m.Reject:
default:
r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]",
r.id, r.Term, m.Type, m.From, m.Term)
if m.Type == pb.MsgApp || m.Type == pb.MsgHeartbeat || m.Type == pb.MsgSnap {
r.becomeFollower(m.Term, m.From)
} else {
r.becomeFollower(m.Term, None)
}
}
case m.Term < r.Term:
if (r.checkQuorum || r.preVote) && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp})
} else if m.Type == pb.MsgPreVote {
r.send(pb.Message{To: m.From, Term: r.Term, Type: pb.MsgPreVoteResp, Reject: true})
} else {
// ignore other cases
r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]",
r.id, r.Term, m.Type, m.From, m.Term)
}
return nil
}
switch m.Type {
case pb.MsgHup: //推动选举(Flower转成PreCandidate发送的消息)
if r.state != StateLeader { //只有非Leader状态的节点才会处理MsgHup消息
//检查是否有未执行的配置变更,大致就是先取出可提交还未执行的这一段,
//然后检查里面是否有是变更集群配置的消息,如果有则直接return不进入candidate状态。
if !r.promotable() {
r.logger.Warningf("%x is unpromotable and can not campaign; ignoring MsgHup", r.id)
return nil
}
//获取raftLog中已提交但未应用的Entry记录
ents, err := r.raftLog.slice(r.raftLog.applied+1, r.raftLog.committed+1, noLimit)
if err != nil {
r.logger.Panicf("unexpected error getting unapplied entries (%v)", err)
}
//检测是否有未应用的EntryConfChange记录,如果有就放弃发起选举的机会
if n := numOfPendingConf(ents); n != 0 && r.raftLog.committed > r.raftLog.applied {
r.logger.Warningf("%x cannot campaign at term %d since there are still %d pending configuration changes to apply", r.id, r.Term, n)
return nil
}
//进入选举
r.logger.Infof("%x is starting a new election at term %d", r.id, r.Term)
if r.preVote {
//检测当前集群是否开启了PreVote模式,如果开启了
//调用raft.campaign()方法切换当前节点的角色,发起PreVote
r.campaign(campaignPreElection)
} else {
r.campaign(campaignElection)
}
} else { //如果当前节点已经是Leader状态,则仅仅输出一条Debug日志
r.logger.Debugf("%x ignoring MsgHup because already leader", r.id)
}
case pb.MsgVote, pb.MsgPreVote: //投票,预投票消息处理
// We can vote if this is a repeat of a vote we've already cast...
// 初步判断是否可以投票
//1. 如果自身记录的Vote值和消息的来源者相同,说明是条重复消息
//2. 如果自身尚未投票,且当前没有leader,则可以投。
canVote := r.Vote == m.From ||
(r.Vote == None && r.lead == None) ||
(m.Type == pb.MsgPreVote && m.Term > r.Term)
//与本地最新的持久化日志比较
if canVote && r.raftLog.isUpToDate(m.Index, m.LogTerm) {
//TODO 发送投票信息
//判断成功,则把票回复该节点,把票投给它。自身记录Vote,并重设election的计数器。
r.send(pb.Message{To: m.From, Term: m.Term, Type: voteRespMsgType(m.Type)})
if m.Type == pb.MsgVote { //如果是MsgVote处理
// Only record real votes.
r.electionElapsed = 0
r.Vote = m.From
}
} else {
//否则回复拒绝投票给该节点
r.send(pb.Message{To: m.From, Term: r.Term, Type: voteRespMsgType(m.Type), Reject: true})
}
default:
err := r.step(r, m)
if err != nil {
return err
}
}
return nil
}
参照
https://raft.github.io/
https://blog.csdn.net/skh2015java/category_9284671.html