源码地址:https://github.com/etcd-io/etcd
scripts/build.sh
#!/usr/bin/env bash
# This scripts build the etcd binaries
# To build the tools, run `build_tools.sh`
source ./scripts/test_lib.sh
source ./scripts/build_lib.sh
# only build when called directly, not sourced
if echo "$0" | grep -E "build(.sh)?$" >/dev/null; then
run_build etcd_build
fi
scripts/build_lib.sh
run_build() {
echo Running "$1"
if $1; then
log_success "SUCCESS: $1 (GOARCH=${GOARCH})"
else
log_error "FAIL: $1 (GOARCH=${GOARCH})"
exit 2
fi
}
etcd_build() {
out="bin"
if [[ -n "${BINDIR}" ]]; then out="${BINDIR}"; fi
run rm -f "${out}/etcd"
(
cd ./server
# Static compilation is useful when etcd is run in a container. $GO_BUILD_FLAGS is OK
# shellcheck disable=SC2086
run env "${GO_BUILD_ENV[@]}" go build $GO_BUILD_FLAGS \
-trimpath \
-installsuffix=cgo \
"-ldflags=${GO_LDFLAGS[*]}" \
-o="../${out}/etcd" . || return 2
) || return 2
run rm -f "${out}/etcdutl"
# shellcheck disable=SC2086
(
cd ./etcdutl
run env GO_BUILD_FLAGS="${GO_BUILD_FLAGS}" "${GO_BUILD_ENV[@]}" go build $GO_BUILD_FLAGS \
-trimpath \
-installsuffix=cgo \
"-ldflags=${GO_LDFLAGS[*]}" \
-o="../${out}/etcdutl" . || return 2
) || return 2
run rm -f "${out}/etcdctl"
}
可以看到项目的启动文件在./server下
server/main.go
func main() {
etcdmain.Main(os.Args)
}
etcdmain/main.go
func Main(args []string) {
checkSupportArch()
if len(args) > 1 {
cmd := args[1]
switch cmd {
case "gateway", "grpc-proxy":
if err := rootCmd.Execute(); err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}
return
}
}
startEtcdOrProxyV2(args)
}
如果传递了参数 可以以proxy启动
func startEtcdOrProxyV2(args []string) {
grpc.EnableTracing = false
cfg := newConfig()
// 根据配置初始化日志 集群信息等内容
defaultInitialCluster := cfg.ec.InitialCluster
....
....
var stopped <-chan struct{}
var errc <-chan error
// 对数据存储目录做了校验 禁止proxy和data同时配置
which := identifyDataDirOrDie(cfg.ec.GetLogger(), cfg.ec.Dir)
if which != dirEmpty {
lg.Info(
"server has already been initialized",
zap.String("data-dir", cfg.ec.Dir),
zap.String("dir-type", string(which)),
)
switch which {
case dirMember:
// 启动etcd
stopped, errc, err = startEtcd(&cfg.ec)
case dirProxy:
lg.Panic("v2 http proxy has already been deprecated in 3.6", zap.String("dir-type", string(which)))
default:
lg.Panic(
"unknown directory type",
zap.String("dir-type", string(which)),
)
}
} else {
lg.Info(
"Initialize and start etcd server",
zap.String("data-dir", cfg.ec.Dir),
zap.String("dir-type", string(which)),
)
stopped, errc, err = startEtcd(&cfg.ec)
}
// 对异常进行处理
...
osutil.Exit(0)
}
初始化完成后,开始正式进入启动流程
etcdmain/etcd.go
// startEtcd除了运行独立etcd所需的钩子外,还运行startEtcd。
func startEtcd(cfg *embed.Config) (<-chan struct{}, <-chan error, error) {
e, err := embed.StartEtcd(cfg)
if err != nil {
return nil, nil, err
}
osutil.RegisterInterruptHandler(e.Close)
select {
case <-e.Server.ReadyNotify(): // wait for e.Server to join the cluster
case <-e.Server.StopNotify(): // publish aborted from 'ErrStopped'
case <-time.After(cfg.ExperimentalWaitClusterReadyTimeout):
e.GetLogger().Warn("startEtcd: timed out waiting for the ready notification")
}
return e.Server.StopNotify(), e.Err(), nil
}
embed/etcd.go
// StartEtcd启动etcd服务器和HTTP处理程序,用于客户端/服务器通信。返回的Etcd.Server不能保证已加入群集。等待Etcd.Server.ReadyNotify()通道,以了解它何时完成并准备好使用。
func StartEtcd(inCfg *Config) (e *Etcd, err error) {
// 校验配置
if err = inCfg.Validate(); err != nil {
return nil, err
}
serving := false
e = &Etcd{cfg: *inCfg, stopc: make(chan struct{})}
cfg := &e.cfg
defer func() {
if e == nil || err == nil {
return
}
if !serving {
// errored before starting gRPC server for serveCtx.serversC
for _, sctx := range e.sctxs {
close(sctx.serversC)
}
}
e.Close()
e = nil
}()
if !cfg.SocketOpts.Empty() {
cfg.logger.Info(
"configuring socket options",
zap.Bool("reuse-address", cfg.SocketOpts.ReuseAddress),
zap.Bool("reuse-port", cfg.SocketOpts.ReusePort),
)
}
e.cfg.logger.Info(
"configuring peer listeners",
zap.Strings("listen-peer-urls", e.cfg.getLPURLs()),
)
// 开始监听其他member发送消息的监听,即2380
if e.Peers, err = configurePeerListeners(cfg); err != nil {
return e, err
}
e.cfg.logger.Info(
"configuring client listeners",
zap.Strings("listen-client-urls", e.cfg.getLCURLs()),
)
// 开启client服务监听 即默认的2379
if e.sctxs, err = configureClientListeners(cfg); err != nil {
return e, err
}
for _, sctx := range e.sctxs {
e.Clients = append(e.Clients, sctx.l)
}
var (
urlsmap types.URLsMap
token string
)
memberInitialized := true
// 通过wal文件是否存在 来判断是否进行过初始化
if !isMemberInitialized(cfg) {
memberInitialized = false
urlsmap, token, err = cfg.PeerURLsMapAndToken("etcd")
if err != nil {
return e, fmt.Errorf("error setting up initial cluster: %v", err)
}
}
// AutoCompactionRetention defaults to "0" if not set.
if len(cfg.AutoCompactionRetention) == 0 {
cfg.AutoCompactionRetention = "0"
}
// 获取自动压缩版本的周期 mode: Revision根据配置获得 Periodic 配置时间*1 hour
autoCompactionRetention, err := parseCompactionRetention(cfg.AutoCompactionMode, cfg.AutoCompactionRetention)
if err != nil {
return e, err
}
// boltdb 后端存储方式 数组/map 这个应该是对应阿里之前的boltdb优化分享
backendFreelistType := parseBackendFreelistType(cfg.BackendFreelistType)
srvcfg := config.ServerConfig{
....
}
if e.Server, err = etcdserver.NewServer(srvcfg); err != nil {
return e, err
}
// buffer channel so goroutines on closed connections won't wait forever
e.errc = make(chan error, len(e.Peers)+len(e.Clients)+2*len(e.sctxs))
// newly started member ("memberInitialized==false")
// does not need corruption check
if memberInitialized && srvcfg.InitialCorruptCheck {
if err = e.Server.CorruptionChecker().InitialCheck(); err != nil {
// set "EtcdServer" to nil, so that it does not block on "EtcdServer.Close()"
// (nothing to close since rafthttp transports have not been started)
e.cfg.logger.Error("checkInitialHashKV failed", zap.Error(err))
e.Server.Cleanup()
e.Server = nil
return e, err
}
}
e.Server.Start()
if err = e.servePeers(); err != nil {
return e, err
}
if err = e.serveClients(); err != nil {
return e, err
}
if err = e.serveMetrics(); err != nil {
return e, err
}
e.cfg.logger.Info(
"now serving peer/client/metrics",
zap.String("local-member-id", e.Server.MemberId().String()),
zap.Strings("initial-advertise-peer-urls", e.cfg.getAPURLs()),
zap.Strings("listen-peer-urls", e.cfg.getLPURLs()),
zap.Strings("advertise-client-urls", e.cfg.getACURLs()),
zap.Strings("listen-client-urls", e.cfg.getLCURLs()),
zap.Strings("listen-metrics-urls", e.cfg.getMetricsURLs()),
)
serving = true
return e, nil
}
对配置参数转换 并开启监听 然后开始etcdserver.NewServer 分析新建Server的过程
server/etcdserver/server.go
// NewServer根据提供的配置创建一个新的EtcdServer。在EtcdServer的生命周期内,配置被认为是静态的
func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) {
b, err := bootstrap(cfg)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
b.Close()
}
}()
sstats := stats.NewServerStats(cfg.Name, b.cluster.cl.String())
lstats := stats.NewLeaderStats(cfg.Logger, b.cluster.nodeID.String())
heartbeat := time.Duration(cfg.TickMs) * time.Millisecond
srv = &EtcdServer{
readych: make(chan struct{}),
Cfg: cfg,
lgMu: new(sync.RWMutex),
lg: cfg.Logger,
errorc: make(chan error, 1),
v2store: b.storage.st,
snapshotter: b.ss,
r: *b.raft.newRaftNode(b.ss, b.storage.wal.w, b.cluster.cl),
memberId: b.cluster.nodeID,
attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
cluster: b.cluster.cl,
stats: sstats,
lstats: lstats,
SyncTicker: time.NewTicker(500 * time.Millisecond),
peerRt: b.prt,
reqIDGen: idutil.NewGenerator(uint16(b.cluster.nodeID), time.Now()),
AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist},
consistIndex: b.storage.backend.ci,
firstCommitInTerm: notify.NewNotifier(),
clusterVersionChanged: notify.NewNotifier(),
}
serverID.With(prometheus.Labels{"server_id": b.cluster.nodeID.String()}).Set(1)
srv.cluster.SetVersionChangedNotifier(srv.clusterVersionChanged)
srv.applyV2 = NewApplierV2(cfg.Logger, srv.v2store, srv.cluster)
srv.be = b.storage.backend.be
srv.beHooks = b.storage.backend.beHooks
minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat
// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
srv.lessor = lease.NewLessor(srv.Logger(), srv.be, srv.cluster, lease.LessorConfig{
MinLeaseTTL: int64(math.Ceil(minTTL.Seconds())),
CheckpointInterval: cfg.LeaseCheckpointInterval,
CheckpointPersist: cfg.LeaseCheckpointPersist,
ExpiredLeasesRetryInterval: srv.Cfg.ReqTimeout(),
})
tp, err := auth.NewTokenProvider(cfg.Logger, cfg.AuthToken,
func(index uint64) <-chan struct{} {
return srv.applyWait.Wait(index)
},
time.Duration(cfg.TokenTTL)*time.Second,
)
if err != nil {
cfg.Logger.Warn("failed to create token provider", zap.Error(err))
return nil, err
}
mvccStoreConfig := mvcc.StoreConfig{
CompactionBatchLimit: cfg.CompactionBatchLimit,
CompactionSleepInterval: cfg.CompactionSleepInterval,
}
srv.kv = mvcc.New(srv.Logger(), srv.be, srv.lessor, mvccStoreConfig)
srv.corruptionChecker = newCorruptionChecker(cfg.Logger, srv, srv.kv.HashStorage())
srv.authStore = auth.NewAuthStore(srv.Logger(), schema.NewAuthBackend(srv.Logger(), srv.be), tp, int(cfg.BcryptCost))
newSrv := srv // since srv == nil in defer if srv is returned as nil
defer func() {
// closing backend without first closing kv can cause
// resumed compactions to fail with closed tx errors
if err != nil {
newSrv.kv.Close()
}
}()
if num := cfg.AutoCompactionRetention; num != 0 {
srv.compactor, err = v3compactor.New(cfg.Logger, cfg.AutoCompactionMode, num, srv.kv, srv)
if err != nil {
return nil, err
}
srv.compactor.Run()
}
if err = srv.restoreAlarms(); err != nil {
return nil, err
}
srv.uberApply = srv.NewUberApplier()
if srv.Cfg.EnableLeaseCheckpoint {
// setting checkpointer enables lease checkpoint feature.
srv.lessor.SetCheckpointer(func(ctx context.Context, cp *pb.LeaseCheckpointRequest) {
srv.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseCheckpoint: cp})
})
}
// Set the hook after EtcdServer finishes the initialization to avoid
// the hook being called during the initialization process.
srv.be.SetTxPostLockInsideApplyHook(srv.getTxPostLockInsideApplyHook())
// TODO: move transport initialization near the definition of remote
tr := &rafthttp.Transport{
Logger: cfg.Logger,
TLSInfo: cfg.PeerTLSInfo,
DialTimeout: cfg.PeerDialTimeout(),
ID: b.cluster.nodeID,
URLs: cfg.PeerURLs,
ClusterID: b.cluster.cl.ID(),
Raft: srv,
Snapshotter: b.ss,
ServerStats: sstats,
LeaderStats: lstats,
ErrorC: srv.errorc,
}
if err = tr.Start(); err != nil {
return nil, err
}
// add all remotes into transport
for _, m := range b.cluster.remotes {
if m.ID != b.cluster.nodeID {
tr.AddRemote(m.ID, m.PeerURLs)
}
}
for _, m := range b.cluster.cl.Members() {
if m.ID != b.cluster.nodeID {
tr.AddPeer(m.ID, m.PeerURLs)
}
}
srv.r.transport = tr
return srv, nil
}
创建server的大体流程如下:
func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) {
if cfg.MaxRequestBytes > recommendedMaxRequestBytes {
cfg.Logger.Warn(
"exceeded recommended request limit",
zap.Uint("max-request-bytes", cfg.MaxRequestBytes),
zap.String("max-request-size", humanize.Bytes(uint64(cfg.MaxRequestBytes))),
zap.Int("recommended-request-bytes", recommendedMaxRequestBytes),
zap.String("recommended-request-size", recommendedMaxRequestBytesString),
)
}
// 新建存储目录,快照目录 后端存储
if terr := fileutil.TouchDirAll(cfg.Logger, cfg.DataDir); terr != nil {
return nil, fmt.Errorf("cannot access data directory: %v", terr)
}
if terr := fileutil.TouchDirAll(cfg.Logger, cfg.MemberDir()); terr != nil {
return nil, fmt.Errorf("cannot access member directory: %v", terr)
}
ss := bootstrapSnapshot(cfg)
prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.PeerDialTimeout())
if err != nil {
return nil, err
}
haveWAL := wal.Exist(cfg.WALDir())
st := v2store.New(StoreClusterPrefix, StoreKeysPrefix)
backend, err := bootstrapBackend(cfg, haveWAL, st, ss)
if err != nil {
return nil, err
}
var bwal *bootstrappedWAL
if haveWAL {
if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
}
bwal = bootstrapWALFromSnapshot(cfg, backend.snapshot)
}
cluster, err := bootstrapCluster(cfg, bwal, prt)
if err != nil {
backend.Close()
return nil, err
}
s, err := bootstrapStorage(cfg, st, backend, bwal, cluster)
if err != nil {
backend.Close()
return nil, err
}
if err = cluster.Finalize(cfg, s); err != nil {
backend.Close()
return nil, err
}
raft := bootstrapRaft(cfg, cluster, s.wal)
return &bootstrappedServer{
prt: prt,
ss: ss,
storage: s,
cluster: cluster,
raft: raft,
}, nil
}