Node will start make the P2P Server and start it:
github.com/ethereum/go-ethereum/p2p.(*Server).Start at server.go:438
github.com/ethereum/go-ethereum/node.(*Node).Start at node.go:220
github.com/ethereum/go-ethereum/cmd/utils.StartNode at cmd.go:67
main.startNode at main.go:331
main.geth at main.go:308
gopkg.in/urfave/cli%2ev1.HandleAction at app.go:490
gopkg.in/urfave/cli%2ev1.(*App).Run at app.go:264
main.main at main.go:248
runtime.main at proc.go:203
runtime.goexit at asm_amd64.s:1357
- Async stack trace
runtime.rt0_go at asm_amd64.s:220
// Start starts running the server.
// Servers can not be re-used after stopping.
func (srv *Server) Start() (err error) {
srv.lock.Lock()
defer srv.lock.Unlock()
if srv.running {
return errors.New("server already running")
}
srv.running = true
srv.log = srv.Config.Logger
if srv.log == nil {
srv.log = log.Root()
}
if srv.clock == nil {
srv.clock = mclock.System{}
}
if srv.NoDial && srv.ListenAddr == "" {
srv.log.Warn("P2P server will be useless, neither dialing nor listening")
}
// static fields
if srv.PrivateKey == nil {
return errors.New("Server.PrivateKey must be set to a non-nil key")
}
if srv.newTransport == nil {
srv.newTransport = newRLPX
}
if srv.listenFunc == nil {
srv.listenFunc = net.Listen
}
srv.quit = make(chan struct{})
srv.delpeer = make(chan peerDrop)
srv.checkpointPostHandshake = make(chan *conn)
srv.checkpointAddPeer = make(chan *conn)
srv.addtrusted = make(chan *enode.Node)
srv.removetrusted = make(chan *enode.Node)
srv.peerOp = make(chan peerOpFunc)
srv.peerOpDone = make(chan struct{})
if err := srv.setupLocalNode(); err != nil {
return err
}
if srv.ListenAddr != "" {
if err := srv.setupListening(); err != nil {
return err
}
}
if err := srv.setupDiscovery(); err != nil {
return err
}
srv.setupDialScheduler()
srv.loopWG.Add(1)
go srv.run()
return nil
}
Some go chan will be created:
Main loop handles messages received from go chan, to manage running peers.
--> Add/Remove TrustedPeer
--> A
// run is the main loop of the server.
func (srv *Server) run() {
...
for {
select {
case <-srv.quit:
// The server was stopped. Run the cleanup logic.
break running
case n := <-srv.addtrusted:
// This channel is used by AddTrustedPeer to add a node
// to the trusted node set.
srv.log.Trace("Adding trusted node", "node", n)
trusted[n.ID()] = true
if p, ok := peers[n.ID()]; ok {
p.rw.set(trustedConn, true)
}
case n := <-srv.removetrusted:
// This channel is used by RemoveTrustedPeer to remove a node
// from the trusted node set.
srv.log.Trace("Removing trusted node", "node", n)
delete(trusted, n.ID())
if p, ok := peers[n.ID()]; ok {
p.rw.set(trustedConn, false)
}
case op := <-srv.peerOp:
// This channel is used by Peers and PeerCount.
op(peers)
srv.peerOpDone <- struct{}{}
case c := <-srv.checkpointPostHandshake:
// A connection has passed the encryption handshake so
// the remote identity is known (but hasn't been verified yet).
if trusted[c.node.ID()] {
// Ensure that the trusted flag is set before checking against MaxPeers.
c.flags |= trustedConn
}
// TODO: track in-progress inbound node IDs (pre-Peer) to avoid dialing them.
c.cont <- srv.postHandshakeChecks(peers, inboundCount, c)
case c := <-srv.checkpointAddPeer:
// At this point the connection is past the protocol handshake.
// Its capabilities are known and the remote identity is verified.
err := srv.addPeerChecks(peers, inboundCount, c)
if err == nil {
// The handshakes are done and it passed all checks.
p := srv.launchPeer(c)
peers[c.node.ID()] = p
srv.log.Debug("Adding p2p peer", "peercount", len(peers), "id", p.ID(), "conn", c.flags, "addr", p.RemoteAddr(), "name", truncateName(c.name))
srv.dialsched.peerAdded(c)
if p.Inbound() {
inboundCount++
}
}
c.cont <- err
case pd := <-srv.delpeer:
// A peer disconnected.
d := common.PrettyDuration(mclock.Now() - pd.created)
delete(peers, pd.ID())
srv.log.Debug("Removing p2p peer", "peercount", len(peers), "id", pd.ID(), "duration", d, "req", pd.requested, "err", pd.err)
srv.dialsched.peerRemoved(pd.rw)
if pd.Inbound() {
inboundCount--
}
}
}
...
}
In Main loop, launchPeer() will be called when a new connection arrived and all checks passed.
The peer will be created from the connection and added to peer set as well as dial scheduler.
case c := <-srv.checkpointAddPeer:
// At this point the connection is past the protocol handshake.
// Its capabilities are known and the remote identity is verified.
err := srv.addPeerChecks(peers, inboundCount, c)
if err == nil {
// The handshakes are done and it passed all checks.
p := srv.launchPeer(c)
peers[c.node.ID()] = p
srv.log.Debug("Adding p2p peer", "peercount", len(peers), "id", p.ID(), "conn", c.flags, "addr", p.RemoteAddr(), "name", truncateName(c.name))
srv.dialsched.peerAdded(c)
if p.Inbound() {
inboundCount++
}
}
c.cont <- err
////////////////////////////////////////////////////////////////////////
func (srv *Server) launchPeer(c *conn) *Peer {
p := newPeer(srv.log, c, srv.Protocols)
if srv.EnableMsgEvents {
// If message events are enabled, pass the peerFeed
// to the peer.
p.events = &srv.peerFeed
}
go srv.runPeer(p)
return p
}
// runPeer runs in its own goroutine for each peer.
func (srv *Server) runPeer(p *Peer) {
if srv.newPeerHook != nil {
srv.newPeerHook(p)
}
srv.peerFeed.Send(&PeerEvent{
Type: PeerEventTypeAdd,
Peer: p.ID(),
RemoteAddress: p.RemoteAddr().String(),
LocalAddress: p.LocalAddr().String(),
})
// Run the per-peer main loop.
remoteRequested, err := p.run()
// Announce disconnect on the main loop to update the peer set.
// The main loop waits for existing peers to be sent on srv.delpeer
// before returning, so this send should not select on srv.quit.
srv.delpeer <- peerDrop{p, err, remoteRequested}
// Broadcast peer drop to external subscribers. This needs to be
// after the send to delpeer so subscribers have a consistent view of
// the peer set (i.e. Server.Peers() doesn't include the peer when the
// event is received.
srv.peerFeed.Send(&PeerEvent{
Type: PeerEventTypeDrop,
Peer: p.ID(),
Error: err.Error(),
RemoteAddress: p.RemoteAddr().String(),
LocalAddress: p.LocalAddr().String(),
})
}
func (p *Peer) run() (remoteRequested bool, err error) {
var (
writeStart = make(chan struct{}, 1)
writeErr = make(chan error, 1)
readErr = make(chan error, 1)
reason DiscReason // sent to the peer
)
p.wg.Add(2)
go p.readLoop(readErr)
go p.pingLoop()
// Start all protocol handlers.
writeStart <- struct{}{}
p.startProtocols(writeStart, writeErr)
// Wait for an error or disconnect.
loop:
for {
select {
case err = <-writeErr:
// A write finished. Allow the next write to start if
// there was no error.
if err != nil {
reason = DiscNetworkError
break loop
}
writeStart <- struct{}{}
case err = <-readErr:
if r, ok := err.(DiscReason); ok {
remoteRequested = true
reason = r
} else {
reason = DiscNetworkError
}
break loop
case err = <-p.protoErr:
reason = discReasonForError(err)
break loop
case err = <-p.disc:
reason = discReasonForError(err)
break loop
}
}
close(p.closed)
p.rw.close(reason)
p.wg.Wait()
return remoteRequested, err
}
A few go routines will started to run the peer:
Below is the main prodecure of protocol manager:
Run: func(p *p2p.Peer, rw p2p.MsgReadWriter) error {
peer := pm.newPeer(int(version), p, rw, pm.txpool.Get)
select {
case pm.newPeerCh <- peer:
pm.wg.Add(1)
defer pm.wg.Done()
return pm.handle(peer)
case <-pm.quitSync:
return p2p.DiscQuitting
}
},
An eth peer will be created from p2p.peer, and passed to newPeerCh of ProtocolManager.
pm.handle(peer) will take over the peer and process eth protocol.
Each p2p.peer will set up a ProtoRW which acts a bridge between P2P readLoop and ProtocolManager for sending/receiving eth messages out/from.
It accepts inbound connection, which is handled by a goroutine ‘go SetupConn’. The first thing is to run handshake with the peer endpoint. Then a message will be sent to mainloop checkpointPostHandshake for validity check. After successfuly check, main loop can send back a message to allow further connection establishment. In the end, a message will be passed to checkpointAddPeer in main loop to indicate a new peer.
// SetupConn runs the handshakes and attempts to add the connection
// as a peer. It returns when the connection has been added as a peer
// or the handshakes have failed.
func (srv *Server) setupConn(c *conn, flags connFlag, dialDest *enode.Node) error {
// If dialing, figure out the remote public key.
var dialPubkey *ecdsa.PublicKey
if dialDest != nil {
dialPubkey = new(ecdsa.PublicKey)
if err := dialDest.Load((*enode.Secp256k1)(dialPubkey)); err != nil {
err = errors.New("dial destination doesn't have a secp256k1 public key")
srv.log.Trace("Setting up connection failed", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err)
return err
}
}
// Run the RLPx handshake.
remotePubkey, err := c.doEncHandshake(srv.PrivateKey, dialPubkey)
if err != nil {
srv.log.Trace("Failed RLPx handshake", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err)
return err
}
if dialDest != nil {
// For dialed connections, check that the remote public key matches.
if dialPubkey.X.Cmp(remotePubkey.X) != 0 || dialPubkey.Y.Cmp(remotePubkey.Y) != 0 {
return DiscUnexpectedIdentity
}
c.node = dialDest
} else {
c.node = nodeFromConn(remotePubkey, c.fd)
}
clog := srv.log.New("id", c.node.ID(), "addr", c.fd.RemoteAddr(), "conn", c.flags)
err = srv.checkpoint(c, srv.checkpointPostHandshake)
if err != nil {
clog.Trace("Rejected peer", "err", err)
return err
}
// Run the capability negotiation handshake.
phs, err := c.doProtoHandshake(srv.ourHandshake)
if err != nil {
clog.Trace("Failed p2p handshake", "err", err)
return err
}
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))
return DiscUnexpectedIdentity
}
c.caps, c.name = phs.Caps, phs.Name
err = srv.checkpoint(c, srv.checkpointAddPeer)
if err != nil {
clog.Trace("Rejected peer", "err", err)
return err
}
return nil
}
Note: when setting up connection, RPLX will be created for the tranport of conn.
RPLX implements MsgReader/Writer interfaces for sending/receiving RLP encoded messages.
func newRLPX(fd net.Conn) transport {
fd.SetDeadline(time.Now().Add(handshakeTimeout))
return &rlpx{fd: fd}
}
Ii will setup the FairMix iterator for Dial Scheudler, then start KAD or v5 discovery, or both.
KAD is used to find more nodes by running KAD lookup…
func ListenV4(c UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv4, error) {
closeCtx, cancel := context.WithCancel(context.Background())
t := &UDPv4{
conn: c,
priv: cfg.PrivateKey,
netrestrict: cfg.NetRestrict,
localNode: ln,
db: ln.Database(),
gotreply: make(chan reply),
addReplyMatcher: make(chan *replyMatcher),
closeCtx: closeCtx,
cancelCloseCtx: cancel,
log: cfg.Log,
}
if t.log == nil {
t.log = log.Root()
}
tab, err := newTable(t, ln.Database(), cfg.Bootnodes, t.log)
if err != nil {
return nil, err
}
t.tab = tab
go tab.loop()
t.wg.Add(2)
go t.loop()
go t.readLoop(cfg.Unhandled)
return t, nil
}
Two goroutines will be started:
Loop:
- nodesCh: new node discovered by Discovery
- doneCh: dialingTask done, remove from dialing task set
- addPeerCh: connection established, remove from static task set
- remPeerCh: disconnected, removed
- addStaticCh: add static node, new dialing task
- remStaticCh: remove static node
- historyExp: timer to remove item in d.history
Besides static node, dial scheduler only takes node input from Discovery, FairMix iterator.
case node := <-nodesCh:
if err := d.checkDial(node); err != nil {
d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err)
} else {
d.startDial(newDialTask(node, dynDialedConn))
}
case task := <-d.doneCh:
id := task.dest.ID()
delete(d.dialing, id)
d.updateStaticPool(id)
d.doneSinceLastLog++
case c := <-d.addPeerCh:
if c.is(dynDialedConn) || c.is(staticDialedConn) {
d.dialPeers++
}
id := c.node.ID()
d.peers[id] = c.flags
// Remove from static pool because the node is now connected.
task := d.static[id]
if task != nil && task.staticPoolIndex >= 0 {
d.removeFromStaticPool(task.staticPoolIndex)
}
// TODO: cancel dials to connected peers
case c := <-d.remPeerCh:
if c.is(dynDialedConn) || c.is(staticDialedConn) {
d.dialPeers--
}
delete(d.peers, c.node.ID())
d.updateStaticPool(c.node.ID())
case node := <-d.addStaticCh:
id := node.ID()
_, exists := d.static[id]
d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists)
if exists {
continue loop
}
task := newDialTask(node, staticDialedConn)
d.static[id] = task
if d.checkDial(node) == nil {
d.addToStaticPool(task)
}
case node := <-d.remStaticCh:
id := node.ID()
task := d.static[id]
d.log.Trace("Removing static node", "id", id, "ok", task != nil)
if task != nil {
delete(d.static, id)
if task.staticPoolIndex >= 0 {
d.removeFromStaticPool(task.staticPoolIndex)
}
}
case <-historyExp:
d.expireHistory()
case <-d.ctx.Done():
it.Close()
break loop
rlpx is the transport protocol used by actual (non-test) connections. It wraps the frame encoder with locks and read/write deadlines.
Interfaces implemented by rlpx:
type rlpx struct {
fd net.Conn // underlying transport
rmu, wmu sync.Mutex
rw *rlpxFrameRW
}
To set up RLPX:
RLPXFrameRW is actually handling the message framing and ciphering/hashing.
It uses AES-CTR & SHA3 for chiphering & hashing. Note that AES-CBC is used to encypt HMAC after hashing.
func newRLPXFrameRW(conn io.ReadWriter, s secrets) *rlpxFrameRW {
macc, err := aes.NewCipher(s.MAC)
if err != nil {
panic("invalid MAC secret: " + err.Error())
}
encc, err := aes.NewCipher(s.AES)
if err != nil {
panic("invalid AES secret: " + err.Error())
}
// we use an all-zeroes IV for AES because the key used
// for encryption is ephemeral.
iv := make([]byte, encc.BlockSize())
return &rlpxFrameRW{
conn: conn,
enc: cipher.NewCTR(encc, iv),
dec: cipher.NewCTR(encc, iv),
macCipher: macc,
egressMAC: s.EgressMAC,
ingressMAC: s.IngressMAC,
}
}
Snappy compression. It is said that it has good balance between comppresion ratio and speed.
Msg Format is like:
16B(Len+ZeroMsg) + 16B(headerMAC) + Variant(Msg) + 16B(MsgMAC)
Variant(Msg) = Encrypted(rlp.Encode(Msg.Code) + Msg.Payload)
Msg.Payload = SnappyEncoded(Msg.Payload)
func (rw *rlpxFrameRW) WriteMsg(msg Msg) error {
ptype, _ := rlp.EncodeToBytes(msg.Code)
// if snappy is enabled, compress message now
if rw.snappy {
if msg.Size > maxUint24 {
return errPlainMessageTooLarge
}
payload, _ := ioutil.ReadAll(msg.Payload)
payload = snappy.Encode(nil, payload)
msg.Payload = bytes.NewReader(payload)
msg.Size = uint32(len(payload))
}
msg.meterSize = msg.Size
if metrics.Enabled && msg.meterCap.Name != "" { // don't meter non-subprotocol messages
m := fmt.Sprintf("%s/%s/%d/%#02x", egressMeterName, msg.meterCap.Name, msg.meterCap.Version, msg.meterCode)
metrics.GetOrRegisterMeter(m, nil).Mark(int64(msg.meterSize))
}
// write header
headbuf := make([]byte, 32)
fsize := uint32(len(ptype)) + msg.Size
if fsize > maxUint24 {
return errors.New("message size overflows uint24")
}
putInt24(fsize, headbuf) // TODO: check overflow
copy(headbuf[3:], zeroHeader)
rw.enc.XORKeyStream(headbuf[:16], headbuf[:16]) // first half is now encrypted
// write header MAC
copy(headbuf[16:], updateMAC(rw.egressMAC, rw.macCipher, headbuf[:16]))
if _, err := rw.conn.Write(headbuf); err != nil {
return err
}
// write encrypted frame, updating the egress MAC hash with
// the data written to conn.
tee := cipher.StreamWriter{S: rw.enc, W: io.MultiWriter(rw.conn, rw.egressMAC)}
if _, err := tee.Write(ptype); err != nil {
return err
}
if _, err := io.Copy(tee, msg.Payload); err != nil {
return err
}
if padding := fsize % 16; padding > 0 {
if _, err := tee.Write(zero16[:16-padding]); err != nil {
return err
}
}
// write frame MAC. egress MAC hash is up to date because
// frame content was written to it as well.
fmacseed := rw.egressMAC.Sum(nil)
mac := updateMAC(rw.egressMAC, rw.macCipher, fmacseed)
_, err := rw.conn.Write(mac)
return err
}
func (rw *rlpxFrameRW) ReadMsg() (msg Msg, err error) {
// read the header
headbuf := make([]byte, 32)
if _, err := io.ReadFull(rw.conn, headbuf); err != nil {
return msg, err
}
// verify header mac
shouldMAC := updateMAC(rw.ingressMAC, rw.macCipher, headbuf[:16])
if !hmac.Equal(shouldMAC, headbuf[16:]) {
return msg, errors.New("bad header MAC")
}
rw.dec.XORKeyStream(headbuf[:16], headbuf[:16]) // first half is now decrypted
fsize := readInt24(headbuf)
// ignore protocol type for now
// read the frame content
var rsize = fsize // frame size rounded up to 16 byte boundary
if padding := fsize % 16; padding > 0 {
rsize += 16 - padding
}
framebuf := make([]byte, rsize)
if _, err := io.ReadFull(rw.conn, framebuf); err != nil {
return msg, err
}
// read and validate frame MAC. we can re-use headbuf for that.
rw.ingressMAC.Write(framebuf)
fmacseed := rw.ingressMAC.Sum(nil)
if _, err := io.ReadFull(rw.conn, headbuf[:16]); err != nil {
return msg, err
}
shouldMAC = updateMAC(rw.ingressMAC, rw.macCipher, fmacseed)
if !hmac.Equal(shouldMAC, headbuf[:16]) {
return msg, errors.New("bad frame MAC")
}
// decrypt frame content
rw.dec.XORKeyStream(framebuf, framebuf)
// decode message code
content := bytes.NewReader(framebuf[:fsize])
if err := rlp.Decode(content, &msg.Code); err != nil {
return msg, err
}
msg.Size = uint32(content.Len())
msg.meterSize = msg.Size
msg.Payload = content
// if snappy is enabled, verify and decompress message
if rw.snappy {
payload, err := ioutil.ReadAll(msg.Payload)
if err != nil {
return msg, err
}
size, err := snappy.DecodedLen(payload)
if err != nil {
return msg, err
}
if size > int(maxUint24) {
return msg, errPlainMessageTooLarge
}
payload, err = snappy.Decode(nil, payload)
if err != nil {
return msg, err
}
msg.Size, msg.Payload = uint32(size), bytes.NewReader(payload)
}
return msg, nil
}