Kademlia的相关原理见https://blog.csdn.net/doleria/article/details/78685531
1.Kademlia相关数据结构
整体结构是
node(节点)=》bucket(k桶中的行)=》Table(k桶及其他一些实现)
- node
// p2p/discover/node.go
// Node represents a host on the network.
// The fields of Node may not be modified.
type Node struct {
IP net.IP // len 4 for IPv4 or 16 for IPv6
UDP, TCP uint16 // port numbers
ID NodeID // the node's public key
// This is a cached copy of sha3(ID) which is used for node
// distance calculations. This is part of Node in order to make it
// possible to write tests that need a node at a certain distance.
// In those tests, the content of sha will not actually correspond
// with ID.
sha common.Hash
// Time when the node was added to the table.
addedAt time.Time
}
- node中包含了节点的ip, udp,tcp端口
- 还有节点id(NodeID 512bit) ,但Kademlia中的距离计算不是根据NodeID,而是sha3(NodeID),及节点的hash值,32个字节
- addedAt 是指节点被加入到K桶时的时间戳
- bucket, k桶的一行
// bucket contains nodes, ordered by their last activity. the entry
// that was most recently active is the first element in entries.
type bucket struct {
entries []*Node // live entries, sorted by time of last contact
replacements []*Node // recently seen nodes to be used if revalidation fails
ips netutil.DistinctNetSet
}
- entries是一个存活的node数组
- ips是一个存储距离的集合,后面再进行具体分析
- Table k桶的实现
type Table struct {
mutex sync.Mutex // protects buckets, bucket content, nursery, rand
buckets [nBuckets]*bucket // index of known nodes by distance
nursery []*Node // bootstrap nodes
rand *mrand.Rand // source of randomness, periodically reseeded
ips netutil.DistinctNetSet
db *nodeDB // database of known nodes
refreshReq chan chan struct{}
initDone chan struct{}
closeReq chan struct{}
closed chan struct{}
bondmu sync.Mutex
bonding map[NodeID]*bondproc
bondslots chan struct{} // limits total number of active bonding processes
nodeAddedHook func(*Node) // for testing
net transport
self *Node // metadata of the local node
}
- k桶的数据存于buckets中,
hashBits = len(common.Hash{}) * 8
nBuckets = hashBits / 15 // Number of buckets
可以算出k桶一共有 32 * 8 / 15 = 17个bucket
- self 是自身节点
- nursery 是bootnodes节点,由p2p.Server.Start()方法传入
- bondmu,bonding,bondslots,这三个成员的使用的方法调用路径是
func (tab *Table) Lookup(targetID NodeID)
=>
func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool) []*Node
=>
func (tab *Table) bondall(nodes []*Node) (result []*Node)
=>
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error)
=>
func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16)
至于其作用,稍后详述
2.node的距离计算
看p2p/discover/node.go中node节点间距离计算的相关代码
// distcmp 比较 a<->target and b<->target.的距离
// 如果a更接近target,返回-1
// 如果b更接近target,返回1
// 如果相等返回0
func distcmp(target, a, b common.Hash) int {
for i := range target {
da := a[i] ^ target[i]
db := b[i] ^ target[i]
if da > db {
return 1
} else if da < db {
return -1
}
}
return 0
}
// 这个表描述一个字节(8bits)表示的值(0-255),各有多少个前置的0
//如0的时候有8个,64的时候有1个0
var lzcount = [256]int{
8, 7, 6, 6, 5, 5, 5, 5,
4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
}
// 这个方法计算两个节点之间的逻辑距离
// 计算方法是求两个node id hash之间的异或值 除了前置的0,还有多少位
// int(log2(a ^ b))
func logdist(a, b common.Hash) int {
lz := 0
for i := range a {
x := a[i] ^ b[i]
if x == 0 {
lz += 8
} else {
lz += lzcount[x]
break
}
}
return len(a)*8 - lz
}
// 已知亮点之间的距离,跟其中一点的NodeID的hash值,求另外一点NodeID的hash值
func hashAtDistance(a common.Hash, n int) (b common.Hash) {
if n == 0 {
return a
}
// flip bit at position n, fill the rest with random bits
b = a
pos := len(a) - n/8 - 1
bit := byte(0x01) << (byte(n%8) - 1)
if bit == 0 {
pos++
bit = 0x80
}
b[pos] = a[pos]&^bit | ^a[pos]&bit // TODO: randomize end bits
for i := pos + 1; i < len(a); i++ {
b[i] = byte(rand.Intn(255))
}
return b
}
记住这句话:两点之间的距离的定义是:求两个node id hash之间的异或值,除了前置的0,还有多少位
3.bonding,bondslots的分析
bonding的主要作用是使用table的net完成ping pong过程,实现两个节点间的握手
type Table struct {
mutex sync.Mutex // protects buckets, bucket content, nursery, rand
buckets [nBuckets]*bucket // index of known nodes by distance
nursery []*Node // bootstrap nodes
rand *mrand.Rand // source of randomness, periodically reseeded
ips netutil.DistinctNetSet
db *nodeDB // database of known nodes
refreshReq chan chan struct{}
initDone chan struct{}
closeReq chan struct{}
closed chan struct{}
bondmu sync.Mutex
bonding map[NodeID]*bondproc
bondslots chan struct{} // limits total number of active bonding processes
nodeAddedHook func(*Node) // for testing
net transport
self *Node // metadata of the local node
}
type bondproc struct {
err error
n *Node
done chan struct{}
}
现在来看相关方法
func (tab *Table) bondall(nodes []*Node) (result []*Node) {
rc := make(chan *Node, len(nodes))
for i := range nodes {
go func(n *Node) {
nn, _ := tab.bond(false, n.ID, n.addr(), n.TCP)
rc <- nn
}(nodes[i])
}
for range nodes {
if n := <-rc; n != nil {
result = append(result, n)
}
}
return result
}
这个方法主要是对每个节点循环调用bond,看bond方法
//该方法的主要目的就是保证本地节点跟远程节点完成握手
//握手的过程就是双方都完成ping pong的信令交换
//findnode等消息只能在握手完成后
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
if id == tab.self.ID {
return nil, errors.New("is self")
}
if pinged && !tab.isInitDone() {
return nil, errors.New("still initializing")
}
// Start bonding if we haven't seen this node for a while or if it failed findnode too often.
node, fails := tab.db.node(id), tab.db.findFails(id)
age := time.Since(tab.db.bondTime(id))
var result error
if fails > 0 || age > nodeDBNodeExpiration {
log.Trace("Starting bonding ping/pong", "id", id, "known", node != nil, "failcount", fails, "age", age)
tab.bondmu.Lock()
w := tab.bonding[id]
if w != nil {
// Wait for an existing bonding process to complete.
tab.bondmu.Unlock()
<-w.done
} else {
// Register a new bonding process.
w = &bondproc{done: make(chan struct{})}
tab.bonding[id] = w
tab.bondmu.Unlock()
// Do the ping/pong. The result goes into w.
tab.pingpong(w, pinged, id, addr, tcpPort)
// Unregister the process after it's done.
tab.bondmu.Lock()
delete(tab.bonding, id)
tab.bondmu.Unlock()
}
// Retrieve the bonding results
result = w.err
if result == nil {
node = w.n
}
}
// Add the node to the table even if the bonding ping/pong
// fails. It will be relaced quickly if it continues to be
// unresponsive.
if node != nil {
tab.add(node)
tab.db.updateFindFails(id, 0)
}
return node, result
}
4.pingpong中的底层网络
前面说到 bonding的主要作用是使用table的net完成ping pong过程,实现两个节点间的握手。
而net是以下过程中创建
//p2p/discover/udp.go
func newUDP(c conn, cfg Config) (*Table, *udp, error) {
udp := &udp{
conn: c,
priv: cfg.PrivateKey,
netrestrict: cfg.NetRestrict,
closing: make(chan struct{}),
gotreply: make(chan reply),
addpending: make(chan *pending),
}
realaddr := c.LocalAddr().(*net.UDPAddr)
if cfg.AnnounceAddr != nil {
realaddr = cfg.AnnounceAddr
}
// TODO: separate TCP port
udp.ourEndpoint = makeEndpoint(realaddr, uint16(realaddr.Port))
tab, err := newTable(udp, PubkeyID(&cfg.PrivateKey.PublicKey), realaddr, cfg.NodeDBPath, cfg.Bootnodes)
if err != nil {
return nil, nil, err
}
udp.Table = tab
go udp.loop()
go udp.readLoop(cfg.Unhandled)
return udp.Table, udp, nil
}
- 从p2p.server.Start()方法中得知 c conn参数是一个监听udp端口的conn
- udp struct中包含udp的conn
- udp struct实现了//p2p/discover/table.go中定义的
// transport is implemented by the UDP transport.
// it is an interface so we can test without opening lots of UDP
// sockets and without generating a private key.
type transport interface {
ping(NodeID, *net.UDPAddr) error
waitping(NodeID) error
findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error)
close()
}
接口
所以当在bond方法中调用ping等方法时,执行的是udp接口的方法(p2p/discover/table.go)
以下对这些方法做出分析
5.p2p的网络维护协议
数据协议的相关结构体如下:
//p2p/discover/udp.go
ping struct {
Version uint
From, To rpcEndpoint
Expiration uint64
// Ignore additional fields (for forward compatibility).
Rest []rlp.RawValue `rlp:"tail"`
}
// pong is the reply to ping.
pong struct {
// This field should mirror the UDP envelope address
// of the ping packet, which provides a way to discover the
// the external address (after NAT).
To rpcEndpoint
ReplyTok []byte // This contains the hash of the ping packet.
Expiration uint64 // Absolute timestamp at which the packet becomes invalid.
// Ignore additional fields (for forward compatibility).
Rest []rlp.RawValue `rlp:"tail"`
}
// findnode is a query for nodes close to the given target.
findnode struct {
Target NodeID // doesn't need to be an actual public key
Expiration uint64
// Ignore additional fields (for forward compatibility).
Rest []rlp.RawValue `rlp:"tail"`
}
// reply to findnode
neighbors struct {
Nodes []rpcNode
Expiration uint64
// Ignore additional fields (for forward compatibility).
Rest []rlp.RawValue `rlp:"tail"`
}