环境
ubuntu 16.04
Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)
omnisky@omnisky:~$ lspci | grep Mell
01:00.0 InfiniBand: Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)
下载驱动
http://cn.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers
安装驱动
官方文档:http://120.52.51.16/www.mellanox.com/related-docs/prod_software/Mellanox_OFED_Linux_User_Manual_v4_5.pdf
ISO格式
挂载镜像
mount -o ro,loop MLNX_OFED_LINUX-2.1-1.0.0-rhel6.4-x86_64.iso /mnt
在/mnt目录下执行如下命令:
./mlnxofedinstall
tgz格式
tar -zxvf MLNX_OFED_LINUX-4.5-1.0.1.0-ubuntu16.04-x86_64.tgz
# 进入
./mlnxofedinstall --force
#安装完毕,加载驱动
/etc/init.d/openibd restart
配置IB
成功安装IB驱动之后,使用命令ifocnfig
,可以看见ib0
vi /etc/network/interfaces
,配置IB的IP
重启网络
ifconfig
# 显示
ib0 Link encap:UNSPEC HWaddr A0-00-02-20-FE-80-00-00-00-00-00-00-00-00-00-00
inet addr:11.11.11.11 Bcast:11.11.11.255 Mask:255.255.255.0
inet6 addr: fe80::202:c903:52:bdd9/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:2044 Metric:1
RX packets:31 errors:0 dropped:0 overruns:0 frame:0
TX packets:47 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:256
RX bytes:3783 (3.7 KB) TX bytes:5153 (5.1 KB)
启动相关服务
# 主节点启动
/etc/init.d/openibd restart
/etc/init.d/opensmd restart
# 其他节点
/etc/init.d/openibd restart
如果配置成功,则如下所示
root@master:/home/omnisky# ibstat
CA 'mlx4_0'
CA type: MT26428
Number of ports: 1
Firmware version: 2.9.1200
Hardware version: b0
Node GUID: 0x0002c9030052bdd8
System image GUID: 0x0002c9030052bddb
Port 1:
State: Active
Physical state: LinkUp
Rate: 40
Base lid: 1
LMC: 0
SM lid: 1
Capability mask: 0x0251086a
Port GUID: 0x0002c9030052bdd9
Link layer: InfiniBand
测试IB
参考:https://community.mellanox.com/s/article/how-to-create-a-docker-container-with-rdma-accelerated-applications-over-100gb-infiniband-network#jive_content_id_Equipment
# server
ib_write_bw -a -d mlx4_0
# client
ib_write_bw -a -F $server_IP -d mlx4_0 --report_gbits
# mlx4_0 通过ibstat查询
如果正常
[root@mofed-test-pod1 /]# ib_write_bw -a -F 10.244.1.171 -d mlx4_0 --report_gbits
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : mlx4_0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
TX depth : 128
CQ Moderation : 100
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0x01 QPN 0x0218 PSN 0xa65d9c RKey 0x001100 VAddr 0x007f392cbec000
remote address: LID 0x03 QPN 0x0218 PSN 0xdad5f RKey 0x001100 VAddr 0x007ff453bc3000
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[Gb/sec] BW average[Gb/sec] MsgRate[Mpps]
2 5000 0.054750 0.052885 3.305343
4 5000 0.15 0.13 4.134340
8 5000 0.30 0.30 4.673526
16 5000 0.60 0.60 4.654721
32 5000 1.20 1.18 4.594563
64 5000 2.39 2.32 4.526180
128 5000 4.79 4.78 4.669972
256 5000 9.56 9.54 4.658179
512 5000 19.00 18.76 4.580314
1024 5000 23.97 23.93 2.921618
2048 5000 25.29 25.23 1.539684
4096 5000 26.32 26.31 0.803058
8192 5000 26.85 26.84 0.409597
16384 5000 27.09 27.09 0.206665
32768 5000 27.21 27.21 0.103811
65536 5000 27.28 27.28 0.052026
131072 5000 27.37 27.28 0.026016
262144 5000 27.28 27.28 0.013009
524288 5000 27.38 27.30 0.006509
1048576 5000 27.34 27.33 0.003258
2097152 5000 27.34 27.33 0.001629
4194304 5000 27.33 27.32 0.000814
8388608 5000 27.32 27.32 0.000407
---------------------------------------------------------------------------------------
k8s rdma插件安装以及测试
环境
root@master:/home/omnisky# kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master Ready master 48m v1.13.3 192.168.207.122 Ubuntu 16.04.5 LTS 4.15.0-45-generic docker://18.6.1
omnisky Ready 46m v1.13.3 192.168.207.124 Ubuntu 16.04.5 LTS 4.15.0-45-generic docker://18.6.1
安装插件
git clone https://github.com/Mellanox/k8s-rdma-sriov-dev-plugin.git
# 进入该文件夹
# Create config map to describe mode as "hca" mode. This is per node configuration.
kubectl create -f example/hca/rdma-hca-node-config.yaml
# Deploy device plugin
kubectl create -f example/device-plugin.yaml
启动容器测试
# Create test pod which requests 1 vhca resource.
kubectl create -f example/hca/test-hca-pod.yaml
复制文件test-hca-pod.yaml
修改文件name,同时指定nodeName,确保pod运行在不同节点
apiVersion: v1
kind: Pod
metadata:
name: mofed-test-pod1
spec:
restartPolicy: OnFailure
nodeName: master
containers:
- image: mellanox/centos_7_4_mofed_4_2_1_2_0_0_60
name: mofed-test-ctr
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
resources:
limits:
rdma/hca: 1
command:
- sh
- -c
- |
ls -l /dev/infiniband /sys/class/net
sleep 1000000
kubectl create -f example/hca/test_custom.yaml
# 两个不同节点的pods
root@master:/home/omnisky/ty/k8s-rdma-sriov-dev-plugin/example/hca# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
mofed-test-pod 1/1 Running 0 49m 10.244.1.171 omnisky
mofed-test-pod1 1/1 Running 0 49m 10.244.0.4 master
分别进入两个容器
kubectl exec -it pod_name bash
ibstat
的结果正常
一个做服务端,一个做客户端,测试
# server
ib_write_bw -a -d mlx4_0
# client
ib_write_bw -a -F $server_IP -d mlx4_0 --report_gbits
# mlx4_0 通过ibstat查询