必要环境
- docker19.03+
- nvidia-container-toolkit
安装最新版本docker
卸载原来的docker
sudo yum remove docker \
docker-client \
docker-client-latest \
docker-common \
docker-latest \
docker-latest-logrotate \
docker-logrotate \
docker-engine
安装依赖
sudo yum update -y && sudo yum install -y yum-utils device-mapper-persistent-data lvm2
添加官方yum库
sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
安装docker
sudo yum install -y docker-ce docker-ce-cli containerd.io
查看docker版本
docker --version
开机启动
systemctl enable --now docker
配置国内源加速
vi /etc/docker/daemon.json
{
"registry-mirrors": [
"https://registry.docker-cn.com",
"http://hub-mirror.c.163.com"
]
}
systemctl daemon-reload
systemctl restart docker
Nvidia驱动安装升级
安装或更新驱动
# 安装/更新驱动
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
yum install -y kmod-nvidia
# 重启 需重启才可生效
reboot
检测是否生效
nvidia-smi
安装nvidia-docker
执行安装命令
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo
# 安装
yum install -y nvidia-container-toolkit
# 重启docker
systemctl restart docker
测试是否安装成功
# 启动所有GPU支持的GPU docker
docker run --gpus all nvidia/cuda:11.0-base nvidia-smi
# 指定GPU个数,并启动GPU docker.
docker run --gpus 1 nvidia/cuda:11.0-base nvidia-smi
# 指定GPU id,并启动GPU docker.
docker run --gpus '"device=1"' nvidia/cuda:11.0-base nvidia-smi