宿主机 安装 driver
# 禁用宿主机 自带 显卡 驱动
lsmod | grep nouveau
nouveau 1949696 0
mxm_wmi 16384 1 nouveau
wmi 32768 2 mxm_wmi,nouveau
video 49152 1 nouveau
i2c_algo_bit 16384 1 nouveau
ttm 106496 2 qxl,nouveau
drm_kms_helper 184320 4 qxl,nouveau
drm 491520 5 drm_kms_helper,qxl,ttm,nouveau
# 首先金庸 nouvean
vi /etc/modprobe.d/blacklist.conf
# 在最后一行添加:blacklist nouveau
modprobe_path='/etc/modprobe.d/blacklist.conf'
sed -i "s/blacklist nouveau//g" ${modprobe_path}
echo -e '\nblacklist nouveau' >> ${modprobe_path}
sudo update-initramfs -u
# 关闭图形界面
systemctl set-default multi-user.target
reboot
lsmod | grep nouveau
# 无输出 代表成功
echo "nameserver 114.114.114.114" > /etc/resolv.conf
sudo sed -i "s@http.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list
sudo sed -i "s@http.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list
apt update -y
sudo apt-get remove nvidia* -y
apt autoremove -y
apt install nvidia-driver-460-server -y
apt install nvidia-cuda-toolkit -y
# 关闭图形界面
systemctl set-default multi-user.target
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update -y
sudo apt-get install -y nvidia-docker2
sudo systemctl restart docker
sudo systemctl enable docker
测试 nvidia-smi 驱动
# 国内
sudo docker run --rm --gpus all registry.cn-hangzhou.aliyuncs.com/mkmk/all:nvidia-cuda-11-base nvidia-smi
Thu Apr 8 16:52:50 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce GT 730 Off | 00000000:00:03.0 N/A | N/A |
| 30% 30C P0 N/A / N/A | 0MiB / 2002MiB | N/A Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 1 GeForce GT 730 Off | 00000000:00:04.0 N/A | N/A |
| 30% 27C P0 N/A / N/A | 0MiB / 2002MiB | N/A Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
root@free_cicd:~#
# 可以 先跳过 这个 步骤 直接 使用 gpu-burn 测试 gpu
# 如果 提示 不兼容 来给 驱动 降级
echo "nameserver 114.114.114.114" > /etc/resolv.conf
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-ubuntu2004-11-1-local_11.1.1-455.32.00-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu2004-11-1-local_11.1.1-455.32.00-1_amd64.deb
sudo apt-key add /var/cuda-repo-ubuntu2004-11-1-local/7fa2af80.pub
sudo apt-get update
sudo apt-get -y install cuda
# 关闭图形界面
systemctl set-default multi-user.target
reboot
使用 gpu-burn 测试 gpu
docker run -it --gpus=all registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1
docker run -it --gpus=all registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1 "/app/gpu_burn" "10"
rm compare.ptx
COMPUTE=60 make
gub-burn 的 dockerfile
root@free_cicd:~/gpu-burn#
cat Dockerfile
FROM nvidia/cuda:11.1.1-devel AS builder
WORKDIR /build
COPY . /build/
RUN make
FROM nvidia/cuda:11.1.1-runtime
COPY --from=builder /build/gpu_burn /app/
COPY --from=builder /build/compare.cu /app/
WORKDIR /app
CMD ["./gpu_burn", "60"]
docker build . -t gpu-burn:cuda11.1
# 推送到 远程
docker tag gpu-burn:cuda11.1 registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1
docker push registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1
需要 合适的 驱动
echo "nameserver 114.114.114.114" > /etc/resolv.conf
ubuntu-drivers devices
Command 'ubuntu-drivers' not found, but can be installed with:
apt install ubuntu-drivers-common -y
ubuntu-drivers devices
== /sys/devices/pci0000:00/0000:00:03.0 ==
modalias : pci:v000010DEd00001287sv00000000sd00000000bc03sc00i00
vendor : NVIDIA Corporation
model : GK208B [GeForce GT 730]
driver : nvidia-driver-390 - distro non-free
driver : nvidia-driver-460 - distro non-free recommended
sudo apt-get install -y nvidia-driver-460 libnvidia-gl-460 libnvidia-compute-460 libnvidia-extra-460 nvidia-compute-utils-460 libnvidia-decode-460 libnvidia-encode-460 nvidia-utils-460 xserver-xorg-video-nvidia-460 libnvidia-cfg1-460 libnvidia-ifr1-460
systemctl set-default multi-user.target
reboot
nvidia-smi
make clean
make CUDAPATH=/usr/local/cuda
来聊聊啊