(1) 主机上安装好驱动
驱动下载地址:https://www.nvidia.cn/content/DriverDownload-March2009/confirmation.php?url=/XFree86/Linux-x86_64/440.82/NVIDIA-Linux-x86_64-440.82.run&lang=cn&type=TITAN
./NVIDIA-Linux-x86_64-440.82.run
# 如果报错,则尝试下面指令
./NVIDIA-Linux-x86_64-440.82.run -a -N --ui=none --no-kernel-module
报错: ERROR: Unable to find the module utility `modprobe`; please make sure you have the package 'module-init-tools' or 'kmod' installed. If you do have 'module-init-tools' or 'kmod' installed, then please check that `modprobe` is in your PATH.
# apt-get install kmod
dyf-ai@ubuntu-ai:~/桌面$ nvidia-smi
Sat Jun 5 09:49:39 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.80 Driver Version: 460.80 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce RTX 206... Off | 00000000:07:00.0 On | N/A |
| 0% 42C P8 6W / 175W | 327MiB / 7979MiB | 2% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
docker安装
上面都可自行百度
(1)在docker上也和前面主机那样,安装一次显卡驱动;
(2)安装cuda
./cuda_10.2.89_440.33.01_linux.run
(3) 添加环境变量
# bashrc
# 在bashrc添加环境变量
export LC_ALL=C
export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/extras/CUPTI/lib64
export PATH=/usr/local/cuda-10.2/bin:/usr/local/ffmpeg/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
(4) 将下载按的cudnn解压,将文件夹解压为相同的名字
cp cudnn-10.2-linux-x64-v7.6.5.32/include/cudnn.h /usr/local/cuda/include/
cp cudnn-10.2-linux-x64-v7.6.5.32/lib64/lib* /usr/local/cuda/lib64/
cd /usr/local/cuda/lib64/
rm -rf libcudnn.so libcudnn.so.7
ln -snf libcudnn.so.7.6.5 libcudnn.so.7
ln -snf libcudnn.so.7 libcudnn.so
cd /usr/local/cuda-10.2/targets/x86_64-linux/lib/
ln -snf libcudart.so.10.2.89 libcudart.so.10.1
cd /usr/local/cuda-10.2/extras/CUPTI/lib64
ln -s libcupti.so.10.2.75 libcupti.so.10.1
echo "export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/extras/CUPTI/lib64" >> /root/.bashrc && source /root/.bashrc
# 同理,安装CUDA11和cudnn8.1.1
cd ${tools_dir}/CUDA+cuDNN/cuDNN/
cp cudnn-11.2-linux-x64-v8.1.1.33/include/cudnn.h /usr/local/cuda/include/
cp cudnn-11.2-linux-x64-v8.1.1.33/lib64/lib* /usr/local/cuda/lib64/
cd /usr/local/cuda/lib64/
rm -rf libcudnn.so libcudnn.so.8
ln -snf libcudnn.so.8.1.1 libcudnn.so.8
ln -snf libcudnn.so.8 libcudnn.so
cd /usr/local/cuda-10.2/targets/x86_64-linux/lib/
ln -snf libcudart.so.11.3.109 libcudart.so.11.0
cd /usr/local/cuda-10.2/extras/CUPTI/lib64
ln -s libcupti.so.2021.1.1 libcupti.so.11.0
echo "export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64:/usr/local/cuda-11.3/extras/CUPTI/lib64" >> /root/.bashrc && source /root/.bashrc
$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
$ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
$ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
$ sudo apt-get update
$ sudo apt-get install -y nvidia-docker2
$ sudo systemctl restart docker
# 三种进入docker方式,第一种暂时有bug,cuda用不了,后面两种可以
# docker run -it --privileged -e NVIDIA_VISIBLES=all -v /media:/media ubuntu:latest /bin/bash
docker run -it --gpus all -v /media:/media ubuntu:latest /bin/bash
docker run -it --privileged=true -v /media:/media ubuntu-ai:1.0 /bin/bash
root@445bf38d82a8:/# python3
Python 3.8.5 (default, May 27 2021, 13:30:53)
[GCC 9.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.cuda.is_available()
True
>>>