sudo lsb_release -a
输出:
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 16.04.6 LTS
Release: 16.04
Codename: xenial
lspci | grep -i nvidia
输出
0:04.0 3D controller: NVIDIA Corporation GK210GL [Tesla K80] (rev a1)
进入root用户
sudo -iu root
https://www.nvidia.com/content/DriverDownload-March2009/confirmation.php?url=/tesla/418.67/NVIDIA-Linux-x86_64-418.67.run&lang=us&type=Tesla#
选择 tesla k80
执行以下命令禁用X-Window服务,否则无法安装显卡驱动:
sudo service lightdm stop
执行以下三条命令卸载原有显卡驱动:
sudo apt-get remove --purge nvidia*
sudo chmod +x NVIDIA-Linux-x86_64-410.93.run
sudo ./NVIDIA-Linux-x86_64-410.93.run --uninstall
安装新驱动
直接执行驱动文件即可安装新驱动,一直默认即可:
sudo ./NVIDIA-Linux-x86_64-410.93.run
执行以下命令启动X-Window服务
sudo service lightdm start
最后执行重启命令,重启系统即可:
reboot
sudo sh cuda_10.1.168_418.67_linux.run
cat /usr/local/cuda/version.txt
输出
CUDA Version 10.1.168
https://developer.nvidia.com/rdp/cudnn-download
选择cuDNN Library for Linux
cudnn-10.1-linux-x64-v7.6.2.24.tgz
解压缩
tar -zxvf cudnn-10.1-linux-x64-v7.6.2.24.tgz
输出得到
cuda/include/cudnn.h
cuda/NVIDIA_SLA_cuDNN_Support.txt
cuda/lib64/libcudnn.so
cuda/lib64/libcudnn.so.7
cuda/lib64/libcudnn.so.7.6.2
cuda/lib64/libcudnn_static.a
cp文件
sudo cp cuda/lib64/* /usr/local/cuda-10.1/lib64/
sudo cp cuda/include/* /usr/local/cuda-10.1/include/
cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2
输出
#define CUDNN_MAJOR 7
#define CUDNN_MINOR 6
#define CUDNN_PATCHLEVEL 2
--
#define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
#include "driver_types.h"
ubuntu 安装anaconda
wget https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh
bash Anaconda3-5.0.1-Linux-x86_64.sh
torch 安装
conda install pytorch torchvision cudatoolkit=9.2 -c pytorch
测试
import torch
x= torch.Tensor([1.0])
xx= x.cuda()
print(xx)
输出
tensor([1.], device='cuda:0')