# 查看系统版本号
cat /etc/issue # Ubuntu 18.04
# 查看系统ip地址
ifconfig
# 查看系统当前状态
htop // 强烈建议安装使用htop
# 安装Nvidia驱动后查看显卡信息(驱动版本、最高支持的cuda版本)
nvidia-smi // 显卡驱动安装见https://www.nvidia.cn/geforce/drivers/ # 驱动495.44
sudo vim /etc/apt/sources.list
,然后更新sudo apt-get update
。具体源参见https://mirrors.tuna.tsinghua.edu.cn/help/ubuntu/~/.condarc
文件,具体源见https://mirrors.tuna.tsinghua.edu.cn/help/anaconda/conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
如果大家会科学上网挂代理,就不需要换源处理了,之前偷懒踩过的坑。
# 如果已有docker,安装前卸载旧版本
sudo apt-get purge docker-ce docker-ce-cli containerd.io
sudo rm -rf /var/lib/docker
sudo rm -rf /var/lib/containerd
# 以shell脚本方式安装
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
# 获取系统对应的版本以及GPG密钥
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
# 安装nvidia-docker工具包
sudo apt-get update
sudo apt-get install -y nvidia-docker2
sudo systemctl restart docker
# 检查安装是否成功
sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
# 稍微解释下这条命令 --rm 表示如果container ID存在,则自动删除该容器,--gpus all表示给容器所有GPU的支持,nvidia/cuda:11.0-base是容器ID,nvidia-smi是启动容器后执行的命令
docker run -it nvidia/cuda:11.0-base bash
:在该镜像的容器中开一个bash,输入nvidia-sminvidia-docker run -it nvidia/cuda:11.0-base bash
:操作同上关于Docker更为详细的介绍可以参见Docker Tutorial For Beginners和知乎Docker简单的命令大全
# 拉取镜像
docker pull nvidia/cuda:11.0-base
# 查看镜像
docker images
# 删除镜像
docker rmi <image_id>
# 容器相关操作
docker container <command>
# 以交互方式创建容器
docker run -it nvidia/cuda:11.0-base bash
# 查看所有状态(运行、停止、退出)的容器
docker ps -a
# 删除所有停止状态的container
#docker container prune也能起到相同作用
docker rm $(docker ps -a -q -f status=exited)
先来一个例子,看看如何搭建起18年开源的一个强化学习代码库环境源:https://github.com/ShangtongZhang/DeepRL
# 没权限就sudo
git clone https://github.com.cnpmjs.org/ShangtongZhang/DeepRL.git
pip install .
来使用仓库中的setup.py就可以了,但因为作者限制python3.6版本,而dm_control更新到只维护3.7及其上版本导致了不兼容,因此要稍微魔改一下DockerFile如下:FROM nvidia/cuda:10.0-base
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y --allow-unauthenticated --no-install-recommends \
build-essential apt-utils cmake git curl vim ca-certificates \
libjpeg-dev libpng-dev \
libgtk3.0 libsm6 cmake ffmpeg pkg-config \
qtbase5-dev libqt5opengl5-dev libassimp-dev \
libboost-python-dev libtinyxml-dev bash \
wget unzip libosmesa6-dev software-properties-common \
libopenmpi-dev libglew-dev openssh-server \
libosmesa6-dev libgl1-mesa-glx libgl1-mesa-dev patchelf libglfw3
RUN rm -rf /var/lib/apt/lists/*
ARG UID
RUN useradd -u $UID --create-home user
USER user
WORKDIR /home/user
RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -b -p miniconda3 && \
rm Miniconda3-latest-Linux-x86_64.sh
ENV PATH /home/user/miniconda3/bin:$PATH
RUN mkdir -p .mujoco \
&& wget https://www.roboti.us/download/mjpro150_linux.zip -O mujoco.zip \
&& unzip mujoco.zip -d .mujoco \
&& rm mujoco.zip
RUN wget https://www.roboti.us/download/mujoco200_linux.zip -O mujoco.zip \
&& unzip mujoco.zip -d .mujoco \
&& rm mujoco.zip
COPY ./mjkey.txt .mujoco/mjkey.txt
ENV LD_LIBRARY_PATH /home/user/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH /home/user/.mujoco/mjpro200_linux/bin:${LD_LIBRARY_PATH}
# 改动一:python换成3.7
RUN conda install -y python=3.7
RUN conda install mpi4py
COPY requirements.txt requirements.txt
# 改动二:给pip和conda换了清华源
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
RUN pip install -r requirements.txt
RUN pip install glfw Cython imageio lockfile
RUN pip install mujoco-py==1.50.1.68
# 改动三:现在直接pip install dm_control就好了
RUN pip install dm_control
RUN pip install git+https://github.com.cnpmjs.org/ShangtongZhang/dm_control2gym.git@scalar_fix
RUN pip install git+git://github.com/openai/baselines.git@8e56dd#egg=baselines
# 改动四:因为不能用setup.py,因此我们直接用源码跑呗
RUN git clone https://github.com.cnpmjs.org/ShangtongZhang/DeepRL.git deep_rl
WORKDIR /home/user/deep_rl
docker build --build-arg UID=11 -t deep_rl:v1.5 .
制作镜像nvidia-docker run -it deep_rl:v1.5 bash
创建一个容器,并进入python examples.py
,修正一下由于我们使用了py3.7导致的一些代码错误:python exapmles.py
就可以在容器中跑起来了关于Docker容器的启动方式、数据保存、网络设置、删除退出等相关知识,请自行进一步查阅
遵循的一个原则:越频繁改动的放到最后添加,比如包可能会经常更新、开源库又有一个新功能的commit、代码经常需要修改等
# 网络选择与host一样,可自行设置,-d表示后台运行
sudo nvidia-docker run --network host --name myrl10 -itd nvidia/cuda:10.0-base bash
docker exec -it myrl10 bash
:进入容器中,查看相关信息,然后开始从零配置环境
cat /etc/issus
:Ubuntu 18.04.1 LTSnvidia-smi
:看显卡驱动版本、最高支持的cuda版本/usr/local/cuda
:cuda确实是10.0,安心!# 给apt换源
修改/etc/apt/sources.list如下:
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-security main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-security main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-proposed main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-proposed main restricted universe multiverse
################
apt update && DEBIAN_FRONTEND=noninteractive apt install -y --allow-unauthenticated --no-install-recommends \
build-essential apt-utils cmake git curl vim ca-certificates sudo tmux\
libjpeg-dev libpng-dev \
libgtk3.0 libsm6 cmake ffmpeg pkg-config \
qtbase5-dev libqt5opengl5-dev libassimp-dev \
libboost-python-dev libtinyxml-dev bash \
wget unzip libosmesa6-dev software-properties-common \
libopenmpi-dev libglew-dev openssh-server \
libosmesa6-dev libgl1-mesa-glx libgl1-mesa-dev patchelf libglfw3 \
&& rm -rf /var/lib/apt/lists/*
useradd --create-home --shell /bin/bash pamirl
adduser pamirl sudo
echo 'pamirl:qwerty' | chpasswd
cd /home/pamirl
# 采用了从清华源下载
wget -q https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86.sh
bash Miniconda3-latest-Linux-x86_64.sh -b -p miniconda3
echo "export PATH=/home/pamirl/miniconda3/bin:$PATH" >> .bashrc && source.bashrc
wget https://github.com.cnpmjs.org/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz
mkdir -p .mujoco
tar -zxf mujoco210-linux-x86_64.tar.gz -C "$HOME/.mujoco"
echo "export LD_LIBRARY_PATH=/home/pamirl/.mujoco/mujoco210/bin:${LD_LIBRARY_PATH}" >> .bashrc && source .bashrc
conda config --describe
# 给conda换源
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
conda create -y -n deep_rl python=3.8
conda init bash
source .bashrc && conda activate deep_rl
#pip换源,也选择修改文件~/.config/pip/pip.conf为具体源
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
# 查看有多少个pip
find ~ -iname pip
# 查看自己具体用的哪个pip,一般是deep_rl环境内python包对应的pip
pip -V
pip install -U 'mujoco-py<2.2,>=2.1'
conda install pytorch=1.10.1 torchvision torchaudio cudatoolkit=10.2
pip install -q dm_control gym
pip install -U git+https://github.com.cnpmjs.org/martinseilair/dm_control2gym.git
docker commit -m "dm_control built from cuda10.0" -a "Nemo" myrl10 dm_control:py38_mujoco210
这里给出一个配置最新基于mujoco物理引擎210的dm_control强化环境。cuda版本10.2 python版本3.8 pytorch版本1.10.1 cudnn版本7.6.5
FROM nvidia/cuda:10.0-base
COPY ./change_apt.txt /
RUN cp /etc/apt/sources.list /etc/apt/sources_init.list \
&& cat /change_apt.txt > /etc/apt/sources.list \
&& rm /change_apt.txt \
&& apt update
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y --allow-unauthenticated --no-install-recommends \
build-essential apt-utils cmake git curl vim ca-certificates sudo tmux\
libjpeg-dev libpng-dev \
libgtk3.0 libsm6 cmake ffmpeg pkg-config \
qtbase5-dev libqt5opengl5-dev libassimp-dev \
libboost-python-dev libtinyxml-dev bash \
wget unzip libosmesa6-dev software-properties-common \
libopenmpi-dev libglew-dev openssh-server \
libosmesa6-dev libgl1-mesa-glx libgl1-mesa-dev patchelf libglfw3 \
&& rm -rf /var/lib/apt/lists/*
RUN useradd --create-home --shell /bin/bash pamirl \
&& adduser pamirl sudo \
&& echo 'pamirl:qwerty' | chpasswd \
&& su pamirl
USER pamirl
WORKDIR /home/pamirl
RUN wget -q https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -b -p miniconda3 && \
rm Miniconda3-latest-Linux-x86_64.sh
ENV PATH /home/pamirl/miniconda3/bin:$PATH
RUN wget https://github.com.cnpmjs.org/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz \
&& mkdir -p .mujoco \
&& whoami \
&& pwd \
&& tar -zxf mujoco210-linux-x86_64.tar.gz -C "$HOME/.mujoco" \
&& rm mujoco210-linux-x86_64.tar.gz
ENV LD_LIBRARY_PATH /home/pamirl/.mujoco/mujoco210/bin:${LD_LIBRARY_PATH}
COPY ./change_conda.txt ./
RUN conda config --describe \
&& cat ./change_conda.txt > .condarc \
&& rm change_conda.txt \
&& conda create -y -n dm_control python=3.8
RUN echo "conda activate dm_control" >> ~/.bashrc
SHELL ["/bin/bash", "--login", "-c"]
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple \
&& source activate dm_control \
&& cat .condarc \
&& pip install -q dm_control gym \
&& pip install -U 'mujoco-py<2.2,>=2.1' \
&& conda install -y pytorch=1.10.1 torchvision torchaudio cudatoolkit=10.2 \
&& pip install glfw Cython imageio lockfile \
&& pip install git+https://github.com.cnpmjs.org/martinseilair/dm_control2gym.git
# Add Your Code Below
在如下文件中,运行:docker build -t dm_control:py38_torch1.10_mujoco210 .
其中change_apt.txt为:
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-security main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-security main restricted universe multiverse
deb https://mirrors.ustc.edu.cn/ubuntu/ bionic-proposed main restricted universe multiverse
deb-src https://mirrors.ustc.edu.cn/ubuntu/ bionic-proposed main restricted universe multiverse
change_conda.txt为:
channels:
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
- defaults
show_channel_urls: true
custom_channels:
conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
这仅仅是强化环境!!!如果想使用强化的算法库,那么版本兼容的任务则更为艰巨了。
下一步计划介绍通用的强化算法库,然后把它们接入到这些配好的环境镜像中运行
https://github.com/openai/gym ↩︎
https://github.com/deepmind/dm_control ↩︎
https://github.com/bulletphysics/bullet3 ↩︎