docker环境安装请参考[鲲鹏920-arm64][Euler OS 20.03 LTS] Docker安装一文
基础镜像gromacs_euler_os:compile_base
请查看[鲲鹏920-arm64][Euler OS] gromacs源码编译一文2.1节
。
FROM gromacs_euler_os:compile_base
ARG NNAE_VERSION=20.0.RC1
ARG NNAE_ARCH=arm64
ARG HOST_ASCEND_BASE=/usr/local/Ascend
ARG CNT_ASCEND_BASE=/usr/local/Ascend/nnae/$NNAE_VERSION/$NNAE_ARCH-linux_gcc7.3.0
ARG ASCEND_NNAE_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/Ascend-NNAE-20.0.RC1-arm64-linux_gcc7.3.0.run"
ARG ASCEND_TF_PLUG_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/Ascend-TFPlugin-20.0.RC1-arm64-linux_gcc7.3.0.run"
ARG PYTHON_DOWNLOAD_URL="https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz"
# 自行编译好的tensorflow安装包,本文资源部分有提供编译好的1.15.4版本下载
ARG TENSORFLOW_PKG_NAME="tensorflow-1.15.4-cp37-cp37m-linux_aarch64.whl"
ARG TENSORFLOW_PKG_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/${TENSORFLOW_PKG_NAME}"
# 以下两个文件位于安装好ascend 910 driver的host机器上,为了让镜像尽量少分层,制作dockerfile的时候从源位置拷出来了
# ascend_install.info位于host机器/etc/ascend_install.info
ARG ASCEND_INSTALL_INFO_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/ascend_install.info"
# version.info位于host机器/usr/local/Ascend/driver/version.info
ARG ASCEND_VERSION_INFO_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/version.info"
WORKDIR /root
# 安装系统包
RUN yum update && \
yum install -y curl wget g++ blas blas-devel gcc-gfortran libgfortran libffi-devel
# 安装python-3.7.5
RUN mkdir -p /data/python && \
cd /data/python && \
wget ${PYTHON_DOWNLOAD_URL} && \
tar xf Python-3.7.5.tgz && \
cd Python-3.7.5 && \
./configure --prefix=/usr/local/python3.7.5 --enable-shared && \
make && make install && \
cd / && rm -rf /data/python && \
rm -rf /usr/lib64/libpython3.7m.so.1.0 && \
ln -s /usr/local/python3.7.5/lib/libpython3.7m.so.1.0 /usr/lib64 && \
ln -s /usr/local/python3.7.5/bin/python3 /usr/bin/python3.7 && \
ln -s /usr/local/python3.7.5/bin/pip3 /usr/bin/pip3.7 && \
ln -s /usr/local/python3.7.5/bin/python3 /usr/bin/python3.7.5 && \
ln -s /usr/local/python3.7.5/bin/pip3 /usr/bin/pip3.7.5 && \
ln -s /usr/local/python3.7.5/bin/python3 /usr/bin/python && \
pip3.7 config set global.index-url https://mirrors.cloud.tencent.com/pypi/simple
# HwHiAiUser
# 注意: HwHiAiUser的uid和gid必须与host侧保持一致。若不一致则启动进程的时候会因为权限问题而拒绝访问,docker内/usr/local/Ascend环境是只读的,故也不能从容器内进行权限修改. 可以有host侧通过 cat /etc/passwd查看HwHiAiUser的uid和gid.
# 本文中host侧HwHiAiUser的uid=1001, 故这里指定uid和gid都为1001以防止出现权限问题
RUN umask 0022 && \
groupadd -g 1001 HwHiAiUser && \
useradd -u 1001 -g HwHiAiUser -m -d /home/HwHiAiUser HwHiAiUser
# 安装python包
# tensorflow1.15版本编译时要求numpy < 1.19.0, 所以安装编译好的包也需要小于此版本
RUN pip3.7 install 'numpy<1.19.0' decorator sympy==1.4 cffi==1.12.3 pyyaml pathlib2 grpcio grpcio-tools protobuf scipy requests
# 安装Ascend包和插件
## 拷贝ascend_install包
RUN curl -o /etc/ascend_install.info ${ASCEND_INSTALL_INFO_DOWNLOAD_URL} && chmod 644 /etc/ascend_install.info && \
mkdir -p /usr/local/Ascend/driver && curl -o /usr/local/Ascend/driver/version.info ${ASCEND_VERSION_INFO_DOWNLOAD_URL} && chmod 440 /usr/local/Ascend/driver/version.info && \
mkdir -p /data/tmp && \
cd /data/tmp && \
curl -o ascend_nnae.run ${ASCEND_NNAE_DOWNLOAD_URL} && \
chmod +x ascend_nnae.run && \
./ascend_nnae.run --install-path=/usr/local/Ascend/ --install --quiet && \
curl -o ascend_tf_plug.run ${ASCEND_TF_PLUG_DOWNLOAD_URL} && \
chmod +x ascend_tf_plug.run && \
./ascend_tf_plug.run --install-path=/usr/local/Ascend/ --install --quiet && \
rm -rf /etc/ascend_install.info /usr/local/Ascend/driver/version.info && \
rm -rf /data/tmp
# 配套包
RUN pip3.7 install ${CNT_ASCEND_BASE}/fwkacllib/lib64/topi-*-py3-none-any.whl && \
pip3.7 install ${CNT_ASCEND_BASE}/fwkacllib/lib64/te-*-py3-none-any.whl && \
pip3.7 install ${CNT_ASCEND_BASE}/fwkacllib/lib64/hccl-*-py3-none-any.whl
# TF安装
# 因为编译里手动编译安装了hdf5, 所以安装tf whl包时也需要手动编译安装
# Cython必须先于h5py安装,放一起或是顺序出错会报错
ENV LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/hdf5/serial:${HOST_ASCEND_BASE}/add-ons:${CNT_ASCEND_BASE}/fwkacllib/lib64:${HOST_ASCEND_BASE}/driver/lib64/common:${HOST_ASCEND_BASE}/driver/lib64/driver:${LD_LIBRARY_PATH}
RUN mkdir /data/depends && cd /data/depends && \
curl -o hdf5-1.10.5.tar.gz https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.5/src/hdf5-1.10.5.tar.gz && \
tar xf hdf5-1.10.5.tar.gz && \
cd hdf5-1.10.5 && \
./configure --prefix=/usr/include/hdf5 && \
make install && \
export CPATH="/usr/include/hdf5/include/:/usr/include/hdf5/lib/" && \
echo 'export CPATH="/usr/include/hdf5/include/:/usr/include/hdf5/lib/"' >> ~/.bashrc && \
ln -s /usr/include/hdf5/lib/libhdf5.so /usr/lib/libhdf5.so && \
ln -s /usr/include/hdf5/lib/libhdf5_hl.so /usr/lib/libhdf5_hl.so && \
pip3.7.5 install Cython && \
pip3.7.5 install h5py==2.8.0 && \
cd /data && rm -rf /data/depends && \
mkdir -p /data/tensorflow && cd /data/tensorflow && \
curl -o ${TENSORFLOW_PKG_NAME} ${TENSORFLOW_PKG_DOWNLOAD_URL} && \
pip3.7.5 install ${TENSORFLOW_PKG_NAME} && \
cd / && rm -rf /data/tensorflow
# 环境变量
ENV GLOG_v=2
ENV TBE_IMPL_PATH=${CNT_ASCEND_BASE}/opp/op_impl/built-in/ai_core/tbe
ENV PATH=${CNT_ASCEND_BASE}/fwkacllib/ccec_compiler/bin/:${PATH}
ENV PYTHONPATH=${TBE_IMPL_PATH}:${PYTHONPATH}
# 增加日志文件夹
# 按华为官网的描述,不创建下面两个文件夹会导致华为的slogd守护进程无法写入日志而启动失败
RUN mkdir -p /var/log/npu/slog && \
chown HwHiAiUser:HwHiAiUser -R /var/log/npu && \
mkdir -p /usr/slog/ && chown HwHiAiUser:HwHiAiUser /usr/slog
因为一直记不住docker run
命令的写法,所以通常情况会选择使用docker-compose
工具来拉起容器。假设2.1节
创建好的docker镜像名为ascend:euler_os_20.09_kunpeng920_ascend910_base
。
version: "3.4"
services:
gromacs_kunpeng_920_ascend910:
restart: "no"
network_mode: "host"
image: ascend:euler_os_20.09_kunpeng920_ascend910_base
volumes:
# 按官网要求将host侧slog.conf, version.info, ascend_install.info文件映射到容器内
- /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
- /etc/ascend_install.info:/etc/ascend_install.info
- /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi # 映射npu-smi工具,使得能在docker中查看ascend加速卡的信息
- /usr/local/Ascend/ascend-toolkit:/usr/local/Ascend/ascend-toolkit # 将tool-kit映射进来,视host侧安装位置不同而不同
environment:
- ASCEND_VISIBLE_DEVICES=0 # npu-smi info获取的序号,多张卡用,号隔开
command:
# 启动命令的含义是设置环境变量,拉起守护进程,并阻塞住,可以根据自身实际情况进行修改
- /bin/bash
- -c
- |
echo "export ASCEND_VISIBLE_DEVICES=$${ASCEND_VISIBLE_DEVICES}" >> /home/HwHiAiUser/.bashrc && \
echo 'export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/add-ons:/usr/local/Ascend/nnae/20.0.RC1/arm64-linux_gcc7.3.0/fwkacllib/lib64' >> /home/HwHiAiUser/.bashrc && \
echo "export ASCEND_VISIBLE_DEVICES=$${ASCEND_VISIBLE_DEVICES}" >> ~/.bashrc && \
echo 'export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/add-ons:/usr/local/Ascend/nnae/20.0.RC1/arm64-linux_gcc7.3.0/fwkacllib/lib64' >> ~/.bashrc && \
source ~/.bashrc && \
su HwHiAiUser --command "/usr/local/Ascend/driver/tools/slogd &" && \
tail -f /etc/hosts
#include
#include "acl/acl.h"
#define INFO_LOG(fmt,args...) fprintf(stdout, "[INFO] " fmt "\n", ##args)
#define WARN_LOG(FMT,args...) fprintf(stdout, "[WARN] " fmt "\n", ##args)
#define ERROR_LOG(fmt,args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args)
using namespace std;
int main() {
INFO_LOG("ACL hello World.");
// ACL init
const char *aclConfigPath = "acl.json";
aclError ret = aclInit(aclConfigPath);
if (ret != ACL_ERROR_NONE) {
ERROR_LOG("acl init failed. ret: %d", ret);
} else {
INFO_LOG("acl init succeed");
}
ret = aclFinalize();
if (ret != ACL_ERROR_NONE) {
ERROR_LOG("finalize acl failed. ret: %d", ret);
} else {
INFO_LOG("finalize acl succeed");
}
INFO_LOG("end to finalize acl");
return 0;
}
cmake_minimum_required(VERSION 3.5.1)
project(AscendHelloWorld)
# Compile options
add_compile_options(-std=c++11)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "out")
set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall")
set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall")
set(ASCEND_HOME "/usr/local/Ascend")
message(STATUS "ASCEND_HOME: ${ASCEND_HOME}")
set(ASCEND_TOOLKIT_HOME "${ASCEND_HOME}/ascend-toolkit/20.0")
message(STATUS "ASCEND_TOOLKIT_HOME: ${ASCEND_TOOLKIT_HOME}")
set(ACLLIB_HOME "${ASCEND_TOOLKIT_HOME}/acllib_linux.arm64")
message(STATUS "ACLLIB_HOME: ${ACLLIB_HOME}")
set(ACLLIB_INC_PATH "${ACLLIB_HOME}/include")
message(STATUS "ACLLIB_INC_PATH: ${ACLLIB_INC_PATH}")
set(ACLLIB_LIB_PATH "${ACLLIB_HOME}/lib64/stub")
message(STATUS "ACLLIB_LIB_PATH: ${ACLLIB_LIB_PATH}")
set(ATC_LIB_HOME "${ASCEND_TOOLKIT_HOME}/atc")
message(STATUS "ATC_LIB_HOME: ${ATC_LIB_HOME}")
set(ATC_LIB_PATH "${ATC_LIB_HOME}/lib64")
message(STATUS "ATC_LIB_PATH: ${ATC_LIB_PATH}")
set(ADD_ONS_LIB_PATH "${ASCEND_HOME}/add-ons")
message(STATUS "ADD_ONS_LIB_PATH: ${ADD_ONS_LIB_PATH}")
# Header path
include_directories(
${ACLLIB_INC_PATH}
)
if (target STREQUAL "Simulator_Function")
add_compile_options(-DFUNC_SIM)
endif()
# add host lib path
link_directories(
${ACLLIB_LIB_PATH}
${ATC_LIB_HOME}
${ADD_ONS_LIB_PATH}
)
add_executable(main
main.cpp)
configure_file(conf/acl.json out/acl.json COPYONLY)
if (target STREQUAL "Simulator_Function")
target_link_libraries(main funcsim)
else()
target_link_libraries(main ascendcl stdc++)
endif()
install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
mkdir -p conf && touch conf/acl.json
HwHiAiUser@ascend:~/workspace/ascend_study_projects/AscendHelloWorld$ tree
.
├── CMakeLists.txt
├── conf
│ └── acl.json
└── main.cpp
1 directory, 4 files
mkdir -p build
cmake ../ -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE
make
cd out
./main
# 输出如下内容即为成功
# [INFO] ACL hello World.
# [INFO] acl init succeed
# [INFO] finalize acl succeed
# [INFO] end to finalize acl
若运行出现问题,请查看位于/var/log/npu
路径下的日志