[鲲鹏920-arm64][Euler OS]配置ascend docker开发环境

文章目录

  • 1 docker环境安装
  • 2 制作docker镜像
    • 2.1 编写dockerfile文件
    • 2.2 编写docker-compose文件
  • 3 HelloWorld测试
    • 3.1 创建main.cpp文件
    • 3.2 创建CMakeLists.txt
    • 3.3 创建空白acl.json文件
    • 3.4 文件夹结构
    • 3.5 编译并运行
  • 资源
  • 参考

1 docker环境安装

docker环境安装请参考[鲲鹏920-arm64][Euler OS 20.03 LTS] Docker安装一文

2 制作docker镜像

2.1 编写dockerfile文件

基础镜像gromacs_euler_os:compile_base请查看[鲲鹏920-arm64][Euler OS] gromacs源码编译一文2.1节

FROM gromacs_euler_os:compile_base

ARG NNAE_VERSION=20.0.RC1
ARG NNAE_ARCH=arm64
ARG HOST_ASCEND_BASE=/usr/local/Ascend
ARG CNT_ASCEND_BASE=/usr/local/Ascend/nnae/$NNAE_VERSION/$NNAE_ARCH-linux_gcc7.3.0
ARG ASCEND_NNAE_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/Ascend-NNAE-20.0.RC1-arm64-linux_gcc7.3.0.run"
ARG ASCEND_TF_PLUG_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/Ascend-TFPlugin-20.0.RC1-arm64-linux_gcc7.3.0.run"
ARG PYTHON_DOWNLOAD_URL="https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz"
# 自行编译好的tensorflow安装包,本文资源部分有提供编译好的1.15.4版本下载
ARG TENSORFLOW_PKG_NAME="tensorflow-1.15.4-cp37-cp37m-linux_aarch64.whl"
ARG TENSORFLOW_PKG_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/${TENSORFLOW_PKG_NAME}"
# 以下两个文件位于安装好ascend 910 driver的host机器上,为了让镜像尽量少分层,制作dockerfile的时候从源位置拷出来了
# ascend_install.info位于host机器/etc/ascend_install.info
ARG ASCEND_INSTALL_INFO_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/ascend_install.info"
# version.info位于host机器/usr/local/Ascend/driver/version.info
ARG ASCEND_VERSION_INFO_DOWNLOAD_URL="http://127.0.0.1:56000/chfs/shared/tmp/version.info"

WORKDIR /root

# 安装系统包
RUN yum update && \
    yum install -y curl wget g++ blas blas-devel gcc-gfortran libgfortran libffi-devel

# 安装python-3.7.5
RUN mkdir -p /data/python && \
    cd /data/python && \
    wget ${PYTHON_DOWNLOAD_URL} && \
    tar xf Python-3.7.5.tgz && \
    cd Python-3.7.5 && \
    ./configure --prefix=/usr/local/python3.7.5 --enable-shared && \
    make && make install && \
    cd / && rm -rf /data/python && \
    rm -rf /usr/lib64/libpython3.7m.so.1.0 && \
    ln -s /usr/local/python3.7.5/lib/libpython3.7m.so.1.0 /usr/lib64 && \
    ln -s /usr/local/python3.7.5/bin/python3 /usr/bin/python3.7 && \
    ln -s /usr/local/python3.7.5/bin/pip3 /usr/bin/pip3.7 && \
    ln -s /usr/local/python3.7.5/bin/python3 /usr/bin/python3.7.5 && \
    ln -s /usr/local/python3.7.5/bin/pip3 /usr/bin/pip3.7.5 && \
    ln -s /usr/local/python3.7.5/bin/python3 /usr/bin/python && \
    pip3.7 config set global.index-url https://mirrors.cloud.tencent.com/pypi/simple

# HwHiAiUser
# 注意: HwHiAiUser的uid和gid必须与host侧保持一致。若不一致则启动进程的时候会因为权限问题而拒绝访问,docker内/usr/local/Ascend环境是只读的,故也不能从容器内进行权限修改. 可以有host侧通过 cat /etc/passwd查看HwHiAiUser的uid和gid.
# 本文中host侧HwHiAiUser的uid=1001, 故这里指定uid和gid都为1001以防止出现权限问题 
RUN umask 0022 && \
    groupadd -g 1001 HwHiAiUser && \
    useradd -u 1001 -g HwHiAiUser -m -d /home/HwHiAiUser HwHiAiUser

# 安装python包
# tensorflow1.15版本编译时要求numpy < 1.19.0, 所以安装编译好的包也需要小于此版本
RUN pip3.7 install 'numpy<1.19.0' decorator sympy==1.4 cffi==1.12.3 pyyaml pathlib2 grpcio grpcio-tools protobuf scipy requests

# 安装Ascend包和插件
## 拷贝ascend_install包
RUN curl -o /etc/ascend_install.info ${ASCEND_INSTALL_INFO_DOWNLOAD_URL} && chmod 644 /etc/ascend_install.info && \
    mkdir -p /usr/local/Ascend/driver && curl -o /usr/local/Ascend/driver/version.info ${ASCEND_VERSION_INFO_DOWNLOAD_URL} && chmod 440 /usr/local/Ascend/driver/version.info && \
    mkdir -p /data/tmp && \
    cd /data/tmp && \
    curl -o ascend_nnae.run ${ASCEND_NNAE_DOWNLOAD_URL} && \
    chmod +x ascend_nnae.run && \
    ./ascend_nnae.run --install-path=/usr/local/Ascend/ --install --quiet && \
    curl -o ascend_tf_plug.run ${ASCEND_TF_PLUG_DOWNLOAD_URL} && \
    chmod +x ascend_tf_plug.run && \
    ./ascend_tf_plug.run --install-path=/usr/local/Ascend/ --install --quiet && \
    rm -rf /etc/ascend_install.info /usr/local/Ascend/driver/version.info && \
    rm -rf /data/tmp


# 配套包
RUN pip3.7 install ${CNT_ASCEND_BASE}/fwkacllib/lib64/topi-*-py3-none-any.whl && \
    pip3.7 install ${CNT_ASCEND_BASE}/fwkacllib/lib64/te-*-py3-none-any.whl && \
    pip3.7 install ${CNT_ASCEND_BASE}/fwkacllib/lib64/hccl-*-py3-none-any.whl

# TF安装
# 因为编译里手动编译安装了hdf5, 所以安装tf whl包时也需要手动编译安装
# Cython必须先于h5py安装,放一起或是顺序出错会报错
ENV LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/hdf5/serial:${HOST_ASCEND_BASE}/add-ons:${CNT_ASCEND_BASE}/fwkacllib/lib64:${HOST_ASCEND_BASE}/driver/lib64/common:${HOST_ASCEND_BASE}/driver/lib64/driver:${LD_LIBRARY_PATH}
RUN mkdir /data/depends && cd /data/depends && \
    curl -o hdf5-1.10.5.tar.gz https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.5/src/hdf5-1.10.5.tar.gz && \
    tar xf hdf5-1.10.5.tar.gz && \
    cd hdf5-1.10.5 && \
    ./configure --prefix=/usr/include/hdf5 && \
    make install && \
    export CPATH="/usr/include/hdf5/include/:/usr/include/hdf5/lib/" && \
    echo 'export CPATH="/usr/include/hdf5/include/:/usr/include/hdf5/lib/"' >> ~/.bashrc && \
    ln -s /usr/include/hdf5/lib/libhdf5.so  /usr/lib/libhdf5.so && \
    ln -s /usr/include/hdf5/lib/libhdf5_hl.so  /usr/lib/libhdf5_hl.so && \
    pip3.7.5 install Cython && \
    pip3.7.5 install h5py==2.8.0 && \
    cd /data && rm -rf /data/depends && \
    mkdir -p /data/tensorflow && cd /data/tensorflow && \
    curl -o ${TENSORFLOW_PKG_NAME} ${TENSORFLOW_PKG_DOWNLOAD_URL} && \
    pip3.7.5 install ${TENSORFLOW_PKG_NAME} && \
    cd / && rm -rf /data/tensorflow

# 环境变量
ENV GLOG_v=2
ENV TBE_IMPL_PATH=${CNT_ASCEND_BASE}/opp/op_impl/built-in/ai_core/tbe
ENV PATH=${CNT_ASCEND_BASE}/fwkacllib/ccec_compiler/bin/:${PATH}
ENV PYTHONPATH=${TBE_IMPL_PATH}:${PYTHONPATH}

# 增加日志文件夹
# 按华为官网的描述,不创建下面两个文件夹会导致华为的slogd守护进程无法写入日志而启动失败
RUN mkdir -p /var/log/npu/slog && \
    chown HwHiAiUser:HwHiAiUser -R /var/log/npu && \
    mkdir -p /usr/slog/ && chown HwHiAiUser:HwHiAiUser /usr/slog

2.2 编写docker-compose文件

因为一直记不住docker run命令的写法,所以通常情况会选择使用docker-compose工具来拉起容器。假设2.1节创建好的docker镜像名为ascend:euler_os_20.09_kunpeng920_ascend910_base

version: "3.4"
services:
  gromacs_kunpeng_920_ascend910:
    restart: "no"
    network_mode: "host"
    image: ascend:euler_os_20.09_kunpeng920_ascend910_base
    volumes:
    	# 按官网要求将host侧slog.conf, version.info, ascend_install.info文件映射到容器内
      - /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf
      - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
      - /etc/ascend_install.info:/etc/ascend_install.info
      - /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi  # 映射npu-smi工具,使得能在docker中查看ascend加速卡的信息
      - /usr/local/Ascend/ascend-toolkit:/usr/local/Ascend/ascend-toolkit # 将tool-kit映射进来,视host侧安装位置不同而不同
    environment:
      - ASCEND_VISIBLE_DEVICES=0  # npu-smi info获取的序号,多张卡用,号隔开
    command:
    	# 启动命令的含义是设置环境变量,拉起守护进程,并阻塞住,可以根据自身实际情况进行修改
      - /bin/bash
      - -c
      - |
        echo "export ASCEND_VISIBLE_DEVICES=$${ASCEND_VISIBLE_DEVICES}" >> /home/HwHiAiUser/.bashrc && \
        echo 'export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/add-ons:/usr/local/Ascend/nnae/20.0.RC1/arm64-linux_gcc7.3.0/fwkacllib/lib64' >> /home/HwHiAiUser/.bashrc && \
        echo "export ASCEND_VISIBLE_DEVICES=$${ASCEND_VISIBLE_DEVICES}" >> ~/.bashrc && \
        echo 'export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/add-ons:/usr/local/Ascend/nnae/20.0.RC1/arm64-linux_gcc7.3.0/fwkacllib/lib64' >> ~/.bashrc && \
        source ~/.bashrc && \
        su HwHiAiUser --command "/usr/local/Ascend/driver/tools/slogd &" && \
        tail -f /etc/hosts

3 HelloWorld测试

3.1 创建main.cpp文件

#include 
#include "acl/acl.h"

#define INFO_LOG(fmt,args...) fprintf(stdout, "[INFO] " fmt "\n", ##args)
#define WARN_LOG(FMT,args...) fprintf(stdout, "[WARN] " fmt "\n", ##args)
#define ERROR_LOG(fmt,args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args)

using namespace std;
int main() {
    INFO_LOG("ACL hello World.");
    // ACL init
    const char *aclConfigPath = "acl.json";
    aclError ret = aclInit(aclConfigPath);
    if (ret != ACL_ERROR_NONE) {
        ERROR_LOG("acl init failed. ret: %d", ret);
    } else {
        INFO_LOG("acl init succeed");
    }
    ret = aclFinalize();
    if (ret != ACL_ERROR_NONE) {
        ERROR_LOG("finalize acl failed. ret: %d", ret);
    } else {
        INFO_LOG("finalize acl succeed");
    }
    INFO_LOG("end to finalize acl");
    return 0;
}

3.2 创建CMakeLists.txt

cmake_minimum_required(VERSION 3.5.1)
project(AscendHelloWorld)

# Compile options
add_compile_options(-std=c++11)

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "out")
set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall")
set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall")

set(ASCEND_HOME "/usr/local/Ascend")
message(STATUS "ASCEND_HOME: ${ASCEND_HOME}")
set(ASCEND_TOOLKIT_HOME "${ASCEND_HOME}/ascend-toolkit/20.0")
message(STATUS "ASCEND_TOOLKIT_HOME: ${ASCEND_TOOLKIT_HOME}")
set(ACLLIB_HOME "${ASCEND_TOOLKIT_HOME}/acllib_linux.arm64")
message(STATUS "ACLLIB_HOME: ${ACLLIB_HOME}")
set(ACLLIB_INC_PATH "${ACLLIB_HOME}/include")
message(STATUS "ACLLIB_INC_PATH: ${ACLLIB_INC_PATH}")

set(ACLLIB_LIB_PATH "${ACLLIB_HOME}/lib64/stub")
message(STATUS "ACLLIB_LIB_PATH: ${ACLLIB_LIB_PATH}")

set(ATC_LIB_HOME "${ASCEND_TOOLKIT_HOME}/atc")
message(STATUS "ATC_LIB_HOME: ${ATC_LIB_HOME}")
set(ATC_LIB_PATH "${ATC_LIB_HOME}/lib64")
message(STATUS "ATC_LIB_PATH: ${ATC_LIB_PATH}")

set(ADD_ONS_LIB_PATH "${ASCEND_HOME}/add-ons")
message(STATUS "ADD_ONS_LIB_PATH: ${ADD_ONS_LIB_PATH}")

# Header path
include_directories(
        ${ACLLIB_INC_PATH}
)

if (target STREQUAL "Simulator_Function")
    add_compile_options(-DFUNC_SIM)
endif()

# add host lib path
link_directories(
        ${ACLLIB_LIB_PATH}
        ${ATC_LIB_HOME}
        ${ADD_ONS_LIB_PATH}
)

add_executable(main
        main.cpp)

configure_file(conf/acl.json out/acl.json COPYONLY)

if (target STREQUAL "Simulator_Function")
    target_link_libraries(main funcsim)
else()
    target_link_libraries(main ascendcl stdc++)
endif()

install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})

3.3 创建空白acl.json文件

mkdir -p conf && touch conf/acl.json

3.4 文件夹结构

HwHiAiUser@ascend:~/workspace/ascend_study_projects/AscendHelloWorld$ tree
.
├── CMakeLists.txt
├── conf
│   └── acl.json
└── main.cpp

1 directory, 4 files

3.5 编译并运行

mkdir -p build
cmake ../ -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE
make

cd out
./main
# 输出如下内容即为成功
# [INFO] ACL hello World.
# [INFO] acl init succeed
# [INFO] finalize acl succeed
# [INFO] end to finalize acl

若运行出现问题,请查看位于/var/log/npu路径下的日志

资源

  • 适用于Euler OS 20.09 aarch版的bazel
  • 适用于鲲鹏920的aarch版tensorflow

参考

  • 配置昇腾环境
  • 指定uid和gid创建用户和组
  • 编译tensorflow aarch版
    • 解决gettid冲突
    • 提示无法从nullptr转换
    • 提示bfloat编译失败
    • 提示api:keras_python_api_gen failed
    • 提示invalid bdist_whell
  • 昇腾应用开发课程
  • 应用软件开发指南

你可能感兴趣的:(鲲鹏&昇腾,linux,docker,arm)