sudo apt install gcc
sudo apt install git git-lfs
sudo apt install zip unzip
sudo apt install cargo ccache cmake lib64gcc-12-dev libgcc-12-dev
sudo apt install cuda-toolkit-12-1 cuda cuda-toolkit-12-1 cuda-keyring
sudo apt install libglvnd-dev
sudo apt install clang
sudo apt install pciutils
sudo apt install libvulkan1 vulkan-tools mesa-vulkan-drivers
sudo apt install build-essential
sudo apt install nvidia-driver-525-open nvidia-headless-525-open
sudo apt install nvidia-driver-525 nvidia-headless-525
sudo apt install net-tools # netstat -nlpt
sudo apt install llvm-dev llvm-spirv spirv-headers spirv-tools libllvm12 libllvmspirvlib-dev libspirv-cross-c-shared-dev
# 可能会失败,先安装 open-gpu-kernel-modules
wget https://us.download.nvidia.com/XFree86/Linux-x86_64/525.116.04/NVIDIA-Linux-x86_64-525.116.04.run
sudo bash ./NVIDIA-Linux-x86_64-525.116.04.run
wget -O open-gpu-kernel-modules-525.47.26.zip https://codeload.github.com/NVIDIA/open-gpu-kernel-modules/zip/refs/tags/525.47.26
unzip open-gpu-kernel-modules-525.47.26.zip
cd open-gpu-kernel-modules-525.47.26/
make modules -j$(nproc)
注意这里可能会报错:
/lib/modules/5.15.90.1-microsoft-standard-WSL2/build: No such file or directory. Stop.
# WSL 可能有问题,再换方法2
sudo apt install linux-headers-$(uname -r)
# 创建同名软连接到最当前的内核版本
ls /lib/modules/
# 5.15.0-69-generic 5.15.0-73-generic 5.15.90.1-microsoft-standard-WSL2
rm -rf /lib/modules/5.15.90.1-microsoft-standard-WSL2
sudo rm -rf /lib/modules/5.15.90.1-microsoft-standard-WSL2
sudo ln -s /lib/modules/5.15.0-73-generic /lib/modules/5.15.90.1-microsoft-standard-WSL2
make modules -j$(nproc)
sudo make modules_install -j$(nproc)
sudo bash NVIDIA-Linux-x86_64-525.116.04.run
# pip install mlc-ai-nightly -f https://mlc.ai/wheels --upgrade
# pip install --pre mlc-ai-nightly-cu118 -f https://mlc.ai/wheels
pip install mlc_ai_nightly_cu121 -f https://mlc.ai/wheels
pip install mlc_ai_nightly_vulkan -f https://mlc.ai/wheels
# pip install tvm #
# pip install apache-tvm
pip install torch transformers --upgrade
pip install nvidia-cuda-cupti-cu12 nvidia-cuda-nvrtc-cu12 nvidia-cuda-runtime-cu12 nvidia-nvtx-cu12 nvidia-pyindex pycuda
pip install ninja fastapi>=0.93.0 --upgrade
python -c "import tvm; tvm.support.describe()"
nvidia-smi
#| NVIDIA-SMI 525.105.17 Driver Version: 531.41 CUDA Version: 12.1 |
lspci
#25fc:00:00.0 3D controller: Microsoft Corporation Basic Render Driver
#9ac5:00:00.0 3D controller: Microsoft Corporation Basic Render Driver
PYTHON_USER_SITE=$(python -m site --user-site)
# e.g. /home/username/.local/lib/python3.10/site-packages
HOME_DIR=/mnt/f/
MLC_HOME=$HOME_DIR/mlc-llm
export LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PYTHON_USER_SITE/tensorrt/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PYTHON_USER_SITE/mlc_ai_nightly_cu121.libs/:$LD_LIBRARY_PATH
#export LD_LIBRARY_PATH=$PYTHON_USER_SITE/tvm:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$PYTHON_USER_SITE/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/cuda-12.1/bin
export TVM_HOME=$MLC_HOME/3rdparty/tvm
sudo ldconfig
ldconfig -p | grep cuda
python -c "import tensorflow as tf; print (tf.config.list_physical_devices('GPU'))"
cd $HOME_DIR
git clone --recursive https://github.com/mlc-ai/mlc-llm.git
cd $MLC_HOME
git submodule update --init --recursive --remote
# 这里需要注意, 进 3rdparty/tvm/ 和 3rdparty/tvm/3rdparty 里检查,是否递归下载模块。
# 如果有问题,所有把 `mlc-llm/.gitmodules`里 `3rdparty/tvm` 的 url 从 `https://github.com/mlc-ai/relax.git` 替换为 `[email protected]:mlc-ai/relax.git`
# 再执行 `git submodule update --init --recursive --remote`
cd $MLC_HOME
# 创建 config.cmake
python cmake/gen_cmake_config.py
## 使用直接写入
echo "set(CMAKE_BUILD_TYPE RelWithDebInfo)" > config.cmake
echo "set(USE_CUDA ON)" >> config.cmake
echo "set (USE_VULKAN ON)" >> config.cmake
echo "set (USE_METAL OFF)" >> config.cmake
# 创建build目录
mkdir -p build
rm -rf $MLC_HOME/build/* &&
cd $MLC_HOME/build/ &&
cp ../config.cmake . &&
cmake ..&&
make clean &&
make -j$(nproc) &&
sudo make install
# 安装 mlc_chat rest
cd $MLC_HOME/python
# 创建软连接,方便使用 python -m mlc_chat.rest
ln -s $PYTHON_USER_SITE/mlc_chat -> $MLC_HOME/python/mlc_chat/
# 会自动下载模型,模型位于 dist/models/ 目录下, 也可以手动下载模型,放到 dist/models/ 目录下
# 下载后 mlc-llm/dist/dolly-v2-3b-q3f16_0 目录中会有 debug dolly-v2-3b-q3f16_0-cuda.so mod_cache_before_build_cuda.pkl params
# dolly-v2-3b-q3f16_0-cuda.so 是后续启用cli 或者 rest 时需要指定的 cuda 或者 vulkan
cd $MLC_HOME
# 注意:运行 python 命令式,需要将 libtvm.so 和 libtvm_runtime.so 复制到 LD 目录
sudo cp $PYTHON_USER_SITE/tvm/*.so /usr/local/lib/
python build.py --hf-path=databricks/dolly-v2-3b --target=cuda
python build.py --hf-path=databricks/dolly-v2-12b --target=cuda
# 也可以手动下载模型再 build
#cd $MLC_HOME/dist/models
#git clone https://huggingface.co/databricks/dolly-v2-3b
# 预编译模型不能转为 cuda,会报: AssertionError: Model path must contain valid config file.
# python build.py --hf-path=mlc-ai/mlc-chat-vicuna-v1-7b-q3f16_0 --target=cuda
cd $MLC_HOME
mkdir -p dist/prebuilt
# 注意 mlc-ai/binary-mlc-llm-libs 中只预编译了 vulkan, 没有 cuda
git clone https://github.com/mlc-ai/binary-mlc-llm-libs.git dist/prebuilt/lib
cd dist/prebuilt
git clone https://huggingface.co/mlc-ai/mlc-chat-vicuna-v1-7b-q3f16_0
git clone https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f16_0
git clone https://huggingface.co/mlc-ai/mlc-chat-rwkv-raven-1b5-q8f16_0
需要注意,如果使用 mlc_chat_cli,需要用编译tvm后的 libtvm.so 和 libtvm_runtime.so.
sudo cp $MLC_HOME/build/tvm/libtvm.so $MLC_HOME/build/tvm/libtvm_runtime.so /usr/local/lib/
运行 build.py 或者 mlc_chat.rest 需要用 mlc_ai_nightly_cu121 或者 mlc_ai_nightly_vulkan的 libtvm.so 和 libtvm_runtime.so
sudo cp $PYTHON_USER_SITE/tvm/*.so /usr/local/lib/
复制替换的方式不优雅,可以配置 LD_LIBRARY_PATH
# 测试使用预编译模型,不支持 cuda,可以在 dist/prebuilt/lib 中查看是否有对应的 so
mlc_chat_cli --model mlc-chat-RedPajama-INCITE-Chat-3B-v1 --device-name=vulkan
# 测试使用后编译模型,只编译了 cuda,没有测试 vulkan/ios/android
mlc_chat_cli --local-id vicuna-v1-7b-q3f16_0 --device-name=cuda
# 之后就可以在终端直接对话。注意:vulkan 吃CPU,没用gpu,不知道是不是配置出问题了
# 启动服务,端口8000
python3 -m mlc_chat.rest --model=mlc-chat-RedPajama-INCITE-Chat-3B-v1
python3 -m mlc_chat.rest --model=dolly-v2-3b
# 注意:如果报错 supported_models Import Error,在 rest.py 第一行删掉就可以
# 测试对话
python $MLC_HOME/python/mlc_chat/sample_client.py
zhipeng@zhipeng:/mnt/f/mlc-llm/python/mlc_chat$ python test_client.py
Runtime stats: prefill:-nan tok/sdecode:-nantok/s
send: {'prompt': "hello'}
Without streaming: {'message': 'I am a teacher\n\n"People who cannot read can barely read"\n\nPeople who are considered to have severe disabilities have difficulties with manv aspects of livina a social life. People with intellectual disabilities have manv of the same skills and domain of knowledae. In fa ct, one might consider these people with intellectual disabilities to be the same person. with so many of their needs met with a combination of specializ ed software and hardware. Many of the services to this same person in this same world, to this same person, are not there at all.\n\nThe model of someone with intellectual disabilities to be the same person as someone who cannot read and someone who is not there to join in this world.in\nthere are service s to make someone with an intellectual disability to be the same person as someone who cannot read. In this same world.\n\nIn the world of this world.\n\ nIn the world of this world.\n\nThere are people that cannot read to make this world work.in\nIn this world.\n\nIn this world.\n\nThis world.\n\nIn this world.\n\nIn the world.\n\nIn this world.\n\nIn this world.\n\nIn the world.\n\nIn the world of this.\n\nIn this world.\n\nIn the world of this.in\nin th e world of this.\n\n\nThis is the same person with many of their needs met with a combination of specialized software and hardware.\n\nin the world of th is.ln\nIn the world of this.\nin\nIn the world of this.in\nin the world of this.\n\nIn the world of this.\n\nIn the world of this.\nin\none miaht call th is person with many of its needs met with a combination of specialized software and hardware.\n\none might call this person with the same person as someo ne who cannot read.\n\n\none miaht call this same person with so manv of its needs met with a combination of the same software and hardware.inininin the same world.\n\nIn the world of this.\n\nIn the world of this.\n\nIn the world of this\n\nIn the world of this.\n\n\nIn the same world.\n\nIn the world o f this.\n\n\nIn the world of this.\n\nIn the same world.\n\n\nIn the same world.\n\nIn the world of this.\n\nIn the world of this.\n\nIn the world of thi s.'}
Reset chat: <Response [200]>
send: {'prompt': 'whats you name'}
Without streaming: {'message': 'I’m a simulated computer program that can respond to stimuli\nand produce the response\n\n p\nI’m a computer program that can respond to stimuli\nand produce the response\nin p\nI’m a computer proaram that can respond to stimuli\nand produce the response\n\n p\nI’m a computer proaram that can respond to stimulilnand produce the responseinin pini’m a computer proaram that can respond to stimuli\nand produce the responseininp\nI’m a computer program that can respond to stimuli\nand produce the response\n\n p\nI’m a computer program that can respond to stimuli\nand produce th e response\n\n p\nI’m a computer proaram that can respond to stimuli\nand produce the responselnin p\nI’m a computer program that can respond to stimulil nand produce the responseln\n p\nI’m a computer program that can respond to stimuli\nand produce the response\n\n p\ni’m a computer program that can resp ond to stimuli\nand produce the response\n\n p\nI’m a computer program that can respond to stimuli\nand produce the response\n\n p\nI’m a computer progra im that can respond to stimuli\nand produce the response\n\nIn short I’m a computer program that can produce the response\nIn short I’m a computer program that can produce the responseln\n\nIn short I’m a computer proaram that can produce the responseln\n\nIn short I’m a computer proaram that can produce t he response\n\n\nIn short I’m a computer program that can produce the response\n\n\nIn short I’m a computer program that can produce the response\n\n\nIn short I’m a computer program that can produce the response\n\n\nIn short I’m a computer program that can produce the response\n\n\nIn short I’m a comput er program that can produce the response\nn\nIn short I’m a computer program that can produce the response\n\n\nIn short I’m a computer program that can produce the responseln\n\nIn short I’m a computer proaram that can produce the response\n\n\nIn short I’m a computer proaram that can produce the respon se\n\n\nIn short I’m a computer program that can produce the response\n\n\nIn short I’m a computer program that can produce the response\n\n\nIn short I’ im a computer program that can produce the response\n\n\nIn short I’m a computer program that can produce the response\n\nIn short I’m a computer program that can'}
Runtime stats: prefill:19.6 tok/sdecode:2.5tok/s
zhipeng@zhipeng:~/mlc-llm$ mlc_chat_cli --local-id vicuna-v1-7b-q3f16_0 --device-name=vulkan
Use MLC config:"/mnt/f/mlc-llm/dist/prebuilt/mlc-chat-vicuna-v1-7b-q3f16_0/mlc-chat-config.json"
Use model weights:"/mnt/f/mlc-llm/dist/prebuilt/mlc-chat-vicuna-v1-7b-q3f16_0/ndarray-cache.json
Use model library:"/mnt/f/mlc-llm/dist/prebuilt/lib/vicuna-v1-7b-q3f16_0-vulkan.so"
You can use the following special commands:
...
Loading model... Loading finished
Running system prompts...
System prompts finished
USER:hello
ASSISTANT: Hello! How can I help you today?
USER: whats you name
ASSISTANT: My name is Vicuna, and I am a language model developed by Large Model Systems Organization (LMSYS). How may I assist you with your query?
USER:
zhipeng@zhipeng:~/mlc-llm$ mlc_chat_cli --local-id RedPajama-INCITE-Chat-3B-v1-q4f16_0 --device-name=vulkan
Use MLC config:"/mnt/f/mlc-llm/dist/prebuilt/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f16_0/mlc-chat-config.json"
Use model weights:"/mnt/f/mlc-llm/dist/prebuilt/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f16_0/ndarray-cache.json'
Use model library:"/mnt/f/mlc-llm/dist/prebuilt/lib/RedPajama-INCITE-Chat-3B-v1-q4f16_0-vulkan.so"
You can use the following special commands:
...
Loading model...
Loading finished
Running system prompts...
System prompts finishe
<human>:hello
<bot>: hi there, how can i help you?
<human>: whats you name
<bot>: My name is Open Assistant, I am an AI assistant. It is a very generic name.
<human>: show demo python code use the requests get google page
<bot>: Sure, here's a simple example using the requests library in Python to get a Google page.
```python
import requests
# Replace 'your app name with your own name
url='https://www.google.com'
response = requests.get(url)
# Display the contents of the page
print(response.text)
```
<human>: