# 安装Python3.8及以上
# torch2.0对应CUDA11.7/11.8
# 安装python3-pip
apt install python3-pip
# 下载Chatglm2-6B
git clone https://github.com/THUDM/ChatGLM2-6B
# 安装依赖包
cd ChatGLM2-6B
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
# 查看torch版本
pip list | grep torch
------------------------------
torch 2.0.1
------------------------------
torch 2.0.1版本对应的是CUDA 11.7/11.8
https://developer.nvidia.com/cuda-toolkit-archive
# 安装驱动+cuda环境依赖
apt update
apt install gcc -y
apt install make -y
# 禁用开源的 nouveau 驱动程序[ubuntu与centos相同配置]
# 编辑文件在最后添加以下配置
vi /etc/modprobe.d/blacklist.conf
blacklist nouveau
options nouveau modeset=0
#更新内核引导配置
sudo update-initramfs -u
#重启机器
reboot
# 根据系统版本选择对应的安装包
wget https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
# 安装CUDA+驱动
sudo sh cuda_11.7.0_515.43.04_linux.run
---------------------------------------------------------------------------------
输入accept----选择需要安装的驱动包与cuda后选择 install
---------------------------------------------------------------------------------
#配置环境变量
#编辑/目录下.bashrc文件,在最下面添加以下3条环境变量
vi .bashrc
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
export PATH=$PATH:/usr/local/cuda/bin
export CUDA_HOME=$CUDA_HOME:/usr/local/cuda
#使新增配置生效
source .bashrc
#查看cuda版本
nvcc -V
由于国内网络问题,从hugging face拉取模型文件比较麻烦,所以使用本地下载更为方便
mkdir /THUDM/chatglm2-6b
下载地址:https://cloud.tsinghua.edu.cn/d/674208019e314311ab5c/?p=%2Fchatglm2-6b&mode=list
下载地址:https://huggingface.co/THUDM/chatglm2-6b/tree/main
#进入ChatGLM2-6B 文件夹
cd ChatGLM2-6B
vi web_demo.py
----------------------------------------------------------------------------------------------------------------------------------------------------------
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import mdtex2html
from utils import load_model_on_gpus
#修改路径指向本地模型路径
model_path = "/root/THUDM/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True,local_files_only=True)
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).cuda()
# 修改最后一行指定端口
demo.queue().launch(share=False, inbrowser=True,server_port=7890, server_name="0.0.0.0")
----------------------------------------------------------------------------------------------------------------------------------------------------------
#运行web demo
python3 web_demo.py
#进入ChatGLM2-6B 文件夹
cd ChatGLM2-6B
# 修改配置,将模型文件指向本地路径
vi api.py
-----------------------------------------------------------------------------------------------------------------------------------------------------
if __name__ == '__main__':
# 模型绝对路径指向本地目录,
model_path = "/root/THUDM/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True,local_files_only=True)
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).cuda()
-----------------------------------------------------------------------------------------------------------------------------------------------------
# 运行ChatGLM2-6B
python3 api.py
#后台运行
nohup python3 api.py > output.log &
# 输入以下命令测试
curl -X POST "http://127.0.0.1:8000" \
-H 'Content-Type: application/json' \
-d '{"prompt": "你好", "history": []}'
# 返回值如下即正常
{
"response":"你好!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。",
"history":[["你好","你好!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。"]],
"status":200,
"time":"2023-10-08 21:38:40"
}
http://本机IP:8000