import sqlite3
import json
import openai
import requests
import os
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored
GPT_MODEL = "gpt-3.5-turbo-1106"
from openai import OpenAI
client = OpenAI(
api_key = os.getenv("OPENAI_API_KEY"),
)
# 1. 定义数据库信息
conn = sqlite3.connect("data/chinook.db")
print("Opened database successfully")
#--------------------------------------------------------
# 定义一个函数chat_completion_request,主要用于发送 聊天补全 请求到OpenAI服务器
def chat_completion_request(messages, tools=None):
if tools is not None:
response = client.chat.completions.create(
model=GPT_MODEL,
messages=messages,
tools=tools,
tool_choice="auto", # auto is default, but we'll be explicit
)
if tools is None:
response = client.chat.completions.create(
model=GPT_MODEL,
messages=messages
)
return response
# 定义一个函数pretty_print_conversation,用于打印消息对话内容
def pretty_print_conversation(messages):
# 为不同角色设置不同的颜色
role_to_color = {
"system": "red",
"user": "green",
"assistant": "blue",
"function": "magenta",
}
# 遍历消息列表
for message in messages:
# 如果消息的角色是"system",则用红色打印“content”
if message["role"] == "system":
print(colored(f"system: {message['content']}\n", role_to_color[message["role"]]))
# 如果消息的角色是"user",则用绿色打印“content”
elif message["role"] == "user":
print(colored(f"user: {message['content']}\n", role_to_color[message["role"]]))
# 如果消息的角色是"assistant",并且消息中包含"function_call",则用蓝色打印"function_call"
elif message["role"] == "assistant" and message.get("function_call"):
print(colored(f"assistant[function_call]: {message['function_call']}\n", role_to_color[message["role"]]))
# 如果消息的角色是"assistant",但是消息中不包含"function_call",则用蓝色打印“content”
elif message["role"] == "assistant" and not message.get("function_call"):
print(colored(f"assistant[content]: {message['content']}\n", role_to_color[message["role"]]))
# 如果消息的角色是"function",则用品红色打印“function”
elif message["role"] == "function":
print(colored(f"function ({message['name']}): {message['content']}\n", role_to_color[message["role"]]))
def get_table_names(conn):
"""返回一个包含所有表名的列表"""
table_names = [] # 创建一个空的表名列表
# 执行SQL查询,获取数据库中所有表的名字
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
# 遍历查询结果,并将每个表名添加到列表中
for table in tables.fetchall():
table_names.append(table[0])
return table_names # 返回表名列表
def get_column_names(conn, table_name):
"""返回一个给定表的所有列名的列表"""
column_names = [] # 创建一个空的列名列表
# 执行SQL查询,获取表的所有列的信息
columns = conn.execute(f"PRAGMA table_info('{table_name}');").fetchall()
# 遍历查询结果,并将每个列名添加到列表中
for col in columns:
column_names.append(col[1])
return column_names # 返回列名列表
def get_database_info(conn):
"""返回一个字典列表,每个字典包含一个表的名字和列信息"""
table_dicts = [] # 创建一个空的字典列表
# 遍历数据库中的所有表
for table_name in get_table_names(conn):
columns_names = get_column_names(conn, table_name) # 获取当前表的所有列名
# 将表名和列名信息作为一个字典添加到列表中
table_dicts.append({"table_name": table_name, "column_names": columns_names})
return table_dicts # 返回字典列表
#--------------------------------------------------------
# 获取数据库信息,并存储为字典列表
database_schema_dict = get_database_info(conn)
# 将数据库信息转换为字符串格式,方便后续使用
database_schema_string = "\n".join(
[
f"Table: {table['table_name']}\nColumns: {', '.join(table['column_names'])}"
for table in database_schema_dict
]
)
# 2.定义 functions让 GPT 模型帮我们构造一个完整的 SQL 查询
tools = [
{
"type": "function",
"function": {
"name": "ask_database",
"description": "Use this function to answer user questions about music. Output should be a fully formed SQL query.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": f"""
SQL query extracting info to answer the user's question.
SQL should be written using this database schema:{database_schema_string}
The query should be returned in plain text, not in JSON.
""",
}
},
"required": ["query"],
},
},
}
]
# 3. 执行SQL语句
def ask_database(conn, query):
"""使用 query 来查询 SQLite 数据库的函数。"""
try:
results = str(conn.execute(query).fetchall()) # 执行查询,并将结果转换为字符串
except Exception as e: # 如果查询失败,捕获异常并返回错误信息
results = f"query failed with error: {e}"
return results # 返回查询结果
def execute_function_call(message):
"""执行函数调用"""
# 判断功能调用的名称是否为 "ask_database"
tool_calls = assistant_message.tool_calls
function_name = tool_calls[0].function.name
if function_name == "ask_database":
# 如果是,则获取功能调用的参数,这里是 SQL 查询
# query = json.loads(message["function_call"]["arguments"])["query"]
query = json.loads(tool_calls[0].function.arguments)["query"]
# 使用 ask_database 函数执行查询,并获取结果
results = ask_database(conn, query)
print(query)
print(results)
else:
# 如果功能调用的名称不是 "ask_database",则返回错误信息
results = f"Error: function {message['function_call']['name']} does not exist"
return results # 返回结果
# 创建一个空的消息列表
messages = []
# 向消息列表中添加一个系统角色的消息,内容是 "Answer user questions by generating SQL queries against the Chinook Music Database."
messages.append({"role": "system",
"content": "通过针对 Chinook 音乐数据库生成 SQL 查询来回答用户的问题"
})
# "content": "Answer user questions by generating SQL queries against the Chinook Music Database."
# 向消息列表中添加一个用户角色的消息,内容是 "Hi, who are the top 5 artists by number of tracks?"
## 根据曲目数量排名前五的艺术家是谁?
messages.append({"role": "user",
"content": "嗨,歌曲数量排名前五的艺术家是谁?"})
# "Hi, who are the top 5 artists by number of tracks?"
# 使用 chat_completion_request 函数获取聊天响应
chat_response = chat_completion_request(messages, tools)
chat_response
# 从聊天响应中获取助手的消息
assistant_message = chat_response.choices[0].message
assistant_message
# 将助手的消息添加到消息列表中
# messages.append(assistant_message)
messages.append({
"role": assistant_message.role, # 消息的角色是
"content": '' # 消息的内容 assistant_message.content
})
# 如果助手的消息中有功能调用
tool_calls = assistant_message.tool_calls
if tool_calls:
tool_call = tool_calls[0]
# 使用 execute_function_call 函数执行功能调用,并获取结果
results = execute_function_call(assistant_message)
# 将功能的结果作为一个功能角色的消息添加到消息列表中
messages.append({
"tool_call_id": tool_call.id,
"role": "function",
"name": tool_call.function.name,
"content": results})
#再次调用模型,将message对象给大模型
second_response = chat_completion_request(messages)
assistant_message = second_response.choices[0].message
messages.append({
"role": assistant_message.role, # 消息的角色是
"content": assistant_message.content # 消息的内容
})
# 使用 pretty_print_conversation 函数打印对话
pretty_print_conversation(messages)
# ---------------------------------------------------------------------------------
# 向消息列表中添加一个用户的问题,内容是 "What is the name of the album with the most tracks?"
## 哪张专辑的曲目数量最多?
messages.append({"role": "user",
"content": "拥有最多曲目的专辑叫什么名字?"}) # What is the name of the album with the most tracks ?
# 使用 chat_completion_request 函数获取聊天响应
chat_response = chat_completion_request(messages, tools)
# 从聊天响应中获取助手的消息
assistant_message = chat_response.choices[0].message
assistant_message
# 将助手的消息添加到消息列表中
# messages.append(assistant_message)
messages.append({"role": assistant_message.role,
"content": ''})
# 如果助手的消息中有功能调用
if assistant_message.tool_calls:
tool_calls = assistant_message.tool_calls
tool_call = tool_calls[0]
# 使用 execute_function_call 函数执行功能调用,并获取结果
results = execute_function_call(assistant_message)
# 将功能的结果作为一个功能角色的消息添加到消息列表中
messages.append({ "tool_call_id": tool_call.id,
"role": "function",
"name": tool_call.function.name,
"content": results})
#再次调用模型,将message对象给大模型
second_response = chat_completion_request(messages)
assistant_message = second_response.choices[0].message
messages.append({
"role": assistant_message.role, # 消息的角色是
"content": assistant_message.content # 消息的内容
})
# 使用 pretty_print_conversation 函数打印对话
pretty_print_conversation(messages)
最终messages打印的内容:
[{'role': 'system', 'content': '通过针对 Chinook 音乐数据库生成 SQL 查询来回答用户的问题'},
{'role': 'user', 'content': '嗨,歌曲数量排名前五的艺术家是谁?'},
{'role': 'assistant', 'content': ''},
{'tool_call_id': 'call_D8CgPbKLVuYJrFbi6om7zsam',
'role': 'function',
'name': 'ask_database',
'content': "[('Iron Maiden', 213), ('U2', 135), ('Led Zeppelin', 114), ('Metallica', 112), ('Lost', 92)]"},
{'role': 'assistant', 'content': ''},
{'tool_call_id': 'call_D8CgPbKLVuYJrFbi6om7zsam',
'role': 'function',
'name': 'ask_database',
'content': "[('Iron Maiden', 213), ('U2', 135), ('Led Zeppelin', 114), ('Metallica', 112), ('Lost', 92)]"},
{'role': 'assistant',
'content': '歌曲数量排名前五的艺术家分别是 Iron Maiden(213首歌曲)、U2(135首歌曲)、Led Zeppelin(114首歌曲)、Metallica(112首歌曲)和Lost(92首歌曲)。'},
{'role': 'user', 'content': '拥有最多曲目的专辑叫什么名字?'},
{'role': 'assistant', 'content': ''},
{'tool_call_id': 'call_M72g22u0r0dEX5BX3MmZT2lK',
'role': 'function',
'name': 'ask_database',
'content': "[('Greatest Hits', 57)]"},
{'role': 'assistant', 'content': '拥有最多曲目的专辑名字是《Greatest Hits》。'}]
可以看出,第一次和大模型交互时,大模型返回的role是assistant,content是空的,tool_calls是有内容的
执行了tool_call后,将获取内容放到message中,再将message提交给大模型进行第二次交互,role是assistant,content是回答的内容,tool_calls是空的了。
可见大模型重新整理问题和答案后,重新输出了内容,看起来舒服多了。
system: 通过针对 Chinook 音乐数据库生成 SQL 查询来回答用户的问题
user: 嗨,歌曲数量排名前五的艺术家是谁?
assistant[content]:
function (ask_database): [('Iron Maiden', 213), ('U2', 135), ('Led Zeppelin', 114), ('Metallica', 112), ('Lost', 92)]
assistant[content]: 歌曲数量排名前五的艺术家分别是 Iron Maiden(213首歌曲)、U2(135首歌曲)、Led Zeppelin(114首歌曲)、Metallica(112首歌曲)和Lost(92首歌曲)。
user: 拥有最多曲目的专辑叫什么名字?
assistant[content]:
function (ask_database): [('Greatest Hits', 57)]
assistant[content]: 拥有最多曲目的专辑名字是《Greatest Hits》。