# 设置服务地址和端口
export OLLAMA_HOST=0.0.0.0:11434
export OLLAMA_ORIGINS=*
# 启动服务
ollama serve
http://localhost:11434
# 请求示例
curl http://localhost:11434/api/tags
# 响应示例
{
"models": [
{
"name": "llama2:7b",
"modified_at": "2024-03-20T10:00:00Z",
"size": 13000000000,
"digest": "sha256:1234567890"
}
]
}
# 请求示例
curl http://localhost:11434/api/pull -d '{
"name": "llama2:7b"
}'
# 响应示例
{
"status": "success",
"digest": "sha256:1234567890"
}
# 请求示例
curl http://localhost:11434/api/generate -d '{
"model": "llama2:7b",
"prompt": "你好,请介绍一下自己",
"stream": false,
"options": {
"temperature": 0.7,
"top_p": 0.9,
"max_tokens": 1000
}
}'
# 响应示例
{
"response": "你好!我是一个基于 Llama2 7B 模型训练的 AI 助手...",
"done": true
}
# 请求示例
curl http://localhost:11434/api/generate -d '{
"model": "llama2:7b",
"prompt": "你好,请介绍一下自己",
"stream": true,
"options": {
"temperature": 0.7
}
}'
# 响应示例(流式)
data: {"response": "你好", "done": false}
data: {"response": "!我是一个", "done": false}
data: {"response": "基于 Llama2", "done": false}
data: {"response": " 7B 模型", "done": false}
data: {"response": "训练的 AI 助手...", "done": true}
import requests
import json
class OllamaClient:
def __init__(self, base_url="http://localhost:11434"):
self.base_url = base_url
def generate(self, model, prompt, **options):
url = f"{self.base_url}/api/generate"
data = {
"model": model,
"prompt": prompt,
"stream": False,
"options": options
}
response = requests.post(url, json=data)
return response.json()
# 使用示例
client = OllamaClient()
response = client.generate(
model="llama2:7b",
prompt="你好,请介绍一下自己",
temperature=0.7,
max_tokens=1000
)
print(response["response"])
class OllamaClient {
constructor(baseUrl = 'http://localhost:11434') {
this.baseUrl = baseUrl;
}
async generate(model, prompt, options = {}) {
const response = await fetch(`${this.baseUrl}/api/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model,
prompt,
stream: false,
options
})
});
return await response.json();
}
}
// 使用示例
const client = new OllamaClient();
client.generate('llama2:7b', '你好,请介绍一下自己', {
temperature: 0.7,
max_tokens: 1000
}).then(response => {
console.log(response.response);
});
class OllamaClient:
def __init__(self, base_url="http://localhost:11434"):
self.base_url = base_url
self.context = []
def generate_with_context(self, model, prompt, **options):
# 添加上下文
full_prompt = "\n".join(self.context + [prompt])
# 生成响应
response = self.generate(model, full_prompt, **options)
# 更新上下文
self.context.append(prompt)
self.context.append(response["response"])
# 限制上下文长度
if len(self.context) > 10:
self.context = self.context[-10:]
return response
class OllamaClient:
def generate(self, model, prompt, **options):
try:
response = requests.post(
f"{self.base_url}/api/generate",
json={
"model": model,
"prompt": prompt,
"stream": False,
"options": options
},
timeout=30
)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"请求失败: {e}")
return None
except json.JSONDecodeError as e:
print(f"JSON 解析失败: {e}")
return None
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class OllamaClient:
def __init__(self, base_url="http://localhost:11434"):
self.base_url = base_url
self.session = requests.Session()
# 配置重试策略
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
class OllamaClient:
async def batch_generate(self, model, prompts, **options):
tasks = []
for prompt in prompts:
task = self.generate(model, prompt, **options)
tasks.append(task)
return await asyncio.gather(*tasks)
class OllamaClient:
def __init__(self, base_url="https://your-domain.com:11434"):
self.base_url = base_url
self.session = requests.Session()
self.session.verify = True # 验证 SSL 证书
class OllamaClient:
def __init__(self, base_url, api_key):
self.base_url = base_url
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"Bearer {api_key}"
})
class OllamaClient:
def generate(self, model, prompt, **options):
try:
response = self.session.post(
f"{self.base_url}/api/generate",
json={
"model": model,
"prompt": prompt,
"stream": False,
"options": options
},
timeout=(5, 30) # 连接超时 5 秒,读取超时 30 秒
)
return response.json()
except requests.exceptions.Timeout:
print("请求超时")
return None
from tenacity import retry, stop_after_attempt, wait_exponential
class OllamaClient:
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def generate(self, model, prompt, **options):
response = self.session.post(
f"{self.base_url}/api/generate",
json={
"model": model,
"prompt": prompt,
"stream": False,
"options": options
}
)
response.raise_for_status()
return response.json()
class ChatBot:
def __init__(self, model="llama2:7b"):
self.client = OllamaClient()
self.model = model
self.context = []
def chat(self, message):
# 添加上下文
full_prompt = "\n".join(self.context + [f"用户: {message}"])
# 生成响应
response = self.client.generate(
self.model,
full_prompt,
temperature=0.7,
max_tokens=1000
)
# 更新上下文
self.context.append(f"用户: {message}")
self.context.append(f"助手: {response['response']}")
return response["response"]
class CodeGenerator:
def __init__(self, model="llama2:7b"):
self.client = OllamaClient()
self.model = model
def generate_code(self, description, language="python"):
prompt = f"""
请用{language}语言实现以下功能:
{description}
要求:
1. 代码要简洁高效
2. 添加必要的注释
3. 包含示例用法
"""
response = self.client.generate(
self.model,
prompt,
temperature=0.3, # 降低随机性,使代码更稳定
max_tokens=2000
)
return response["response"]
API 开发注意事项
安全建议
性能优化建议