Understanding the Ollama API
Ollama provides a simple REST API accessible at http://localhost:11434/api. The main endpoints include:
- /api/generate - Generate text from a prompt
- /api/chat - Have a conversation with a model
- /api/embeddings - Generate embeddings for text
- /api/tags - List available models
- /api/pull - Pull a model
Basic API Request: Using curl
1
2
3
4
5
curl -X POST http://localhost:11434/api/generate -d '{
"model": "phi",
"prompt": "Explain how a firewall works in simple terms",
"stream": false
}'
Streaming Responses (Useless)
1
2
3
4
5
curl -X POST http://localhost:11434/api/generate -d '{
"model": "phi",
"prompt": "Write a 5-point checklist for securing a web server",
"stream": true
}'
Chat Conversations
1
2
3
4
5
6
7
curl -X POST http://localhost:11434/api/chat -d '{
"model": "mistral",
"messages": [
{ "role": "system", "content": "You are a helpful cybersecurity assistant." },
{ "role": "user", "content": "What are the OWASP Top 10?" }
]
}'
Creating a Simple API Client in Python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
cat > ollama_client.py << 'EOF'
import requests
import json
import sys
def generate_text(model, prompt, temperature=0.7):
"""Generate text using the Ollama API."""
url = "http://localhost:11434/api/generate"
data = {
"model": model,
"prompt": prompt,
"temperature": temperature,
"stream": False
}
response = requests.post(url, json=data)
return response.json()["response"]
def chat(model, messages):
"""Have a conversation using the Ollama API."""
url = "http://localhost:11434/api/chat"
data = {
"model": model,
"messages": messages,
"stream": False
}
response = requests.post(url, json=data)
return response.json()["message"]["content"]
def list_models():
"""List available models."""
url = "http://localhost:11434/api/tags"
response = requests.get(url)
return [model["name"] for model in response.json()["models"]]
if __name__ == "__main__":
# List available models
print("Available models:")
models = list_models()
for i, model in enumerate(models):
print(f"{i+1}. {model}")
# Select a model
if models:
model_idx = int(input("\nSelect a model (enter number): ")) - 1
model = models[model_idx]
# Choose mode
print("\nChoose mode:")
print("1. Generate text")
print("2. Chat")
mode = int(input("Enter choice: "))
if mode == 1:
# Generate text
prompt = input("\nEnter prompt: ")
print("\nGenerating response...\n")
response = generate_text(model, prompt)
print(response)
elif mode == 2:
# Chat
messages = [{"role": "system", "content": "You are a helpful assistant."}]
print("\nChat mode (type 'exit' to quit)")
while True:
user_input = input("\nYou: ")
if user_input.lower() == 'exit':
break
messages.append({"role": "user", "content": user_input})
print("\nThinking...\n")
response = chat(model, messages)
print(f"Assistant: {response}")
messages.append({"role": "assistant", "content": response})
else:
print("No models available. Pull a model first using 'ollama pull <model>'")
EOF
Run the script:
1
python3 ollama_client.py
Creating a Web Application with Ollama
1
pip install flask
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
cat > flask_ollama_app.py << 'EOF'
from flask import Flask, request, jsonify, render_template_string
import requests
app = Flask(__name__)
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<title>Ollama Chat</title>
<style>
body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
#chat-container { height: 400px; overflow-y: auto; border: 1px solid #ccc; padding: 10px; margin-bottom: 10px; }
#user-input { width: 80%; padding: 8px; }
#send-button { padding: 8px 15px; }
.user-message { background-color: #e6f7ff; padding: 8px; border-radius: 5px; margin: 5px 0; }
.assistant-message { background-color: #f0f0f0; padding: 8px; border-radius: 5px; margin: 5px 0; }
</style>
</head>
<body>
<h1>Ollama Chat</h1>
<div id="chat-container"></div>
<div>
<input type="text" id="user-input" placeholder="Type your message...">
<button id="send-button">Send</button>
</div>
<script>
const chatContainer = document.getElementById('chat-container');
const userInput = document.getElementById('user-input');
const sendButton = document.getElementById('send-button');
let messages = [
{"role": "system", "content": "You are a helpful cybersecurity assistant."}
];
function addMessage(role, content) {
const messageDiv = document.createElement('div');
messageDiv.className = role + '-message';
messageDiv.textContent = content;
chatContainer.appendChild(messageDiv);
chatContainer.scrollTop = chatContainer.scrollHeight;
messages.push({"role": role, "content": content});
}
function sendMessage() {
const content = userInput.value.trim();
if (content) {
addMessage('user', content);
userInput.value = '';
fetch('/chat', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({messages: messages})
})
.then(response => response.json())
.then(data => {
addMessage('assistant', data.response);
})
.catch(error => {
console.error('Error:', error);
addMessage('assistant', 'Sorry, there was an error processing your request.');
});
}
}
sendButton.addEventListener('click', sendMessage);
userInput.addEventListener('keyup', function(event) {
if (event.key === 'Enter') {
sendMessage();
}
});
</script>
</body>
</html>
"""
@app.route('/')
def index():
return render_template_string(HTML_TEMPLATE)
@app.route('/chat', methods=['POST'])
def chat():
data = request.json
response = requests.post(
'http://localhost:11434/api/chat',
json={
"model": "mistral",
"messages": data['messages'],
"stream": False
}
)
return jsonify({"response": response.json()["message"]["content"]})
if __name__ == '__main__':
# Run on all interfaces (0.0.0.0) so it's accessible externally
# Keep debug=False for production-like environments
app.run(host='0.0.0.0', debug=False, port=80)
EOF
1
python3 flask_ollama_app.py