Vision models
Vision completions enable the processing of images alongside text, allowing for a wide range of applications such as image description, object recognition, and data extraction from visual content. By sending a combination of text prompts and image URLs, the model can provide insightful responses based on the visual input.
Vision Completions
You can provide images in two ways:
- Remote URL: Supply a publicly accessible URL pointing to the image.
- Base64 Encoding: Encode the image in base64 format and pass it in the
image_url
field.
Remote Image URL
import regolo
regolo.default_key = "YOUR_REGOLO_KEY"
regolo.default_chat_model = "Qwen2.5-VL-32B-Instruct"
print(regolo.static_chat_completions(messages=[{
"role": "user",
"content": [
{
"type": "text",
"text": "What’s in this image?"
},
{
"type": "image_url",
"image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/de/Colosseo_2020.jpg/960px-Colosseo_2020.jpg"},
"format": "image/jpeg"
}
]
}]))
import requests
url = "https://api.regolo.ai/v1/chat/completions"
payload = {
"model": "Qwen2.5-VL-32B-Instruct",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What’s in this image?"
},
{
"type": "image_url",
"image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/de/Colosseo_2020.jpg/960px-Colosseo_2020.jpg"},
"format": "image/jpeg"
}
]
}
]
}
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_REGOLO_KEY"
}
response = requests.post(url, json=payload, headers=headers)
print(response.json())
curl -X POST https://api.regolo.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_REGOLO_KEY" \
-d '{
"model": "Qwen2.5-VL-32B-Instruct",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What’s in this image?"
},
{
"type": "image_url",
"image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/de/Colosseo_2020.jpg/960px-Colosseo_2020.jpg"},
"format": "image/jpeg"
}
]
}
]
}'
Base64 Encoding
This script demonstrates how to encode a local image as Base64 and send it to a multimodal model (text + image) for analysis.
Replace 'YOUR-API-KEY' with your actual API key before running.
import base64
import json
import requests
from pathlib import Path
API_URL = "https://api.regolo.ai/v1/chat/completions"
API_KEY = "YOUR-API-KEY"
MODEL = "gemma-3-27b-it"
IMAGE_PATH = Path("colosseo.jpg")
if not IMAGE_PATH.exists():
raise FileNotFoundError(f"Image not found: {IMAGE_PATH.resolve()}")
with open(IMAGE_PATH, "rb") as f:
image_bytes = f.read()
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
payload = {
"model": MODEL,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What’s in this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_b64}",
"format": "image/jpeg"
}
}
]
}
]
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
}
print("Sending request to Regolo AI API...")
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
if response.status_code != 200:
print(f"Error {response.status_code}:")
print(response.text)
else:
result = response.json()
try:
content = result["choices"][0]["message"]["content"]
print("Model response:")
print(content)
except Exception:
print("Unexpected response format:")
print(response.text)