Skip to content

Vision models

Vision completions enable the processing of images alongside text, allowing for a wide range of applications such as image description, object recognition, and data extraction from visual content. By sending a combination of text prompts and image URLs, the model can provide insightful responses based on the visual input.

Note

This API supports only images, PDF files are not supported.

Vision Completions

You can provide images in two ways:

  • Remote URL: Supply a publicly accessible URL pointing to the image.
  • Base64 Encoding: Encode the image in base64 format and pass it in the image_url field.

Remote Image URL

import regolo

regolo.default_key = "YOUR_REGOLO_KEY"
regolo.default_chat_model = "Qwen2.5-VL-32B-Instruct"

print(regolo.static_chat_completions(messages=[{
    "role": "user",
    "content": [
        {
            "type": "text",
            "text": "What’s in this image?"
        },
        {
            "type": "image_url",
            "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/de/Colosseo_2020.jpg/960px-Colosseo_2020.jpg"},
            "format": "image/jpeg"
        }
    ]
}]))
import requests

url = "https://api.regolo.ai/v1/chat/completions"
payload = {
    "model": "Qwen2.5-VL-32B-Instruct",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "What’s in this image?"
                },
                {
                    "type": "image_url",
                    "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/de/Colosseo_2020.jpg/960px-Colosseo_2020.jpg"},
                    "format": "image/jpeg"
                }
            ]
        }
    ]
}
headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer YOUR_REGOLO_KEY"
}

response = requests.post(url, json=payload, headers=headers)
print(response.json())
curl -X POST https://api.regolo.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_REGOLO_KEY" \
-d '{
    "model": "Qwen2.5-VL-32B-Instruct",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "What’s in this image?"
                },
                {
                    "type": "image_url",
                    "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/de/Colosseo_2020.jpg/960px-Colosseo_2020.jpg"},
                    "format": "image/jpeg"
                }
            ]
        }
    ]
}'

Base64 Encoding

This script demonstrates how to encode a local image as Base64 and send it to a multimodal model (text + image) for analysis.

Replace 'YOUR-API-KEY' with your actual API key before running.

import base64
import json
import requests
from pathlib import Path

API_URL = "https://api.regolo.ai/v1/chat/completions"
API_KEY = "YOUR-API-KEY"
MODEL = "gemma-3-27b-it"

IMAGE_PATH = Path("colosseo.jpg")

if not IMAGE_PATH.exists():
    raise FileNotFoundError(f"Image not found: {IMAGE_PATH.resolve()}")

with open(IMAGE_PATH, "rb") as f:
    image_bytes = f.read()

image_b64 = base64.b64encode(image_bytes).decode("utf-8")

payload = {
    "model": MODEL,
    "messages": [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What’s in this image?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{image_b64}",
                        "format": "image/jpeg"
                    }
                }
            ]
        }
    ]
}

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}"
}

print("Sending request to Regolo AI API...")
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))

if response.status_code != 200:
    print(f"Error {response.status_code}:")
    print(response.text)
else:
    result = response.json()
    try:
        content = result["choices"][0]["message"]["content"]
        print("Model response:")
        print(content)
    except Exception:
        print("Unexpected response format:")
        print(response.text)