Use image inputs with Claude, GPT-4o, Gemini, and other vision-capable models through TheRouter's unified API.
Vision-capable models at TheRouter:
Supported formats:
Note: Maximum image size varies by provider. Claude: 5MB per image. OpenAI: 20MB. Gemini: 20MB.
curl https://api.therouter.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer ${THEROUTER_API_KEY}" \
-d '{
"model": "anthropic/claude-sonnet-4.6",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}
],
"max_tokens": 1024
}'curl https://api.therouter.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer ${THEROUTER_API_KEY}" \
-d '{
"model": "openai/gpt-4.1",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
}
}
]
}
],
"max_tokens": 1024
}'from openai import OpenAI
import base64
client = OpenAI(
api_key="your_therouter_api_key",
base_url="https://api.therouter.ai/v1"
)
# From URL
response = client.chat.completions.create(
model="google/gemini-3-flash-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}
],
max_tokens=1024
)
print(response.choices[0].message.content)
# From local file (base64)
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
base64_image = encode_image("path/to/image.jpg")
response = client.chat.completions.create(
model="anthropic/claude-sonnet-4.6",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze this image"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
max_tokens=1024
)
print(response.choices[0].message.content)import OpenAI from "openai";
import * as fs from "fs";
const client = new OpenAI({
apiKey: process.env.THEROUTER_API_KEY,
baseURL: "https://api.therouter.ai/v1",
});
// From URL
const response = await client.chat.completions.create({
model: "xai/grok-4-1-fast-reasoning",
messages: [
{
role: "user",
content: [
{
type: "text",
text: "What is in this image?",
},
{
type: "image_url",
image_url: {
url: "https://example.com/image.jpg",
},
},
],
},
],
max_tokens: 1024,
});
console.log(response.choices[0].message.content);
// From local file (base64)
function encodeImage(imagePath: string): string {
const imageBuffer = fs.readFileSync(imagePath);
return imageBuffer.toString("base64");
}
const base64Image = encodeImage("path/to/image.jpg");
const responseLocal = await client.chat.completions.create({
model: "meta/llama-4-maverick",
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Describe this image in detail",
},
{
type: "image_url",
image_url: {
url: `data:image/jpeg;base64,${base64Image}`,
},
},
],
},
],
max_tokens: 1024,
});
console.log(responseLocal.choices[0].message.content);Common errors when using vision:
{
"error": {
"message": "Model deepseek/deepseek-v3.2 does not support image_url content. Supported models with vision: anthropic/claude-opus-4.6, ...",
"type": "invalid_request_error",
"code": "multimodal_not_supported"
}
}Solution: Use a vision-capable model. Check the /v1/models endpoint for models with features: ["vision"].
{
"error": {
"message": "Invalid image format. Supported formats: jpeg, png, gif, webp",
"type": "invalid_request_error",
"code": "invalid_image_format"
}
}Solution: Convert image to a supported format (JPEG, PNG, GIF, WebP).
Solution: Compress or resize the image. Maximum sizes: Claude 5MB, OpenAI 20MB, Gemini 20MB.