Files
dotfiles/machines/desktop/config/aichat/models-override.yaml

1798 lines
45 KiB
YAML

version: 0.30.0
list:
- provider: openai
models:
- name: gpt-5.2
type: chat
max_input_tokens: 400000
input_price: 1.75
output_price: 14.0
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-5
type: chat
max_input_tokens: 400000
input_price: 1.25
output_price: 10.0
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-5-mini
type: chat
max_input_tokens: 400000
input_price: 0.25
output_price: 2.0
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-5-nano
type: chat
max_input_tokens: 400000
input_price: 0.05
output_price: 0.4
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-4.1
type: chat
max_input_tokens: 1047576
input_price: 2.0
output_price: 8.0
max_output_tokens: 32768
supports_vision: true
supports_function_calling: true
- name: gpt-4o
type: chat
max_input_tokens: 128000
input_price: 2.5
output_price: 10.0
max_output_tokens: 16384
supports_vision: true
supports_function_calling: true
- name: gpt-4-turbo
type: chat
max_input_tokens: 128000
input_price: 10.0
output_price: 30.0
max_output_tokens: 4096
supports_vision: true
supports_function_calling: true
- name: gpt-3.5-turbo
type: chat
max_input_tokens: 16385
input_price: 0.5
output_price: 1.5
max_output_tokens: 4096
supports_function_calling: true
- name: text-embedding-3-large
type: embedding
input_price: 0.13
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
input_price: 0.02
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- provider: gemini
models:
- name: gemini-2.5-flash
type: chat
max_input_tokens: 1048576
input_price: 0.0
output_price: 0.0
max_output_tokens: 65536
supports_vision: true
supports_function_calling: true
- name: gemini-2.5-pro
type: chat
max_input_tokens: 1048576
input_price: 0.0
output_price: 0.0
max_output_tokens: 65536
supports_vision: true
supports_function_calling: true
- name: gemini-2.5-flash-lite
type: chat
max_input_tokens: 1000000
input_price: 0.0
output_price: 0.0
max_output_tokens: 64000
supports_vision: true
supports_function_calling: true
- name: gemini-3-pro-preview
type: chat
max_input_tokens: 1048576
supports_vision: true
supports_function_calling: true
- name: gemini-3-flash-preview
type: chat
max_input_tokens: 1048576
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash
type: chat
max_input_tokens: 1048576
input_price: 0.0
output_price: 0.0
max_output_tokens: 8192
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-lite
type: chat
max_input_tokens: 1048576
input_price: 0.0
output_price: 0.0
max_output_tokens: 8192
supports_vision: true
supports_function_calling: true
- name: gemma-3-27b-it
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 8192
- name: text-embedding-004
type: embedding
input_price: 0.0
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 100
- provider: claude
models:
- name: claude-sonnet-4-5-20250929
type: chat
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-sonnet-4-5-20250929:thinking
type: chat
real_name: claude-sonnet-4-5-20250929
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5-20251001
type: chat
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5-20251001:thinking
type: chat
real_name: claude-haiku-4-5-20251001
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-opus-4-5-20251101
type: chat
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-opus-4-5-20251101:thinking
type: chat
real_name: claude-opus-4-5-20251101
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- provider: mistral
models:
- name: mistral-large-latest
type: chat
input_price: 0.5
output_price: 1.5
max_output_tokens: 262144
supports_vision: true
supports_function_calling: true
- name: mistral-medium-latest
type: chat
max_input_tokens: 131072
input_price: 0.4
output_price: 2.0
supports_vision: true
supports_function_calling: true
- name: mistral-small-latest
type: chat
max_input_tokens: 32768
input_price: 0.1
output_price: 0.3
supports_vision: true
supports_function_calling: true
- name: magistral-medium-latest
type: chat
max_input_tokens: 131072
input_price: 2.0
output_price: 5.0
- name: magistral-small-latest
type: chat
max_input_tokens: 131072
input_price: 0.5
output_price: 1.5
- name: devstral-medium-latest
type: chat
max_input_tokens: 262144
input_price: 0.4
output_price: 2.0
supports_function_calling: true
- name: devstral-small-latest
type: chat
max_input_tokens: 262144
input_price: 0.1
output_price: 0.3
supports_function_calling: true
- name: codestral-latest
type: chat
max_input_tokens: 262144
input_price: 0.3
output_price: 0.9
supports_function_calling: true
- name: ministral-14b-latest
type: chat
max_input_tokens: 262144
input_price: 0.2
output_price: 0.2
supports_function_calling: true
- name: mistral-embed
type: embedding
max_input_tokens: 8092
input_price: 0.1
max_tokens_per_chunk: 8092
default_chunk_size: 2000
- provider: ai21
models:
- name: jamba-large
type: chat
max_input_tokens: 256000
input_price: 2.0
output_price: 8.0
supports_function_calling: true
- name: jamba-mini
type: chat
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
- provider: cohere
models:
- name: command-a-03-2025
type: chat
max_input_tokens: 262144
input_price: 2.5
output_price: 10.0
max_output_tokens: 8192
supports_function_calling: true
- name: command-a-reasoning-08-2025
type: chat
max_input_tokens: 262144
input_price: 2.5
output_price: 10.0
max_output_tokens: 32768
- name: command-a-vision-07-2025
type: chat
max_input_tokens: 131072
input_price: 2.5
output_price: 10.0
max_output_tokens: 8192
supports_vision: true
- name: command-r7b-12-2024
type: chat
max_input_tokens: 131072
input_price: 0.0375
output_price: 0.15
max_output_tokens: 4096
- name: embed-v4.0
type: embedding
input_price: 0.12
max_tokens_per_chunk: 2048
default_chunk_size: 2000
max_batch_size: 96
- name: embed-english-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: embed-multilingual-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: rerank-v3.5
type: reranker
max_input_tokens: 4096
- name: rerank-english-v3.0
type: reranker
max_input_tokens: 4096
- name: rerank-multilingual-v3.0
type: reranker
max_input_tokens: 4096
- provider: xai
models:
- name: grok-4-1-fast-non-reasoning
type: chat
max_input_tokens: 2000000
input_price: 0.2
output_price: 0.5
supports_function_calling: true
- name: grok-4-1-fast-reasoning
type: chat
max_input_tokens: 2000000
input_price: 0.2
output_price: 0.5
supports_function_calling: true
- name: grok-code-fast-1
type: chat
max_input_tokens: 256000
input_price: 0.2
output_price: 1.5
supports_function_calling: true
- provider: perplexity
models:
- name: sonar-pro
type: chat
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
- name: sonar
type: chat
max_input_tokens: 128000
input_price: 1.0
output_price: 1.0
- name: sonar-reasoning-pro
type: chat
max_input_tokens: 128000
input_price: 2.0
output_price: 8.0
- name: sonar-deep-research
type: chat
max_input_tokens: 128000
input_price: 2.0
output_price: 8.0
- provider: groq
models:
- name: openai/gpt-oss-120b
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
supports_function_calling: true
- name: openai/gpt-oss-20b
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
supports_function_calling: true
- name: meta-llama/llama-4-maverick-17b-128e-instruct
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-4-scout-17b-16e-instruct
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
supports_vision: true
supports_function_calling: true
- name: llama-3.3-70b-versatile
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
supports_function_calling: true
- name: moonshotai/kimi-k2-instruct-0905
type: chat
max_input_tokens: 262144
input_price: 0.0
output_price: 0.0
supports_function_calling: true
- name: qwen/qwen3-32b
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
- name: groq/compound
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
- name: groq/compound-mini
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
- provider: vertexai
models:
- name: gemini-2.5-flash
type: chat
max_input_tokens: 1048576
input_price: 0.3
output_price: 2.5
max_output_tokens: 65536
supports_vision: true
supports_function_calling: true
- name: gemini-2.5-pro
type: chat
max_input_tokens: 1048576
input_price: 1.25
output_price: 10.0
max_output_tokens: 65536
supports_vision: true
supports_function_calling: true
- name: gemini-2.5-flash-lite
type: chat
max_input_tokens: 1048576
input_price: 0.3
output_price: 0.4
max_output_tokens: 65536
supports_vision: true
supports_function_calling: true
- name: gemini-3-pro-preview
type: chat
max_input_tokens: 1048576
supports_vision: true
supports_function_calling: true
- name: gemini-3-flash-preview
type: chat
max_input_tokens: 1048576
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-001
type: chat
max_input_tokens: 1048576
input_price: 0.15
output_price: 0.6
max_output_tokens: 8192
supports_vision: true
supports_function_calling: true
- name: gemini-2.0-flash-lite-001
type: chat
max_input_tokens: 1048576
input_price: 0.075
output_price: 0.3
max_output_tokens: 8192
supports_vision: true
supports_function_calling: true
- name: claude-sonnet-4-5@20250929
type: chat
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-sonnet-4-5@20250929:thinking
type: chat
real_name: claude-sonnet-4-5@20250929
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
- name: claude-haiku-4-5@20251001
type: chat
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-haiku-4-5@20251001:thinking
type: chat
real_name: claude-haiku-4-5@20251001
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
- name: claude-opus-4-5@20251101
type: chat
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: claude-opus-4-5@20251101:thinking
type: chat
real_name: claude-opus-4-5@20251101
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
patch:
body:
temperature: null
top_p: null
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
- name: text-embedding-005
type: embedding
max_input_tokens: 20000
input_price: 0.025
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 5
- name: text-multilingual-embedding-002
type: embedding
max_input_tokens: 20000
input_price: 0.2
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 5
- provider: bedrock
models:
- name: us.anthropic.claude-sonnet-4-5-20250929-v1:0
type: chat
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: us.anthropic.claude-sonnet-4-5-20250929-v1:0:thinking
type: chat
real_name: us.anthropic.claude-sonnet-4-5-20250929-v1:0
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
patch:
body:
inferenceConfig:
temperature: null
topP: null
additionalModelRequestFields:
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
- name: us.anthropic.claude-haiku-4-5-20251001-v1:0
type: chat
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: us.anthropic.claude-haiku-4-5-20251001-v1:0:thinking
type: chat
real_name: us.anthropic.claude-haiku-4-5-20251001-v1:0
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
patch:
body:
inferenceConfig:
temperature: null
topP: null
additionalModelRequestFields:
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
- name: us.anthropic.claude-opus-4-5-20251101-v1:0
type: chat
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: us.anthropic.claude-opus-4-5-20251101-v1:0:thinking
type: chat
real_name: us.anthropic.claude-opus-4-5-20251101-v1:0
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
patch:
body:
inferenceConfig:
temperature: null
topP: null
additionalModelRequestFields:
thinking:
type: enabled
budget_tokens: 16000
max_output_tokens: 24000
require_max_tokens: true
supports_vision: true
- name: us.meta.llama4-maverick-17b-instruct-v1:0
type: chat
max_input_tokens: 131072
input_price: 0.24
output_price: 0.97
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: us.meta.llama4-scout-17b-instruct-v1:0
type: chat
max_input_tokens: 131072
input_price: 0.17
output_price: 0.66
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: us.meta.llama3-3-70b-instruct-v1:0
type: chat
max_input_tokens: 131072
input_price: 0.72
output_price: 0.72
max_output_tokens: 8192
require_max_tokens: true
supports_function_calling: true
- name: us.amazon.nova-premier-v1:0
type: chat
max_input_tokens: 300000
input_price: 2.5
output_price: 12.5
max_output_tokens: 5120
- name: us.amazon.nova-pro-v1:0
type: chat
max_input_tokens: 300000
input_price: 0.8
output_price: 3.2
max_output_tokens: 5120
supports_vision: true
- name: us.amazon.nova-lite-v1:0
type: chat
max_input_tokens: 300000
input_price: 0.06
output_price: 0.24
max_output_tokens: 5120
supports_vision: true
- name: us.amazon.nova-micro-v1:0
type: chat
max_input_tokens: 128000
input_price: 0.035
output_price: 0.14
max_output_tokens: 5120
- name: cohere.embed-english-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: cohere.embed-multilingual-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: us.deepseek.r1-v1:0
type: chat
max_input_tokens: 128000
input_price: 1.35
output_price: 5.4
- provider: cloudflare
models:
- name: '@cf/meta/llama-4-scout-17b-16e-instruct'
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 2048
require_max_tokens: true
- name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 2048
require_max_tokens: true
- name: '@cf/qwen/qwen3-30b-a3b-fp8'
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 2048
require_max_tokens: true
- name: '@cf/qwen/qwen2.5-coder-32b-instruct'
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 2048
require_max_tokens: true
- name: '@cf/google/gemma-3-12b-it'
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 2048
require_max_tokens: true
- name: '@cf/mistralai/mistral-small-3.1-24b-instruct'
type: chat
max_input_tokens: 131072
input_price: 0.0
output_price: 0.0
max_output_tokens: 2048
require_max_tokens: true
- name: '@cf/baai/bge-large-en-v1.5'
type: embedding
input_price: 0.0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- provider: ernie
models:
- name: ernie-4.5-turbo-128k
type: chat
max_input_tokens: 131072
input_price: 0.112
output_price: 0.448
- name: ernie-4.5-turbo-vl-32k
type: chat
max_input_tokens: 32768
input_price: 0.42
output_price: 1.26
supports_vision: true
- name: ernie-5.0-thinking-preview
type: chat
max_input_tokens: 131072
input_price: 1.4
output_price: 5.6
- name: ernie-x1.1-preview
type: chat
max_input_tokens: 65536
input_price: 0.14
output_price: 0.56
- name: bge-large-zh
type: embedding
input_price: 0.07
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 16
- name: bge-large-en
type: embedding
input_price: 0.07
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 16
- name: bce-reranker-base
type: reranker
max_input_tokens: 1024
input_price: 0.07
- provider: qianwen
models:
- name: qwen3-max
type: chat
max_input_tokens: 262144
supports_function_calling: true
- name: qwen-plus
type: chat
max_input_tokens: 131072
supports_function_calling: true
- name: qwen-flash
type: chat
max_input_tokens: 1000000
supports_function_calling: true
- name: qwen3-vl-plus
type: chat
max_input_tokens: 262144
supports_vision: true
- name: qwen3-vl-flash
type: chat
max_input_tokens: 262144
supports_vision: true
- name: qwen-coder-plus
type: chat
max_input_tokens: 1000000
- name: qwen-coder-flash
type: chat
max_input_tokens: 1000000
- name: qwen3-next-80b-a3b-instruct
type: chat
max_input_tokens: 131072
input_price: 0.14
output_price: 0.56
supports_function_calling: true
- name: qwen3-next-80b-a3b-thinking
type: chat
max_input_tokens: 131072
input_price: 0.14
output_price: 1.4
- name: qwen3-235b-a22b-instruct-2507
type: chat
max_input_tokens: 131072
input_price: 0.28
output_price: 1.12
supports_function_calling: true
- name: qwen3-235b-a22b-thinking-2507
type: chat
max_input_tokens: 131072
input_price: 0.28
output_price: 2.8
- name: qwen3-30b-a3b-instruct-2507
type: chat
max_input_tokens: 131072
input_price: 0.105
output_price: 0.42
supports_function_calling: true
- name: qwen3-30b-a3b-thinking-2507
type: chat
max_input_tokens: 131072
input_price: 0.105
output_price: 1.05
- name: qwen3-vl-32b-instruct
type: chat
max_input_tokens: 131072
input_price: 0.28
output_price: 1.12
supports_vision: true
- name: qwen3-vl-8b-instruct
type: chat
max_input_tokens: 131072
input_price: 0.07
output_price: 0.28
supports_vision: true
- name: qwen3-coder-480b-a35b-instruct
type: chat
max_input_tokens: 262144
input_price: 1.26
output_price: 5.04
- name: qwen3-coder-30b-a3b-instruct
type: chat
max_input_tokens: 262144
input_price: 0.315
output_price: 1.26
- name: deepseek-v3.2-exp
type: chat
max_input_tokens: 131072
input_price: 0.28
output_price: 0.42
- name: text-embedding-v4
type: embedding
input_price: 0.1
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 10
- name: text-embedding-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 10
- provider: hunyuan
models:
- name: hunyuan-2.0-instruct-20251111
type: chat
max_input_tokens: 131072
input_price: 0.112
output_price: 0.28
supports_function_calling: true
- name: hunyuan-2.0-thinking-20251109
type: chat
max_input_tokens: 131072
input_price: 0.14
output_price: 0.56
supports_function_calling: true
- name: hunyuan-vision-1.5-instruct
type: chat
max_input_tokens: 24576
input_price: 0.42
output_price: 1.26
supports_vision: true
- name: hunyuan-embedding
type: embedding
input_price: 0.01
max_tokens_per_chunk: 1024
default_chunk_size: 1000
max_batch_size: 100
- provider: moonshot
models:
- name: kimi-k2.5
type: chat
max_input_tokens: 262144
input_price: 0.56
output_price: 2.94
supports_vision: true
supports_function_calling: true
- name: kimi-k2-turbo-preview
type: chat
max_input_tokens: 262144
input_price: 1.12
output_price: 8.12
supports_vision: true
supports_function_calling: true
- name: kimi-k2-0905-preview
type: chat
max_input_tokens: 262144
input_price: 0.56
output_price: 2.24
supports_vision: true
supports_function_calling: true
- name: kimi-k2-thinking-turbo
type: chat
max_input_tokens: 262144
input_price: 1.12
output_price: 8.12
supports_vision: true
- name: kimi-k2-thinking
type: chat
max_input_tokens: 262144
input_price: 0.56
output_price: 2.24
supports_vision: true
- provider: deepseek
models:
- name: deepseek-chat
type: chat
max_input_tokens: 64000
input_price: 0.56
output_price: 1.68
max_output_tokens: 8192
supports_function_calling: true
- name: deepseek-reasoner
type: chat
max_input_tokens: 64000
input_price: 0.56
output_price: 1.68
max_output_tokens: 32768
- provider: zhipuai
models:
- name: glm-4.7
type: chat
max_input_tokens: 202752
input_price: 0.56
output_price: 2.24
supports_function_calling: true
- name: glm-4.7:instruct
type: chat
real_name: glm-4.7
max_input_tokens: 202752
input_price: 0.56
output_price: 2.24
patch:
body:
thinking:
type: disabled
supports_function_calling: true
- name: glm-4.7-flash
type: chat
max_input_tokens: 202752
input_price: 0.0
output_price: 0.0
supports_function_calling: true
- name: glm-4.6v
type: chat
max_input_tokens: 65536
input_price: 0.28
output_price: 0.84
supports_vision: true
- name: glm-4.6v-flash
type: chat
max_input_tokens: 65536
input_price: 0.0
output_price: 0.0
supports_vision: true
- name: embedding-3
type: embedding
max_input_tokens: 8192
input_price: 0.07
max_tokens_per_chunk: 8192
default_chunk_size: 2000
- name: rerank
type: reranker
max_input_tokens: 4096
input_price: 0.112
- provider: minimax
models:
- name: minimax-m2.1
type: chat
max_input_tokens: 204800
input_price: 0.294
output_price: 1.176
supports_function_calling: true
- name: minimax-m2.1-lightning
type: chat
max_input_tokens: 204800
input_price: 0.294
output_price: 2.352
supports_function_calling: true
- provider: openrouter
models:
- name: openai/gpt-5.2
type: chat
max_input_tokens: 400000
input_price: 1.75
output_price: 14.0
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5
type: chat
max_input_tokens: 400000
input_price: 1.25
output_price: 10.0
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5-mini
type: chat
max_input_tokens: 400000
input_price: 0.25
output_price: 2.0
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: openai/gpt-5-nano
type: chat
max_input_tokens: 400000
input_price: 0.05
output_price: 0.4
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4.1
type: chat
max_input_tokens: 1047576
input_price: 2.0
output_price: 8.0
max_output_tokens: 32768
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4o
type: chat
max_input_tokens: 128000
input_price: 2.5
output_price: 10.0
supports_vision: true
supports_function_calling: true
- name: openai/gpt-oss-120b
type: chat
max_input_tokens: 131072
input_price: 0.09
output_price: 0.45
supports_function_calling: true
- name: openai/gpt-oss-20b
type: chat
max_input_tokens: 131072
input_price: 0.04
output_price: 0.16
supports_function_calling: true
- name: google/gemini-2.5-flash
type: chat
max_input_tokens: 1048576
input_price: 0.3
output_price: 2.5
supports_vision: true
supports_function_calling: true
- name: google/gemini-2.5-pro
type: chat
max_input_tokens: 1048576
input_price: 1.25
output_price: 10.0
supports_vision: true
supports_function_calling: true
- name: google/gemini-2.5-flash-lite
type: chat
max_input_tokens: 1048576
input_price: 0.3
output_price: 0.4
supports_vision: true
- name: google/gemini-2.0-flash-001
type: chat
max_input_tokens: 1000000
input_price: 0.15
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: google/gemini-2.0-flash-lite-001
type: chat
max_input_tokens: 1048576
input_price: 0.075
output_price: 0.3
supports_vision: true
supports_function_calling: true
- name: google/gemma-3-27b-it
type: chat
max_input_tokens: 131072
input_price: 0.1
output_price: 0.2
- name: anthropic/claude-sonnet-4.5
type: chat
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-haiku-4.5
type: chat
max_input_tokens: 200000
input_price: 1.0
output_price: 5.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-opus-4.5
type: chat
max_input_tokens: 200000
input_price: 5.0
output_price: 25.0
max_output_tokens: 8192
require_max_tokens: true
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-4-maverick
type: chat
max_input_tokens: 1048576
input_price: 0.18
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-4-scout
type: chat
max_input_tokens: 327680
input_price: 0.08
output_price: 0.3
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-3.3-70b-instruct
type: chat
max_input_tokens: 131072
input_price: 0.12
output_price: 0.3
- name: mistralai/mistral-large-2512
type: chat
max_input_tokens: 262144
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: mistralai/mistral-medium-3.1
type: chat
max_input_tokens: 131072
input_price: 0.4
output_price: 2.0
supports_vision: true
supports_function_calling: true
- name: mistralai/mistral-small-3.2-24b-instruct
type: chat
max_input_tokens: 131072
input_price: 0.1
output_price: 0.3
supports_vision: true
- name: mistralai/devstral-2512
type: chat
max_input_tokens: 262144
input_price: 0.5
output_price: 0.22
supports_function_calling: true
- name: mistralai/devstral-small
type: chat
max_input_tokens: 131072
input_price: 0.07
output_price: 0.28
supports_function_calling: true
- name: mistralai/codestral-2508
type: chat
max_input_tokens: 256000
input_price: 0.3
output_price: 0.9
supports_function_calling: true
- name: mistralai/ministral-14b-2512
type: chat
max_input_tokens: 262144
input_price: 0.2
output_price: 0.2
supports_function_calling: true
- name: ai21/jamba-large-1.7
type: chat
max_input_tokens: 256000
input_price: 2.0
output_price: 8.0
supports_function_calling: true
- name: ai21/jamba-mini-1.7
type: chat
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
- name: cohere/command-a
type: chat
max_input_tokens: 256000
input_price: 2.5
output_price: 10.0
supports_function_calling: true
- name: cohere/command-r7b-12-2024
type: chat
max_input_tokens: 128000
input_price: 0.0375
output_price: 0.15
max_output_tokens: 4096
- name: deepseek/deepseek-v3.2
type: chat
max_input_tokens: 163840
input_price: 0.25
output_price: 0.38
- name: qwen/qwen3-max
type: chat
max_input_tokens: 262144
input_price: 1.2
output_price: 6.0
supports_function_calling: true
- name: qwen/qwen-plus
type: chat
max_input_tokens: 131072
input_price: 0.4
output_price: 1.2
max_output_tokens: 8192
supports_function_calling: true
- name: qwen/qwen3-next-80b-a3b-instruct
type: chat
max_input_tokens: 262144
input_price: 0.1
output_price: 0.8
supports_function_calling: true
- name: qwen/qwen3-next-80b-a3b-thinking
type: chat
max_input_tokens: 262144
input_price: 0.1
output_price: 0.8
- name: qwen/qwen5-235b-a22b-2507
type: chat
max_input_tokens: 262144
input_price: 0.12
output_price: 0.59
supports_function_calling: true
- name: qwen/qwen3-235b-a22b-thinking-2507
type: chat
max_input_tokens: 262144
input_price: 0.118
output_price: 0.118
- name: qwen/qwen3-30b-a3b-instruct-2507
type: chat
max_input_tokens: 131072
input_price: 0.2
output_price: 0.8
- name: qwen/qwen3-30b-a3b-thinking-2507
type: chat
max_input_tokens: 262144
input_price: 0.071
output_price: 0.285
- name: qwen/qwen3-vl-32b-instruct
type: chat
max_input_tokens: 262144
input_price: 0.35
output_price: 1.1
supports_vision: true
- name: qwen/qwen3-vl-8b-instruct
type: chat
max_input_tokens: 262144
input_price: 0.08
output_price: 0.5
supports_vision: true
- name: qwen/qwen3-coder-plus
type: chat
max_input_tokens: 128000
input_price: 1.0
output_price: 5.0
supports_function_calling: true
- name: qwen/qwen3-coder-flash
type: chat
max_input_tokens: 128000
input_price: 0.3
output_price: 1.5
supports_function_calling: true
- name: qwen/qwen3-coder
type: chat
max_input_tokens: 262144
input_price: 0.22
output_price: 0.95
supports_function_calling: true
- name: qwen/qwen3-coder-30b-a3b-instruct
type: chat
max_input_tokens: 262144
input_price: 0.052
output_price: 0.207
supports_function_calling: true
- name: moonshotai/kimi-k2.5
type: chat
max_input_tokens: 262144
input_price: 0.57
output_price: 2.85
supports_vision: true
supports_function_calling: true
- name: moonshotai/kimi-k2-0905
type: chat
max_input_tokens: 262144
input_price: 0.296
output_price: 1.185
supports_vision: true
supports_function_calling: true
- name: moonshotai/kimi-k2-thinking
type: chat
max_input_tokens: 262144
input_price: 0.45
output_price: 2.35
supports_function_calling: true
- name: moonshotai/kimi-dev-72b
type: chat
max_input_tokens: 131072
input_price: 0.29
output_price: 1.15
supports_function_calling: true
- name: x-ai/grok-4.1-fast
type: chat
max_input_tokens: 2000000
input_price: 0.2
output_price: 0.5
supports_function_calling: true
- name: x-ai/grok-code-fast-1
type: chat
max_input_tokens: 256000
input_price: 0.2
output_price: 1.5
supports_function_calling: true
- name: amazon/nova-premier-v1
type: chat
max_input_tokens: 1000000
input_price: 2.5
output_price: 12.5
supports_vision: true
- name: amazon/nova-pro-v1
type: chat
max_input_tokens: 300000
input_price: 0.8
output_price: 3.2
max_output_tokens: 5120
supports_vision: true
- name: amazon/nova-lite-v1
type: chat
max_input_tokens: 300000
input_price: 0.06
output_price: 0.24
max_output_tokens: 5120
supports_vision: true
- name: amazon/nova-micro-v1
type: chat
max_input_tokens: 128000
input_price: 0.035
output_price: 0.14
max_output_tokens: 5120
- name: perplexity/sonar-pro
type: chat
max_input_tokens: 200000
input_price: 3.0
output_price: 15.0
- name: perplexity/sonar
type: chat
max_input_tokens: 127072
input_price: 1.0
output_price: 1.0
- name: perplexity/sonar-reasoning-pro
type: chat
max_input_tokens: 128000
input_price: 2.0
output_price: 8.0
patch:
body:
include_reasoning: true
- name: perplexity/sonar-deep-research
type: chat
max_input_tokens: 200000
input_price: 2.0
output_price: 8.0
patch:
body:
include_reasoning: true
- name: minimax/minimax-m2.1
type: chat
max_input_tokens: 196608
input_price: 0.12
output_price: 0.48
supports_function_calling: true
- name: z-ai/glm-4.7
type: chat
max_input_tokens: 202752
input_price: 0.16
output_price: 0.8
supports_function_calling: true
- name: z-ai/glm-4.7-flash
type: chat
max_input_tokens: 202752
input_price: 0.07
output_price: 0.4
supports_function_calling: true
- name: z-ai/glm-4.6v
type: chat
max_input_tokens: 131072
input_price: 0.3
output_price: 0.9
supports_vision: true
- provider: github
models:
- name: gpt-5
type: chat
max_input_tokens: 400000
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-5-mini
type: chat
max_input_tokens: 400000
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-5-nano
type: chat
max_input_tokens: 400000
max_output_tokens: 128000
supports_vision: true
supports_function_calling: true
- name: gpt-4.1
type: chat
max_input_tokens: 1047576
max_output_tokens: 32768
supports_vision: true
supports_function_calling: true
- name: gpt-4o
type: chat
max_input_tokens: 128000
max_output_tokens: 16384
supports_function_calling: true
- name: text-embedding-3-large
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 2000
max_batch_size: 100
- name: llama-4-maverick-17b-128e-instruct-fp8
type: chat
max_input_tokens: 1048576
supports_vision: true
- name: llama-4-scout-17b-16e-instruct
type: chat
max_input_tokens: 327680
supports_vision: true
- name: llama-3.3-70b-instruct
type: chat
max_input_tokens: 131072
- name: mistral-medium-2505
type: chat
max_input_tokens: 131072
supports_function_calling: true
- name: mistral-small-2503
type: chat
max_input_tokens: 131072
supports_function_calling: true
- name: codestral-2501
type: chat
max_input_tokens: 256000
supports_function_calling: true
- name: cohere-embed-v3-english
type: embedding
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: cohere-embed-v3-multilingual
type: embedding
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: deepseek-r1-0528
type: chat
max_input_tokens: 163840
- name: deepseek-v3-0324
type: chat
max_input_tokens: 163840
- name: mai-ds-r1
type: chat
max_input_tokens: 163840
- name: phi-4
type: chat
max_input_tokens: 16384
- name: phi-4-mini-instruct
type: chat
max_input_tokens: 131072
- name: phi-4-reasoning
type: chat
max_input_tokens: 33792
- name: phi-4-mini-reasoning
type: chat
max_input_tokens: 131072
- name: grok-3
type: chat
max_input_tokens: 131072
- name: grok-3-mini
type: chat
max_input_tokens: 131072
- provider: deepinfra
models:
- name: openai/gpt-oss-120b
type: chat
max_input_tokens: 131072
input_price: 0.09
output_price: 0.45
supports_function_calling: true
- name: openai/gpt-oss-20b
type: chat
max_input_tokens: 131072
input_price: 0.04
output_price: 0.16
supports_function_calling: true
- name: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
type: chat
max_input_tokens: 1048576
input_price: 0.18
output_price: 0.6
supports_vision: true
- name: meta-llama/Llama-4-Scout-17B-16E-Instruct
type: chat
max_input_tokens: 327680
input_price: 0.08
output_price: 0.3
supports_vision: true
- name: Qwen/Qwen3-Next-80B-A3B-Instruct
type: chat
max_input_tokens: 262144
input_price: 0.14
output_price: 1.4
supports_function_calling: true
- name: Qwen/Qwen3-Next-80B-A3B-Thinking
type: chat
max_input_tokens: 262144
input_price: 0.14
output_price: 1.4
- name: Qwen/Qwen3-235B-A22B-Instruct-2507
type: chat
max_input_tokens: 131072
input_price: 0.13
output_price: 0.6
supports_function_calling: true
- name: Qwen/Qwen3-235B-A22B-Thinking-2507
type: chat
max_input_tokens: 131072
input_price: 0.13
output_price: 0.6
- name: Qwen/Qwen3-Coder-480B-A35B-Instruct
type: chat
max_input_tokens: 131072
input_price: 0.4
output_price: 1.6
supports_function_calling: true
- name: Qwen/Qwen3-Coder-30B-A3B-Instruct
type: chat
max_input_tokens: 262144
input_price: 0.07
output_price: 0.27
supports_function_calling: true
- name: Qwen/Qwen3-30B-A3B
type: chat
max_input_tokens: 40960
input_price: 0.1
output_price: 0.3
- name: Qwen/Qwen3-VL-8B-Instruct
type: chat
max_input_tokens: 262144
input_price: 0.18
output_price: 0.69
supports_vision: true
- name: deepseek-ai/DeepSeek-V3.2
type: chat
max_input_tokens: 163840
input_price: 0.26
output_price: 0.39
supports_function_calling: true
- name: google/gemma-3-27b-it
type: chat
max_input_tokens: 131072
input_price: 0.1
output_price: 0.2
- name: mistralai/Mistral-Small-3.2-24B-Instruct-2506
type: chat
max_input_tokens: 32768
input_price: 0.06
output_price: 0.12
- name: moonshotai/Kimi-K2.5
type: chat
max_input_tokens: 262144
input_price: 0.5
output_price: 2.8
supports_function_calling: true
- name: moonshotai/Kimi-K2-Instruct-0905
type: chat
max_input_tokens: 262144
input_price: 0.5
output_price: 2.0
supports_function_calling: true
- name: moonshotai/Kimi-K2-Thinking
type: chat
max_input_tokens: 262144
input_price: 0.55
output_price: 2.5
supports_function_calling: true
- name: MiniMaxAI/MiniMax-M2.1
type: chat
max_input_tokens: 262144
input_price: 0.28
output_price: 1.2
supports_function_calling: true
- name: zai-org/GLM-4.7
type: chat
max_input_tokens: 202752
input_price: 0.43
output_price: 1.75
supports_function_calling: true
- name: zai-org/GLM-4.6V
type: chat
max_input_tokens: 131072
input_price: 0.3
output_price: 0.9
supports_vision: true
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
input_price: 0.01
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: intfloat/e5-large-v2
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: intfloat/multilingual-e5-large
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-large
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- provider: jina
models:
- name: jina-embeddings-v3
type: embedding
input_price: 0.0
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: jina-clip-v2
type: embedding
input_price: 0.0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-colbert-v2
type: embedding
input_price: 0.0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-reranker-v2-base-multilingual
type: reranker
max_input_tokens: 8192
input_price: 0.0
- name: jina-colbert-v2
type: reranker
max_input_tokens: 8192
input_price: 0.0
- provider: voyageai
models:
- name: voyage-3-large
type: embedding
max_input_tokens: 120000
input_price: 0.18
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-3
type: embedding
max_input_tokens: 320000
input_price: 0.06
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-3-lite
type: embedding
max_input_tokens: 1000000
input_price: 0.02
max_tokens_per_chunk: 32000
default_chunk_size: 1000
max_batch_size: 128
- name: rerank-2
type: reranker
max_input_tokens: 16000
input_price: 0.05
- name: rerank-2-lite
type: reranker
max_input_tokens: 8000
input_price: 0.02