version: 0.30.0 list: - provider: openai models: - name: gpt-5.2 type: chat max_input_tokens: 400000 input_price: 1.75 output_price: 14.0 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-5 type: chat max_input_tokens: 400000 input_price: 1.25 output_price: 10.0 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-5-mini type: chat max_input_tokens: 400000 input_price: 0.25 output_price: 2.0 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-5-nano type: chat max_input_tokens: 400000 input_price: 0.05 output_price: 0.4 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-4.1 type: chat max_input_tokens: 1047576 input_price: 2.0 output_price: 8.0 max_output_tokens: 32768 supports_vision: true supports_function_calling: true - name: gpt-4o type: chat max_input_tokens: 128000 input_price: 2.5 output_price: 10.0 max_output_tokens: 16384 supports_vision: true supports_function_calling: true - name: gpt-4-turbo type: chat max_input_tokens: 128000 input_price: 10.0 output_price: 30.0 max_output_tokens: 4096 supports_vision: true supports_function_calling: true - name: gpt-3.5-turbo type: chat max_input_tokens: 16385 input_price: 0.5 output_price: 1.5 max_output_tokens: 4096 supports_function_calling: true - name: text-embedding-3-large type: embedding input_price: 0.13 max_tokens_per_chunk: 8191 default_chunk_size: 2000 max_batch_size: 100 - name: text-embedding-3-small type: embedding input_price: 0.02 max_tokens_per_chunk: 8191 default_chunk_size: 2000 max_batch_size: 100 - provider: gemini models: - name: gemini-2.5-flash type: chat max_input_tokens: 1048576 input_price: 0.0 output_price: 0.0 max_output_tokens: 65536 supports_vision: true supports_function_calling: true - name: gemini-2.5-pro type: chat max_input_tokens: 1048576 input_price: 0.0 output_price: 0.0 max_output_tokens: 65536 supports_vision: true supports_function_calling: true - name: gemini-2.5-flash-lite type: chat max_input_tokens: 1000000 input_price: 0.0 output_price: 0.0 max_output_tokens: 64000 supports_vision: true supports_function_calling: true - name: gemini-3-pro-preview type: chat max_input_tokens: 1048576 supports_vision: true supports_function_calling: true - name: gemini-3-flash-preview type: chat max_input_tokens: 1048576 supports_vision: true supports_function_calling: true - name: gemini-2.0-flash type: chat max_input_tokens: 1048576 input_price: 0.0 output_price: 0.0 max_output_tokens: 8192 supports_vision: true supports_function_calling: true - name: gemini-2.0-flash-lite type: chat max_input_tokens: 1048576 input_price: 0.0 output_price: 0.0 max_output_tokens: 8192 supports_vision: true supports_function_calling: true - name: gemma-3-27b-it type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 8192 - name: text-embedding-004 type: embedding input_price: 0.0 max_tokens_per_chunk: 2048 default_chunk_size: 1500 max_batch_size: 100 - provider: claude models: - name: claude-sonnet-4-5-20250929 type: chat max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-sonnet-4-5-20250929:thinking type: chat real_name: claude-sonnet-4-5-20250929 max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 patch: body: temperature: null top_p: null thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-haiku-4-5-20251001 type: chat max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-haiku-4-5-20251001:thinking type: chat real_name: claude-haiku-4-5-20251001 max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 patch: body: temperature: null top_p: null thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-opus-4-5-20251101 type: chat max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-opus-4-5-20251101:thinking type: chat real_name: claude-opus-4-5-20251101 max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 patch: body: temperature: null top_p: null thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true supports_function_calling: true - provider: mistral models: - name: mistral-large-latest type: chat input_price: 0.5 output_price: 1.5 max_output_tokens: 262144 supports_vision: true supports_function_calling: true - name: mistral-medium-latest type: chat max_input_tokens: 131072 input_price: 0.4 output_price: 2.0 supports_vision: true supports_function_calling: true - name: mistral-small-latest type: chat max_input_tokens: 32768 input_price: 0.1 output_price: 0.3 supports_vision: true supports_function_calling: true - name: magistral-medium-latest type: chat max_input_tokens: 131072 input_price: 2.0 output_price: 5.0 - name: magistral-small-latest type: chat max_input_tokens: 131072 input_price: 0.5 output_price: 1.5 - name: devstral-medium-latest type: chat max_input_tokens: 262144 input_price: 0.4 output_price: 2.0 supports_function_calling: true - name: devstral-small-latest type: chat max_input_tokens: 262144 input_price: 0.1 output_price: 0.3 supports_function_calling: true - name: codestral-latest type: chat max_input_tokens: 262144 input_price: 0.3 output_price: 0.9 supports_function_calling: true - name: ministral-14b-latest type: chat max_input_tokens: 262144 input_price: 0.2 output_price: 0.2 supports_function_calling: true - name: mistral-embed type: embedding max_input_tokens: 8092 input_price: 0.1 max_tokens_per_chunk: 8092 default_chunk_size: 2000 - provider: ai21 models: - name: jamba-large type: chat max_input_tokens: 256000 input_price: 2.0 output_price: 8.0 supports_function_calling: true - name: jamba-mini type: chat max_input_tokens: 256000 input_price: 0.2 output_price: 0.4 supports_function_calling: true - provider: cohere models: - name: command-a-03-2025 type: chat max_input_tokens: 262144 input_price: 2.5 output_price: 10.0 max_output_tokens: 8192 supports_function_calling: true - name: command-a-reasoning-08-2025 type: chat max_input_tokens: 262144 input_price: 2.5 output_price: 10.0 max_output_tokens: 32768 - name: command-a-vision-07-2025 type: chat max_input_tokens: 131072 input_price: 2.5 output_price: 10.0 max_output_tokens: 8192 supports_vision: true - name: command-r7b-12-2024 type: chat max_input_tokens: 131072 input_price: 0.0375 output_price: 0.15 max_output_tokens: 4096 - name: embed-v4.0 type: embedding input_price: 0.12 max_tokens_per_chunk: 2048 default_chunk_size: 2000 max_batch_size: 96 - name: embed-english-v3.0 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: embed-multilingual-v3.0 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: rerank-v3.5 type: reranker max_input_tokens: 4096 - name: rerank-english-v3.0 type: reranker max_input_tokens: 4096 - name: rerank-multilingual-v3.0 type: reranker max_input_tokens: 4096 - provider: xai models: - name: grok-4-1-fast-non-reasoning type: chat max_input_tokens: 2000000 input_price: 0.2 output_price: 0.5 supports_function_calling: true - name: grok-4-1-fast-reasoning type: chat max_input_tokens: 2000000 input_price: 0.2 output_price: 0.5 supports_function_calling: true - name: grok-code-fast-1 type: chat max_input_tokens: 256000 input_price: 0.2 output_price: 1.5 supports_function_calling: true - provider: perplexity models: - name: sonar-pro type: chat max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 - name: sonar type: chat max_input_tokens: 128000 input_price: 1.0 output_price: 1.0 - name: sonar-reasoning-pro type: chat max_input_tokens: 128000 input_price: 2.0 output_price: 8.0 - name: sonar-deep-research type: chat max_input_tokens: 128000 input_price: 2.0 output_price: 8.0 - provider: groq models: - name: openai/gpt-oss-120b type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 supports_function_calling: true - name: openai/gpt-oss-20b type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 supports_function_calling: true - name: meta-llama/llama-4-maverick-17b-128e-instruct type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 supports_vision: true supports_function_calling: true - name: meta-llama/llama-4-scout-17b-16e-instruct type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 supports_vision: true supports_function_calling: true - name: llama-3.3-70b-versatile type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 supports_function_calling: true - name: moonshotai/kimi-k2-instruct-0905 type: chat max_input_tokens: 262144 input_price: 0.0 output_price: 0.0 supports_function_calling: true - name: qwen/qwen3-32b type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 - name: groq/compound type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 - name: groq/compound-mini type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 - provider: vertexai models: - name: gemini-2.5-flash type: chat max_input_tokens: 1048576 input_price: 0.3 output_price: 2.5 max_output_tokens: 65536 supports_vision: true supports_function_calling: true - name: gemini-2.5-pro type: chat max_input_tokens: 1048576 input_price: 1.25 output_price: 10.0 max_output_tokens: 65536 supports_vision: true supports_function_calling: true - name: gemini-2.5-flash-lite type: chat max_input_tokens: 1048576 input_price: 0.3 output_price: 0.4 max_output_tokens: 65536 supports_vision: true supports_function_calling: true - name: gemini-3-pro-preview type: chat max_input_tokens: 1048576 supports_vision: true supports_function_calling: true - name: gemini-3-flash-preview type: chat max_input_tokens: 1048576 supports_vision: true supports_function_calling: true - name: gemini-2.0-flash-001 type: chat max_input_tokens: 1048576 input_price: 0.15 output_price: 0.6 max_output_tokens: 8192 supports_vision: true supports_function_calling: true - name: gemini-2.0-flash-lite-001 type: chat max_input_tokens: 1048576 input_price: 0.075 output_price: 0.3 max_output_tokens: 8192 supports_vision: true supports_function_calling: true - name: claude-sonnet-4-5@20250929 type: chat max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-sonnet-4-5@20250929:thinking type: chat real_name: claude-sonnet-4-5@20250929 max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 patch: body: temperature: null top_p: null thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true - name: claude-haiku-4-5@20251001 type: chat max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-haiku-4-5@20251001:thinking type: chat real_name: claude-haiku-4-5@20251001 max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 patch: body: temperature: null top_p: null thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true - name: claude-opus-4-5@20251101 type: chat max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: claude-opus-4-5@20251101:thinking type: chat real_name: claude-opus-4-5@20251101 max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 patch: body: temperature: null top_p: null thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true - name: text-embedding-005 type: embedding max_input_tokens: 20000 input_price: 0.025 max_tokens_per_chunk: 2048 default_chunk_size: 1500 max_batch_size: 5 - name: text-multilingual-embedding-002 type: embedding max_input_tokens: 20000 input_price: 0.2 max_tokens_per_chunk: 2048 default_chunk_size: 1500 max_batch_size: 5 - provider: bedrock models: - name: us.anthropic.claude-sonnet-4-5-20250929-v1:0 type: chat max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: us.anthropic.claude-sonnet-4-5-20250929-v1:0:thinking type: chat real_name: us.anthropic.claude-sonnet-4-5-20250929-v1:0 max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 patch: body: inferenceConfig: temperature: null topP: null additionalModelRequestFields: thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true - name: us.anthropic.claude-haiku-4-5-20251001-v1:0 type: chat max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: us.anthropic.claude-haiku-4-5-20251001-v1:0:thinking type: chat real_name: us.anthropic.claude-haiku-4-5-20251001-v1:0 max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 patch: body: inferenceConfig: temperature: null topP: null additionalModelRequestFields: thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true - name: us.anthropic.claude-opus-4-5-20251101-v1:0 type: chat max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: us.anthropic.claude-opus-4-5-20251101-v1:0:thinking type: chat real_name: us.anthropic.claude-opus-4-5-20251101-v1:0 max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 patch: body: inferenceConfig: temperature: null topP: null additionalModelRequestFields: thinking: type: enabled budget_tokens: 16000 max_output_tokens: 24000 require_max_tokens: true supports_vision: true - name: us.meta.llama4-maverick-17b-instruct-v1:0 type: chat max_input_tokens: 131072 input_price: 0.24 output_price: 0.97 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: us.meta.llama4-scout-17b-instruct-v1:0 type: chat max_input_tokens: 131072 input_price: 0.17 output_price: 0.66 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: us.meta.llama3-3-70b-instruct-v1:0 type: chat max_input_tokens: 131072 input_price: 0.72 output_price: 0.72 max_output_tokens: 8192 require_max_tokens: true supports_function_calling: true - name: us.amazon.nova-premier-v1:0 type: chat max_input_tokens: 300000 input_price: 2.5 output_price: 12.5 max_output_tokens: 5120 - name: us.amazon.nova-pro-v1:0 type: chat max_input_tokens: 300000 input_price: 0.8 output_price: 3.2 max_output_tokens: 5120 supports_vision: true - name: us.amazon.nova-lite-v1:0 type: chat max_input_tokens: 300000 input_price: 0.06 output_price: 0.24 max_output_tokens: 5120 supports_vision: true - name: us.amazon.nova-micro-v1:0 type: chat max_input_tokens: 128000 input_price: 0.035 output_price: 0.14 max_output_tokens: 5120 - name: cohere.embed-english-v3 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: cohere.embed-multilingual-v3 type: embedding input_price: 0.1 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: us.deepseek.r1-v1:0 type: chat max_input_tokens: 128000 input_price: 1.35 output_price: 5.4 - provider: cloudflare models: - name: '@cf/meta/llama-4-scout-17b-16e-instruct' type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 2048 require_max_tokens: true - name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast' type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 2048 require_max_tokens: true - name: '@cf/qwen/qwen3-30b-a3b-fp8' type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 2048 require_max_tokens: true - name: '@cf/qwen/qwen2.5-coder-32b-instruct' type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 2048 require_max_tokens: true - name: '@cf/google/gemma-3-12b-it' type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 2048 require_max_tokens: true - name: '@cf/mistralai/mistral-small-3.1-24b-instruct' type: chat max_input_tokens: 131072 input_price: 0.0 output_price: 0.0 max_output_tokens: 2048 require_max_tokens: true - name: '@cf/baai/bge-large-en-v1.5' type: embedding input_price: 0.0 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - provider: ernie models: - name: ernie-4.5-turbo-128k type: chat max_input_tokens: 131072 input_price: 0.112 output_price: 0.448 - name: ernie-4.5-turbo-vl-32k type: chat max_input_tokens: 32768 input_price: 0.42 output_price: 1.26 supports_vision: true - name: ernie-5.0-thinking-preview type: chat max_input_tokens: 131072 input_price: 1.4 output_price: 5.6 - name: ernie-x1.1-preview type: chat max_input_tokens: 65536 input_price: 0.14 output_price: 0.56 - name: bge-large-zh type: embedding input_price: 0.07 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 16 - name: bge-large-en type: embedding input_price: 0.07 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 16 - name: bce-reranker-base type: reranker max_input_tokens: 1024 input_price: 0.07 - provider: qianwen models: - name: qwen3-max type: chat max_input_tokens: 262144 supports_function_calling: true - name: qwen-plus type: chat max_input_tokens: 131072 supports_function_calling: true - name: qwen-flash type: chat max_input_tokens: 1000000 supports_function_calling: true - name: qwen3-vl-plus type: chat max_input_tokens: 262144 supports_vision: true - name: qwen3-vl-flash type: chat max_input_tokens: 262144 supports_vision: true - name: qwen-coder-plus type: chat max_input_tokens: 1000000 - name: qwen-coder-flash type: chat max_input_tokens: 1000000 - name: qwen3-next-80b-a3b-instruct type: chat max_input_tokens: 131072 input_price: 0.14 output_price: 0.56 supports_function_calling: true - name: qwen3-next-80b-a3b-thinking type: chat max_input_tokens: 131072 input_price: 0.14 output_price: 1.4 - name: qwen3-235b-a22b-instruct-2507 type: chat max_input_tokens: 131072 input_price: 0.28 output_price: 1.12 supports_function_calling: true - name: qwen3-235b-a22b-thinking-2507 type: chat max_input_tokens: 131072 input_price: 0.28 output_price: 2.8 - name: qwen3-30b-a3b-instruct-2507 type: chat max_input_tokens: 131072 input_price: 0.105 output_price: 0.42 supports_function_calling: true - name: qwen3-30b-a3b-thinking-2507 type: chat max_input_tokens: 131072 input_price: 0.105 output_price: 1.05 - name: qwen3-vl-32b-instruct type: chat max_input_tokens: 131072 input_price: 0.28 output_price: 1.12 supports_vision: true - name: qwen3-vl-8b-instruct type: chat max_input_tokens: 131072 input_price: 0.07 output_price: 0.28 supports_vision: true - name: qwen3-coder-480b-a35b-instruct type: chat max_input_tokens: 262144 input_price: 1.26 output_price: 5.04 - name: qwen3-coder-30b-a3b-instruct type: chat max_input_tokens: 262144 input_price: 0.315 output_price: 1.26 - name: deepseek-v3.2-exp type: chat max_input_tokens: 131072 input_price: 0.28 output_price: 0.42 - name: text-embedding-v4 type: embedding input_price: 0.1 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 10 - name: text-embedding-v3 type: embedding input_price: 0.1 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 10 - provider: hunyuan models: - name: hunyuan-2.0-instruct-20251111 type: chat max_input_tokens: 131072 input_price: 0.112 output_price: 0.28 supports_function_calling: true - name: hunyuan-2.0-thinking-20251109 type: chat max_input_tokens: 131072 input_price: 0.14 output_price: 0.56 supports_function_calling: true - name: hunyuan-vision-1.5-instruct type: chat max_input_tokens: 24576 input_price: 0.42 output_price: 1.26 supports_vision: true - name: hunyuan-embedding type: embedding input_price: 0.01 max_tokens_per_chunk: 1024 default_chunk_size: 1000 max_batch_size: 100 - provider: moonshot models: - name: kimi-k2.5 type: chat max_input_tokens: 262144 input_price: 0.56 output_price: 2.94 supports_vision: true supports_function_calling: true - name: kimi-k2-turbo-preview type: chat max_input_tokens: 262144 input_price: 1.12 output_price: 8.12 supports_vision: true supports_function_calling: true - name: kimi-k2-0905-preview type: chat max_input_tokens: 262144 input_price: 0.56 output_price: 2.24 supports_vision: true supports_function_calling: true - name: kimi-k2-thinking-turbo type: chat max_input_tokens: 262144 input_price: 1.12 output_price: 8.12 supports_vision: true - name: kimi-k2-thinking type: chat max_input_tokens: 262144 input_price: 0.56 output_price: 2.24 supports_vision: true - provider: deepseek models: - name: deepseek-chat type: chat max_input_tokens: 64000 input_price: 0.56 output_price: 1.68 max_output_tokens: 8192 supports_function_calling: true - name: deepseek-reasoner type: chat max_input_tokens: 64000 input_price: 0.56 output_price: 1.68 max_output_tokens: 32768 - provider: zhipuai models: - name: glm-4.7 type: chat max_input_tokens: 202752 input_price: 0.56 output_price: 2.24 supports_function_calling: true - name: glm-4.7:instruct type: chat real_name: glm-4.7 max_input_tokens: 202752 input_price: 0.56 output_price: 2.24 patch: body: thinking: type: disabled supports_function_calling: true - name: glm-4.7-flash type: chat max_input_tokens: 202752 input_price: 0.0 output_price: 0.0 supports_function_calling: true - name: glm-4.6v type: chat max_input_tokens: 65536 input_price: 0.28 output_price: 0.84 supports_vision: true - name: glm-4.6v-flash type: chat max_input_tokens: 65536 input_price: 0.0 output_price: 0.0 supports_vision: true - name: embedding-3 type: embedding max_input_tokens: 8192 input_price: 0.07 max_tokens_per_chunk: 8192 default_chunk_size: 2000 - name: rerank type: reranker max_input_tokens: 4096 input_price: 0.112 - provider: minimax models: - name: minimax-m2.1 type: chat max_input_tokens: 204800 input_price: 0.294 output_price: 1.176 supports_function_calling: true - name: minimax-m2.1-lightning type: chat max_input_tokens: 204800 input_price: 0.294 output_price: 2.352 supports_function_calling: true - provider: openrouter models: - name: openai/gpt-5.2 type: chat max_input_tokens: 400000 input_price: 1.75 output_price: 14.0 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: openai/gpt-5 type: chat max_input_tokens: 400000 input_price: 1.25 output_price: 10.0 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: openai/gpt-5-mini type: chat max_input_tokens: 400000 input_price: 0.25 output_price: 2.0 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: openai/gpt-5-nano type: chat max_input_tokens: 400000 input_price: 0.05 output_price: 0.4 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: openai/gpt-4.1 type: chat max_input_tokens: 1047576 input_price: 2.0 output_price: 8.0 max_output_tokens: 32768 supports_vision: true supports_function_calling: true - name: openai/gpt-4o type: chat max_input_tokens: 128000 input_price: 2.5 output_price: 10.0 supports_vision: true supports_function_calling: true - name: openai/gpt-oss-120b type: chat max_input_tokens: 131072 input_price: 0.09 output_price: 0.45 supports_function_calling: true - name: openai/gpt-oss-20b type: chat max_input_tokens: 131072 input_price: 0.04 output_price: 0.16 supports_function_calling: true - name: google/gemini-2.5-flash type: chat max_input_tokens: 1048576 input_price: 0.3 output_price: 2.5 supports_vision: true supports_function_calling: true - name: google/gemini-2.5-pro type: chat max_input_tokens: 1048576 input_price: 1.25 output_price: 10.0 supports_vision: true supports_function_calling: true - name: google/gemini-2.5-flash-lite type: chat max_input_tokens: 1048576 input_price: 0.3 output_price: 0.4 supports_vision: true - name: google/gemini-2.0-flash-001 type: chat max_input_tokens: 1000000 input_price: 0.15 output_price: 0.6 supports_vision: true supports_function_calling: true - name: google/gemini-2.0-flash-lite-001 type: chat max_input_tokens: 1048576 input_price: 0.075 output_price: 0.3 supports_vision: true supports_function_calling: true - name: google/gemma-3-27b-it type: chat max_input_tokens: 131072 input_price: 0.1 output_price: 0.2 - name: anthropic/claude-sonnet-4.5 type: chat max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: anthropic/claude-haiku-4.5 type: chat max_input_tokens: 200000 input_price: 1.0 output_price: 5.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: anthropic/claude-opus-4.5 type: chat max_input_tokens: 200000 input_price: 5.0 output_price: 25.0 max_output_tokens: 8192 require_max_tokens: true supports_vision: true supports_function_calling: true - name: meta-llama/llama-4-maverick type: chat max_input_tokens: 1048576 input_price: 0.18 output_price: 0.6 supports_vision: true supports_function_calling: true - name: meta-llama/llama-4-scout type: chat max_input_tokens: 327680 input_price: 0.08 output_price: 0.3 supports_vision: true supports_function_calling: true - name: meta-llama/llama-3.3-70b-instruct type: chat max_input_tokens: 131072 input_price: 0.12 output_price: 0.3 - name: mistralai/mistral-large-2512 type: chat max_input_tokens: 262144 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: mistralai/mistral-medium-3.1 type: chat max_input_tokens: 131072 input_price: 0.4 output_price: 2.0 supports_vision: true supports_function_calling: true - name: mistralai/mistral-small-3.2-24b-instruct type: chat max_input_tokens: 131072 input_price: 0.1 output_price: 0.3 supports_vision: true - name: mistralai/devstral-2512 type: chat max_input_tokens: 262144 input_price: 0.5 output_price: 0.22 supports_function_calling: true - name: mistralai/devstral-small type: chat max_input_tokens: 131072 input_price: 0.07 output_price: 0.28 supports_function_calling: true - name: mistralai/codestral-2508 type: chat max_input_tokens: 256000 input_price: 0.3 output_price: 0.9 supports_function_calling: true - name: mistralai/ministral-14b-2512 type: chat max_input_tokens: 262144 input_price: 0.2 output_price: 0.2 supports_function_calling: true - name: ai21/jamba-large-1.7 type: chat max_input_tokens: 256000 input_price: 2.0 output_price: 8.0 supports_function_calling: true - name: ai21/jamba-mini-1.7 type: chat max_input_tokens: 256000 input_price: 0.2 output_price: 0.4 supports_function_calling: true - name: cohere/command-a type: chat max_input_tokens: 256000 input_price: 2.5 output_price: 10.0 supports_function_calling: true - name: cohere/command-r7b-12-2024 type: chat max_input_tokens: 128000 input_price: 0.0375 output_price: 0.15 max_output_tokens: 4096 - name: deepseek/deepseek-v3.2 type: chat max_input_tokens: 163840 input_price: 0.25 output_price: 0.38 - name: qwen/qwen3-max type: chat max_input_tokens: 262144 input_price: 1.2 output_price: 6.0 supports_function_calling: true - name: qwen/qwen-plus type: chat max_input_tokens: 131072 input_price: 0.4 output_price: 1.2 max_output_tokens: 8192 supports_function_calling: true - name: qwen/qwen3-next-80b-a3b-instruct type: chat max_input_tokens: 262144 input_price: 0.1 output_price: 0.8 supports_function_calling: true - name: qwen/qwen3-next-80b-a3b-thinking type: chat max_input_tokens: 262144 input_price: 0.1 output_price: 0.8 - name: qwen/qwen5-235b-a22b-2507 type: chat max_input_tokens: 262144 input_price: 0.12 output_price: 0.59 supports_function_calling: true - name: qwen/qwen3-235b-a22b-thinking-2507 type: chat max_input_tokens: 262144 input_price: 0.118 output_price: 0.118 - name: qwen/qwen3-30b-a3b-instruct-2507 type: chat max_input_tokens: 131072 input_price: 0.2 output_price: 0.8 - name: qwen/qwen3-30b-a3b-thinking-2507 type: chat max_input_tokens: 262144 input_price: 0.071 output_price: 0.285 - name: qwen/qwen3-vl-32b-instruct type: chat max_input_tokens: 262144 input_price: 0.35 output_price: 1.1 supports_vision: true - name: qwen/qwen3-vl-8b-instruct type: chat max_input_tokens: 262144 input_price: 0.08 output_price: 0.5 supports_vision: true - name: qwen/qwen3-coder-plus type: chat max_input_tokens: 128000 input_price: 1.0 output_price: 5.0 supports_function_calling: true - name: qwen/qwen3-coder-flash type: chat max_input_tokens: 128000 input_price: 0.3 output_price: 1.5 supports_function_calling: true - name: qwen/qwen3-coder type: chat max_input_tokens: 262144 input_price: 0.22 output_price: 0.95 supports_function_calling: true - name: qwen/qwen3-coder-30b-a3b-instruct type: chat max_input_tokens: 262144 input_price: 0.052 output_price: 0.207 supports_function_calling: true - name: moonshotai/kimi-k2.5 type: chat max_input_tokens: 262144 input_price: 0.57 output_price: 2.85 supports_vision: true supports_function_calling: true - name: moonshotai/kimi-k2-0905 type: chat max_input_tokens: 262144 input_price: 0.296 output_price: 1.185 supports_vision: true supports_function_calling: true - name: moonshotai/kimi-k2-thinking type: chat max_input_tokens: 262144 input_price: 0.45 output_price: 2.35 supports_function_calling: true - name: moonshotai/kimi-dev-72b type: chat max_input_tokens: 131072 input_price: 0.29 output_price: 1.15 supports_function_calling: true - name: x-ai/grok-4.1-fast type: chat max_input_tokens: 2000000 input_price: 0.2 output_price: 0.5 supports_function_calling: true - name: x-ai/grok-code-fast-1 type: chat max_input_tokens: 256000 input_price: 0.2 output_price: 1.5 supports_function_calling: true - name: amazon/nova-premier-v1 type: chat max_input_tokens: 1000000 input_price: 2.5 output_price: 12.5 supports_vision: true - name: amazon/nova-pro-v1 type: chat max_input_tokens: 300000 input_price: 0.8 output_price: 3.2 max_output_tokens: 5120 supports_vision: true - name: amazon/nova-lite-v1 type: chat max_input_tokens: 300000 input_price: 0.06 output_price: 0.24 max_output_tokens: 5120 supports_vision: true - name: amazon/nova-micro-v1 type: chat max_input_tokens: 128000 input_price: 0.035 output_price: 0.14 max_output_tokens: 5120 - name: perplexity/sonar-pro type: chat max_input_tokens: 200000 input_price: 3.0 output_price: 15.0 - name: perplexity/sonar type: chat max_input_tokens: 127072 input_price: 1.0 output_price: 1.0 - name: perplexity/sonar-reasoning-pro type: chat max_input_tokens: 128000 input_price: 2.0 output_price: 8.0 patch: body: include_reasoning: true - name: perplexity/sonar-deep-research type: chat max_input_tokens: 200000 input_price: 2.0 output_price: 8.0 patch: body: include_reasoning: true - name: minimax/minimax-m2.1 type: chat max_input_tokens: 196608 input_price: 0.12 output_price: 0.48 supports_function_calling: true - name: z-ai/glm-4.7 type: chat max_input_tokens: 202752 input_price: 0.16 output_price: 0.8 supports_function_calling: true - name: z-ai/glm-4.7-flash type: chat max_input_tokens: 202752 input_price: 0.07 output_price: 0.4 supports_function_calling: true - name: z-ai/glm-4.6v type: chat max_input_tokens: 131072 input_price: 0.3 output_price: 0.9 supports_vision: true - provider: github models: - name: gpt-5 type: chat max_input_tokens: 400000 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-5-mini type: chat max_input_tokens: 400000 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-5-nano type: chat max_input_tokens: 400000 max_output_tokens: 128000 supports_vision: true supports_function_calling: true - name: gpt-4.1 type: chat max_input_tokens: 1047576 max_output_tokens: 32768 supports_vision: true supports_function_calling: true - name: gpt-4o type: chat max_input_tokens: 128000 max_output_tokens: 16384 supports_function_calling: true - name: text-embedding-3-large type: embedding max_tokens_per_chunk: 8191 default_chunk_size: 2000 max_batch_size: 100 - name: text-embedding-3-small type: embedding max_tokens_per_chunk: 8191 default_chunk_size: 2000 max_batch_size: 100 - name: llama-4-maverick-17b-128e-instruct-fp8 type: chat max_input_tokens: 1048576 supports_vision: true - name: llama-4-scout-17b-16e-instruct type: chat max_input_tokens: 327680 supports_vision: true - name: llama-3.3-70b-instruct type: chat max_input_tokens: 131072 - name: mistral-medium-2505 type: chat max_input_tokens: 131072 supports_function_calling: true - name: mistral-small-2503 type: chat max_input_tokens: 131072 supports_function_calling: true - name: codestral-2501 type: chat max_input_tokens: 256000 supports_function_calling: true - name: cohere-embed-v3-english type: embedding max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: cohere-embed-v3-multilingual type: embedding max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 96 - name: deepseek-r1-0528 type: chat max_input_tokens: 163840 - name: deepseek-v3-0324 type: chat max_input_tokens: 163840 - name: mai-ds-r1 type: chat max_input_tokens: 163840 - name: phi-4 type: chat max_input_tokens: 16384 - name: phi-4-mini-instruct type: chat max_input_tokens: 131072 - name: phi-4-reasoning type: chat max_input_tokens: 33792 - name: phi-4-mini-reasoning type: chat max_input_tokens: 131072 - name: grok-3 type: chat max_input_tokens: 131072 - name: grok-3-mini type: chat max_input_tokens: 131072 - provider: deepinfra models: - name: openai/gpt-oss-120b type: chat max_input_tokens: 131072 input_price: 0.09 output_price: 0.45 supports_function_calling: true - name: openai/gpt-oss-20b type: chat max_input_tokens: 131072 input_price: 0.04 output_price: 0.16 supports_function_calling: true - name: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 type: chat max_input_tokens: 1048576 input_price: 0.18 output_price: 0.6 supports_vision: true - name: meta-llama/Llama-4-Scout-17B-16E-Instruct type: chat max_input_tokens: 327680 input_price: 0.08 output_price: 0.3 supports_vision: true - name: Qwen/Qwen3-Next-80B-A3B-Instruct type: chat max_input_tokens: 262144 input_price: 0.14 output_price: 1.4 supports_function_calling: true - name: Qwen/Qwen3-Next-80B-A3B-Thinking type: chat max_input_tokens: 262144 input_price: 0.14 output_price: 1.4 - name: Qwen/Qwen3-235B-A22B-Instruct-2507 type: chat max_input_tokens: 131072 input_price: 0.13 output_price: 0.6 supports_function_calling: true - name: Qwen/Qwen3-235B-A22B-Thinking-2507 type: chat max_input_tokens: 131072 input_price: 0.13 output_price: 0.6 - name: Qwen/Qwen3-Coder-480B-A35B-Instruct type: chat max_input_tokens: 131072 input_price: 0.4 output_price: 1.6 supports_function_calling: true - name: Qwen/Qwen3-Coder-30B-A3B-Instruct type: chat max_input_tokens: 262144 input_price: 0.07 output_price: 0.27 supports_function_calling: true - name: Qwen/Qwen3-30B-A3B type: chat max_input_tokens: 40960 input_price: 0.1 output_price: 0.3 - name: Qwen/Qwen3-VL-8B-Instruct type: chat max_input_tokens: 262144 input_price: 0.18 output_price: 0.69 supports_vision: true - name: deepseek-ai/DeepSeek-V3.2 type: chat max_input_tokens: 163840 input_price: 0.26 output_price: 0.39 supports_function_calling: true - name: google/gemma-3-27b-it type: chat max_input_tokens: 131072 input_price: 0.1 output_price: 0.2 - name: mistralai/Mistral-Small-3.2-24B-Instruct-2506 type: chat max_input_tokens: 32768 input_price: 0.06 output_price: 0.12 - name: moonshotai/Kimi-K2.5 type: chat max_input_tokens: 262144 input_price: 0.5 output_price: 2.8 supports_function_calling: true - name: moonshotai/Kimi-K2-Instruct-0905 type: chat max_input_tokens: 262144 input_price: 0.5 output_price: 2.0 supports_function_calling: true - name: moonshotai/Kimi-K2-Thinking type: chat max_input_tokens: 262144 input_price: 0.55 output_price: 2.5 supports_function_calling: true - name: MiniMaxAI/MiniMax-M2.1 type: chat max_input_tokens: 262144 input_price: 0.28 output_price: 1.2 supports_function_calling: true - name: zai-org/GLM-4.7 type: chat max_input_tokens: 202752 input_price: 0.43 output_price: 1.75 supports_function_calling: true - name: zai-org/GLM-4.6V type: chat max_input_tokens: 131072 input_price: 0.3 output_price: 0.9 supports_vision: true - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: BAAI/bge-m3 type: embedding input_price: 0.01 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 100 - name: intfloat/e5-large-v2 type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: intfloat/multilingual-e5-large type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - name: thenlper/gte-large type: embedding input_price: 0.01 max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 - provider: jina models: - name: jina-embeddings-v3 type: embedding input_price: 0.0 max_tokens_per_chunk: 8192 default_chunk_size: 2000 max_batch_size: 100 - name: jina-clip-v2 type: embedding input_price: 0.0 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - name: jina-colbert-v2 type: embedding input_price: 0.0 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - name: jina-reranker-v2-base-multilingual type: reranker max_input_tokens: 8192 input_price: 0.0 - name: jina-colbert-v2 type: reranker max_input_tokens: 8192 input_price: 0.0 - provider: voyageai models: - name: voyage-3-large type: embedding max_input_tokens: 120000 input_price: 0.18 max_tokens_per_chunk: 32000 default_chunk_size: 2000 max_batch_size: 128 - name: voyage-3 type: embedding max_input_tokens: 320000 input_price: 0.06 max_tokens_per_chunk: 32000 default_chunk_size: 2000 max_batch_size: 128 - name: voyage-3-lite type: embedding max_input_tokens: 1000000 input_price: 0.02 max_tokens_per_chunk: 32000 default_chunk_size: 1000 max_batch_size: 128 - name: rerank-2 type: reranker max_input_tokens: 16000 input_price: 0.05 - name: rerank-2-lite type: reranker max_input_tokens: 8000 input_price: 0.02