{"cline/claude-3-7-sonnet:1024":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-7-sonnet:16384":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-7-sonnet:high":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-5-sonnet:alpha":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/deepseek-reasoner:alpha":{"maxTokens":8000,"contextWindow":64000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.55,"outputPrice":2.1900000000000004,"description":"Fully open-source model & technical report. Performance on par with OpenAI-o1.","cacheWritesPrice":0.55,"cacheReadsPrice":0.14},"cline/o3-mini:low":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.1,"outputPrice":4.4,"description":"","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"cline/o3-mini:high":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.1,"outputPrice":4.4,"description":"","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"cline/claude-3-7-sonnet":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-7-sonnet:low":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-7-sonnet:medium":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-7-sonnet:max":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/o3-mini:medium":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.1,"outputPrice":4.4,"description":"","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"cline/claude-3-7-sonnet:8192":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-5-sonnet:alpha-v2":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/claude-3-7-sonnet:64000":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"cline/o3-mini":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.1,"outputPrice":4.4,"description":"","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"cline/claude-3-5-sonnet":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"coding/claude-3-7-sonnet":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"coding/claude-4-sonnet":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Claude Sonnet 4 significantly improves on Sonnet 3.7's industry-leading capabilities, excelling in coding with a state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency for internal and external use cases, with enhanced steerability for greater control over implementations.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"coding/gemini-2.5-flash-preview-05-20":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.6,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.15},"coding/gemini-2.5-pro-preview-03-25":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":10,"description":"","cacheWritesPrice":1.625,"cacheReadsPrice":0.31},"coding/gemini-2.5-pro-preview-05-06":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":10,"description":"","cacheWritesPrice":1.625,"cacheReadsPrice":0.31},"anthropic/claude-3-5-sonnet-20241022":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"anthropic/claude-3-5-haiku-latest":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":4,"description":"Anthropic's fastest model. Intelligence at blazing speeds.","cacheWritesPrice":1,"cacheReadsPrice":0.08},"anthropic/claude-3-5-sonnet-latest":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"anthropic/claude-opus-4-20250514":{"maxTokens":32000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":15,"outputPrice":75,"description":"Claude Opus 4 is Anthropic's most powerful model yet and the best coding model in the world, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It delivers sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours—dramatically outperforming all Sonnet models and significantly expanding what AI agents can accomplish.","cacheWritesPrice":18.75,"cacheReadsPrice":1.5},"anthropic/claude-3-7-sonnet-latest":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"anthropic/claude-3-5-haiku-20241022":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":4,"description":"Anthropic's fastest model. Intelligence at blazing speeds.","cacheWritesPrice":1,"cacheReadsPrice":0.08},"anthropic/claude-3-5-sonnet-20240620":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"anthropic/claude-3-opus-latest":{"maxTokens":4096,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":15,"outputPrice":75,"description":"Powerful model for highly complex tasks. Top-level intelligence, fluency, and understanding.","cacheWritesPrice":18.75,"cacheReadsPrice":1.5},"anthropic/claude-3-sonnet-20240229":{"maxTokens":4096,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Balance of intelligence and speed. Strong utility, balanced for scaled deployments","cacheWritesPrice":3,"cacheReadsPrice":3},"anthropic/claude-3-7-sonnet-20250219":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"anthropic/claude-sonnet-4-20250514":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Claude Sonnet 4 significantly improves on Sonnet 3.7's industry-leading capabilities, excelling in coding with a state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency for internal and external use cases, with enhanced steerability for greater control over implementations.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"anthropic/claude-3-opus-20240229":{"maxTokens":4096,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":15,"outputPrice":75,"description":"Powerful model for highly complex tasks. Top-level intelligence, fluency, and understanding.","cacheWritesPrice":18.75,"cacheReadsPrice":1.5},"anthropic/claude-3-haiku-20240307":{"maxTokens":4096,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.25,"outputPrice":1.25,"description":"Fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.","cacheWritesPrice":0.3,"cacheReadsPrice":0.03},"vertex/anthropic/claude-3-7-sonnet-latest@europe-west1":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet@us-east5":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet-latest@us-east5":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet@europe-west1":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet-latest@europe-west1":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-4-sonnet-latest":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet-latest@us-east5":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet-20241022@europe-west1":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-4-sonnet":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet@europe-west1":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet@us-east5":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet-20250219@europe-west1":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet-20241022":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet-20241022@us-east5":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet-20250219":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet-latest":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-7-sonnet-20250219@us-east5":{"maxTokens":64000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":true,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"vertex/anthropic/claude-3-5-sonnet-latest":{"maxTokens":8192,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Anthropic's previous most intelligent model. High level of intelligence and capability. Excells in coding.","cacheWritesPrice":3.75,"cacheReadsPrice":0.3},"deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.12,"outputPrice":0.3,"description":"","cacheWritesPrice":0.12,"cacheReadsPrice":0.12},"deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.02,"outputPrice":0.049999999999999996,"description":"","cacheWritesPrice":0.02,"cacheReadsPrice":0.02},"deepinfra/microsoft/phi-4":{"maxTokens":0,"contextWindow":16384,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.07,"outputPrice":0.14,"description":"","cacheWritesPrice":0.07,"cacheReadsPrice":0.07},"deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct":{"maxTokens":4096,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.055,"outputPrice":0.055,"description":"","cacheWritesPrice":0.055,"cacheReadsPrice":0.055},"deepinfra/meta-llama/Llama-3.2-90B-Vision-Instruct":{"maxTokens":4096,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.35,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.35,"cacheReadsPrice":0.35},"deepinfra/meta-llama/Meta-Llama-3.1-405B-Instruct":{"maxTokens":0,"contextWindow":130815,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":0.7999999999999999,"description":"","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"deepinfra/Qwen/Qwen3-235B-A22B":{"maxTokens":4096,"contextWindow":40000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.6,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.12,"outputPrice":0.3,"description":"","cacheWritesPrice":0.12,"cacheReadsPrice":0.12},"deepinfra/Qwen/QwQ-32B":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.12,"outputPrice":0.18,"description":"","cacheWritesPrice":0.12,"cacheReadsPrice":0.12},"deepinfra/meta-llama/Llama-3.3-70B-Instruct":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.22999999999999998,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.22999999999999998,"cacheReadsPrice":0.22999999999999998},"deepinfra/microsoft/WizardLM-2-8x22B":{"maxTokens":4096,"contextWindow":65536,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":0.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"deepinfra/deepseek-ai/DeepSeek-R1":{"maxTokens":8192,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.85,"outputPrice":2.5,"description":"","cacheWritesPrice":0.85,"cacheReadsPrice":0.85},"deepinfra/deepseek-ai/DeepSeek-V3":{"maxTokens":8192,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.85,"outputPrice":0.8999999999999999,"description":"","cacheWritesPrice":0.85,"cacheReadsPrice":0.85},"deepinfra/Qwen/Qwen2.5-72B-Instruct":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.22999999999999998,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.22999999999999998,"cacheReadsPrice":0.22999999999999998},"deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct":{"maxTokens":0,"contextWindow":130815,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.22999999999999998,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.22999999999999998,"cacheReadsPrice":0.22999999999999998},"deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B":{"maxTokens":8192,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.22999999999999998,"outputPrice":0.69,"description":"","cacheWritesPrice":0.22999999999999998,"cacheReadsPrice":0.22999999999999998},"deepinfra/Qwen/Qwen2.5-Coder-32B-Instruct":{"maxTokens":0,"contextWindow":16384,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.07,"outputPrice":0.16,"description":"","cacheWritesPrice":0.07,"cacheReadsPrice":0.07},"deepinfra/Qwen/Qwen3-32B":{"maxTokens":0,"contextWindow":40000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.3,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.09999999999999999},"xai/grok-3-mini-beta":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.5,"description":"","cacheWritesPrice":0.3,"cacheReadsPrice":0.3},"xai/grok-3-mini-fast-beta:high":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.6,"outputPrice":4,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.6},"xai/grok-3-mini-fast-beta":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.6,"outputPrice":4,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.6},"xai/grok-3-beta":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"","cacheWritesPrice":3,"cacheReadsPrice":3},"xai/grok-3-mini-beta:low":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.5,"description":"","cacheWritesPrice":0.3,"cacheReadsPrice":0.3},"xai/grok-3-mini-beta:high":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.5,"description":"","cacheWritesPrice":0.3,"cacheReadsPrice":0.3},"xai/grok-3-mini-fast-beta:low":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.6,"outputPrice":4,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.6},"xai/grok-2-1212":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":10,"description":"","cacheWritesPrice":2,"cacheReadsPrice":2},"xai/grok-2-latest":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":10,"description":"","cacheWritesPrice":2,"cacheReadsPrice":2},"xai/grok-3-fast-beta":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":5,"outputPrice":25,"description":"","cacheWritesPrice":5,"cacheReadsPrice":5},"mistral/devstral-small-latest":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.3,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.09999999999999999},"mistral/mistral-large-latest":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":6,"description":"","cacheWritesPrice":2,"cacheReadsPrice":2},"mistral/open-mistral-7b":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.25,"outputPrice":0.25,"description":"","cacheWritesPrice":0.25,"cacheReadsPrice":0.25},"mistral/mistral-small-latest":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.3,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.09999999999999999},"parasail/parasail-qwen-coder32b-longcontext-128":{"maxTokens":8192,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":0.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"parasail/parasail-qwen25-vl-72b-instruct":{"maxTokens":8192,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7,"outputPrice":0.7,"description":"","cacheWritesPrice":0.7,"cacheReadsPrice":0.7},"parasail/parasail-mistral-7b-instruct-03":{"maxTokens":8192,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.11,"outputPrice":0.11,"description":"","cacheWritesPrice":0.11,"cacheReadsPrice":0.11},"parasail/parasail-mistral-nemo":{"maxTokens":8192,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.11,"outputPrice":0.11,"description":"","cacheWritesPrice":0.11,"cacheReadsPrice":0.11},"parasail/parasail-mythomax-13b":{"maxTokens":8192,"contextWindow":4000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.11,"outputPrice":0.11,"description":"","cacheWritesPrice":0.11,"cacheReadsPrice":0.11},"parasail/parasail-eva-25-72b-v02-fp8":{"maxTokens":8192,"contextWindow":32000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7,"outputPrice":0.7,"description":"","cacheWritesPrice":0.7,"cacheReadsPrice":0.7},"parasail/meta-llama/Llama-4-Scout-17B-16E-Instruct":{"maxTokens":1048576,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.14,"outputPrice":0.58,"description":"","cacheWritesPrice":0.58,"cacheReadsPrice":0.14},"parasail/parasail-skyfall-36b-v2-fp8":{"maxTokens":8192,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":0.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"parasail/parasail-anubis-pro":{"maxTokens":8192,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":0.7999999999999999,"description":"","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"parasail/parasail-deepseek-r1":{"maxTokens":8192,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":3,"description":"","cacheWritesPrice":3,"cacheReadsPrice":3},"parasail/parasail-gemma3-27b-it":{"maxTokens":8192,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.3},"parasail/parasail-wayfarer-70b-llama33-fp8":{"maxTokens":8192,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7,"outputPrice":0.7,"description":"","cacheWritesPrice":0.7,"cacheReadsPrice":0.7},"parasail/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8":{"maxTokens":1048576,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.21,"outputPrice":0.85,"description":"","cacheWritesPrice":0.85,"cacheReadsPrice":0.21},"openai/o1-mini:medium":{"maxTokens":65536,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023. o1-mini is a faster and more affordable reasoning model, but OpenAI recommends using the newer o3-mini model that features higher intelligence at the same latency and price as o1-mini.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o1-mini:high":{"maxTokens":65536,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023. o1-mini is a faster and more affordable reasoning model, but OpenAI recommends using the newer o3-mini model that features higher intelligence at the same latency and price as o1-mini.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/gpt-4.1-mini":{"maxTokens":32768,"contextWindow":1047576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.39999999999999997,"outputPrice":1.5999999999999999,"description":"","cacheWritesPrice":0.39999999999999997,"cacheReadsPrice":0.09999999999999999},"openai/gpt-4.1":{"maxTokens":32768,"contextWindow":1047576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":8,"description":"","cacheWritesPrice":2,"cacheReadsPrice":0.5},"openai/o4-mini:medium":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.275},"openai/gpt-4o":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2.5,"outputPrice":10,"description":"","cacheWritesPrice":2.5,"cacheReadsPrice":1.25},"openai/o3-2025-04-16":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":10,"outputPrice":40,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":40,"cacheReadsPrice":2.5},"openai/o1-2024-12-17":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":15,"outputPrice":60,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":15,"cacheReadsPrice":7.5},"openai/o1-mini-2024-09-12":{"maxTokens":65536,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023. o1-mini is a faster and more affordable reasoning model, but OpenAI recommends using the newer o3-mini model that features higher intelligence at the same latency and price as o1-mini.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o4-mini:high":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.275},"openai/gpt-4.1-nano-2025-04-14":{"maxTokens":32768,"contextWindow":1047576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.024999999999999998},"openai/o1:medium":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":15,"outputPrice":60,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":15,"cacheReadsPrice":7.5},"openai/o1-mini":{"maxTokens":65536,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023. o1-mini is a faster and more affordable reasoning model, but OpenAI recommends using the newer o3-mini model that features higher intelligence at the same latency and price as o1-mini.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o3-mini":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o3-mini:medium":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o4-mini":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.275},"openai/gpt-4o-mini-2024-07-18":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.6,"description":"","cacheWritesPrice":0.15,"cacheReadsPrice":0.075},"openai/gpt-4.5-preview":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":75,"outputPrice":150,"description":"","cacheWritesPrice":75,"cacheReadsPrice":37.5},"openai/gpt-4.1-mini-2025-04-14":{"maxTokens":32768,"contextWindow":1047576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.39999999999999997,"outputPrice":1.5999999999999999,"description":"","cacheWritesPrice":0.39999999999999997,"cacheReadsPrice":0.09999999999999999},"openai/o3:flex":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":5,"outputPrice":20,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":20,"cacheReadsPrice":1.25},"openai/o4-mini:flex":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":0.55,"outputPrice":2.2,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":0.55,"cacheReadsPrice":0.1375},"openai/gpt-4o-2024-05-13":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2.5,"outputPrice":10,"description":"","cacheWritesPrice":2.5,"cacheReadsPrice":2.5},"openai/gpt-4o-mini":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.6,"description":"","cacheWritesPrice":0.15,"cacheReadsPrice":0.075},"openai/o1:high":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":15,"outputPrice":60,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":15,"cacheReadsPrice":7.5},"openai/o4-mini-2025-04-16":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.275},"openai/o1:low":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":15,"outputPrice":60,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":15,"cacheReadsPrice":7.5},"openai/gpt-4o-2024-11-20":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2.5,"outputPrice":10,"description":"","cacheWritesPrice":2.5,"cacheReadsPrice":1.25},"openai/chatgpt-4o-latest":{"maxTokens":16000,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":5,"outputPrice":15,"description":"","cacheWritesPrice":5,"cacheReadsPrice":5},"openai/o1":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":15,"outputPrice":60,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":15,"cacheReadsPrice":7.5},"openai/gpt-4.1-2025-04-14":{"maxTokens":32768,"contextWindow":1047576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":8,"description":"","cacheWritesPrice":2,"cacheReadsPrice":0.5},"openai/gpt-4.1-nano":{"maxTokens":32768,"contextWindow":1047576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.024999999999999998},"openai/o3-mini:low":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o3-mini:high":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o1-mini:low":{"maxTokens":65536,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023. o1-mini is a faster and more affordable reasoning model, but OpenAI recommends using the newer o3-mini model that features higher intelligence at the same latency and price as o1-mini.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/gpt-4o-2024-08-06":{"maxTokens":4096,"contextWindow":128000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2.5,"outputPrice":10,"description":"","cacheWritesPrice":2.5,"cacheReadsPrice":1.25},"openai/o3":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":10,"outputPrice":40,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","cacheWritesPrice":40,"cacheReadsPrice":2.5},"openai/o3-mini-2025-01-31":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.55},"openai/o4-mini:low":{"maxTokens":100000,"contextWindow":200000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":true,"inputPrice":1.1,"outputPrice":4.4,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","cacheWritesPrice":1.1,"cacheReadsPrice":0.275},"minimaxi/MiniMax-Text-01":{"maxTokens":0,"contextWindow":1000192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":1.1,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"minimaxi/DeepSeek-R1":{"maxTokens":0,"contextWindow":640000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.55,"outputPrice":2.1900000000000004,"description":"","cacheWritesPrice":2.1900000000000004,"cacheReadsPrice":0.55},"together/meta-llama/Llama-3-70b-chat-hf":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.88,"outputPrice":0.88,"description":"","cacheWritesPrice":0.88,"cacheReadsPrice":0.88},"together/meta-llama/Meta-Llama-3-70B-Instruct-Turbo":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.88,"outputPrice":0.88,"description":"","cacheWritesPrice":0.88,"cacheReadsPrice":0.88},"together/meta-llama/Meta-Llama-3-8B-Instruct-Lite":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.09999999999999999,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.09999999999999999},"together/meta-llama/Meta-Llama-3-70B-Instruct-Lite":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.54,"outputPrice":0.54,"description":"","cacheWritesPrice":0.54,"cacheReadsPrice":0.54},"together/Qwen/Qwen2.5-Coder-32B-Instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":0.7999999999999999,"description":"","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"together/deepseek-ai/DeepSeek-R1":{"maxTokens":8192,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":7,"outputPrice":7,"description":"","cacheWritesPrice":7,"cacheReadsPrice":7},"together/Qwen/QwQ-32B-Preview":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.2,"outputPrice":1.2,"description":"","cacheWritesPrice":1.2,"cacheReadsPrice":1.2},"together/deepseek-ai/DeepSeek-V3":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":1.25,"description":"","cacheWritesPrice":1.25,"cacheReadsPrice":1.25},"together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo":{"maxTokens":0,"contextWindow":130815,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3.5,"outputPrice":3.5,"description":"","cacheWritesPrice":3.5,"cacheReadsPrice":3.5},"together/meta-llama/Llama-2-7b-chat-hf":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.19999999999999998,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"together/Qwen/Qwen2.5-72B-Instruct-Turbo":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.2,"outputPrice":1.2,"description":"","cacheWritesPrice":1.2,"cacheReadsPrice":1.2},"together/Qwen/Qwen2.5-7B-Instruct-Turbo":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.3,"description":"","cacheWritesPrice":0.3,"cacheReadsPrice":0.3},"together/meta-llama/Llama-3-8b-chat-hf":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.19999999999999998,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"together/deepseek-llm-67b-chat":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.8999999999999999,"outputPrice":0.8999999999999999,"description":"","cacheWritesPrice":0.8999999999999999,"cacheReadsPrice":0.8999999999999999},"together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.88,"outputPrice":0.88,"description":"","cacheWritesPrice":0.88,"cacheReadsPrice":0.88},"together/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.88,"outputPrice":0.88,"description":"","cacheWritesPrice":0.88,"cacheReadsPrice":0.88},"together/meta-llama/Llama-2-70b-hf":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.8999999999999999,"outputPrice":0.8999999999999999,"description":"","cacheWritesPrice":0.8999999999999999,"cacheReadsPrice":0.8999999999999999},"together/upstage/SOLAR-10.7B-Instruct-v1.0":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.3,"description":"","cacheWritesPrice":0.3,"cacheReadsPrice":0.3},"together/meta-llama/Llama-3.3-70B-Instruct-Turbo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.88,"outputPrice":0.88,"description":"","cacheWritesPrice":0.88,"cacheReadsPrice":0.88},"together/meta-llama/Llama-2-13b-chat-hf":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.22,"outputPrice":0.22,"description":"","cacheWritesPrice":0.22,"cacheReadsPrice":0.22},"together/meta-llama/LlamaGuard-2-8b":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.19999999999999998,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.18,"outputPrice":0.18,"description":"","cacheWritesPrice":0.18,"cacheReadsPrice":0.18},"together/meta-llama/Llama-3.2-3B-Instruct-Turbo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.06,"outputPrice":0.06,"description":"","cacheWritesPrice":0.06,"cacheReadsPrice":0.06},"together/meta-llama/Meta-Llama-Guard-3-8B":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.19999999999999998,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"together/Qwen/Qwen2-72B-Instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.8999999999999999,"outputPrice":0.8999999999999999,"description":"","cacheWritesPrice":0.8999999999999999,"cacheReadsPrice":0.8999999999999999},"together/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.6,"outputPrice":0.6,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.6},"google/gemini-1.5-flash":{"maxTokens":8192,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.075,"outputPrice":0.3,"description":"","cacheWritesPrice":0.075,"cacheReadsPrice":0.075},"google/gemini-2.5-pro-preview-06-05":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":10,"description":"","cacheWritesPrice":1.625,"cacheReadsPrice":0.31},"google/gemini-1.5-pro-latest":{"maxTokens":8192,"contextWindow":2097152,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":5,"description":"","cacheWritesPrice":1.25,"cacheReadsPrice":1.25},"google/gemini-1.5-pro":{"maxTokens":8192,"contextWindow":2097152,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":5,"description":"","cacheWritesPrice":1.25,"cacheReadsPrice":1.25},"google/gemini-2.0-flash-001":{"maxTokens":8192,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09999999999999999,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.09999999999999999,"cacheReadsPrice":0.09999999999999999},"google/gemini-2.5-flash-preview-04-17":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.6,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.15},"google/gemini-2.5-pro-preview-03-25":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":10,"description":"","cacheWritesPrice":1.625,"cacheReadsPrice":0.31},"google/gemini-1.5-flash-8b":{"maxTokens":8192,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.075,"outputPrice":0.3,"description":"","cacheWritesPrice":0.075,"cacheReadsPrice":0.075},"google/gemini-1.5-flash-latest":{"maxTokens":8192,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.075,"outputPrice":0.3,"description":"","cacheWritesPrice":0.075,"cacheReadsPrice":0.075},"google/gemini-2.5-flash-preview-05-20":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.6,"description":"","cacheWritesPrice":0.6,"cacheReadsPrice":0.15},"google/gemini-1.5-flash-8b-latest":{"maxTokens":8192,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.0375,"outputPrice":0.15,"description":"","cacheWritesPrice":0.0375,"cacheReadsPrice":0.0375},"google/gemini-2.5-pro-preview-05-06":{"maxTokens":65535,"contextWindow":1048576,"supportsPromptCache":true,"supportsImages":true,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.25,"outputPrice":10,"description":"","cacheWritesPrice":1.625,"cacheReadsPrice":0.31},"nebius/Qwen/QwQ-32B":{"maxTokens":0,"contextWindow":32000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.44999999999999996,"description":"","cacheWritesPrice":0.15,"cacheReadsPrice":0.15},"nebius/Qwen/QwQ-32B-fast":{"maxTokens":0,"contextWindow":32000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":1.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"nebius/deepseek-ai/DeepSeek-V3-0324":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":1.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"nebius/meta-llama/Llama-3.3-70B-Instruct":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.13,"outputPrice":0.39999999999999997,"description":"","cacheWritesPrice":0.13,"cacheReadsPrice":0.13},"nebius/deepseek-ai/DeepSeek-R1-0528":{"maxTokens":0,"contextWindow":164000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":2.4,"description":"","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"nebius/deepseek-ai/DeepSeek-R1-fast":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":6,"description":"","cacheWritesPrice":2,"cacheReadsPrice":2},"nebius/deepseek-ai/DeepSeek-V3-0324-fast":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":6,"description":"","cacheWritesPrice":6,"cacheReadsPrice":2},"nebius/deepseek-ai/DeepSeek-R1":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":2.4,"description":"","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"nebius/deepseek-ai/DeepSeek-V3":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":1.5,"description":"","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"alibaba/qwen-max":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.5999999999999999,"outputPrice":6.3999999999999995,"description":"","cacheWritesPrice":1.5999999999999999,"cacheReadsPrice":1.5999999999999999},"alibaba/qwen-plus":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.39999999999999997,"outputPrice":1.2,"description":"","cacheWritesPrice":0.39999999999999997,"cacheReadsPrice":0.39999999999999997},"alibaba/qwen-turbo":{"maxTokens":0,"contextWindow":1000000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.049999999999999996,"outputPrice":0.19999999999999998,"description":"","cacheWritesPrice":0.049999999999999996,"cacheReadsPrice":0.049999999999999996},"netmind/deepseek-ai/DeepSeek-R1-0528":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":true,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":1,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"deepseek/deepseek-chat":{"maxTokens":8000,"contextWindow":64000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.27,"outputPrice":1.1,"description":"DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models.\n\nIt tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.","cacheWritesPrice":0.27,"cacheReadsPrice":0.07},"deepseek/deepseek-reasoner":{"maxTokens":8000,"contextWindow":64000,"supportsPromptCache":true,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.55,"outputPrice":2.1900000000000004,"description":"Fully open-source model & technical report. Performance on par with OpenAI-o1.","cacheWritesPrice":0.55,"cacheReadsPrice":0.14},"groq/qwen-qwq-32b":{"maxTokens":131072,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.29,"outputPrice":0.39,"description":"Qwen/QwQ-32B is a breakthrough 32-billion parameter reasoning model delivering performance comparable to state-of-the-art (SOTA) models 20x larger like DeepSeek-R1 (671B parameters) on complex reasoning and coding tasks. Deployed on Groq's hardware, it provides the world's fastest and cost-efficient reasoning, producing chains and results in seconds. Along with native tool use support, the 128K context window enables processing extensive information while maintaining comprehensive context.","cacheWritesPrice":0.29,"cacheReadsPrice":0.29},"novita/meta-llama/llama-3.3-70b-instruct":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.39,"outputPrice":0.39,"description":"The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.","cacheWritesPrice":0.39,"cacheReadsPrice":0.39},"novita/meta-llama/llama-3-8b-instruct":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.04,"outputPrice":0.04,"description":"Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.","cacheWritesPrice":0.04,"cacheReadsPrice":0.04},"novita/qwen/qwen3-235b-a22b-fp8":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.7999999999999999,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"novita/meta-llama/llama-3.2-3b-instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.03,"outputPrice":0.049999999999999996,"description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out)","cacheWritesPrice":0.03,"cacheReadsPrice":0.03},"novita/deepseek/deepseek-r1-turbo":{"maxTokens":0,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7,"outputPrice":2.5,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","cacheWritesPrice":2.5,"cacheReadsPrice":0.7},"novita/qwen/qwen2.5-vl-72b-instruct":{"maxTokens":0,"contextWindow":96000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":0.7999999999999999,"description":"Qwen2 VL 72B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\nSoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\nUnderstanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\nAgent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\nMultilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"novita/meta-llama/llama-3.1-8b-instruct":{"maxTokens":0,"contextWindow":16384,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.049999999999999996,"outputPrice":0.049999999999999996,"description":"Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, outperforming several leading closed-source models.","cacheWritesPrice":0.049999999999999996,"cacheReadsPrice":0.049999999999999996},"novita/jondurbin/airoboros-l2-70b":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.5,"outputPrice":0.5,"description":"This is a fine-tuned Llama-2 model designed to support longer and more detailed writing prompts, as well as next-chapter generation. It also includes an experimental role-playing instruction set with multi-round dialogues, character interactions, and varying numbers of participants","cacheWritesPrice":0.5,"cacheReadsPrice":0.5},"novita/meta-llama/llama-3.1-8b-instruct-max":{"maxTokens":0,"contextWindow":16384,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.049999999999999996,"outputPrice":0.049999999999999996,"description":"Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, outperforming several leading closed-source models.","cacheWritesPrice":0.049999999999999996,"cacheReadsPrice":0.049999999999999996},"novita/qwen/qwen-2-vl-72b-instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.44999999999999996,"outputPrice":0.44999999999999996,"description":"Qwen2 VL 72B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\nSoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\nUnderstanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\nAgent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\nMultilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.","cacheWritesPrice":0.44999999999999996,"cacheReadsPrice":0.44999999999999996},"novita/meta-llama/llama-3.1-8b-instruct-bf16":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.06,"outputPrice":0.06,"description":"Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, \n                   outperforming several leading closed-source models.","cacheWritesPrice":0.06,"cacheReadsPrice":0.06},"novita/meta-llama/llama-3.2-11b-vision-instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.06,"outputPrice":0.06,"description":"Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis. Its ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.","cacheWritesPrice":0.06,"cacheReadsPrice":0.06},"novita/deepseek/deepseek-prover-v2-671b":{"maxTokens":0,"contextWindow":160000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7,"outputPrice":2.5,"description":"","cacheWritesPrice":0.7,"cacheReadsPrice":0.7},"novita/mistralai/mistral-nemo":{"maxTokens":0,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.16999999999999998,"outputPrice":0.16999999999999998,"description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi. It supports function calling and is released under the Apache 2.0 license.","cacheWritesPrice":0.16999999999999998,"cacheReadsPrice":0.16999999999999998},"novita/gryphe/mythomax-l2-13b":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.09,"outputPrice":0.09,"description":"The idea behind this merge is that each layer is composed of several tensors, which are in turn responsible for specific functions. Using MythoLogic-L2's robust understanding as its input and Huginn's extensive writing capability as its output seems to have resulted in a model that exceeds at both, confirming my theory. (More details to be released at a later time).","cacheWritesPrice":0.09,"cacheReadsPrice":0.09},"novita/microsoft/wizardlm-2-8x22b":{"maxTokens":0,"contextWindow":65535,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.62,"outputPrice":0.62,"description":"WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.","cacheWritesPrice":0.62,"cacheReadsPrice":0.62},"novita/openchat/openchat-7b":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.06,"outputPrice":0.06,"description":"OpenChat 7B is a library of open-source language models, fine-tuned with \"C-RLFT (Conditioned Reinforcement Learning Fine-Tuning)\" - a strategy inspired by offline reinforcement learning. It has been trained on mixed-quality data without preference labels.","cacheWritesPrice":0.06,"cacheReadsPrice":0.06},"novita/sao10k/l3-70b-euryale-v2.1":{"maxTokens":0,"contextWindow":16000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.48,"outputPrice":1.48,"description":"The uncensored llama3 model is a powerhouse of creativity, excelling in both roleplay and story writing. It offers a liberating experience during roleplays, free from any restrictions. This model stands out for its immense creativity, boasting a vast array of unique ideas and plots, truly a treasure trove for those seeking originality. Its unrestricted nature during roleplays allows for the full breadth of imagination to unfold, akin to an enhanced, big-brained version of Stheno. Perfect for creative minds seeking a boundless platform for their imaginative expressions, the uncensored llama3 model is an ideal choice","cacheWritesPrice":1.48,"cacheReadsPrice":1.48},"novita/qwen/qwen-2-7b-instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.054,"outputPrice":0.054,"description":"Qwen2 is the newest series in the Qwen large language model family. Qwen2 7B is a transformer-based model that demonstrates exceptional performance in language understanding, multilingual capabilities, programming, mathematics, and reasoning.","cacheWritesPrice":0.054,"cacheReadsPrice":0.054},"novita/deepseek/deepseek-v3-0324":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.39999999999999997,"outputPrice":1.3,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","cacheWritesPrice":0.39999999999999997,"cacheReadsPrice":0.39999999999999997},"novita/meta-llama/llama-3.1-70b-instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.33999999999999997,"outputPrice":0.39,"description":"Meta's latest class of models, Llama 3.1, has launched with a variety of sizes and configurations. The 70B instruct-tuned version is optimized for high-quality dialogue use cases. It has demonstrated strong performance in human evaluations compared to leading closed-source models.","cacheWritesPrice":0.33999999999999997,"cacheReadsPrice":0.33999999999999997},"novita/google/gemma-2-9b-it":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.08,"outputPrice":0.08,"description":"Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.","cacheWritesPrice":0.08,"cacheReadsPrice":0.08},"novita/sophosympatheia/midnight-rose-70b":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":0.7999999999999999,"description":"A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"novita/meta-llama/llama-3.2-1b-instruct":{"maxTokens":0,"contextWindow":131000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.02,"outputPrice":0.02,"description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out).","cacheWritesPrice":0.02,"cacheReadsPrice":0.02},"novita/meta-llama/llama-4-maverick-17b-128e-instruct-fp8":{"maxTokens":1048576,"contextWindow":1048576,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.19999999999999998,"outputPrice":0.85,"description":"","cacheWritesPrice":0.19999999999999998,"cacheReadsPrice":0.19999999999999998},"novita/sao10k/l3-8b-lunaris":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.049999999999999996,"outputPrice":0.049999999999999996,"description":"A generalist / roleplaying model merge based on Llama 3.","cacheWritesPrice":0.049999999999999996,"cacheReadsPrice":0.049999999999999996},"novita/qwen/qwq-32b":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.18,"outputPrice":0.19999999999999998,"description":"","cacheWritesPrice":0.18,"cacheReadsPrice":0.18},"novita/deepseek/deepseek_v3":{"maxTokens":0,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.8899999999999999,"outputPrice":0.8899999999999999,"description":"DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.","cacheWritesPrice":0.8899999999999999,"cacheReadsPrice":0.8899999999999999},"novita/nousresearch/hermes-2-pro-llama-3-8b":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.14,"outputPrice":0.14,"description":"Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.","cacheWritesPrice":0.14,"cacheReadsPrice":0.14},"novita/teknium/openhermes-2.5-mistral-7b":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.16999999999999998,"outputPrice":0.16999999999999998,"description":"OpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.","cacheWritesPrice":0.16999999999999998,"cacheReadsPrice":0.16999999999999998},"novita/sao10k/l31-70b-euryale-v2.2":{"maxTokens":0,"contextWindow":16000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1.48,"outputPrice":1.48,"description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay from Sao10k. It is the successor of Euryale L3 70B v2.1.","cacheWritesPrice":1.48,"cacheReadsPrice":1.48},"novita/meta-llama/llama-3-70b-instruct":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.51,"outputPrice":0.74,"description":"Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.","cacheWritesPrice":0.51,"cacheReadsPrice":0.51},"novita/nousresearch/nous-hermes-llama2-13b":{"maxTokens":0,"contextWindow":4096,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.16999999999999998,"outputPrice":0.16999999999999998,"description":"Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions. This model was fine-tuned by Nous Research, with Teknium and Emozilla leading the fine tuning process and dataset curation, Redmond AI sponsoring the compute, and several other contributors.","cacheWritesPrice":0.16999999999999998,"cacheReadsPrice":0.16999999999999998},"novita/deepseek/deepseek-r1":{"maxTokens":0,"contextWindow":64000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":4,"outputPrice":4,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","cacheWritesPrice":4,"cacheReadsPrice":4},"novita/deepseek/deepseek-r1-distill-llama-70b":{"maxTokens":0,"contextWindow":32000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.7999999999999999,"outputPrice":0.7999999999999999,"description":"DeepSeek R1 Distill LLama 70B","cacheWritesPrice":0.7999999999999999,"cacheReadsPrice":0.7999999999999999},"novita/deepseek/deepseek-r1-distill-qwen-14b":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.15,"outputPrice":0.15,"description":"DeepSeek R1 Distill Qwen 14B is a distilled large language model based on Qwen 2.5 14B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\nAIME 2024 pass@1: 69.7\nMATH-500 pass@1: 93.9\nCodeForces Rating: 1481\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.","cacheWritesPrice":0.15,"cacheReadsPrice":0.15},"novita/deepseek/deepseek-r1-distill-qwen-32b":{"maxTokens":0,"contextWindow":12800,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.3,"outputPrice":0.3,"description":"DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\nAIME 2024 pass@1: 72.6\nMATH-500 pass@1: 94.3\nCodeForces Rating: 1691\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.","cacheWritesPrice":0.3,"cacheReadsPrice":0.3},"novita/Sao10K/L3-8B-Stheno-v3.2":{"maxTokens":0,"contextWindow":8192,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.049999999999999996,"outputPrice":0.049999999999999996,"description":"Sao10K/L3-8B-Stheno-v3.2 is a highly skilled actor that excels at fully immersing itself in any role assigned.","cacheWritesPrice":0.049999999999999996,"cacheReadsPrice":0.049999999999999996},"novita/mistralai/mistral-7b-instruct":{"maxTokens":0,"contextWindow":32768,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.059,"outputPrice":0.059,"description":"A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.","cacheWritesPrice":0.059,"cacheReadsPrice":0.059},"novita/deepseek/deepseek-v3-turbo":{"maxTokens":0,"contextWindow":128000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.39999999999999997,"outputPrice":1.3,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","cacheWritesPrice":0.39999999999999997,"cacheReadsPrice":0.39999999999999997},"novita/qwen/qwen-2.5-72b-instruct":{"maxTokens":0,"contextWindow":32000,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":0.38,"outputPrice":0.39999999999999997,"description":"Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.","cacheWritesPrice":0.38,"cacheReadsPrice":0.38},"perplexity/sonar":{"maxTokens":8192,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":1,"outputPrice":1,"description":"Lightweight offering with search grounding, quicker and cheaper than Sonar Pro.","cacheWritesPrice":1,"cacheReadsPrice":1},"perplexity/sonar-pro":{"maxTokens":8192,"contextWindow":204800,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":3,"outputPrice":15,"description":"Premier search offering with search grounding, supporting advanced queries and follow-ups.","cacheWritesPrice":3,"cacheReadsPrice":3},"perplexity/sonar-reasoning-pro":{"maxTokens":8192,"contextWindow":131072,"supportsPromptCache":false,"supportsImages":false,"supportsComputerUse":false,"supportsReasoningBudget":false,"supportsReasoningEffort":false,"inputPrice":2,"outputPrice":8,"description":"Premier reasoning offering powered by DeepSeek R1 with Chain of Thought (CoT).","cacheWritesPrice":2,"cacheReadsPrice":2}}