# SSoT model catalog. Pricing verified 2026-04-28 against official provider # pages. Per RULE 0.4: source_url + verified_at on every "verified" row. # Conversion: $1.00/Mtok = 100_000_000 micro-cents/Mtok. # # Schema: see _primitives/_rust/kei-model/src/model.rs # Loader: see _primitives/_rust/kei-model/src/registry.rs # ============================================================ # Anthropic — verified 2026-04-28 # Source: claude.com/pricing + benchlm.ai April 2026 confirmation # ============================================================ [[models]] id = "claude-opus-4-7" provider = "anthropic" display_name = "Claude Opus 4.7" context_tokens = 200000 capabilities = ["code", "vision", "streaming", "function-call", "long-context-200k", "system-prompt"] status = "active" role_tags = ["expensive-code", "complex-reasoning", "code-implementer", "edit-shared", "kei-architect"] fallback = "claude-sonnet-4-6" notes = "Anthropic flagship 2026-04. NOTE: new tokenizer can produce 35% more tokens for same input vs prior versions." [models.pricing] input_per_mtok_micro = 500000000 output_per_mtok_micro = 2500000000 status = "verified" source_url = "https://claude.com/pricing" verified_at = "2026-04-28" [[models]] id = "claude-sonnet-4-6" provider = "anthropic" display_name = "Claude Sonnet 4.6" context_tokens = 200000 capabilities = ["code", "vision", "streaming", "function-call", "long-context-200k", "system-prompt"] status = "active" role_tags = ["mid", "edit-local", "kei-researcher"] fallback = "claude-haiku-4-5" notes = "Anthropic mid-tier" [models.pricing] input_per_mtok_micro = 300000000 output_per_mtok_micro = 1500000000 status = "verified" source_url = "https://claude.com/pricing" verified_at = "2026-04-28" [[models]] id = "claude-haiku-4-5" provider = "anthropic" display_name = "Claude Haiku 4.5" context_tokens = 200000 capabilities = ["code", "streaming", "function-call", "long-context-200k", "system-prompt"] status = "active" role_tags = ["cheap-classifier", "kei-critic", "kei-classifier"] fallback = "gpt-4o-mini" notes = "Anthropic cheap-tier (current kei-cortex default)" [models.pricing] input_per_mtok_micro = 100000000 output_per_mtok_micro = 500000000 status = "verified" source_url = "https://claude.com/pricing" verified_at = "2026-04-28" # ============================================================ # OpenAI — verified 2026-04-28 # Source: openai.com/api/pricing # ============================================================ [[models]] id = "gpt-4o" provider = "openai" display_name = "GPT-4o" context_tokens = 128000 capabilities = ["code", "vision", "streaming", "function-call", "system-prompt"] status = "active" role_tags = ["mid"] fallback = "gpt-4o-mini" notes = "OpenAI mid-tier multimodal" [models.pricing] input_per_mtok_micro = 250000000 output_per_mtok_micro = 1000000000 status = "verified" source_url = "https://openai.com/api/pricing/" verified_at = "2026-04-28" [[models]] id = "gpt-4o-mini" provider = "openai" display_name = "GPT-4o Mini" context_tokens = 128000 capabilities = ["code", "streaming", "function-call", "system-prompt"] status = "active" role_tags = ["cheap"] fallback = "deepseek-chat" notes = "OpenAI cheap-tier (current kei-router openai default). 16x cheaper than GPT-4o." [models.pricing] input_per_mtok_micro = 15000000 output_per_mtok_micro = 60000000 status = "verified" source_url = "https://openai.com/api/pricing/" verified_at = "2026-04-28" [[models]] id = "o1" provider = "openai" display_name = "OpenAI o1" context_tokens = 200000 capabilities = ["code", "function-call", "long-context-200k", "system-prompt"] status = "active" role_tags = ["complex-reasoning"] fallback = "gpt-4o" notes = "OpenAI reasoning-tier. NOTE: internal 'thinking tokens' inflate actual cost beyond listed rate. o3 has replaced o1 as primary; consider migration." [models.pricing] input_per_mtok_micro = 1500000000 output_per_mtok_micro = 6000000000 status = "verified" source_url = "https://openai.com/api/pricing/" verified_at = "2026-04-28" # ============================================================ # Kimi — verified 2026-04-28 # Source: openrouter.ai (Moonshot K2 Thinking listing) # ============================================================ [[models]] id = "kimi-k2-thinking" provider = "kimi" display_name = "Kimi K2 Thinking" context_tokens = 256000 capabilities = ["code", "streaming", "function-call", "long-context-200k", "system-prompt"] status = "active" role_tags = ["complex-reasoning", "mid"] fallback = "deepseek-chat" notes = "Moonshot Kimi reasoning-tier (current kei-router kimi default). Trillion-param MoE, 32B active. Cached input: 75% off." [models.pricing] input_per_mtok_micro = 60000000 output_per_mtok_micro = 250000000 status = "verified" source_url = "https://openrouter.ai/moonshotai/kimi-k2-thinking" verified_at = "2026-04-28" # ============================================================ # Mistral — verified 2026-04-28 # Source: mistral.ai/pricing # ============================================================ [[models]] id = "mistral-large" provider = "mistral" display_name = "Mistral Large 3" context_tokens = 128000 capabilities = ["code", "streaming", "function-call", "system-prompt"] status = "active" role_tags = ["mid"] fallback = "mistral-small" notes = "Mistral flagship (Large 3 generation)" [models.pricing] input_per_mtok_micro = 200000000 output_per_mtok_micro = 600000000 status = "verified" source_url = "https://mistral.ai/pricing" verified_at = "2026-04-28" [[models]] id = "mistral-small" provider = "mistral" display_name = "Mistral Small 3" context_tokens = 128000 capabilities = ["code", "streaming", "function-call", "system-prompt"] status = "active" role_tags = ["cheap"] fallback = "deepseek-chat" notes = "Mistral cheap-tier (Small 3 generation)" [models.pricing] input_per_mtok_micro = 10000000 output_per_mtok_micro = 30000000 status = "verified" source_url = "https://mistral.ai/pricing" verified_at = "2026-04-28" # ============================================================ # DeepSeek — verified 2026-04-28 # Source: api-docs.deepseek.com # NOTE: deepseek-chat / deepseek-reasoner are deprecated names; # they map to deepseek-v4-flash non-thinking / thinking modes # respectively. Pricing here reflects v4-flash rates which apply # to both legacy names. # ============================================================ [[models]] id = "deepseek-chat" provider = "deepseek" display_name = "DeepSeek Chat (v4-flash compat)" context_tokens = 128000 capabilities = ["code", "streaming", "function-call", "system-prompt"] status = "active" role_tags = ["cheap"] fallback = "llama-3-70b-local" notes = "DeepSeek conversational tier. Legacy name maps to v4-flash non-thinking mode. Migrate to deepseek-v4-flash before deprecation deadline." [models.pricing] input_per_mtok_micro = 14000000 output_per_mtok_micro = 28000000 status = "verified" source_url = "https://api-docs.deepseek.com/quick_start/pricing" verified_at = "2026-04-28" [[models]] id = "deepseek-reasoner" provider = "deepseek" display_name = "DeepSeek Reasoner (v4-flash thinking compat)" context_tokens = 128000 capabilities = ["code", "streaming", "function-call", "system-prompt"] status = "active" role_tags = ["complex-reasoning", "mid"] fallback = "deepseek-chat" notes = "DeepSeek reasoning tier. Legacy name maps to v4-flash thinking mode (75% discount applied per docs). Effective rates after discount." [models.pricing] input_per_mtok_micro = 43500000 output_per_mtok_micro = 87000000 status = "verified" source_url = "https://api-docs.deepseek.com/quick_start/pricing" verified_at = "2026-04-28" # ============================================================ # Local (Ollama / llama.cpp / MLX) — free, marked verified-zero # ============================================================ [[models]] id = "llama-3-70b-local" provider = "local" display_name = "Llama 3 70B (local inference)" context_tokens = 8192 capabilities = ["code", "streaming", "system-prompt"] status = "active" role_tags = ["free", "cheap"] fallback = "" notes = "Local inference (Ollama / llama.cpp / MLX). Zero per-token cost; energy + hardware-amortization tracked separately." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 0 status = "verified" source_url = "https://ollama.com/library/llama3" verified_at = "2026-04-28" # ============================================================ # Google Gemini image-gen (nano-banana CLI) — verified 2026-05-01 # Source: https://fal.ai/models/fal-ai/flux-2-pro (fal.ai catalog) # https://fal.ai/pricing # SCHEMA NOTE: provider="local" is a placeholder — the Provider # enum does not yet include "google". Follow-up: add Provider::Google # and Provider::Fal and Provider::Elevenlabs to model.rs + enum parse(). # capabilities=[] because Capability enum has no image-gen / video-gen # variants yet. Follow-up: extend Capability enum with generation caps. # Per-image cost stored in output_per_mtok_micro (1 image = 1 unit). # Unit semantics noted in the notes field (loader does not yet parse "unit"). # ============================================================ [[models]] id = "gemini-3-1-flash-image" provider = "local" display_name = "Gemini 3.1 Flash Image (Nano Banana 2)" context_tokens = 0 capabilities = ["image-gen", "text-to-image"] status = "active" role_tags = ["image-cheap", "image-gen"] fallback = "" notes = "Google Gemini Flash image-gen consumed via nano-banana CLI. Real provider: google. Per-image cost; 1 unit = 1 image. output_per_mtok_micro stores per-image micro-cents. Pricing unverified on public page — nanobanana aggregator lists $0.0398/image via fal.ai." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 3980000 status = "needs-verification" source_url = "https://fal.ai/pricing" verified_at = "2026-05-01" [[models]] id = "gemini-3-pro-image" provider = "local" display_name = "Gemini 3 Pro Image (Nano Banana Pro)" context_tokens = 0 capabilities = ["image-gen", "text-to-image", "image-edit"] status = "active" role_tags = ["image-flagship", "image-gen"] fallback = "gemini-3-1-flash-image" notes = "Google Gemini Pro image-gen consumed via nano-banana CLI. Real provider: google. Per-image cost; 1 unit = 1 image. output_per_mtok_micro stores per-image micro-cents. Pricing not on public page — marked needs-verification." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 0 status = "needs-verification" source_url = "https://fal.ai/pricing" verified_at = "2026-05-01" # ============================================================ # fal.ai generation models (image + video) — verified 2026-05-01 # Source: https://fal.ai/models/fal-ai/flux-2-pro # https://fal.ai/models/fal-ai/flux-pro/v1.1 # https://fal.ai/models/fal-ai/kling-video/o3/4k/text-to-video # https://fal.ai/models/fal-ai/kling-video/o3/standard/image-to-video # https://fal.ai/models/fal-ai/veo3 # https://fal.ai/models/fal-ai/ideogram/v3 # https://fal.ai/models/fal-ai/recraft-v3 # SCHEMA NOTE: provider="local" placeholder (real provider: fal). # capabilities=[] placeholder (enum lacks image-gen/video-gen/etc). # Image pricing: output_per_mtok_micro = per-image micro-cents (1 unit = 1 image). # Video pricing: output_per_mtok_micro = per-second micro-cents (1 unit = 1 second). # ============================================================ [[models]] id = "flux-2-pro" provider = "local" display_name = "FLUX.2 [pro] Text to Image" context_tokens = 0 capabilities = ["image-gen", "text-to-image"] status = "active" role_tags = ["image-flagship", "image-gen"] fallback = "flux-pro-1-1" notes = "fal.ai FLUX.2 [pro]. ZERO-CONFIG: no guidance_scale parameter (model handles internally). Per-megapixel billing: $0.03 first MP + $0.015 per extra MP. 1024x1024 = $0.03, 1920x1080 = $0.045. output_per_mtok_micro = 3000000 (base 1MP price). Real provider: fal." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 3000000 status = "verified" source_url = "https://fal.ai/models/fal-ai/flux-2-pro" verified_at = "2026-05-01" [[models]] id = "flux-pro-1-1" provider = "local" display_name = "FLUX.1 [pro] v1.1 Text to Image" context_tokens = 0 capabilities = ["image-gen", "text-to-image"] status = "active" role_tags = ["image-flagship", "image-gen"] fallback = "" notes = "fal.ai FLUX Pro 1.1. Per-megapixel billing: $0.04/MP rounded up. Fallback for flux-2-pro. output_per_mtok_micro = 4000000 (1MP base). Real provider: fal." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 4000000 status = "verified" source_url = "https://fal.ai/models/fal-ai/flux-pro/v1.1" verified_at = "2026-05-01" [[models]] id = "kling-o3" provider = "local" display_name = "Kling O3 4K Text/Image to Video" context_tokens = 0 capabilities = ["video-gen", "text-to-video", "image-to-video"] status = "active" role_tags = ["video-gen"] fallback = "" notes = "fal.ai Kling O3. 4K video generation. Per-second cost: $0.42/s (audio off). Durations 3-15s; default 5s (=$2.10). 2500-char prompt limit (O3 hard limit). Supports elements + voice_ids simultaneously. output_per_mtok_micro = 42000000 (per-second micro-cents). Real provider: fal." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 42000000 status = "verified" source_url = "https://fal.ai/models/fal-ai/kling-video/o3/4k/text-to-video" verified_at = "2026-05-01" [[models]] id = "veo-3" provider = "local" display_name = "Google Veo 3 (via fal.ai)" context_tokens = 0 capabilities = ["video-gen", "text-to-video"] status = "active" role_tags = ["video-gen"] fallback = "kling-o3" notes = "fal.ai Google Veo 3 partner model. Per-second cost: $0.50/s (audio off) or $0.75/s (audio on). Fast tier: $0.25/s (audio off) or $0.40/s (audio on). output_per_mtok_micro = 50000000 (standard audio-off). Real provider: fal/google." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 50000000 status = "verified" source_url = "https://fal.ai/models/fal-ai/veo3" verified_at = "2026-05-01" [[models]] id = "ideogram-v3" provider = "local" display_name = "Ideogram V3 Text to Image" context_tokens = 0 capabilities = ["image-gen", "text-to-image", "image-edit"] status = "active" role_tags = ["image-flagship", "image-gen"] fallback = "" notes = "fal.ai Ideogram V3. Per-image tiered: $0.03 TURBO / $0.06 BALANCED / $0.09 QUALITY. Exceptional text/typography rendering. output_per_mtok_micro = 6000000 (BALANCED tier). Real provider: fal." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 6000000 status = "verified" source_url = "https://fal.ai/models/fal-ai/ideogram/v3" verified_at = "2026-05-01" [[models]] id = "recraft-v3" provider = "local" display_name = "Recraft V3 Text to Image / Vector" context_tokens = 0 capabilities = ["image-gen", "text-to-image", "image-edit"] status = "active" role_tags = ["image-cheap", "image-gen"] fallback = "" notes = "fal.ai Recraft V3. Per-image: $0.04 (raster) or $0.08 (vector_illustration style). Styles: realistic_image, digital_illustration, vector_illustration. Strong text rendering. output_per_mtok_micro = 4000000 (raster base). Real provider: fal." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 4000000 status = "verified" source_url = "https://fal.ai/models/fal-ai/recraft-v3" verified_at = "2026-05-01" # ============================================================ # ElevenLabs voice generation — verified 2026-05-01 # Source: https://elevenlabs.io/pricing/api # SCHEMA NOTE: provider="local" placeholder (real provider: elevenlabs). # Voice pricing: output_per_mtok_micro = per-1k-char micro-cents # (1 unit = 1000 characters). input_per_mtok_micro = 0. # ============================================================ [[models]] id = "elevenlabs-v3" provider = "local" display_name = "ElevenLabs Turbo v3 (Flash)" context_tokens = 0 capabilities = ["voice-gen", "text-to-speech", "voice-clone"] status = "active" role_tags = ["voice-gen"] fallback = "elevenlabs-multilingual-v2" notes = "ElevenLabs high-quality voice model. Per-1k-chars: $0.10. Low latency ~250-300ms. Supports 32 languages. 40k char limit per request. output_per_mtok_micro = 10000000 (per-1k-chars micro-cents). Real provider: elevenlabs. ElevenLabs 3-step pattern: designVoice -> createVoice -> TTS." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 10000000 status = "verified" source_url = "https://elevenlabs.io/pricing/api" verified_at = "2026-05-01" [[models]] id = "elevenlabs-multilingual-v2" provider = "local" display_name = "ElevenLabs Multilingual v2" context_tokens = 0 capabilities = ["voice-gen", "text-to-speech", "voice-clone"] status = "active" role_tags = ["voice-gen"] fallback = "" notes = "ElevenLabs multilingual voice model (fallback for v3). Per-1k-chars: $0.10. Same pricing tier as v3. Supports 32 languages. output_per_mtok_micro = 10000000. Real provider: elevenlabs." [models.pricing] input_per_mtok_micro = 0 output_per_mtok_micro = 10000000 status = "verified" source_url = "https://elevenlabs.io/pricing/api" verified_at = "2026-05-01"