# Реестр моделей. Слой 2 из трёх. # # Каждая модель привязана к провайдеру через `provider_ref`. Цены в # microcents (1e-6 USD) на 1M токенов — совместимо с # kei-ledger.cost_micro_cents и kei-model-router::pricing.rs. # # Источники цен: pricing-страница соответствующего провайдера. # Каждая запись помечена `verified_at` — дата последней сверки. # # `deprecated_at = null` ⇒ модель живая. После deprecate kei-model-router # не выбирает её для новых invocation-ов, но старые записи в ledger # продолжают резолвиться. # ─── Anthropic ───────────────────────────────────────────────────────── [[model]] provider_ref = "anthropic" id = "claude-haiku-4-5-20251001" slug = "haiku" display_name = "Claude Haiku 4.5" context_window = 200000 cost_input_per_mtok_micro = 100_000_000 # $1.00 cost_output_per_mtok_micro = 500_000_000 # $5.00 cache_write_5m_per_mtok_micro = 125_000_000 cache_write_1h_per_mtok_micro = 200_000_000 cache_read_per_mtok_micro = 10_000_000 verified_at = "2026-04-30" deprecated_at = "" notes = "Cheapest Claude. Single-edit, formatting, lookup." [[model]] provider_ref = "anthropic" id = "claude-sonnet-4-6" slug = "sonnet" display_name = "Claude Sonnet 4.6" context_window = 1000000 cost_input_per_mtok_micro = 300_000_000 # $3.00 cost_output_per_mtok_micro = 1_500_000_000 # $15.00 cache_write_5m_per_mtok_micro = 375_000_000 cache_write_1h_per_mtok_micro = 600_000_000 cache_read_per_mtok_micro = 30_000_000 verified_at = "2026-04-30" deprecated_at = "" notes = "RULE 0.20 default for code-implementer-* / researcher-*." [[model]] provider_ref = "anthropic" id = "claude-opus-4-7" slug = "opus" display_name = "Claude Opus 4.7" context_window = 1000000 cost_input_per_mtok_micro = 500_000_000 # $5.00 cost_output_per_mtok_micro = 2_500_000_000 # $25.00 cache_write_5m_per_mtok_micro = 625_000_000 cache_write_1h_per_mtok_micro = 1_000_000_000 cache_read_per_mtok_micro = 50_000_000 verified_at = "2026-04-30" deprecated_at = "" notes = "Architecture, novel reasoning, math derivation. Tokenizer overhead 1.35x vs Sonnet." # ─── Claude Code (subscription) ────────────────────────────────────────── # Same Claude models, reached via the `claude` CLI under the user's plan. # Subscription billing → per-token cost 0 (mirrors the codex convention). [[model]] provider_ref = "claude-code" id = "claude-sonnet-4-6" slug = "sonnet" display_name = "Claude Sonnet 4.6 (Claude Code)" context_window = 1000000 cost_input_per_mtok_micro = 0 # subscription, not per-token cost_output_per_mtok_micro = 0 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "2026-05-22" deprecated_at = "" notes = "Default model. Balanced; RULE 0.20 default for code-implementer-* / researcher-*." [[model]] provider_ref = "claude-code" id = "claude-opus-4-7" slug = "opus" display_name = "Claude Opus 4.7 (Claude Code)" context_window = 1000000 cost_input_per_mtok_micro = 0 # subscription, not per-token cost_output_per_mtok_micro = 0 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "2026-05-22" deprecated_at = "" notes = "Architecture, novel reasoning, math derivation." [[model]] provider_ref = "claude-code" id = "claude-haiku-4-5-20251001" slug = "haiku" display_name = "Claude Haiku 4.5 (Claude Code)" context_window = 200000 cost_input_per_mtok_micro = 0 # subscription, not per-token cost_output_per_mtok_micro = 0 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "2026-05-22" deprecated_at = "" notes = "Cheapest Claude. Single-edit, formatting, lookup." # ─── OpenAI ──────────────────────────────────────────────────────────── [[model]] provider_ref = "openai" id = "gpt-5" slug = "gpt-5" display_name = "GPT-5" context_window = 400000 cost_input_per_mtok_micro = 200_000_000 # $2.00 (placeholder, verify on use) cost_output_per_mtok_micro = 800_000_000 # $8.00 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "" deprecated_at = "" notes = "[UNVERIFIED] — re-fetch pricing page before billing-grade decisions." [[model]] provider_ref = "codex" id = "gpt-5-codex" slug = "codex" display_name = "GPT-5 Codex (via ChatGPT OAuth)" context_window = 200000 cost_input_per_mtok_micro = 0 # subscription, not per-token cost_output_per_mtok_micro = 0 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "2026-05-10" deprecated_at = "" notes = "RULE 0.23 dual-review. ChatGPT Plus/Pro/Team subscription quota." # ─── xAI ─────────────────────────────────────────────────────────────── [[model]] provider_ref = "xai" id = "grok-4" slug = "grok-4" display_name = "Grok 4" context_window = 256000 cost_input_per_mtok_micro = 300_000_000 # [UNVERIFIED] cost_output_per_mtok_micro = 1_500_000_000 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "" deprecated_at = "" notes = "[UNVERIFIED] — verify before billing." # ─── DeepSeek ────────────────────────────────────────────────────────── [[model]] provider_ref = "deepseek" id = "deepseek-r1" slug = "r1" display_name = "DeepSeek R1" context_window = 64000 cost_input_per_mtok_micro = 55_000_000 # $0.55 [UNVERIFIED — pricing fluctuates] cost_output_per_mtok_micro = 219_000_000 # $2.19 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 14_000_000 # $0.14 cache hit verified_at = "" deprecated_at = "" notes = "Cheapest reasoning tier. Long thinking time." [[model]] provider_ref = "deepseek" id = "deepseek-v3" slug = "v3" display_name = "DeepSeek V3" context_window = 64000 cost_input_per_mtok_micro = 27_000_000 # $0.27 [UNVERIFIED] cost_output_per_mtok_micro = 110_000_000 # $1.10 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 7_000_000 verified_at = "" deprecated_at = "" notes = "Non-reasoning baseline. Cheap bulk inference." # ─── Google ──────────────────────────────────────────────────────────── [[model]] provider_ref = "google" id = "gemini-2.5-pro" slug = "gemini-pro" display_name = "Gemini 2.5 Pro" context_window = 2000000 cost_input_per_mtok_micro = 125_000_000 # $1.25 [UNVERIFIED] cost_output_per_mtok_micro = 1_000_000_000 # $10.00 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "" deprecated_at = "" notes = "2M context. Long-document analysis." # ─── Local (zero per-token cost) ─────────────────────────────────────── [[model]] provider_ref = "ollama-local" id = "llama-3.3-70b" slug = "llama-70b-local" display_name = "Llama 3.3 70B (Ollama local)" context_window = 128000 cost_input_per_mtok_micro = 0 cost_output_per_mtok_micro = 0 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "2026-05-13" deprecated_at = "" notes = "Local. Compute cost is electricity + opportunity, not per-token." [[model]] provider_ref = "mlx-local" id = "qwen-2.5-coder-32b" slug = "qwen-coder-local" display_name = "Qwen 2.5 Coder 32B (MLX)" context_window = 32000 cost_input_per_mtok_micro = 0 cost_output_per_mtok_micro = 0 cache_write_5m_per_mtok_micro = 0 cache_write_1h_per_mtok_micro = 0 cache_read_per_mtok_micro = 0 verified_at = "2026-05-13" deprecated_at = "" notes = "Code-focused local model. Apple silicon Metal."