kei-registries/models.toml
KeiSei84 b90499311e feat: add Claude Code subscription provider (default for the kit)
KeiSeiKit installs into Claude Code, but the wizard offered no Claude under
`subscription` (only OpenAI Codex) — a Claude user picking subscription got no
Claude. Add provider `claude-code` (transport=subscription, endpoint=
local:claude-cli, auth_env=_, no API key) + 3 claude-code models (sonnet
default, opus, haiku), mirroring the codex subscription convention. Listed
before codex so it's the default pick under subscription.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 12:41:16 +08:00

238 lines
8.5 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Реестр моделей. Слой 2 из трёх.
#
# Каждая модель привязана к провайдеру через `provider_ref`. Цены в
# microcents (1e-6 USD) на 1M токенов — совместимо с
# kei-ledger.cost_micro_cents и kei-model-router::pricing.rs.
#
# Источники цен: pricing-страница соответствующего провайдера.
# Каждая запись помечена `verified_at` — дата последней сверки.
#
# `deprecated_at = null` ⇒ модель живая. После deprecate kei-model-router
# не выбирает её для новых invocation-ов, но старые записи в ledger
# продолжают резолвиться.
# ─── Anthropic ─────────────────────────────────────────────────────────
[[model]]
provider_ref = "anthropic"
id = "claude-haiku-4-5-20251001"
slug = "haiku"
display_name = "Claude Haiku 4.5"
context_window = 200000
cost_input_per_mtok_micro = 100_000_000 # $1.00
cost_output_per_mtok_micro = 500_000_000 # $5.00
cache_write_5m_per_mtok_micro = 125_000_000
cache_write_1h_per_mtok_micro = 200_000_000
cache_read_per_mtok_micro = 10_000_000
verified_at = "2026-04-30"
deprecated_at = ""
notes = "Cheapest Claude. Single-edit, formatting, lookup."
[[model]]
provider_ref = "anthropic"
id = "claude-sonnet-4-6"
slug = "sonnet"
display_name = "Claude Sonnet 4.6"
context_window = 1000000
cost_input_per_mtok_micro = 300_000_000 # $3.00
cost_output_per_mtok_micro = 1_500_000_000 # $15.00
cache_write_5m_per_mtok_micro = 375_000_000
cache_write_1h_per_mtok_micro = 600_000_000
cache_read_per_mtok_micro = 30_000_000
verified_at = "2026-04-30"
deprecated_at = ""
notes = "RULE 0.20 default for code-implementer-* / researcher-*."
[[model]]
provider_ref = "anthropic"
id = "claude-opus-4-7"
slug = "opus"
display_name = "Claude Opus 4.7"
context_window = 1000000
cost_input_per_mtok_micro = 500_000_000 # $5.00
cost_output_per_mtok_micro = 2_500_000_000 # $25.00
cache_write_5m_per_mtok_micro = 625_000_000
cache_write_1h_per_mtok_micro = 1_000_000_000
cache_read_per_mtok_micro = 50_000_000
verified_at = "2026-04-30"
deprecated_at = ""
notes = "Architecture, novel reasoning, math derivation. Tokenizer overhead 1.35x vs Sonnet."
# ─── Claude Code (subscription) ──────────────────────────────────────────
# Same Claude models, reached via the `claude` CLI under the user's plan.
# Subscription billing → per-token cost 0 (mirrors the codex convention).
[[model]]
provider_ref = "claude-code"
id = "claude-sonnet-4-6"
slug = "sonnet"
display_name = "Claude Sonnet 4.6 (Claude Code)"
context_window = 1000000
cost_input_per_mtok_micro = 0 # subscription, not per-token
cost_output_per_mtok_micro = 0
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = "2026-05-22"
deprecated_at = ""
notes = "Default model. Balanced; RULE 0.20 default for code-implementer-* / researcher-*."
[[model]]
provider_ref = "claude-code"
id = "claude-opus-4-7"
slug = "opus"
display_name = "Claude Opus 4.7 (Claude Code)"
context_window = 1000000
cost_input_per_mtok_micro = 0 # subscription, not per-token
cost_output_per_mtok_micro = 0
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = "2026-05-22"
deprecated_at = ""
notes = "Architecture, novel reasoning, math derivation."
[[model]]
provider_ref = "claude-code"
id = "claude-haiku-4-5-20251001"
slug = "haiku"
display_name = "Claude Haiku 4.5 (Claude Code)"
context_window = 200000
cost_input_per_mtok_micro = 0 # subscription, not per-token
cost_output_per_mtok_micro = 0
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = "2026-05-22"
deprecated_at = ""
notes = "Cheapest Claude. Single-edit, formatting, lookup."
# ─── OpenAI ────────────────────────────────────────────────────────────
[[model]]
provider_ref = "openai"
id = "gpt-5"
slug = "gpt-5"
display_name = "GPT-5"
context_window = 400000
cost_input_per_mtok_micro = 200_000_000 # $2.00 (placeholder, verify on use)
cost_output_per_mtok_micro = 800_000_000 # $8.00
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = ""
deprecated_at = ""
notes = "[UNVERIFIED] — re-fetch pricing page before billing-grade decisions."
[[model]]
provider_ref = "codex"
id = "gpt-5-codex"
slug = "codex"
display_name = "GPT-5 Codex (via ChatGPT OAuth)"
context_window = 200000
cost_input_per_mtok_micro = 0 # subscription, not per-token
cost_output_per_mtok_micro = 0
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = "2026-05-10"
deprecated_at = ""
notes = "RULE 0.23 dual-review. ChatGPT Plus/Pro/Team subscription quota."
# ─── xAI ───────────────────────────────────────────────────────────────
[[model]]
provider_ref = "xai"
id = "grok-4"
slug = "grok-4"
display_name = "Grok 4"
context_window = 256000
cost_input_per_mtok_micro = 300_000_000 # [UNVERIFIED]
cost_output_per_mtok_micro = 1_500_000_000
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = ""
deprecated_at = ""
notes = "[UNVERIFIED] — verify before billing."
# ─── DeepSeek ──────────────────────────────────────────────────────────
[[model]]
provider_ref = "deepseek"
id = "deepseek-r1"
slug = "r1"
display_name = "DeepSeek R1"
context_window = 64000
cost_input_per_mtok_micro = 55_000_000 # $0.55 [UNVERIFIED — pricing fluctuates]
cost_output_per_mtok_micro = 219_000_000 # $2.19
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 14_000_000 # $0.14 cache hit
verified_at = ""
deprecated_at = ""
notes = "Cheapest reasoning tier. Long thinking time."
[[model]]
provider_ref = "deepseek"
id = "deepseek-v3"
slug = "v3"
display_name = "DeepSeek V3"
context_window = 64000
cost_input_per_mtok_micro = 27_000_000 # $0.27 [UNVERIFIED]
cost_output_per_mtok_micro = 110_000_000 # $1.10
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 7_000_000
verified_at = ""
deprecated_at = ""
notes = "Non-reasoning baseline. Cheap bulk inference."
# ─── Google ────────────────────────────────────────────────────────────
[[model]]
provider_ref = "google"
id = "gemini-2.5-pro"
slug = "gemini-pro"
display_name = "Gemini 2.5 Pro"
context_window = 2000000
cost_input_per_mtok_micro = 125_000_000 # $1.25 [UNVERIFIED]
cost_output_per_mtok_micro = 1_000_000_000 # $10.00
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = ""
deprecated_at = ""
notes = "2M context. Long-document analysis."
# ─── Local (zero per-token cost) ───────────────────────────────────────
[[model]]
provider_ref = "ollama-local"
id = "llama-3.3-70b"
slug = "llama-70b-local"
display_name = "Llama 3.3 70B (Ollama local)"
context_window = 128000
cost_input_per_mtok_micro = 0
cost_output_per_mtok_micro = 0
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = "2026-05-13"
deprecated_at = ""
notes = "Local. Compute cost is electricity + opportunity, not per-token."
[[model]]
provider_ref = "mlx-local"
id = "qwen-2.5-coder-32b"
slug = "qwen-coder-local"
display_name = "Qwen 2.5 Coder 32B (MLX)"
context_window = 32000
cost_input_per_mtok_micro = 0
cost_output_per_mtok_micro = 0
cache_write_5m_per_mtok_micro = 0
cache_write_1h_per_mtok_micro = 0
cache_read_per_mtok_micro = 0
verified_at = "2026-05-13"
deprecated_at = ""
notes = "Code-focused local model. Apple silicon Metal."