KeiSeiKit-1.0/_blocks/registries/providers.toml
Parfii-bot 3aef8678c0 feat: three-layer agent registries (providers/models/profiles)
Splits agent definition into stable provider + swappable model + role-bound
profile. Adding a new LLM API is one row in providers.toml; new model is
one row in models.toml; agent invocation picks any (provider, model) pair
through agent-profiles.toml default_model_ref.

- providers.toml: 10 providers — anthropic, openai, codex (OAuth), xai,
  deepseek, google, ollama-local, mlx-local, lmstudio-local, litellm-proxy
- models.toml: 11 models with cost_*_per_mtok_micro + context_window +
  verified_at + deprecated_at
- agent-profiles.toml: 18 representative profiles; manifest_path points
  to the canonical .md in ~/.claude/agents/

Three-layer DNA per the new architecture:
  agent-shell::<provider>:<model>:<caps>::<scope8>::<body8>-<nonce8>

This commit only adds registries — kei-model-router still hardcodes
the Claude-only Model enum. Wave 4 will rewire it to read TOML.
2026-05-13 20:51:04 +08:00

131 lines
4.1 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace.
#
# Слой 1 из трёх: provider → model → profile (agent-profiles.toml).
# Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт),
# модели за ним меняются — см. models.toml.
#
# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты.
#
# Конвенция: `id` — kebab-case, без версий. Версии — в моделях.
[[provider]]
id = "anthropic"
display_name = "Anthropic"
endpoint = "https://api.anthropic.com/v1/messages"
auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY
auth_env = "ANTHROPIC_API_KEY"
api_version_header = "anthropic-version"
api_version_value = "2023-06-01"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 4000
billing_currency = "USD"
notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*."
[[provider]]
id = "openai"
display_name = "OpenAI"
endpoint = "https://api.openai.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "OPENAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 5000
billing_currency = "USD"
notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)."
[[provider]]
id = "codex"
display_name = "OpenAI Codex (ChatGPT OAuth)"
endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP
auth_scheme = "oauth-subscription"
auth_env = "_" # no env — CLI handles auth
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 60 # ChatGPT subscription quota
billing_currency = "USD-subscription"
notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers."
[[provider]]
id = "xai"
display_name = "xAI"
endpoint = "https://api.x.ai/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "XAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "Grok family. OpenAI-compatible API."
[[provider]]
id = "deepseek"
display_name = "DeepSeek"
endpoint = "https://api.deepseek.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "DEEPSEEK_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 60
billing_currency = "USD" # invoices in USD even if upstream CNY
notes = "Cheapest tier for batch reasoning. R1 reasoner family."
[[provider]]
id = "google"
display_name = "Google Gemini"
endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
auth_scheme = "query-key" # ?key=<...> in URL
auth_env = "GEMINI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "1M-context window for long-doc analysis."
[[provider]]
id = "ollama-local"
display_name = "Ollama (local)"
endpoint = "http://127.0.0.1:11434/api/chat"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0 # local, no remote limit
billing_currency = "USD-zero" # local compute, opportunity cost only
notes = "Self-hosted. Models loaded via `ollama pull`."
[[provider]]
id = "mlx-local"
display_name = "MLX (Apple silicon local)"
endpoint = "http://127.0.0.1:8080/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Native Apple-silicon inference. mlx_lm.server."
[[provider]]
id = "lmstudio-local"
display_name = "LM Studio (local)"
endpoint = "http://127.0.0.1:1234/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Desktop GUI runner. OpenAI-compatible."
[[provider]]
id = "litellm-proxy"
display_name = "LiteLLM proxy (keisei.app)"
endpoint = "https://api.keisei.app/llm/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "KEI_LITELLM_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD-keisei"
notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."