kei-registries/providers.toml
KeiSei84 b90499311e feat: add Claude Code subscription provider (default for the kit)
KeiSeiKit installs into Claude Code, but the wizard offered no Claude under
`subscription` (only OpenAI Codex) — a Claude user picking subscription got no
Claude. Add provider `claude-code` (transport=subscription, endpoint=
local:claude-cli, auth_env=_, no API key) + 3 claude-code models (sonnet
default, opus, haiku), mirroring the codex subscription convention. Listed
before codex so it's the default pick under subscription.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 12:41:16 +08:00

217 lines
7.8 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace.
#
# Иерархия: transport → provider → model → profile (agent-profiles.toml).
# Транспорт стабилен (direct-api / aws-bedrock / azure-openai / google-vertex / local),
# провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт),
# модели за ним меняются — см. models.toml.
#
# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты.
#
# Поле `transport` задаёт способ доступа:
# - direct-api — прямой HTTP к провайдеру (Anthropic, OpenAI, xAI, etc.)
# - aws-bedrock — через AWS Bedrock (IAM/role, регион)
# - azure-openai — через Azure (deployment + Azure key)
# - google-vertex — через GCP Vertex AI (service-account JSON + project/region)
# - local — локальная LLM (Ollama, MLX, LM Studio)
# - proxy — собственный или сторонний прокси (LiteLLM, OpenRouter)
# - subscription — OAuth-привязка через подписку (ChatGPT, Claude.ai)
#
# Конвенция: `id` — kebab-case с суффиксом транспорта если не direct-api
# (anthropic, anthropic-bedrock; openai, openai-azure; google, google-vertex).
[[provider]]
id = "anthropic"
display_name = "Anthropic (Direct API)"
transport = "direct-api"
endpoint = "https://api.anthropic.com/v1/messages"
auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY
auth_env = "ANTHROPIC_API_KEY"
api_version_header = "anthropic-version"
api_version_value = "2023-06-01"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 4000
billing_currency = "USD"
notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*."
[[provider]]
id = "anthropic-bedrock"
display_name = "Anthropic (AWS Bedrock)"
transport = "aws-bedrock"
endpoint = "https://bedrock-runtime.{region}.amazonaws.com"
auth_scheme = "aws-sigv4"
auth_env = "AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,AWS_REGION"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 0 # per-region quota; see AWS console
billing_currency = "USD-aws"
notes = "Claude через AWS Bedrock — для enterprise с AWS-биллингом. Регион в AWS_REGION."
[[provider]]
id = "openai"
display_name = "OpenAI (Direct API)"
transport = "direct-api"
endpoint = "https://api.openai.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "OPENAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 5000
billing_currency = "USD"
notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)."
[[provider]]
id = "openai-azure"
display_name = "OpenAI (Azure)"
transport = "azure-openai"
endpoint = "https://{azure_resource}.openai.azure.com/openai/deployments/{deployment}"
auth_scheme = "api-key"
auth_env = "AZURE_OPENAI_API_KEY,AZURE_OPENAI_ENDPOINT,AZURE_OPENAI_DEPLOYMENT"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 0 # per-deployment quota; see Azure portal
billing_currency = "USD-azure"
notes = "OpenAI через Azure — для enterprise с Azure-биллингом. Нужны resource+deployment."
[[provider]]
id = "claude-code"
display_name = "Claude Code (subscription — your claude CLI, no API key)"
transport = "subscription"
endpoint = "local:claude-cli" # invoked via the `claude` CLI (Claude Code), not HTTP
auth_scheme = "oauth-subscription"
auth_env = "_" # no env — Claude Code handles auth (claude.ai Max / Pro)
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 60 # Claude subscription quota
billing_currency = "USD-subscription"
notes = "Default for the Claude-Code-native kit. Uses ambient `claude` CLI OAuth; no API key needed."
[[provider]]
id = "codex"
display_name = "OpenAI Codex (ChatGPT OAuth)"
transport = "subscription"
endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP
auth_scheme = "oauth-subscription"
auth_env = "_" # no env — CLI handles auth
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 60 # ChatGPT subscription quota
billing_currency = "USD-subscription"
notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers."
[[provider]]
id = "xai"
display_name = "xAI"
transport = "direct-api"
endpoint = "https://api.x.ai/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "XAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "Grok family. OpenAI-compatible API."
[[provider]]
id = "deepseek"
display_name = "DeepSeek"
transport = "direct-api"
endpoint = "https://api.deepseek.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "DEEPSEEK_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 60
billing_currency = "USD" # invoices in USD even if upstream CNY
notes = "Cheapest tier for batch reasoning. R1 reasoner family."
[[provider]]
id = "google"
display_name = "Google Gemini (Direct API)"
transport = "direct-api"
endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
auth_scheme = "query-key" # ?key=<...> in URL
auth_env = "GEMINI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "1M-context window for long-doc analysis."
[[provider]]
id = "google-vertex"
display_name = "Google Gemini (Vertex AI)"
transport = "google-vertex"
endpoint = "https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/google/models"
auth_scheme = "gcp-service-account"
auth_env = "GOOGLE_APPLICATION_CREDENTIALS,GCP_PROJECT_ID,GCP_REGION"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 0 # per-project quota; see GCP console
billing_currency = "USD-gcp"
notes = "Gemini через Vertex AI — для enterprise с GCP-биллингом. Нужен service-account JSON."
[[provider]]
id = "ollama-local"
display_name = "Ollama (local)"
transport = "local"
endpoint = "http://127.0.0.1:11434/api/chat"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0 # local, no remote limit
billing_currency = "USD-zero" # local compute, opportunity cost only
notes = "Self-hosted. Models loaded via `ollama pull`."
[[provider]]
id = "mlx-local"
display_name = "MLX (Apple silicon local)"
transport = "local"
endpoint = "http://127.0.0.1:8080/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Native Apple-silicon inference. mlx_lm.server."
[[provider]]
id = "lmstudio-local"
display_name = "LM Studio (local)"
transport = "local"
endpoint = "http://127.0.0.1:1234/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Desktop GUI runner. OpenAI-compatible."
[[provider]]
id = "litellm-proxy"
display_name = "LiteLLM proxy (keisei.app)"
transport = "proxy"
endpoint = "https://api.keisei.app/llm/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "KEI_LITELLM_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD-keisei"
notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."
[[provider]]
id = "openrouter"
display_name = "OpenRouter (multi-provider proxy)"
transport = "proxy"
endpoint = "https://openrouter.ai/api/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "OPENROUTER_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "Меняет провайдеров по pricing. Подходит для эксплоративных задач."