KeiSeiKit installs into Claude Code, but the wizard offered no Claude under `subscription` (only OpenAI Codex) — a Claude user picking subscription got no Claude. Add provider `claude-code` (transport=subscription, endpoint= local:claude-cli, auth_env=_, no API key) + 3 claude-code models (sonnet default, opus, haiku), mirroring the codex subscription convention. Listed before codex so it's the default pick under subscription. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
217 lines
7.8 KiB
TOML
217 lines
7.8 KiB
TOML
# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace.
|
||
#
|
||
# Иерархия: transport → provider → model → profile (agent-profiles.toml).
|
||
# Транспорт стабилен (direct-api / aws-bedrock / azure-openai / google-vertex / local),
|
||
# провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт),
|
||
# модели за ним меняются — см. models.toml.
|
||
#
|
||
# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты.
|
||
#
|
||
# Поле `transport` задаёт способ доступа:
|
||
# - direct-api — прямой HTTP к провайдеру (Anthropic, OpenAI, xAI, etc.)
|
||
# - aws-bedrock — через AWS Bedrock (IAM/role, регион)
|
||
# - azure-openai — через Azure (deployment + Azure key)
|
||
# - google-vertex — через GCP Vertex AI (service-account JSON + project/region)
|
||
# - local — локальная LLM (Ollama, MLX, LM Studio)
|
||
# - proxy — собственный или сторонний прокси (LiteLLM, OpenRouter)
|
||
# - subscription — OAuth-привязка через подписку (ChatGPT, Claude.ai)
|
||
#
|
||
# Конвенция: `id` — kebab-case с суффиксом транспорта если не direct-api
|
||
# (anthropic, anthropic-bedrock; openai, openai-azure; google, google-vertex).
|
||
|
||
[[provider]]
|
||
id = "anthropic"
|
||
display_name = "Anthropic (Direct API)"
|
||
transport = "direct-api"
|
||
endpoint = "https://api.anthropic.com/v1/messages"
|
||
auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY
|
||
auth_env = "ANTHROPIC_API_KEY"
|
||
api_version_header = "anthropic-version"
|
||
api_version_value = "2023-06-01"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 4000
|
||
billing_currency = "USD"
|
||
notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*."
|
||
|
||
[[provider]]
|
||
id = "anthropic-bedrock"
|
||
display_name = "Anthropic (AWS Bedrock)"
|
||
transport = "aws-bedrock"
|
||
endpoint = "https://bedrock-runtime.{region}.amazonaws.com"
|
||
auth_scheme = "aws-sigv4"
|
||
auth_env = "AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,AWS_REGION"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 0 # per-region quota; see AWS console
|
||
billing_currency = "USD-aws"
|
||
notes = "Claude через AWS Bedrock — для enterprise с AWS-биллингом. Регион в AWS_REGION."
|
||
|
||
[[provider]]
|
||
id = "openai"
|
||
display_name = "OpenAI (Direct API)"
|
||
transport = "direct-api"
|
||
endpoint = "https://api.openai.com/v1/chat/completions"
|
||
auth_scheme = "bearer"
|
||
auth_env = "OPENAI_API_KEY"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 5000
|
||
billing_currency = "USD"
|
||
notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)."
|
||
|
||
[[provider]]
|
||
id = "openai-azure"
|
||
display_name = "OpenAI (Azure)"
|
||
transport = "azure-openai"
|
||
endpoint = "https://{azure_resource}.openai.azure.com/openai/deployments/{deployment}"
|
||
auth_scheme = "api-key"
|
||
auth_env = "AZURE_OPENAI_API_KEY,AZURE_OPENAI_ENDPOINT,AZURE_OPENAI_DEPLOYMENT"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 0 # per-deployment quota; see Azure portal
|
||
billing_currency = "USD-azure"
|
||
notes = "OpenAI через Azure — для enterprise с Azure-биллингом. Нужны resource+deployment."
|
||
|
||
[[provider]]
|
||
id = "claude-code"
|
||
display_name = "Claude Code (subscription — your claude CLI, no API key)"
|
||
transport = "subscription"
|
||
endpoint = "local:claude-cli" # invoked via the `claude` CLI (Claude Code), not HTTP
|
||
auth_scheme = "oauth-subscription"
|
||
auth_env = "_" # no env — Claude Code handles auth (claude.ai Max / Pro)
|
||
retry_max = 1
|
||
retry_backoff_ms = 0
|
||
rate_limit_rpm = 60 # Claude subscription quota
|
||
billing_currency = "USD-subscription"
|
||
notes = "Default for the Claude-Code-native kit. Uses ambient `claude` CLI OAuth; no API key needed."
|
||
|
||
[[provider]]
|
||
id = "codex"
|
||
display_name = "OpenAI Codex (ChatGPT OAuth)"
|
||
transport = "subscription"
|
||
endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP
|
||
auth_scheme = "oauth-subscription"
|
||
auth_env = "_" # no env — CLI handles auth
|
||
retry_max = 1
|
||
retry_backoff_ms = 0
|
||
rate_limit_rpm = 60 # ChatGPT subscription quota
|
||
billing_currency = "USD-subscription"
|
||
notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers."
|
||
|
||
[[provider]]
|
||
id = "xai"
|
||
display_name = "xAI"
|
||
transport = "direct-api"
|
||
endpoint = "https://api.x.ai/v1/chat/completions"
|
||
auth_scheme = "bearer"
|
||
auth_env = "XAI_API_KEY"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 1000
|
||
billing_currency = "USD"
|
||
notes = "Grok family. OpenAI-compatible API."
|
||
|
||
[[provider]]
|
||
id = "deepseek"
|
||
display_name = "DeepSeek"
|
||
transport = "direct-api"
|
||
endpoint = "https://api.deepseek.com/v1/chat/completions"
|
||
auth_scheme = "bearer"
|
||
auth_env = "DEEPSEEK_API_KEY"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 60
|
||
billing_currency = "USD" # invoices in USD even if upstream CNY
|
||
notes = "Cheapest tier for batch reasoning. R1 reasoner family."
|
||
|
||
[[provider]]
|
||
id = "google"
|
||
display_name = "Google Gemini (Direct API)"
|
||
transport = "direct-api"
|
||
endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
|
||
auth_scheme = "query-key" # ?key=<...> in URL
|
||
auth_env = "GEMINI_API_KEY"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 1000
|
||
billing_currency = "USD"
|
||
notes = "1M-context window for long-doc analysis."
|
||
|
||
[[provider]]
|
||
id = "google-vertex"
|
||
display_name = "Google Gemini (Vertex AI)"
|
||
transport = "google-vertex"
|
||
endpoint = "https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/google/models"
|
||
auth_scheme = "gcp-service-account"
|
||
auth_env = "GOOGLE_APPLICATION_CREDENTIALS,GCP_PROJECT_ID,GCP_REGION"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 0 # per-project quota; see GCP console
|
||
billing_currency = "USD-gcp"
|
||
notes = "Gemini через Vertex AI — для enterprise с GCP-биллингом. Нужен service-account JSON."
|
||
|
||
[[provider]]
|
||
id = "ollama-local"
|
||
display_name = "Ollama (local)"
|
||
transport = "local"
|
||
endpoint = "http://127.0.0.1:11434/api/chat"
|
||
auth_scheme = "none"
|
||
auth_env = "_"
|
||
retry_max = 1
|
||
retry_backoff_ms = 0
|
||
rate_limit_rpm = 0 # local, no remote limit
|
||
billing_currency = "USD-zero" # local compute, opportunity cost only
|
||
notes = "Self-hosted. Models loaded via `ollama pull`."
|
||
|
||
[[provider]]
|
||
id = "mlx-local"
|
||
display_name = "MLX (Apple silicon local)"
|
||
transport = "local"
|
||
endpoint = "http://127.0.0.1:8080/v1/chat/completions"
|
||
auth_scheme = "none"
|
||
auth_env = "_"
|
||
retry_max = 1
|
||
retry_backoff_ms = 0
|
||
rate_limit_rpm = 0
|
||
billing_currency = "USD-zero"
|
||
notes = "Native Apple-silicon inference. mlx_lm.server."
|
||
|
||
[[provider]]
|
||
id = "lmstudio-local"
|
||
display_name = "LM Studio (local)"
|
||
transport = "local"
|
||
endpoint = "http://127.0.0.1:1234/v1/chat/completions"
|
||
auth_scheme = "none"
|
||
auth_env = "_"
|
||
retry_max = 1
|
||
retry_backoff_ms = 0
|
||
rate_limit_rpm = 0
|
||
billing_currency = "USD-zero"
|
||
notes = "Desktop GUI runner. OpenAI-compatible."
|
||
|
||
[[provider]]
|
||
id = "litellm-proxy"
|
||
display_name = "LiteLLM proxy (keisei.app)"
|
||
transport = "proxy"
|
||
endpoint = "https://api.keisei.app/llm/v1/chat/completions"
|
||
auth_scheme = "bearer"
|
||
auth_env = "KEI_LITELLM_KEY"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 1000
|
||
billing_currency = "USD-keisei"
|
||
notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."
|
||
|
||
[[provider]]
|
||
id = "openrouter"
|
||
display_name = "OpenRouter (multi-provider proxy)"
|
||
transport = "proxy"
|
||
endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
||
auth_scheme = "bearer"
|
||
auth_env = "OPENROUTER_API_KEY"
|
||
retry_max = 3
|
||
retry_backoff_ms = 500
|
||
rate_limit_rpm = 1000
|
||
billing_currency = "USD"
|
||
notes = "Меняет провайдеров по pricing. Подходит для эксплоративных задач."
|