kei-registries/providers.toml
Parfii-bot afe0c6f118 feat: add transport layer to provider hierarchy
Иерархия: transport → provider → model → profile.

Поле `transport`:
  direct-api      — прямой HTTP к провайдеру
  aws-bedrock     — AWS Bedrock (IAM/role + region)
  azure-openai    — Azure OpenAI (deployment + key)
  google-vertex   — GCP Vertex AI (service-account JSON)
  local           — Ollama / MLX / LM Studio
  proxy           — LiteLLM / OpenRouter
  subscription    — ChatGPT OAuth (Codex)

Новые провайдеры:
  - anthropic-bedrock — Claude через AWS
  - openai-azure     — GPT через Azure
  - google-vertex    — Gemini через Vertex
  - openrouter       — multi-provider proxy

Существующие провайдеры получили transport-тег.
Используется install/lib-onboarding.sh для интерактивного мастера
выбора language → transport → provider → model → credentials.

14 провайдеров суммарно: 5 direct-api + 1 aws + 1 azure + 1 vertex
+ 3 local + 2 proxy + 1 subscription.
2026-05-17 02:24:19 +08:00

204 lines
7.2 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace.
#
# Иерархия: transport → provider → model → profile (agent-profiles.toml).
# Транспорт стабилен (direct-api / aws-bedrock / azure-openai / google-vertex / local),
# провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт),
# модели за ним меняются — см. models.toml.
#
# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты.
#
# Поле `transport` задаёт способ доступа:
# - direct-api — прямой HTTP к провайдеру (Anthropic, OpenAI, xAI, etc.)
# - aws-bedrock — через AWS Bedrock (IAM/role, регион)
# - azure-openai — через Azure (deployment + Azure key)
# - google-vertex — через GCP Vertex AI (service-account JSON + project/region)
# - local — локальная LLM (Ollama, MLX, LM Studio)
# - proxy — собственный или сторонний прокси (LiteLLM, OpenRouter)
# - subscription — OAuth-привязка через подписку (ChatGPT, Claude.ai)
#
# Конвенция: `id` — kebab-case с суффиксом транспорта если не direct-api
# (anthropic, anthropic-bedrock; openai, openai-azure; google, google-vertex).
[[provider]]
id = "anthropic"
display_name = "Anthropic (Direct API)"
transport = "direct-api"
endpoint = "https://api.anthropic.com/v1/messages"
auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY
auth_env = "ANTHROPIC_API_KEY"
api_version_header = "anthropic-version"
api_version_value = "2023-06-01"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 4000
billing_currency = "USD"
notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*."
[[provider]]
id = "anthropic-bedrock"
display_name = "Anthropic (AWS Bedrock)"
transport = "aws-bedrock"
endpoint = "https://bedrock-runtime.{region}.amazonaws.com"
auth_scheme = "aws-sigv4"
auth_env = "AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,AWS_REGION"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 0 # per-region quota; see AWS console
billing_currency = "USD-aws"
notes = "Claude через AWS Bedrock — для enterprise с AWS-биллингом. Регион в AWS_REGION."
[[provider]]
id = "openai"
display_name = "OpenAI (Direct API)"
transport = "direct-api"
endpoint = "https://api.openai.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "OPENAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 5000
billing_currency = "USD"
notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)."
[[provider]]
id = "openai-azure"
display_name = "OpenAI (Azure)"
transport = "azure-openai"
endpoint = "https://{azure_resource}.openai.azure.com/openai/deployments/{deployment}"
auth_scheme = "api-key"
auth_env = "AZURE_OPENAI_API_KEY,AZURE_OPENAI_ENDPOINT,AZURE_OPENAI_DEPLOYMENT"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 0 # per-deployment quota; see Azure portal
billing_currency = "USD-azure"
notes = "OpenAI через Azure — для enterprise с Azure-биллингом. Нужны resource+deployment."
[[provider]]
id = "codex"
display_name = "OpenAI Codex (ChatGPT OAuth)"
transport = "subscription"
endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP
auth_scheme = "oauth-subscription"
auth_env = "_" # no env — CLI handles auth
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 60 # ChatGPT subscription quota
billing_currency = "USD-subscription"
notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers."
[[provider]]
id = "xai"
display_name = "xAI"
transport = "direct-api"
endpoint = "https://api.x.ai/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "XAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "Grok family. OpenAI-compatible API."
[[provider]]
id = "deepseek"
display_name = "DeepSeek"
transport = "direct-api"
endpoint = "https://api.deepseek.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "DEEPSEEK_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 60
billing_currency = "USD" # invoices in USD even if upstream CNY
notes = "Cheapest tier for batch reasoning. R1 reasoner family."
[[provider]]
id = "google"
display_name = "Google Gemini (Direct API)"
transport = "direct-api"
endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
auth_scheme = "query-key" # ?key=<...> in URL
auth_env = "GEMINI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "1M-context window for long-doc analysis."
[[provider]]
id = "google-vertex"
display_name = "Google Gemini (Vertex AI)"
transport = "google-vertex"
endpoint = "https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/google/models"
auth_scheme = "gcp-service-account"
auth_env = "GOOGLE_APPLICATION_CREDENTIALS,GCP_PROJECT_ID,GCP_REGION"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 0 # per-project quota; see GCP console
billing_currency = "USD-gcp"
notes = "Gemini через Vertex AI — для enterprise с GCP-биллингом. Нужен service-account JSON."
[[provider]]
id = "ollama-local"
display_name = "Ollama (local)"
transport = "local"
endpoint = "http://127.0.0.1:11434/api/chat"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0 # local, no remote limit
billing_currency = "USD-zero" # local compute, opportunity cost only
notes = "Self-hosted. Models loaded via `ollama pull`."
[[provider]]
id = "mlx-local"
display_name = "MLX (Apple silicon local)"
transport = "local"
endpoint = "http://127.0.0.1:8080/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Native Apple-silicon inference. mlx_lm.server."
[[provider]]
id = "lmstudio-local"
display_name = "LM Studio (local)"
transport = "local"
endpoint = "http://127.0.0.1:1234/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Desktop GUI runner. OpenAI-compatible."
[[provider]]
id = "litellm-proxy"
display_name = "LiteLLM proxy (keisei.app)"
transport = "proxy"
endpoint = "https://api.keisei.app/llm/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "KEI_LITELLM_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD-keisei"
notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."
[[provider]]
id = "openrouter"
display_name = "OpenRouter (multi-provider proxy)"
transport = "proxy"
endpoint = "https://openrouter.ai/api/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "OPENROUTER_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "Меняет провайдеров по pricing. Подходит для эксплоративных задач."