# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace. # # Иерархия: transport → provider → model → profile (agent-profiles.toml). # Транспорт стабилен (direct-api / aws-bedrock / azure-openai / google-vertex / local), # провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт), # модели за ним меняются — см. models.toml. # # Все цены/контекст — в models.toml. Здесь только транспорт + лимиты. # # Поле `transport` задаёт способ доступа: # - direct-api — прямой HTTP к провайдеру (Anthropic, OpenAI, xAI, etc.) # - aws-bedrock — через AWS Bedrock (IAM/role, регион) # - azure-openai — через Azure (deployment + Azure key) # - google-vertex — через GCP Vertex AI (service-account JSON + project/region) # - local — локальная LLM (Ollama, MLX, LM Studio) # - proxy — собственный или сторонний прокси (LiteLLM, OpenRouter) # - subscription — OAuth-привязка через подписку (ChatGPT, Claude.ai) # # Конвенция: `id` — kebab-case с суффиксом транспорта если не direct-api # (anthropic, anthropic-bedrock; openai, openai-azure; google, google-vertex). [[provider]] id = "anthropic" display_name = "Anthropic (Direct API)" transport = "direct-api" endpoint = "https://api.anthropic.com/v1/messages" auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY auth_env = "ANTHROPIC_API_KEY" api_version_header = "anthropic-version" api_version_value = "2023-06-01" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 4000 billing_currency = "USD" notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*." [[provider]] id = "anthropic-bedrock" display_name = "Anthropic (AWS Bedrock)" transport = "aws-bedrock" endpoint = "https://bedrock-runtime.{region}.amazonaws.com" auth_scheme = "aws-sigv4" auth_env = "AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,AWS_REGION" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 0 # per-region quota; see AWS console billing_currency = "USD-aws" notes = "Claude через AWS Bedrock — для enterprise с AWS-биллингом. Регион в AWS_REGION." [[provider]] id = "openai" display_name = "OpenAI (Direct API)" transport = "direct-api" endpoint = "https://api.openai.com/v1/chat/completions" auth_scheme = "bearer" auth_env = "OPENAI_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 5000 billing_currency = "USD" notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)." [[provider]] id = "openai-azure" display_name = "OpenAI (Azure)" transport = "azure-openai" endpoint = "https://{azure_resource}.openai.azure.com/openai/deployments/{deployment}" auth_scheme = "api-key" auth_env = "AZURE_OPENAI_API_KEY,AZURE_OPENAI_ENDPOINT,AZURE_OPENAI_DEPLOYMENT" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 0 # per-deployment quota; see Azure portal billing_currency = "USD-azure" notes = "OpenAI через Azure — для enterprise с Azure-биллингом. Нужны resource+deployment." [[provider]] id = "claude-code" display_name = "Claude Code (subscription — your claude CLI, no API key)" transport = "subscription" endpoint = "local:claude-cli" # invoked via the `claude` CLI (Claude Code), not HTTP auth_scheme = "oauth-subscription" auth_env = "_" # no env — Claude Code handles auth (claude.ai Max / Pro) retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 60 # Claude subscription quota billing_currency = "USD-subscription" notes = "Default for the Claude-Code-native kit. Uses ambient `claude` CLI OAuth; no API key needed." [[provider]] id = "codex" display_name = "OpenAI Codex (ChatGPT OAuth)" transport = "subscription" endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP auth_scheme = "oauth-subscription" auth_env = "_" # no env — CLI handles auth retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 60 # ChatGPT subscription quota billing_currency = "USD-subscription" notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers." [[provider]] id = "xai" display_name = "xAI" transport = "direct-api" endpoint = "https://api.x.ai/v1/chat/completions" auth_scheme = "bearer" auth_env = "XAI_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD" notes = "Grok family. OpenAI-compatible API." [[provider]] id = "deepseek" display_name = "DeepSeek" transport = "direct-api" endpoint = "https://api.deepseek.com/v1/chat/completions" auth_scheme = "bearer" auth_env = "DEEPSEEK_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 60 billing_currency = "USD" # invoices in USD even if upstream CNY notes = "Cheapest tier for batch reasoning. R1 reasoner family." [[provider]] id = "google" display_name = "Google Gemini (Direct API)" transport = "direct-api" endpoint = "https://generativelanguage.googleapis.com/v1beta/models" auth_scheme = "query-key" # ?key=<...> in URL auth_env = "GEMINI_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD" notes = "1M-context window for long-doc analysis." [[provider]] id = "google-vertex" display_name = "Google Gemini (Vertex AI)" transport = "google-vertex" endpoint = "https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/google/models" auth_scheme = "gcp-service-account" auth_env = "GOOGLE_APPLICATION_CREDENTIALS,GCP_PROJECT_ID,GCP_REGION" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 0 # per-project quota; see GCP console billing_currency = "USD-gcp" notes = "Gemini через Vertex AI — для enterprise с GCP-биллингом. Нужен service-account JSON." [[provider]] id = "ollama-local" display_name = "Ollama (local)" transport = "local" endpoint = "http://127.0.0.1:11434/api/chat" auth_scheme = "none" auth_env = "_" retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 0 # local, no remote limit billing_currency = "USD-zero" # local compute, opportunity cost only notes = "Self-hosted. Models loaded via `ollama pull`." [[provider]] id = "mlx-local" display_name = "MLX (Apple silicon local)" transport = "local" endpoint = "http://127.0.0.1:8080/v1/chat/completions" auth_scheme = "none" auth_env = "_" retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 0 billing_currency = "USD-zero" notes = "Native Apple-silicon inference. mlx_lm.server." [[provider]] id = "lmstudio-local" display_name = "LM Studio (local)" transport = "local" endpoint = "http://127.0.0.1:1234/v1/chat/completions" auth_scheme = "none" auth_env = "_" retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 0 billing_currency = "USD-zero" notes = "Desktop GUI runner. OpenAI-compatible." [[provider]] id = "litellm-proxy" display_name = "LiteLLM proxy (keisei.app)" transport = "proxy" endpoint = "https://api.keisei.app/llm/v1/chat/completions" auth_scheme = "bearer" auth_env = "KEI_LITELLM_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD-keisei" notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer." [[provider]] id = "openrouter" display_name = "OpenRouter (multi-provider proxy)" transport = "proxy" endpoint = "https://openrouter.ai/api/v1/chat/completions" auth_scheme = "bearer" auth_env = "OPENROUTER_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD" notes = "Меняет провайдеров по pricing. Подходит для эксплоративных задач."