# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace. # # Слой 1 из трёх: provider → model → profile (agent-profiles.toml). # Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт), # модели за ним меняются — см. models.toml. # # Все цены/контекст — в models.toml. Здесь только транспорт + лимиты. # # Конвенция: `id` — kebab-case, без версий. Версии — в моделях. [[provider]] id = "anthropic" display_name = "Anthropic" endpoint = "https://api.anthropic.com/v1/messages" auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY auth_env = "ANTHROPIC_API_KEY" api_version_header = "anthropic-version" api_version_value = "2023-06-01" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 4000 billing_currency = "USD" notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*." [[provider]] id = "openai" display_name = "OpenAI" endpoint = "https://api.openai.com/v1/chat/completions" auth_scheme = "bearer" auth_env = "OPENAI_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 5000 billing_currency = "USD" notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)." [[provider]] id = "codex" display_name = "OpenAI Codex (ChatGPT OAuth)" endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP auth_scheme = "oauth-subscription" auth_env = "_" # no env — CLI handles auth retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 60 # ChatGPT subscription quota billing_currency = "USD-subscription" notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers." [[provider]] id = "xai" display_name = "xAI" endpoint = "https://api.x.ai/v1/chat/completions" auth_scheme = "bearer" auth_env = "XAI_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD" notes = "Grok family. OpenAI-compatible API." [[provider]] id = "deepseek" display_name = "DeepSeek" endpoint = "https://api.deepseek.com/v1/chat/completions" auth_scheme = "bearer" auth_env = "DEEPSEEK_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 60 billing_currency = "USD" # invoices in USD even if upstream CNY notes = "Cheapest tier for batch reasoning. R1 reasoner family." [[provider]] id = "google" display_name = "Google Gemini" endpoint = "https://generativelanguage.googleapis.com/v1beta/models" auth_scheme = "query-key" # ?key=<...> in URL auth_env = "GEMINI_API_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD" notes = "1M-context window for long-doc analysis." [[provider]] id = "ollama-local" display_name = "Ollama (local)" endpoint = "http://127.0.0.1:11434/api/chat" auth_scheme = "none" auth_env = "_" retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 0 # local, no remote limit billing_currency = "USD-zero" # local compute, opportunity cost only notes = "Self-hosted. Models loaded via `ollama pull`." [[provider]] id = "mlx-local" display_name = "MLX (Apple silicon local)" endpoint = "http://127.0.0.1:8080/v1/chat/completions" auth_scheme = "none" auth_env = "_" retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 0 billing_currency = "USD-zero" notes = "Native Apple-silicon inference. mlx_lm.server." [[provider]] id = "lmstudio-local" display_name = "LM Studio (local)" endpoint = "http://127.0.0.1:1234/v1/chat/completions" auth_scheme = "none" auth_env = "_" retry_max = 1 retry_backoff_ms = 0 rate_limit_rpm = 0 billing_currency = "USD-zero" notes = "Desktop GUI runner. OpenAI-compatible." [[provider]] id = "litellm-proxy" display_name = "LiteLLM proxy (keisei.app)" endpoint = "https://api.keisei.app/llm/v1/chat/completions" auth_scheme = "bearer" auth_env = "KEI_LITELLM_KEY" retry_max = 3 retry_backoff_ms = 500 rate_limit_rpm = 1000 billing_currency = "USD-keisei" notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."