# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace.
#
# Слой 1 из трёх: provider → model → profile (agent-profiles.toml).
# Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт),
# модели за ним меняются — см. models.toml.
#
# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты.
#
# Конвенция: `id` — kebab-case, без версий. Версии — в моделях.

[[provider]]
id = "anthropic"
display_name = "Anthropic"
endpoint = "https://api.anthropic.com/v1/messages"
auth_scheme = "x-api-key"          # header name; value from env ANTHROPIC_API_KEY
auth_env = "ANTHROPIC_API_KEY"
api_version_header = "anthropic-version"
api_version_value = "2023-06-01"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 4000
billing_currency = "USD"
notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*."

[[provider]]
id = "openai"
display_name = "OpenAI"
endpoint = "https://api.openai.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "OPENAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 5000
billing_currency = "USD"
notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)."

[[provider]]
id = "codex"
display_name = "OpenAI Codex (ChatGPT OAuth)"
endpoint = "local:codex-cli"       # invoked via `codex` CLI, not HTTP
auth_scheme = "oauth-subscription"
auth_env = "_"                      # no env — CLI handles auth
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 60                # ChatGPT subscription quota
billing_currency = "USD-subscription"
notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers."

[[provider]]
id = "xai"
display_name = "xAI"
endpoint = "https://api.x.ai/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "XAI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "Grok family. OpenAI-compatible API."

[[provider]]
id = "deepseek"
display_name = "DeepSeek"
endpoint = "https://api.deepseek.com/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "DEEPSEEK_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 60
billing_currency = "USD"           # invoices in USD even if upstream CNY
notes = "Cheapest tier for batch reasoning. R1 reasoner family."

[[provider]]
id = "google"
display_name = "Google Gemini"
endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
auth_scheme = "query-key"          # ?key=<...> in URL
auth_env = "GEMINI_API_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD"
notes = "1M-context window for long-doc analysis."

[[provider]]
id = "ollama-local"
display_name = "Ollama (local)"
endpoint = "http://127.0.0.1:11434/api/chat"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0                 # local, no remote limit
billing_currency = "USD-zero"      # local compute, opportunity cost only
notes = "Self-hosted. Models loaded via `ollama pull`."

[[provider]]
id = "mlx-local"
display_name = "MLX (Apple silicon local)"
endpoint = "http://127.0.0.1:8080/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Native Apple-silicon inference. mlx_lm.server."

[[provider]]
id = "lmstudio-local"
display_name = "LM Studio (local)"
endpoint = "http://127.0.0.1:1234/v1/chat/completions"
auth_scheme = "none"
auth_env = "_"
retry_max = 1
retry_backoff_ms = 0
rate_limit_rpm = 0
billing_currency = "USD-zero"
notes = "Desktop GUI runner. OpenAI-compatible."

[[provider]]
id = "litellm-proxy"
display_name = "LiteLLM proxy (keisei.app)"
endpoint = "https://api.keisei.app/llm/v1/chat/completions"
auth_scheme = "bearer"
auth_env = "KEI_LITELLM_KEY"
retry_max = 3
retry_backoff_ms = 500
rate_limit_rpm = 1000
billing_currency = "USD-keisei"
notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."