chore: registries → submodule from KeiSeiLab/kei-registries
Closes audit HIGH-1 (SSoT drift between KSK and marketplace). _blocks/registries/ now tracks the single canonical kei-registries repo; marketplace consumes the same submodule.
This commit is contained in:
parent
7281e7ecea
commit
d01649b355
5 changed files with 4 additions and 508 deletions
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "_blocks/registries"]
|
||||
path = _blocks/registries
|
||||
url = git@github.com:KeiSeiLab/kei-registries.git
|
||||
1
_blocks/registries
Submodule
1
_blocks/registries
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 14a1e62db6bff685e9b707fd368e6bbe42e9a819
|
||||
|
|
@ -1,186 +0,0 @@
|
|||
# Реестр профилей агентов. Слой 3 из трёх.
|
||||
#
|
||||
# Профиль — это (role + caps + default_model_ref + system_prompt_ref).
|
||||
# Не привязан к конкретному провайдеру: provider+model выбирается
|
||||
# kei-model-router'ом по подсказке caller'а либо берётся default_model_ref.
|
||||
#
|
||||
# Источник истины для существующих 59 манифестов — `.md` файлы в
|
||||
# `~/.claude/agents/`. Этот toml содержит ТОЛЬКО core/default-профили
|
||||
# для marketplace UI и kei-model-router; per-manifest detail остаётся
|
||||
# в `.md` файлах с frontmatter.
|
||||
#
|
||||
# Caps-bundle алфавит:
|
||||
# FS-RO — Read/Glob/Grep only
|
||||
# FS-RW — Read/Write/Edit
|
||||
# BASH — Bash shell
|
||||
# WEB — WebFetch/WebSearch
|
||||
# AGENT — может спавнить sub-агентов (RULE 0.12)
|
||||
# PLAN — Plan Mode allowed
|
||||
#
|
||||
# Стандартные комбинации:
|
||||
# RO-WEB-FS — researcher / validator (read+web)
|
||||
# FS-RW-BASH-PLAN — code-implementer (full + plan)
|
||||
# RO-FS — critic / auditor (read-only no web)
|
||||
# FS-RW-BASH-AGENT — orchestrator-meta (can spawn)
|
||||
|
||||
# ─── Core implementer family ───────────────────────────────────────────
|
||||
|
||||
[[profile]]
|
||||
id = "code-implementer"
|
||||
role = "code-implementer"
|
||||
caps = "FS-RW-BASH-PLAN"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Generic Rust/TS/Python/Go/Swift/Flutter implementer. Constructor Pattern. RULE 0.20 → sonnet default."
|
||||
manifest_path = "~/.claude/agents/code-implementer.md"
|
||||
|
||||
[[profile]]
|
||||
id = "code-implementer-rust"
|
||||
role = "code-implementer"
|
||||
caps = "FS-RW-BASH-PLAN"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Rust specialist. Cargo, traits, async/tokio, rusqlite, tests."
|
||||
manifest_path = "~/.claude/agents/code-implementer-rust.md"
|
||||
|
||||
[[profile]]
|
||||
id = "code-implementer-typescript"
|
||||
role = "code-implementer"
|
||||
caps = "FS-RW-BASH-PLAN"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Next.js 16 / Node / browser. Type-safe API contracts."
|
||||
manifest_path = "~/.claude/agents/code-implementer-typescript.md"
|
||||
|
||||
# ─── Researcher family ─────────────────────────────────────────────────
|
||||
|
||||
[[profile]]
|
||||
id = "researcher"
|
||||
role = "researcher"
|
||||
caps = "RO-WEB-FS"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Web+codebase research. Evidence-Graded findings. Read-only."
|
||||
manifest_path = "~/.claude/agents/researcher.md"
|
||||
|
||||
[[profile]]
|
||||
id = "researcher-web"
|
||||
role = "researcher-web"
|
||||
caps = "WEB"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Web research specialist. WebFetch/WebSearch only."
|
||||
manifest_path = "~/.claude/agents/researcher-web.md"
|
||||
|
||||
[[profile]]
|
||||
id = "researcher-code"
|
||||
role = "researcher-code"
|
||||
caps = "RO-FS"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Codebase research. Glob/Grep/Read only."
|
||||
manifest_path = "~/.claude/agents/researcher-code.md"
|
||||
|
||||
# ─── Critic / auditor family ───────────────────────────────────────────
|
||||
|
||||
[[profile]]
|
||||
id = "critic"
|
||||
role = "critic"
|
||||
caps = "RO-FS-WEB"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Anti-patterns, tech debt, security issues, bugs. Severity-sorted findings."
|
||||
manifest_path = "~/.claude/agents/critic.md"
|
||||
|
||||
[[profile]]
|
||||
id = "security-auditor"
|
||||
role = "security-auditor"
|
||||
caps = "RO-FS-WEB"
|
||||
default_model_ref = "anthropic/claude-opus-4-7"
|
||||
description = "9-point differential security review. Risk-classified findings. Opus default — security needs depth."
|
||||
manifest_path = "~/.claude/agents/security-auditor.md"
|
||||
|
||||
[[profile]]
|
||||
id = "codex-reviewer"
|
||||
role = "codex-reviewer"
|
||||
caps = "RO-FS-WEB"
|
||||
default_model_ref = "codex/gpt-5-codex"
|
||||
description = "RULE 0.23 DUAL REVIEW. Independent second-opinion via OpenAI Codex CLI."
|
||||
manifest_path = "~/.claude/agents/codex-reviewer.md"
|
||||
|
||||
# ─── Infra family ──────────────────────────────────────────────────────
|
||||
|
||||
[[profile]]
|
||||
id = "infra-implementer"
|
||||
role = "infra-implementer"
|
||||
caps = "FS-RW-BASH"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Deploys, CI/CD, secrets, container/IaC. Per-project credential isolation."
|
||||
manifest_path = "~/.claude/agents/infra-implementer.md"
|
||||
|
||||
[[profile]]
|
||||
id = "modal-runner"
|
||||
role = "modal-runner"
|
||||
caps = "FS-RW-BASH"
|
||||
default_model_ref = "anthropic/claude-opus-4-7"
|
||||
description = "Modal compute orchestrator. Cost gating, KILL GUARD. Opus — irreversible $$ actions need depth."
|
||||
manifest_path = "~/.claude/agents/modal-runner.md"
|
||||
|
||||
# ─── ML family ─────────────────────────────────────────────────────────
|
||||
|
||||
[[profile]]
|
||||
id = "ml-implementer"
|
||||
role = "ml-implementer"
|
||||
caps = "FS-RW-BASH-PLAN"
|
||||
default_model_ref = "anthropic/claude-opus-4-7"
|
||||
description = "ML training/inference. Math-First, Pre-Experiment Check, Modal protocol."
|
||||
manifest_path = "~/.claude/agents/ml-implementer.md"
|
||||
|
||||
[[profile]]
|
||||
id = "ml-researcher"
|
||||
role = "ml-researcher"
|
||||
caps = "RO-WEB-FS"
|
||||
default_model_ref = "anthropic/claude-opus-4-7"
|
||||
description = "ML literature, benchmarks, reproducibility. Math-First read-only."
|
||||
manifest_path = "~/.claude/agents/ml-researcher.md"
|
||||
|
||||
# ─── Specialist anchors (project-specialists) ──────────────────────────
|
||||
#
|
||||
# Полный список 59 манифестов остаётся в `~/.claude/agents/*.md`.
|
||||
# Здесь — представительные anchors, marketplace UI обходит directory
|
||||
# и собирает остальные из frontmatter.
|
||||
|
||||
[[profile]]
|
||||
id = "cartoon-studio-specialist"
|
||||
role = "project-specialist"
|
||||
caps = "FS-RW-BASH-AGENT"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "Cartoon Studio AI video generation. Next.js 16 + Drizzle + SQLite. Flux 2 Pro / Kling O3 / ElevenLabs."
|
||||
manifest_path = "~/.claude/agents/cartoon-studio-specialist.md"
|
||||
|
||||
[[profile]]
|
||||
id = "keisei-os-specialist"
|
||||
role = "project-specialist"
|
||||
caps = "FS-RW-BASH-AGENT"
|
||||
default_model_ref = "anthropic/claude-sonnet-4-6"
|
||||
description = "KeiSei OS 4-layer runtime, 13 primitives, 14 Brain Tools."
|
||||
manifest_path = "~/.claude/agents/keisei-os-specialist.md"
|
||||
|
||||
# ─── Validator / cost-guardian (lightweight) ───────────────────────────
|
||||
|
||||
[[profile]]
|
||||
id = "validator"
|
||||
role = "validator"
|
||||
caps = "RO-FS-WEB"
|
||||
default_model_ref = "anthropic/claude-haiku-4-5"
|
||||
description = "RULE 0.4 fact-checker. API existence, version compat, doc claims. Haiku — narrow tasks."
|
||||
manifest_path = "~/.claude/agents/validator.md"
|
||||
|
||||
[[profile]]
|
||||
id = "cost-guardian"
|
||||
role = "cost-guardian"
|
||||
caps = "RO-FS-BASH-WEB"
|
||||
default_model_ref = "anthropic/claude-haiku-4-5"
|
||||
description = "Pre-launch compute cost verification. Dashboard balance, running jobs, head-room."
|
||||
manifest_path = "~/.claude/agents/cost-guardian.md"
|
||||
|
||||
[[profile]]
|
||||
id = "patent-compliance"
|
||||
role = "patent-compliance"
|
||||
caps = "RO-FS-BASH"
|
||||
default_model_ref = "anthropic/claude-haiku-4-5"
|
||||
description = "Pre-filing IP cross-ref scan. Greps for unfiled-patent references."
|
||||
manifest_path = "~/.claude/agents/patent-compliance.md"
|
||||
|
|
@ -1,191 +0,0 @@
|
|||
# Реестр моделей. Слой 2 из трёх.
|
||||
#
|
||||
# Каждая модель привязана к провайдеру через `provider_ref`. Цены в
|
||||
# microcents (1e-6 USD) на 1M токенов — совместимо с
|
||||
# kei-ledger.cost_micro_cents и kei-model-router::pricing.rs.
|
||||
#
|
||||
# Источники цен: pricing-страница соответствующего провайдера.
|
||||
# Каждая запись помечена `verified_at` — дата последней сверки.
|
||||
#
|
||||
# `deprecated_at = null` ⇒ модель живая. После deprecate kei-model-router
|
||||
# не выбирает её для новых invocation-ов, но старые записи в ledger
|
||||
# продолжают резолвиться.
|
||||
|
||||
# ─── Anthropic ─────────────────────────────────────────────────────────
|
||||
|
||||
[[model]]
|
||||
provider_ref = "anthropic"
|
||||
# TODO: pin to "claude-haiku-4-5-20251001" once router/pricing accept slug as
|
||||
# the lookup key (currently hardcoded by id in pricing.rs/escalate.rs tests).
|
||||
id = "claude-haiku-4-5"
|
||||
slug = "haiku"
|
||||
display_name = "Claude Haiku 4.5"
|
||||
context_window = 200000
|
||||
cost_input_per_mtok_micro = 100_000_000 # $1.00
|
||||
cost_output_per_mtok_micro = 500_000_000 # $5.00
|
||||
cache_write_5m_per_mtok_micro = 125_000_000
|
||||
cache_write_1h_per_mtok_micro = 200_000_000
|
||||
cache_read_per_mtok_micro = 10_000_000
|
||||
verified_at = "2026-04-30"
|
||||
deprecated_at = ""
|
||||
notes = "Cheapest Claude. Single-edit, formatting, lookup."
|
||||
|
||||
[[model]]
|
||||
provider_ref = "anthropic"
|
||||
id = "claude-sonnet-4-6"
|
||||
slug = "sonnet"
|
||||
display_name = "Claude Sonnet 4.6"
|
||||
context_window = 1000000
|
||||
cost_input_per_mtok_micro = 300_000_000 # $3.00
|
||||
cost_output_per_mtok_micro = 1_500_000_000 # $15.00
|
||||
cache_write_5m_per_mtok_micro = 375_000_000
|
||||
cache_write_1h_per_mtok_micro = 600_000_000
|
||||
cache_read_per_mtok_micro = 30_000_000
|
||||
verified_at = "2026-04-30"
|
||||
deprecated_at = ""
|
||||
notes = "RULE 0.20 default for code-implementer-* / researcher-*."
|
||||
|
||||
[[model]]
|
||||
provider_ref = "anthropic"
|
||||
id = "claude-opus-4-7"
|
||||
slug = "opus"
|
||||
display_name = "Claude Opus 4.7"
|
||||
context_window = 1000000
|
||||
cost_input_per_mtok_micro = 500_000_000 # $5.00
|
||||
cost_output_per_mtok_micro = 2_500_000_000 # $25.00
|
||||
cache_write_5m_per_mtok_micro = 625_000_000
|
||||
cache_write_1h_per_mtok_micro = 1_000_000_000
|
||||
cache_read_per_mtok_micro = 50_000_000
|
||||
verified_at = "2026-04-30"
|
||||
deprecated_at = ""
|
||||
notes = "Architecture, novel reasoning, math derivation. Tokenizer overhead 1.35x vs Sonnet."
|
||||
|
||||
# ─── OpenAI ────────────────────────────────────────────────────────────
|
||||
|
||||
[[model]]
|
||||
provider_ref = "openai"
|
||||
id = "gpt-5"
|
||||
slug = "gpt-5"
|
||||
display_name = "GPT-5"
|
||||
context_window = 400000
|
||||
cost_input_per_mtok_micro = 200_000_000 # $2.00 (placeholder, verify on use)
|
||||
cost_output_per_mtok_micro = 800_000_000 # $8.00
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 0
|
||||
verified_at = ""
|
||||
deprecated_at = ""
|
||||
notes = "[UNVERIFIED] — re-fetch pricing page before billing-grade decisions."
|
||||
|
||||
[[model]]
|
||||
provider_ref = "codex"
|
||||
id = "gpt-5-codex"
|
||||
slug = "codex"
|
||||
display_name = "GPT-5 Codex (via ChatGPT OAuth)"
|
||||
context_window = 200000
|
||||
cost_input_per_mtok_micro = 0 # subscription, not per-token
|
||||
cost_output_per_mtok_micro = 0
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 0
|
||||
verified_at = "2026-05-10"
|
||||
deprecated_at = ""
|
||||
notes = "RULE 0.23 dual-review. ChatGPT Plus/Pro/Team subscription quota."
|
||||
|
||||
# ─── xAI ───────────────────────────────────────────────────────────────
|
||||
|
||||
[[model]]
|
||||
provider_ref = "xai"
|
||||
id = "grok-4"
|
||||
slug = "grok-4"
|
||||
display_name = "Grok 4"
|
||||
context_window = 256000
|
||||
cost_input_per_mtok_micro = 300_000_000 # [UNVERIFIED]
|
||||
cost_output_per_mtok_micro = 1_500_000_000
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 0
|
||||
verified_at = ""
|
||||
deprecated_at = ""
|
||||
notes = "[UNVERIFIED] — verify before billing."
|
||||
|
||||
# ─── DeepSeek ──────────────────────────────────────────────────────────
|
||||
|
||||
[[model]]
|
||||
provider_ref = "deepseek"
|
||||
id = "deepseek-r1"
|
||||
slug = "r1"
|
||||
display_name = "DeepSeek R1"
|
||||
context_window = 64000
|
||||
cost_input_per_mtok_micro = 55_000_000 # $0.55 [UNVERIFIED — pricing fluctuates]
|
||||
cost_output_per_mtok_micro = 219_000_000 # $2.19
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 14_000_000 # $0.14 cache hit
|
||||
verified_at = ""
|
||||
deprecated_at = ""
|
||||
notes = "Cheapest reasoning tier. Long thinking time."
|
||||
|
||||
[[model]]
|
||||
provider_ref = "deepseek"
|
||||
id = "deepseek-v3"
|
||||
slug = "v3"
|
||||
display_name = "DeepSeek V3"
|
||||
context_window = 64000
|
||||
cost_input_per_mtok_micro = 27_000_000 # $0.27 [UNVERIFIED]
|
||||
cost_output_per_mtok_micro = 110_000_000 # $1.10
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 7_000_000
|
||||
verified_at = ""
|
||||
deprecated_at = ""
|
||||
notes = "Non-reasoning baseline. Cheap bulk inference."
|
||||
|
||||
# ─── Google ────────────────────────────────────────────────────────────
|
||||
|
||||
[[model]]
|
||||
provider_ref = "google"
|
||||
id = "gemini-2.5-pro"
|
||||
slug = "gemini-pro"
|
||||
display_name = "Gemini 2.5 Pro"
|
||||
context_window = 2000000
|
||||
cost_input_per_mtok_micro = 125_000_000 # $1.25 [UNVERIFIED]
|
||||
cost_output_per_mtok_micro = 1_000_000_000 # $10.00
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 0
|
||||
verified_at = ""
|
||||
deprecated_at = ""
|
||||
notes = "2M context. Long-document analysis."
|
||||
|
||||
# ─── Local (zero per-token cost) ───────────────────────────────────────
|
||||
|
||||
[[model]]
|
||||
provider_ref = "ollama-local"
|
||||
id = "llama-3.3-70b"
|
||||
slug = "llama-70b-local"
|
||||
display_name = "Llama 3.3 70B (Ollama local)"
|
||||
context_window = 128000
|
||||
cost_input_per_mtok_micro = 0
|
||||
cost_output_per_mtok_micro = 0
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 0
|
||||
verified_at = "2026-05-13"
|
||||
deprecated_at = ""
|
||||
notes = "Local. Compute cost is electricity + opportunity, not per-token."
|
||||
|
||||
[[model]]
|
||||
provider_ref = "mlx-local"
|
||||
id = "qwen-2.5-coder-32b"
|
||||
slug = "qwen-coder-local"
|
||||
display_name = "Qwen 2.5 Coder 32B (MLX)"
|
||||
context_window = 32000
|
||||
cost_input_per_mtok_micro = 0
|
||||
cost_output_per_mtok_micro = 0
|
||||
cache_write_5m_per_mtok_micro = 0
|
||||
cache_write_1h_per_mtok_micro = 0
|
||||
cache_read_per_mtok_micro = 0
|
||||
verified_at = "2026-05-13"
|
||||
deprecated_at = ""
|
||||
notes = "Code-focused local model. Apple silicon Metal."
|
||||
|
|
@ -1,131 +0,0 @@
|
|||
# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace.
|
||||
#
|
||||
# Слой 1 из трёх: provider → model → profile (agent-profiles.toml).
|
||||
# Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт),
|
||||
# модели за ним меняются — см. models.toml.
|
||||
#
|
||||
# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты.
|
||||
#
|
||||
# Конвенция: `id` — kebab-case, без версий. Версии — в моделях.
|
||||
|
||||
[[provider]]
|
||||
id = "anthropic"
|
||||
display_name = "Anthropic"
|
||||
endpoint = "https://api.anthropic.com/v1/messages"
|
||||
auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY
|
||||
auth_env = "ANTHROPIC_API_KEY"
|
||||
api_version_header = "anthropic-version"
|
||||
api_version_value = "2023-06-01"
|
||||
retry_max = 3
|
||||
retry_backoff_ms = 500
|
||||
rate_limit_rpm = 4000
|
||||
billing_currency = "USD"
|
||||
notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*."
|
||||
|
||||
[[provider]]
|
||||
id = "openai"
|
||||
display_name = "OpenAI"
|
||||
endpoint = "https://api.openai.com/v1/chat/completions"
|
||||
auth_scheme = "bearer"
|
||||
auth_env = "OPENAI_API_KEY"
|
||||
retry_max = 3
|
||||
retry_backoff_ms = 500
|
||||
rate_limit_rpm = 5000
|
||||
billing_currency = "USD"
|
||||
notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)."
|
||||
|
||||
[[provider]]
|
||||
id = "codex"
|
||||
display_name = "OpenAI Codex (ChatGPT OAuth)"
|
||||
endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP
|
||||
auth_scheme = "oauth-subscription"
|
||||
auth_env = "_" # no env — CLI handles auth
|
||||
retry_max = 1
|
||||
retry_backoff_ms = 0
|
||||
rate_limit_rpm = 60 # ChatGPT subscription quota
|
||||
billing_currency = "USD-subscription"
|
||||
notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers."
|
||||
|
||||
[[provider]]
|
||||
id = "xai"
|
||||
display_name = "xAI"
|
||||
endpoint = "https://api.x.ai/v1/chat/completions"
|
||||
auth_scheme = "bearer"
|
||||
auth_env = "XAI_API_KEY"
|
||||
retry_max = 3
|
||||
retry_backoff_ms = 500
|
||||
rate_limit_rpm = 1000
|
||||
billing_currency = "USD"
|
||||
notes = "Grok family. OpenAI-compatible API."
|
||||
|
||||
[[provider]]
|
||||
id = "deepseek"
|
||||
display_name = "DeepSeek"
|
||||
endpoint = "https://api.deepseek.com/v1/chat/completions"
|
||||
auth_scheme = "bearer"
|
||||
auth_env = "DEEPSEEK_API_KEY"
|
||||
retry_max = 3
|
||||
retry_backoff_ms = 500
|
||||
rate_limit_rpm = 60
|
||||
billing_currency = "USD" # invoices in USD even if upstream CNY
|
||||
notes = "Cheapest tier for batch reasoning. R1 reasoner family."
|
||||
|
||||
[[provider]]
|
||||
id = "google"
|
||||
display_name = "Google Gemini"
|
||||
endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
|
||||
auth_scheme = "query-key" # ?key=<...> in URL
|
||||
auth_env = "GEMINI_API_KEY"
|
||||
retry_max = 3
|
||||
retry_backoff_ms = 500
|
||||
rate_limit_rpm = 1000
|
||||
billing_currency = "USD"
|
||||
notes = "1M-context window for long-doc analysis."
|
||||
|
||||
[[provider]]
|
||||
id = "ollama-local"
|
||||
display_name = "Ollama (local)"
|
||||
endpoint = "http://127.0.0.1:11434/api/chat"
|
||||
auth_scheme = "none"
|
||||
auth_env = "_"
|
||||
retry_max = 1
|
||||
retry_backoff_ms = 0
|
||||
rate_limit_rpm = 0 # local, no remote limit
|
||||
billing_currency = "USD-zero" # local compute, opportunity cost only
|
||||
notes = "Self-hosted. Models loaded via `ollama pull`."
|
||||
|
||||
[[provider]]
|
||||
id = "mlx-local"
|
||||
display_name = "MLX (Apple silicon local)"
|
||||
endpoint = "http://127.0.0.1:8080/v1/chat/completions"
|
||||
auth_scheme = "none"
|
||||
auth_env = "_"
|
||||
retry_max = 1
|
||||
retry_backoff_ms = 0
|
||||
rate_limit_rpm = 0
|
||||
billing_currency = "USD-zero"
|
||||
notes = "Native Apple-silicon inference. mlx_lm.server."
|
||||
|
||||
[[provider]]
|
||||
id = "lmstudio-local"
|
||||
display_name = "LM Studio (local)"
|
||||
endpoint = "http://127.0.0.1:1234/v1/chat/completions"
|
||||
auth_scheme = "none"
|
||||
auth_env = "_"
|
||||
retry_max = 1
|
||||
retry_backoff_ms = 0
|
||||
rate_limit_rpm = 0
|
||||
billing_currency = "USD-zero"
|
||||
notes = "Desktop GUI runner. OpenAI-compatible."
|
||||
|
||||
[[provider]]
|
||||
id = "litellm-proxy"
|
||||
display_name = "LiteLLM proxy (keisei.app)"
|
||||
endpoint = "https://api.keisei.app/llm/v1/chat/completions"
|
||||
auth_scheme = "bearer"
|
||||
auth_env = "KEI_LITELLM_KEY"
|
||||
retry_max = 3
|
||||
retry_backoff_ms = 500
|
||||
rate_limit_rpm = 1000
|
||||
billing_currency = "USD-keisei"
|
||||
notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."
|
||||
Loading…
Reference in a new issue