From 3aef8678c099388819d5a7ce4b3f830a49ee2ac8 Mon Sep 17 00:00:00 2001 From: Parfii-bot Date: Wed, 13 May 2026 20:51:04 +0800 Subject: [PATCH] feat: three-layer agent registries (providers/models/profiles) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits agent definition into stable provider + swappable model + role-bound profile. Adding a new LLM API is one row in providers.toml; new model is one row in models.toml; agent invocation picks any (provider, model) pair through agent-profiles.toml default_model_ref. - providers.toml: 10 providers — anthropic, openai, codex (OAuth), xai, deepseek, google, ollama-local, mlx-local, lmstudio-local, litellm-proxy - models.toml: 11 models with cost_*_per_mtok_micro + context_window + verified_at + deprecated_at - agent-profiles.toml: 18 representative profiles; manifest_path points to the canonical .md in ~/.claude/agents/ Three-layer DNA per the new architecture: agent-shell::::::::- This commit only adds registries — kei-model-router still hardcodes the Claude-only Model enum. Wave 4 will rewire it to read TOML. --- _blocks/registries/agent-profiles.toml | 186 +++++++++++++++++++++++++ _blocks/registries/models.toml | 178 +++++++++++++++++++++++ _blocks/registries/providers.toml | 131 +++++++++++++++++ 3 files changed, 495 insertions(+) create mode 100644 _blocks/registries/agent-profiles.toml create mode 100644 _blocks/registries/models.toml create mode 100644 _blocks/registries/providers.toml diff --git a/_blocks/registries/agent-profiles.toml b/_blocks/registries/agent-profiles.toml new file mode 100644 index 0000000..44d2a06 --- /dev/null +++ b/_blocks/registries/agent-profiles.toml @@ -0,0 +1,186 @@ +# Реестр профилей агентов. Слой 3 из трёх. +# +# Профиль — это (role + caps + default_model_ref + system_prompt_ref). +# Не привязан к конкретному провайдеру: provider+model выбирается +# kei-model-router'ом по подсказке caller'а либо берётся default_model_ref. +# +# Источник истины для существующих 59 манифестов — `.md` файлы в +# `~/.claude/agents/`. Этот toml содержит ТОЛЬКО core/default-профили +# для marketplace UI и kei-model-router; per-manifest detail остаётся +# в `.md` файлах с frontmatter. +# +# Caps-bundle алфавит: +# FS-RO — Read/Glob/Grep only +# FS-RW — Read/Write/Edit +# BASH — Bash shell +# WEB — WebFetch/WebSearch +# AGENT — может спавнить sub-агентов (RULE 0.12) +# PLAN — Plan Mode allowed +# +# Стандартные комбинации: +# RO-WEB-FS — researcher / validator (read+web) +# FS-RW-BASH-PLAN — code-implementer (full + plan) +# RO-FS — critic / auditor (read-only no web) +# FS-RW-BASH-AGENT — orchestrator-meta (can spawn) + +# ─── Core implementer family ─────────────────────────────────────────── + +[[profile]] +id = "code-implementer" +role = "code-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Generic Rust/TS/Python/Go/Swift/Flutter implementer. Constructor Pattern. RULE 0.20 → sonnet default." +manifest_path = "~/.claude/agents/code-implementer.md" + +[[profile]] +id = "code-implementer-rust" +role = "code-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Rust specialist. Cargo, traits, async/tokio, rusqlite, tests." +manifest_path = "~/.claude/agents/code-implementer-rust.md" + +[[profile]] +id = "code-implementer-typescript" +role = "code-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Next.js 16 / Node / browser. Type-safe API contracts." +manifest_path = "~/.claude/agents/code-implementer-typescript.md" + +# ─── Researcher family ───────────────────────────────────────────────── + +[[profile]] +id = "researcher" +role = "researcher" +caps = "RO-WEB-FS" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Web+codebase research. Evidence-Graded findings. Read-only." +manifest_path = "~/.claude/agents/researcher.md" + +[[profile]] +id = "researcher-web" +role = "researcher-web" +caps = "WEB" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Web research specialist. WebFetch/WebSearch only." +manifest_path = "~/.claude/agents/researcher-web.md" + +[[profile]] +id = "researcher-code" +role = "researcher-code" +caps = "RO-FS" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Codebase research. Glob/Grep/Read only." +manifest_path = "~/.claude/agents/researcher-code.md" + +# ─── Critic / auditor family ─────────────────────────────────────────── + +[[profile]] +id = "critic" +role = "critic" +caps = "RO-FS-WEB" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Anti-patterns, tech debt, security issues, bugs. Severity-sorted findings." +manifest_path = "~/.claude/agents/critic.md" + +[[profile]] +id = "security-auditor" +role = "security-auditor" +caps = "RO-FS-WEB" +default_model_ref = "anthropic/claude-opus-4-7" +description = "9-point differential security review. Risk-classified findings. Opus default — security needs depth." +manifest_path = "~/.claude/agents/security-auditor.md" + +[[profile]] +id = "codex-reviewer" +role = "codex-reviewer" +caps = "RO-FS-WEB" +default_model_ref = "codex/gpt-5-codex" +description = "RULE 0.23 DUAL REVIEW. Independent second-opinion via OpenAI Codex CLI." +manifest_path = "~/.claude/agents/codex-reviewer.md" + +# ─── Infra family ────────────────────────────────────────────────────── + +[[profile]] +id = "infra-implementer" +role = "infra-implementer" +caps = "FS-RW-BASH" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Deploys, CI/CD, secrets, container/IaC. Per-project credential isolation." +manifest_path = "~/.claude/agents/infra-implementer.md" + +[[profile]] +id = "modal-runner" +role = "modal-runner" +caps = "FS-RW-BASH" +default_model_ref = "anthropic/claude-opus-4-7" +description = "Modal compute orchestrator. Cost gating, KILL GUARD. Opus — irreversible $$ actions need depth." +manifest_path = "~/.claude/agents/modal-runner.md" + +# ─── ML family ───────────────────────────────────────────────────────── + +[[profile]] +id = "ml-implementer" +role = "ml-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-opus-4-7" +description = "ML training/inference. Math-First, Pre-Experiment Check, Modal protocol." +manifest_path = "~/.claude/agents/ml-implementer.md" + +[[profile]] +id = "ml-researcher" +role = "ml-researcher" +caps = "RO-WEB-FS" +default_model_ref = "anthropic/claude-opus-4-7" +description = "ML literature, benchmarks, reproducibility. Math-First read-only." +manifest_path = "~/.claude/agents/ml-researcher.md" + +# ─── Specialist anchors (project-specialists) ────────────────────────── +# +# Полный список 59 манифестов остаётся в `~/.claude/agents/*.md`. +# Здесь — представительные anchors, marketplace UI обходит directory +# и собирает остальные из frontmatter. + +[[profile]] +id = "cartoon-studio-specialist" +role = "project-specialist" +caps = "FS-RW-BASH-AGENT" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Cartoon Studio AI video generation. Next.js 16 + Drizzle + SQLite. Flux 2 Pro / Kling O3 / ElevenLabs." +manifest_path = "~/.claude/agents/cartoon-studio-specialist.md" + +[[profile]] +id = "keisei-os-specialist" +role = "project-specialist" +caps = "FS-RW-BASH-AGENT" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "KeiSei OS 4-layer runtime, 13 primitives, 14 Brain Tools." +manifest_path = "~/.claude/agents/keisei-os-specialist.md" + +# ─── Validator / cost-guardian (lightweight) ─────────────────────────── + +[[profile]] +id = "validator" +role = "validator" +caps = "RO-FS-WEB" +default_model_ref = "anthropic/claude-haiku-4-5" +description = "RULE 0.4 fact-checker. API existence, version compat, doc claims. Haiku — narrow tasks." +manifest_path = "~/.claude/agents/validator.md" + +[[profile]] +id = "cost-guardian" +role = "cost-guardian" +caps = "RO-FS-BASH-WEB" +default_model_ref = "anthropic/claude-haiku-4-5" +description = "Pre-launch compute cost verification. Dashboard balance, running jobs, head-room." +manifest_path = "~/.claude/agents/cost-guardian.md" + +[[profile]] +id = "patent-compliance" +role = "patent-compliance" +caps = "RO-FS-BASH" +default_model_ref = "anthropic/claude-haiku-4-5" +description = "Pre-filing IP cross-ref scan. Greps for unfiled-patent references." +manifest_path = "~/.claude/agents/patent-compliance.md" diff --git a/_blocks/registries/models.toml b/_blocks/registries/models.toml new file mode 100644 index 0000000..8be737d --- /dev/null +++ b/_blocks/registries/models.toml @@ -0,0 +1,178 @@ +# Реестр моделей. Слой 2 из трёх. +# +# Каждая модель привязана к провайдеру через `provider_ref`. Цены в +# microcents (1e-6 USD) на 1M токенов — совместимо с +# kei-ledger.cost_micro_cents и kei-model-router::pricing.rs. +# +# Источники цен: pricing-страница соответствующего провайдера. +# Каждая запись помечена `verified_at` — дата последней сверки. +# +# `deprecated_at = null` ⇒ модель живая. После deprecate kei-model-router +# не выбирает её для новых invocation-ов, но старые записи в ledger +# продолжают резолвиться. + +# ─── Anthropic ───────────────────────────────────────────────────────── + +[[model]] +provider_ref = "anthropic" +id = "claude-haiku-4-5" +slug = "haiku" +display_name = "Claude Haiku 4.5" +context_window = 200000 +cost_input_per_mtok_micro = 100_000_000 # $1.00 +cost_output_per_mtok_micro = 500_000_000 # $5.00 +cache_write_5m_per_mtok_micro = 125_000_000 +cache_read_per_mtok_micro = 10_000_000 +verified_at = "2026-04-30" +deprecated_at = "" +notes = "Cheapest Claude. Single-edit, formatting, lookup." + +[[model]] +provider_ref = "anthropic" +id = "claude-sonnet-4-6" +slug = "sonnet" +display_name = "Claude Sonnet 4.6" +context_window = 1000000 +cost_input_per_mtok_micro = 300_000_000 # $3.00 +cost_output_per_mtok_micro = 1_500_000_000 # $15.00 +cache_write_5m_per_mtok_micro = 375_000_000 +cache_read_per_mtok_micro = 30_000_000 +verified_at = "2026-04-30" +deprecated_at = "" +notes = "RULE 0.20 default for code-implementer-* / researcher-*." + +[[model]] +provider_ref = "anthropic" +id = "claude-opus-4-7" +slug = "opus" +display_name = "Claude Opus 4.7" +context_window = 1000000 +cost_input_per_mtok_micro = 500_000_000 # $5.00 +cost_output_per_mtok_micro = 2_500_000_000 # $25.00 +cache_write_5m_per_mtok_micro = 625_000_000 +cache_read_per_mtok_micro = 50_000_000 +verified_at = "2026-04-30" +deprecated_at = "" +notes = "Architecture, novel reasoning, math derivation. Tokenizer overhead 1.35x vs Sonnet." + +# ─── OpenAI ──────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "openai" +id = "gpt-5" +slug = "gpt-5" +display_name = "GPT-5" +context_window = 400000 +cost_input_per_mtok_micro = 200_000_000 # $2.00 (placeholder, verify on use) +cost_output_per_mtok_micro = 800_000_000 # $8.00 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "" +deprecated_at = "" +notes = "[UNVERIFIED] — re-fetch pricing page before billing-grade decisions." + +[[model]] +provider_ref = "codex" +id = "gpt-5-codex" +slug = "codex" +display_name = "GPT-5 Codex (via ChatGPT OAuth)" +context_window = 200000 +cost_input_per_mtok_micro = 0 # subscription, not per-token +cost_output_per_mtok_micro = 0 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "2026-05-10" +deprecated_at = "" +notes = "RULE 0.23 dual-review. ChatGPT Plus/Pro/Team subscription quota." + +# ─── xAI ─────────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "xai" +id = "grok-4" +slug = "grok-4" +display_name = "Grok 4" +context_window = 256000 +cost_input_per_mtok_micro = 300_000_000 # [UNVERIFIED] +cost_output_per_mtok_micro = 1_500_000_000 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "" +deprecated_at = "" +notes = "[UNVERIFIED] — verify before billing." + +# ─── DeepSeek ────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "deepseek" +id = "deepseek-r1" +slug = "r1" +display_name = "DeepSeek R1" +context_window = 64000 +cost_input_per_mtok_micro = 55_000_000 # $0.55 [UNVERIFIED — pricing fluctuates] +cost_output_per_mtok_micro = 219_000_000 # $2.19 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 14_000_000 # $0.14 cache hit +verified_at = "" +deprecated_at = "" +notes = "Cheapest reasoning tier. Long thinking time." + +[[model]] +provider_ref = "deepseek" +id = "deepseek-v3" +slug = "v3" +display_name = "DeepSeek V3" +context_window = 64000 +cost_input_per_mtok_micro = 27_000_000 # $0.27 [UNVERIFIED] +cost_output_per_mtok_micro = 110_000_000 # $1.10 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 7_000_000 +verified_at = "" +deprecated_at = "" +notes = "Non-reasoning baseline. Cheap bulk inference." + +# ─── Google ──────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "google" +id = "gemini-2.5-pro" +slug = "gemini-pro" +display_name = "Gemini 2.5 Pro" +context_window = 2000000 +cost_input_per_mtok_micro = 125_000_000 # $1.25 [UNVERIFIED] +cost_output_per_mtok_micro = 1_000_000_000 # $10.00 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "" +deprecated_at = "" +notes = "2M context. Long-document analysis." + +# ─── Local (zero per-token cost) ─────────────────────────────────────── + +[[model]] +provider_ref = "ollama-local" +id = "llama-3.3-70b" +slug = "llama-70b-local" +display_name = "Llama 3.3 70B (Ollama local)" +context_window = 128000 +cost_input_per_mtok_micro = 0 +cost_output_per_mtok_micro = 0 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "2026-05-13" +deprecated_at = "" +notes = "Local. Compute cost is electricity + opportunity, not per-token." + +[[model]] +provider_ref = "mlx-local" +id = "qwen-2.5-coder-32b" +slug = "qwen-coder-local" +display_name = "Qwen 2.5 Coder 32B (MLX)" +context_window = 32000 +cost_input_per_mtok_micro = 0 +cost_output_per_mtok_micro = 0 +cache_write_5m_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "2026-05-13" +deprecated_at = "" +notes = "Code-focused local model. Apple silicon Metal." diff --git a/_blocks/registries/providers.toml b/_blocks/registries/providers.toml new file mode 100644 index 0000000..ac420c4 --- /dev/null +++ b/_blocks/registries/providers.toml @@ -0,0 +1,131 @@ +# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace. +# +# Слой 1 из трёх: provider → model → profile (agent-profiles.toml). +# Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт), +# модели за ним меняются — см. models.toml. +# +# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты. +# +# Конвенция: `id` — kebab-case, без версий. Версии — в моделях. + +[[provider]] +id = "anthropic" +display_name = "Anthropic" +endpoint = "https://api.anthropic.com/v1/messages" +auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY +auth_env = "ANTHROPIC_API_KEY" +api_version_header = "anthropic-version" +api_version_value = "2023-06-01" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 4000 +billing_currency = "USD" +notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*." + +[[provider]] +id = "openai" +display_name = "OpenAI" +endpoint = "https://api.openai.com/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "OPENAI_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 5000 +billing_currency = "USD" +notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)." + +[[provider]] +id = "codex" +display_name = "OpenAI Codex (ChatGPT OAuth)" +endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP +auth_scheme = "oauth-subscription" +auth_env = "_" # no env — CLI handles auth +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 60 # ChatGPT subscription quota +billing_currency = "USD-subscription" +notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers." + +[[provider]] +id = "xai" +display_name = "xAI" +endpoint = "https://api.x.ai/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "XAI_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 1000 +billing_currency = "USD" +notes = "Grok family. OpenAI-compatible API." + +[[provider]] +id = "deepseek" +display_name = "DeepSeek" +endpoint = "https://api.deepseek.com/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "DEEPSEEK_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 60 +billing_currency = "USD" # invoices in USD even if upstream CNY +notes = "Cheapest tier for batch reasoning. R1 reasoner family." + +[[provider]] +id = "google" +display_name = "Google Gemini" +endpoint = "https://generativelanguage.googleapis.com/v1beta/models" +auth_scheme = "query-key" # ?key=<...> in URL +auth_env = "GEMINI_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 1000 +billing_currency = "USD" +notes = "1M-context window for long-doc analysis." + +[[provider]] +id = "ollama-local" +display_name = "Ollama (local)" +endpoint = "http://127.0.0.1:11434/api/chat" +auth_scheme = "none" +auth_env = "_" +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 0 # local, no remote limit +billing_currency = "USD-zero" # local compute, opportunity cost only +notes = "Self-hosted. Models loaded via `ollama pull`." + +[[provider]] +id = "mlx-local" +display_name = "MLX (Apple silicon local)" +endpoint = "http://127.0.0.1:8080/v1/chat/completions" +auth_scheme = "none" +auth_env = "_" +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 0 +billing_currency = "USD-zero" +notes = "Native Apple-silicon inference. mlx_lm.server." + +[[provider]] +id = "lmstudio-local" +display_name = "LM Studio (local)" +endpoint = "http://127.0.0.1:1234/v1/chat/completions" +auth_scheme = "none" +auth_env = "_" +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 0 +billing_currency = "USD-zero" +notes = "Desktop GUI runner. OpenAI-compatible." + +[[provider]] +id = "litellm-proxy" +display_name = "LiteLLM proxy (keisei.app)" +endpoint = "https://api.keisei.app/llm/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "KEI_LITELLM_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 1000 +billing_currency = "USD-keisei" +notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."