commit 14a1e62db6bff685e9b707fd368e6bbe42e9a819 Author: Parfii-bot Date: Thu May 14 01:16:45 2026 +0800 chore: kei-registries — three-layer DNA SSoT (initial) diff --git a/README.md b/README.md new file mode 100644 index 0000000..f6aa2f2 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# kei-registries + +Единый источник истины для three-layer DNA: + +- **providers.toml** — LLM-провайдеры (anthropic / openai / xai / deepseek / google + локальные) +- **models.toml** — конкретные модели с ценами и контекстом +- **agent-profiles.toml** — публичные роли (юзер выбирает в marketplace, добавляет свой ключ) + +Подключается как submodule в: +- `KeiSeiLab/KeiSeiKit-1.0` → `_blocks/registries/` (kei-model-router читает) +- `keisei-marketplace` → `_blocks/registries/` (Next.js loader читает) + +## Что НЕ здесь +ml-implementer, ml-researcher, physics-deriver, cartoon-studio, keisei-os, +patent-compliance — приватные/IP-связанные, лежат локально в `~/.claude/agents/`. diff --git a/agent-profiles.toml b/agent-profiles.toml new file mode 100644 index 0000000..3d2d416 --- /dev/null +++ b/agent-profiles.toml @@ -0,0 +1,113 @@ +# Реестр публичных профилей агентов. Слой 3 из трёх. +# +# Не привязан к провайдеру: provider+model выбирается kei-model-router'ом +# по подсказке либо берётся default_model_ref. Юзер потом подключает свой +# ключ (Anthropic/OpenAI/xAI/etc) и реестр становится "его". +# +# Приватные project-specialists (cartoon-studio, keisei-os, vortex, ml-*, +# physics, patent-compliance) живут локально в `~/.claude/agents/*.md` +# и НЕ публикуются в marketplace. + +[[profile]] +id = "code-implementer" +role = "code-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Generic Rust/TS/Python/Go/Swift/Flutter implementer. Constructor Pattern. RULE 0.20 → sonnet default." +manifest_path = "~/.claude/agents/code-implementer.md" + +[[profile]] +id = "code-implementer-rust" +role = "code-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Rust specialist. Cargo, traits, async/tokio, rusqlite, tests." +manifest_path = "~/.claude/agents/code-implementer-rust.md" + +[[profile]] +id = "code-implementer-typescript" +role = "code-implementer" +caps = "FS-RW-BASH-PLAN" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Next.js 16 / Node / browser. Type-safe API contracts." +manifest_path = "~/.claude/agents/code-implementer-typescript.md" + +[[profile]] +id = "researcher" +role = "researcher" +caps = "RO-WEB-FS" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Web+codebase research. Evidence-Graded findings. Read-only." +manifest_path = "~/.claude/agents/researcher.md" + +[[profile]] +id = "researcher-web" +role = "researcher-web" +caps = "WEB" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Web research specialist. WebFetch/WebSearch only." +manifest_path = "~/.claude/agents/researcher-web.md" + +[[profile]] +id = "researcher-code" +role = "researcher-code" +caps = "RO-FS" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Codebase research. Glob/Grep/Read only." +manifest_path = "~/.claude/agents/researcher-code.md" + +[[profile]] +id = "critic" +role = "critic" +caps = "RO-FS-WEB" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Anti-patterns, tech debt, security issues, bugs. Severity-sorted findings." +manifest_path = "~/.claude/agents/critic.md" + +[[profile]] +id = "security-auditor" +role = "security-auditor" +caps = "RO-FS-WEB" +default_model_ref = "anthropic/claude-opus-4-7" +description = "9-point differential security review. Risk-classified findings. Opus default — security needs depth." +manifest_path = "~/.claude/agents/security-auditor.md" + +[[profile]] +id = "codex-reviewer" +role = "codex-reviewer" +caps = "RO-FS-WEB" +default_model_ref = "codex/gpt-5-codex" +description = "RULE 0.23 DUAL REVIEW. Independent second-opinion via OpenAI Codex CLI." +manifest_path = "~/.claude/agents/codex-reviewer.md" + +[[profile]] +id = "infra-implementer" +role = "infra-implementer" +caps = "FS-RW-BASH" +default_model_ref = "anthropic/claude-sonnet-4-6" +description = "Deploys, CI/CD, secrets, container/IaC. Per-project credential isolation." +manifest_path = "~/.claude/agents/infra-implementer.md" + +[[profile]] +id = "modal-runner" +role = "modal-runner" +caps = "FS-RW-BASH" +default_model_ref = "anthropic/claude-opus-4-7" +description = "Modal compute orchestrator. Cost gating, KILL GUARD. Opus — irreversible $$ actions need depth." +manifest_path = "~/.claude/agents/modal-runner.md" + +[[profile]] +id = "validator" +role = "validator" +caps = "RO-FS-WEB" +default_model_ref = "anthropic/claude-haiku-4-5" +description = "RULE 0.4 fact-checker. API existence, version compat, doc claims. Haiku — narrow tasks." +manifest_path = "~/.claude/agents/validator.md" + +[[profile]] +id = "cost-guardian" +role = "cost-guardian" +caps = "RO-FS-BASH-WEB" +default_model_ref = "anthropic/claude-haiku-4-5" +description = "Pre-launch compute cost verification. Dashboard balance, running jobs, head-room." +manifest_path = "~/.claude/agents/cost-guardian.md" diff --git a/models.toml b/models.toml new file mode 100644 index 0000000..3b7ae5b --- /dev/null +++ b/models.toml @@ -0,0 +1,191 @@ +# Реестр моделей. Слой 2 из трёх. +# +# Каждая модель привязана к провайдеру через `provider_ref`. Цены в +# microcents (1e-6 USD) на 1M токенов — совместимо с +# kei-ledger.cost_micro_cents и kei-model-router::pricing.rs. +# +# Источники цен: pricing-страница соответствующего провайдера. +# Каждая запись помечена `verified_at` — дата последней сверки. +# +# `deprecated_at = null` ⇒ модель живая. После deprecate kei-model-router +# не выбирает её для новых invocation-ов, но старые записи в ledger +# продолжают резолвиться. + +# ─── Anthropic ───────────────────────────────────────────────────────── + +[[model]] +provider_ref = "anthropic" +# TODO: pin to "claude-haiku-4-5-20251001" once router/pricing accept slug as +# the lookup key (currently hardcoded by id in pricing.rs/escalate.rs tests). +id = "claude-haiku-4-5" +slug = "haiku" +display_name = "Claude Haiku 4.5" +context_window = 200000 +cost_input_per_mtok_micro = 100_000_000 # $1.00 +cost_output_per_mtok_micro = 500_000_000 # $5.00 +cache_write_5m_per_mtok_micro = 125_000_000 +cache_write_1h_per_mtok_micro = 200_000_000 +cache_read_per_mtok_micro = 10_000_000 +verified_at = "2026-04-30" +deprecated_at = "" +notes = "Cheapest Claude. Single-edit, formatting, lookup." + +[[model]] +provider_ref = "anthropic" +id = "claude-sonnet-4-6" +slug = "sonnet" +display_name = "Claude Sonnet 4.6" +context_window = 1000000 +cost_input_per_mtok_micro = 300_000_000 # $3.00 +cost_output_per_mtok_micro = 1_500_000_000 # $15.00 +cache_write_5m_per_mtok_micro = 375_000_000 +cache_write_1h_per_mtok_micro = 600_000_000 +cache_read_per_mtok_micro = 30_000_000 +verified_at = "2026-04-30" +deprecated_at = "" +notes = "RULE 0.20 default for code-implementer-* / researcher-*." + +[[model]] +provider_ref = "anthropic" +id = "claude-opus-4-7" +slug = "opus" +display_name = "Claude Opus 4.7" +context_window = 1000000 +cost_input_per_mtok_micro = 500_000_000 # $5.00 +cost_output_per_mtok_micro = 2_500_000_000 # $25.00 +cache_write_5m_per_mtok_micro = 625_000_000 +cache_write_1h_per_mtok_micro = 1_000_000_000 +cache_read_per_mtok_micro = 50_000_000 +verified_at = "2026-04-30" +deprecated_at = "" +notes = "Architecture, novel reasoning, math derivation. Tokenizer overhead 1.35x vs Sonnet." + +# ─── OpenAI ──────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "openai" +id = "gpt-5" +slug = "gpt-5" +display_name = "GPT-5" +context_window = 400000 +cost_input_per_mtok_micro = 200_000_000 # $2.00 (placeholder, verify on use) +cost_output_per_mtok_micro = 800_000_000 # $8.00 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "" +deprecated_at = "" +notes = "[UNVERIFIED] — re-fetch pricing page before billing-grade decisions." + +[[model]] +provider_ref = "codex" +id = "gpt-5-codex" +slug = "codex" +display_name = "GPT-5 Codex (via ChatGPT OAuth)" +context_window = 200000 +cost_input_per_mtok_micro = 0 # subscription, not per-token +cost_output_per_mtok_micro = 0 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "2026-05-10" +deprecated_at = "" +notes = "RULE 0.23 dual-review. ChatGPT Plus/Pro/Team subscription quota." + +# ─── xAI ─────────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "xai" +id = "grok-4" +slug = "grok-4" +display_name = "Grok 4" +context_window = 256000 +cost_input_per_mtok_micro = 300_000_000 # [UNVERIFIED] +cost_output_per_mtok_micro = 1_500_000_000 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "" +deprecated_at = "" +notes = "[UNVERIFIED] — verify before billing." + +# ─── DeepSeek ────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "deepseek" +id = "deepseek-r1" +slug = "r1" +display_name = "DeepSeek R1" +context_window = 64000 +cost_input_per_mtok_micro = 55_000_000 # $0.55 [UNVERIFIED — pricing fluctuates] +cost_output_per_mtok_micro = 219_000_000 # $2.19 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 14_000_000 # $0.14 cache hit +verified_at = "" +deprecated_at = "" +notes = "Cheapest reasoning tier. Long thinking time." + +[[model]] +provider_ref = "deepseek" +id = "deepseek-v3" +slug = "v3" +display_name = "DeepSeek V3" +context_window = 64000 +cost_input_per_mtok_micro = 27_000_000 # $0.27 [UNVERIFIED] +cost_output_per_mtok_micro = 110_000_000 # $1.10 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 7_000_000 +verified_at = "" +deprecated_at = "" +notes = "Non-reasoning baseline. Cheap bulk inference." + +# ─── Google ──────────────────────────────────────────────────────────── + +[[model]] +provider_ref = "google" +id = "gemini-2.5-pro" +slug = "gemini-pro" +display_name = "Gemini 2.5 Pro" +context_window = 2000000 +cost_input_per_mtok_micro = 125_000_000 # $1.25 [UNVERIFIED] +cost_output_per_mtok_micro = 1_000_000_000 # $10.00 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "" +deprecated_at = "" +notes = "2M context. Long-document analysis." + +# ─── Local (zero per-token cost) ─────────────────────────────────────── + +[[model]] +provider_ref = "ollama-local" +id = "llama-3.3-70b" +slug = "llama-70b-local" +display_name = "Llama 3.3 70B (Ollama local)" +context_window = 128000 +cost_input_per_mtok_micro = 0 +cost_output_per_mtok_micro = 0 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "2026-05-13" +deprecated_at = "" +notes = "Local. Compute cost is electricity + opportunity, not per-token." + +[[model]] +provider_ref = "mlx-local" +id = "qwen-2.5-coder-32b" +slug = "qwen-coder-local" +display_name = "Qwen 2.5 Coder 32B (MLX)" +context_window = 32000 +cost_input_per_mtok_micro = 0 +cost_output_per_mtok_micro = 0 +cache_write_5m_per_mtok_micro = 0 +cache_write_1h_per_mtok_micro = 0 +cache_read_per_mtok_micro = 0 +verified_at = "2026-05-13" +deprecated_at = "" +notes = "Code-focused local model. Apple silicon Metal." diff --git a/providers.toml b/providers.toml new file mode 100644 index 0000000..ac420c4 --- /dev/null +++ b/providers.toml @@ -0,0 +1,131 @@ +# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace. +# +# Слой 1 из трёх: provider → model → profile (agent-profiles.toml). +# Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт), +# модели за ним меняются — см. models.toml. +# +# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты. +# +# Конвенция: `id` — kebab-case, без версий. Версии — в моделях. + +[[provider]] +id = "anthropic" +display_name = "Anthropic" +endpoint = "https://api.anthropic.com/v1/messages" +auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY +auth_env = "ANTHROPIC_API_KEY" +api_version_header = "anthropic-version" +api_version_value = "2023-06-01" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 4000 +billing_currency = "USD" +notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*." + +[[provider]] +id = "openai" +display_name = "OpenAI" +endpoint = "https://api.openai.com/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "OPENAI_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 5000 +billing_currency = "USD" +notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)." + +[[provider]] +id = "codex" +display_name = "OpenAI Codex (ChatGPT OAuth)" +endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP +auth_scheme = "oauth-subscription" +auth_env = "_" # no env — CLI handles auth +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 60 # ChatGPT subscription quota +billing_currency = "USD-subscription" +notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers." + +[[provider]] +id = "xai" +display_name = "xAI" +endpoint = "https://api.x.ai/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "XAI_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 1000 +billing_currency = "USD" +notes = "Grok family. OpenAI-compatible API." + +[[provider]] +id = "deepseek" +display_name = "DeepSeek" +endpoint = "https://api.deepseek.com/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "DEEPSEEK_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 60 +billing_currency = "USD" # invoices in USD even if upstream CNY +notes = "Cheapest tier for batch reasoning. R1 reasoner family." + +[[provider]] +id = "google" +display_name = "Google Gemini" +endpoint = "https://generativelanguage.googleapis.com/v1beta/models" +auth_scheme = "query-key" # ?key=<...> in URL +auth_env = "GEMINI_API_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 1000 +billing_currency = "USD" +notes = "1M-context window for long-doc analysis." + +[[provider]] +id = "ollama-local" +display_name = "Ollama (local)" +endpoint = "http://127.0.0.1:11434/api/chat" +auth_scheme = "none" +auth_env = "_" +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 0 # local, no remote limit +billing_currency = "USD-zero" # local compute, opportunity cost only +notes = "Self-hosted. Models loaded via `ollama pull`." + +[[provider]] +id = "mlx-local" +display_name = "MLX (Apple silicon local)" +endpoint = "http://127.0.0.1:8080/v1/chat/completions" +auth_scheme = "none" +auth_env = "_" +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 0 +billing_currency = "USD-zero" +notes = "Native Apple-silicon inference. mlx_lm.server." + +[[provider]] +id = "lmstudio-local" +display_name = "LM Studio (local)" +endpoint = "http://127.0.0.1:1234/v1/chat/completions" +auth_scheme = "none" +auth_env = "_" +retry_max = 1 +retry_backoff_ms = 0 +rate_limit_rpm = 0 +billing_currency = "USD-zero" +notes = "Desktop GUI runner. OpenAI-compatible." + +[[provider]] +id = "litellm-proxy" +display_name = "LiteLLM proxy (keisei.app)" +endpoint = "https://api.keisei.app/llm/v1/chat/completions" +auth_scheme = "bearer" +auth_env = "KEI_LITELLM_KEY" +retry_max = 3 +retry_backoff_ms = 500 +rate_limit_rpm = 1000 +billing_currency = "USD-keisei" +notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."