diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..9f4f7aa --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "_blocks/registries"] + path = _blocks/registries + url = git@github.com:KeiSeiLab/kei-registries.git diff --git a/_blocks/registries b/_blocks/registries new file mode 160000 index 0000000..14a1e62 --- /dev/null +++ b/_blocks/registries @@ -0,0 +1 @@ +Subproject commit 14a1e62db6bff685e9b707fd368e6bbe42e9a819 diff --git a/_blocks/registries/agent-profiles.toml b/_blocks/registries/agent-profiles.toml deleted file mode 100644 index 44d2a06..0000000 --- a/_blocks/registries/agent-profiles.toml +++ /dev/null @@ -1,186 +0,0 @@ -# Реестр профилей агентов. Слой 3 из трёх. -# -# Профиль — это (role + caps + default_model_ref + system_prompt_ref). -# Не привязан к конкретному провайдеру: provider+model выбирается -# kei-model-router'ом по подсказке caller'а либо берётся default_model_ref. -# -# Источник истины для существующих 59 манифестов — `.md` файлы в -# `~/.claude/agents/`. Этот toml содержит ТОЛЬКО core/default-профили -# для marketplace UI и kei-model-router; per-manifest detail остаётся -# в `.md` файлах с frontmatter. -# -# Caps-bundle алфавит: -# FS-RO — Read/Glob/Grep only -# FS-RW — Read/Write/Edit -# BASH — Bash shell -# WEB — WebFetch/WebSearch -# AGENT — может спавнить sub-агентов (RULE 0.12) -# PLAN — Plan Mode allowed -# -# Стандартные комбинации: -# RO-WEB-FS — researcher / validator (read+web) -# FS-RW-BASH-PLAN — code-implementer (full + plan) -# RO-FS — critic / auditor (read-only no web) -# FS-RW-BASH-AGENT — orchestrator-meta (can spawn) - -# ─── Core implementer family ─────────────────────────────────────────── - -[[profile]] -id = "code-implementer" -role = "code-implementer" -caps = "FS-RW-BASH-PLAN" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Generic Rust/TS/Python/Go/Swift/Flutter implementer. Constructor Pattern. RULE 0.20 → sonnet default." -manifest_path = "~/.claude/agents/code-implementer.md" - -[[profile]] -id = "code-implementer-rust" -role = "code-implementer" -caps = "FS-RW-BASH-PLAN" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Rust specialist. Cargo, traits, async/tokio, rusqlite, tests." -manifest_path = "~/.claude/agents/code-implementer-rust.md" - -[[profile]] -id = "code-implementer-typescript" -role = "code-implementer" -caps = "FS-RW-BASH-PLAN" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Next.js 16 / Node / browser. Type-safe API contracts." -manifest_path = "~/.claude/agents/code-implementer-typescript.md" - -# ─── Researcher family ───────────────────────────────────────────────── - -[[profile]] -id = "researcher" -role = "researcher" -caps = "RO-WEB-FS" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Web+codebase research. Evidence-Graded findings. Read-only." -manifest_path = "~/.claude/agents/researcher.md" - -[[profile]] -id = "researcher-web" -role = "researcher-web" -caps = "WEB" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Web research specialist. WebFetch/WebSearch only." -manifest_path = "~/.claude/agents/researcher-web.md" - -[[profile]] -id = "researcher-code" -role = "researcher-code" -caps = "RO-FS" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Codebase research. Glob/Grep/Read only." -manifest_path = "~/.claude/agents/researcher-code.md" - -# ─── Critic / auditor family ─────────────────────────────────────────── - -[[profile]] -id = "critic" -role = "critic" -caps = "RO-FS-WEB" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Anti-patterns, tech debt, security issues, bugs. Severity-sorted findings." -manifest_path = "~/.claude/agents/critic.md" - -[[profile]] -id = "security-auditor" -role = "security-auditor" -caps = "RO-FS-WEB" -default_model_ref = "anthropic/claude-opus-4-7" -description = "9-point differential security review. Risk-classified findings. Opus default — security needs depth." -manifest_path = "~/.claude/agents/security-auditor.md" - -[[profile]] -id = "codex-reviewer" -role = "codex-reviewer" -caps = "RO-FS-WEB" -default_model_ref = "codex/gpt-5-codex" -description = "RULE 0.23 DUAL REVIEW. Independent second-opinion via OpenAI Codex CLI." -manifest_path = "~/.claude/agents/codex-reviewer.md" - -# ─── Infra family ────────────────────────────────────────────────────── - -[[profile]] -id = "infra-implementer" -role = "infra-implementer" -caps = "FS-RW-BASH" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Deploys, CI/CD, secrets, container/IaC. Per-project credential isolation." -manifest_path = "~/.claude/agents/infra-implementer.md" - -[[profile]] -id = "modal-runner" -role = "modal-runner" -caps = "FS-RW-BASH" -default_model_ref = "anthropic/claude-opus-4-7" -description = "Modal compute orchestrator. Cost gating, KILL GUARD. Opus — irreversible $$ actions need depth." -manifest_path = "~/.claude/agents/modal-runner.md" - -# ─── ML family ───────────────────────────────────────────────────────── - -[[profile]] -id = "ml-implementer" -role = "ml-implementer" -caps = "FS-RW-BASH-PLAN" -default_model_ref = "anthropic/claude-opus-4-7" -description = "ML training/inference. Math-First, Pre-Experiment Check, Modal protocol." -manifest_path = "~/.claude/agents/ml-implementer.md" - -[[profile]] -id = "ml-researcher" -role = "ml-researcher" -caps = "RO-WEB-FS" -default_model_ref = "anthropic/claude-opus-4-7" -description = "ML literature, benchmarks, reproducibility. Math-First read-only." -manifest_path = "~/.claude/agents/ml-researcher.md" - -# ─── Specialist anchors (project-specialists) ────────────────────────── -# -# Полный список 59 манифестов остаётся в `~/.claude/agents/*.md`. -# Здесь — представительные anchors, marketplace UI обходит directory -# и собирает остальные из frontmatter. - -[[profile]] -id = "cartoon-studio-specialist" -role = "project-specialist" -caps = "FS-RW-BASH-AGENT" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "Cartoon Studio AI video generation. Next.js 16 + Drizzle + SQLite. Flux 2 Pro / Kling O3 / ElevenLabs." -manifest_path = "~/.claude/agents/cartoon-studio-specialist.md" - -[[profile]] -id = "keisei-os-specialist" -role = "project-specialist" -caps = "FS-RW-BASH-AGENT" -default_model_ref = "anthropic/claude-sonnet-4-6" -description = "KeiSei OS 4-layer runtime, 13 primitives, 14 Brain Tools." -manifest_path = "~/.claude/agents/keisei-os-specialist.md" - -# ─── Validator / cost-guardian (lightweight) ─────────────────────────── - -[[profile]] -id = "validator" -role = "validator" -caps = "RO-FS-WEB" -default_model_ref = "anthropic/claude-haiku-4-5" -description = "RULE 0.4 fact-checker. API existence, version compat, doc claims. Haiku — narrow tasks." -manifest_path = "~/.claude/agents/validator.md" - -[[profile]] -id = "cost-guardian" -role = "cost-guardian" -caps = "RO-FS-BASH-WEB" -default_model_ref = "anthropic/claude-haiku-4-5" -description = "Pre-launch compute cost verification. Dashboard balance, running jobs, head-room." -manifest_path = "~/.claude/agents/cost-guardian.md" - -[[profile]] -id = "patent-compliance" -role = "patent-compliance" -caps = "RO-FS-BASH" -default_model_ref = "anthropic/claude-haiku-4-5" -description = "Pre-filing IP cross-ref scan. Greps for unfiled-patent references." -manifest_path = "~/.claude/agents/patent-compliance.md" diff --git a/_blocks/registries/models.toml b/_blocks/registries/models.toml deleted file mode 100644 index 3b7ae5b..0000000 --- a/_blocks/registries/models.toml +++ /dev/null @@ -1,191 +0,0 @@ -# Реестр моделей. Слой 2 из трёх. -# -# Каждая модель привязана к провайдеру через `provider_ref`. Цены в -# microcents (1e-6 USD) на 1M токенов — совместимо с -# kei-ledger.cost_micro_cents и kei-model-router::pricing.rs. -# -# Источники цен: pricing-страница соответствующего провайдера. -# Каждая запись помечена `verified_at` — дата последней сверки. -# -# `deprecated_at = null` ⇒ модель живая. После deprecate kei-model-router -# не выбирает её для новых invocation-ов, но старые записи в ledger -# продолжают резолвиться. - -# ─── Anthropic ───────────────────────────────────────────────────────── - -[[model]] -provider_ref = "anthropic" -# TODO: pin to "claude-haiku-4-5-20251001" once router/pricing accept slug as -# the lookup key (currently hardcoded by id in pricing.rs/escalate.rs tests). -id = "claude-haiku-4-5" -slug = "haiku" -display_name = "Claude Haiku 4.5" -context_window = 200000 -cost_input_per_mtok_micro = 100_000_000 # $1.00 -cost_output_per_mtok_micro = 500_000_000 # $5.00 -cache_write_5m_per_mtok_micro = 125_000_000 -cache_write_1h_per_mtok_micro = 200_000_000 -cache_read_per_mtok_micro = 10_000_000 -verified_at = "2026-04-30" -deprecated_at = "" -notes = "Cheapest Claude. Single-edit, formatting, lookup." - -[[model]] -provider_ref = "anthropic" -id = "claude-sonnet-4-6" -slug = "sonnet" -display_name = "Claude Sonnet 4.6" -context_window = 1000000 -cost_input_per_mtok_micro = 300_000_000 # $3.00 -cost_output_per_mtok_micro = 1_500_000_000 # $15.00 -cache_write_5m_per_mtok_micro = 375_000_000 -cache_write_1h_per_mtok_micro = 600_000_000 -cache_read_per_mtok_micro = 30_000_000 -verified_at = "2026-04-30" -deprecated_at = "" -notes = "RULE 0.20 default for code-implementer-* / researcher-*." - -[[model]] -provider_ref = "anthropic" -id = "claude-opus-4-7" -slug = "opus" -display_name = "Claude Opus 4.7" -context_window = 1000000 -cost_input_per_mtok_micro = 500_000_000 # $5.00 -cost_output_per_mtok_micro = 2_500_000_000 # $25.00 -cache_write_5m_per_mtok_micro = 625_000_000 -cache_write_1h_per_mtok_micro = 1_000_000_000 -cache_read_per_mtok_micro = 50_000_000 -verified_at = "2026-04-30" -deprecated_at = "" -notes = "Architecture, novel reasoning, math derivation. Tokenizer overhead 1.35x vs Sonnet." - -# ─── OpenAI ──────────────────────────────────────────────────────────── - -[[model]] -provider_ref = "openai" -id = "gpt-5" -slug = "gpt-5" -display_name = "GPT-5" -context_window = 400000 -cost_input_per_mtok_micro = 200_000_000 # $2.00 (placeholder, verify on use) -cost_output_per_mtok_micro = 800_000_000 # $8.00 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 0 -verified_at = "" -deprecated_at = "" -notes = "[UNVERIFIED] — re-fetch pricing page before billing-grade decisions." - -[[model]] -provider_ref = "codex" -id = "gpt-5-codex" -slug = "codex" -display_name = "GPT-5 Codex (via ChatGPT OAuth)" -context_window = 200000 -cost_input_per_mtok_micro = 0 # subscription, not per-token -cost_output_per_mtok_micro = 0 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 0 -verified_at = "2026-05-10" -deprecated_at = "" -notes = "RULE 0.23 dual-review. ChatGPT Plus/Pro/Team subscription quota." - -# ─── xAI ─────────────────────────────────────────────────────────────── - -[[model]] -provider_ref = "xai" -id = "grok-4" -slug = "grok-4" -display_name = "Grok 4" -context_window = 256000 -cost_input_per_mtok_micro = 300_000_000 # [UNVERIFIED] -cost_output_per_mtok_micro = 1_500_000_000 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 0 -verified_at = "" -deprecated_at = "" -notes = "[UNVERIFIED] — verify before billing." - -# ─── DeepSeek ────────────────────────────────────────────────────────── - -[[model]] -provider_ref = "deepseek" -id = "deepseek-r1" -slug = "r1" -display_name = "DeepSeek R1" -context_window = 64000 -cost_input_per_mtok_micro = 55_000_000 # $0.55 [UNVERIFIED — pricing fluctuates] -cost_output_per_mtok_micro = 219_000_000 # $2.19 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 14_000_000 # $0.14 cache hit -verified_at = "" -deprecated_at = "" -notes = "Cheapest reasoning tier. Long thinking time." - -[[model]] -provider_ref = "deepseek" -id = "deepseek-v3" -slug = "v3" -display_name = "DeepSeek V3" -context_window = 64000 -cost_input_per_mtok_micro = 27_000_000 # $0.27 [UNVERIFIED] -cost_output_per_mtok_micro = 110_000_000 # $1.10 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 7_000_000 -verified_at = "" -deprecated_at = "" -notes = "Non-reasoning baseline. Cheap bulk inference." - -# ─── Google ──────────────────────────────────────────────────────────── - -[[model]] -provider_ref = "google" -id = "gemini-2.5-pro" -slug = "gemini-pro" -display_name = "Gemini 2.5 Pro" -context_window = 2000000 -cost_input_per_mtok_micro = 125_000_000 # $1.25 [UNVERIFIED] -cost_output_per_mtok_micro = 1_000_000_000 # $10.00 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 0 -verified_at = "" -deprecated_at = "" -notes = "2M context. Long-document analysis." - -# ─── Local (zero per-token cost) ─────────────────────────────────────── - -[[model]] -provider_ref = "ollama-local" -id = "llama-3.3-70b" -slug = "llama-70b-local" -display_name = "Llama 3.3 70B (Ollama local)" -context_window = 128000 -cost_input_per_mtok_micro = 0 -cost_output_per_mtok_micro = 0 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 0 -verified_at = "2026-05-13" -deprecated_at = "" -notes = "Local. Compute cost is electricity + opportunity, not per-token." - -[[model]] -provider_ref = "mlx-local" -id = "qwen-2.5-coder-32b" -slug = "qwen-coder-local" -display_name = "Qwen 2.5 Coder 32B (MLX)" -context_window = 32000 -cost_input_per_mtok_micro = 0 -cost_output_per_mtok_micro = 0 -cache_write_5m_per_mtok_micro = 0 -cache_write_1h_per_mtok_micro = 0 -cache_read_per_mtok_micro = 0 -verified_at = "2026-05-13" -deprecated_at = "" -notes = "Code-focused local model. Apple silicon Metal." diff --git a/_blocks/registries/providers.toml b/_blocks/registries/providers.toml deleted file mode 100644 index ac420c4..0000000 --- a/_blocks/registries/providers.toml +++ /dev/null @@ -1,131 +0,0 @@ -# Реестр провайдеров LLM. SSoT для kei-model-router + keisei-marketplace. -# -# Слой 1 из трёх: provider → model → profile (agent-profiles.toml). -# Провайдер стабилен (endpoint, схема аутентификации, биллинг-аккаунт), -# модели за ним меняются — см. models.toml. -# -# Все цены/контекст — в models.toml. Здесь только транспорт + лимиты. -# -# Конвенция: `id` — kebab-case, без версий. Версии — в моделях. - -[[provider]] -id = "anthropic" -display_name = "Anthropic" -endpoint = "https://api.anthropic.com/v1/messages" -auth_scheme = "x-api-key" # header name; value from env ANTHROPIC_API_KEY -auth_env = "ANTHROPIC_API_KEY" -api_version_header = "anthropic-version" -api_version_value = "2023-06-01" -retry_max = 3 -retry_backoff_ms = 500 -rate_limit_rpm = 4000 -billing_currency = "USD" -notes = "Primary provider for Claude family. RULE 0.20 default for code-implementer-*." - -[[provider]] -id = "openai" -display_name = "OpenAI" -endpoint = "https://api.openai.com/v1/chat/completions" -auth_scheme = "bearer" -auth_env = "OPENAI_API_KEY" -retry_max = 3 -retry_backoff_ms = 500 -rate_limit_rpm = 5000 -billing_currency = "USD" -notes = "GPT family + codex (ChatGPT OAuth, see codex provider for dual-review)." - -[[provider]] -id = "codex" -display_name = "OpenAI Codex (ChatGPT OAuth)" -endpoint = "local:codex-cli" # invoked via `codex` CLI, not HTTP -auth_scheme = "oauth-subscription" -auth_env = "_" # no env — CLI handles auth -retry_max = 1 -retry_backoff_ms = 0 -rate_limit_rpm = 60 # ChatGPT subscription quota -billing_currency = "USD-subscription" -notes = "RULE 0.23 DUAL REVIEW. Separate billing from API providers." - -[[provider]] -id = "xai" -display_name = "xAI" -endpoint = "https://api.x.ai/v1/chat/completions" -auth_scheme = "bearer" -auth_env = "XAI_API_KEY" -retry_max = 3 -retry_backoff_ms = 500 -rate_limit_rpm = 1000 -billing_currency = "USD" -notes = "Grok family. OpenAI-compatible API." - -[[provider]] -id = "deepseek" -display_name = "DeepSeek" -endpoint = "https://api.deepseek.com/v1/chat/completions" -auth_scheme = "bearer" -auth_env = "DEEPSEEK_API_KEY" -retry_max = 3 -retry_backoff_ms = 500 -rate_limit_rpm = 60 -billing_currency = "USD" # invoices in USD even if upstream CNY -notes = "Cheapest tier for batch reasoning. R1 reasoner family." - -[[provider]] -id = "google" -display_name = "Google Gemini" -endpoint = "https://generativelanguage.googleapis.com/v1beta/models" -auth_scheme = "query-key" # ?key=<...> in URL -auth_env = "GEMINI_API_KEY" -retry_max = 3 -retry_backoff_ms = 500 -rate_limit_rpm = 1000 -billing_currency = "USD" -notes = "1M-context window for long-doc analysis." - -[[provider]] -id = "ollama-local" -display_name = "Ollama (local)" -endpoint = "http://127.0.0.1:11434/api/chat" -auth_scheme = "none" -auth_env = "_" -retry_max = 1 -retry_backoff_ms = 0 -rate_limit_rpm = 0 # local, no remote limit -billing_currency = "USD-zero" # local compute, opportunity cost only -notes = "Self-hosted. Models loaded via `ollama pull`." - -[[provider]] -id = "mlx-local" -display_name = "MLX (Apple silicon local)" -endpoint = "http://127.0.0.1:8080/v1/chat/completions" -auth_scheme = "none" -auth_env = "_" -retry_max = 1 -retry_backoff_ms = 0 -rate_limit_rpm = 0 -billing_currency = "USD-zero" -notes = "Native Apple-silicon inference. mlx_lm.server." - -[[provider]] -id = "lmstudio-local" -display_name = "LM Studio (local)" -endpoint = "http://127.0.0.1:1234/v1/chat/completions" -auth_scheme = "none" -auth_env = "_" -retry_max = 1 -retry_backoff_ms = 0 -rate_limit_rpm = 0 -billing_currency = "USD-zero" -notes = "Desktop GUI runner. OpenAI-compatible." - -[[provider]] -id = "litellm-proxy" -display_name = "LiteLLM proxy (keisei.app)" -endpoint = "https://api.keisei.app/llm/v1/chat/completions" -auth_scheme = "bearer" -auth_env = "KEI_LITELLM_KEY" -retry_max = 3 -retry_backoff_ms = 500 -rate_limit_rpm = 1000 -billing_currency = "USD-keisei" -notes = "Per-user-keyed proxy. Routes to anthropic/openai/xai/deepseek under one bearer."