Merge branch 'fix/v0.14.1-audit-blockers' — v0.14.1 audit fixes + CI

Wave 1+2 audit: 3 RELEASE BLOCKERS + 3 HIGH + 2 MED + newly-surfaced CI gap all fixed.

Verified:
- Rust workspace tests: 167 (up from 154; +13 new path-traversal + backend-guard + patch-format tests)
- Assembler tests: 20/20 unchanged
- install.sh syntax clean
- GitHub Actions CI covers: rust-assembler, rust-primitives, ts-packages (Node 18/20/22), install-dry-run, shell-lint, genesis-scan — all matrix Ubuntu+macOS
This commit is contained in:
Parfii-bot 2026-04-22 13:36:59 +08:00
commit 4b0185a3d1
184 changed files with 12541 additions and 294 deletions

87
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,87 @@
name: CI
on:
push:
branches: [main]
pull_request:
jobs:
rust-assembler:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
workspaces: _assembler
- run: cd _assembler && cargo test --release
rust-primitives:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
workspaces: _primitives/_rust
- run: cd _primitives/_rust && cargo test --workspace --release
ts-packages:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
node: ['18', '20', '22']
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
- run: cd _ts_packages && npm ci
- run: cd _ts_packages && npm run build --workspaces
- run: cd _ts_packages && npm test --workspaces --if-present
install-dry-run:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: Install hard deps (Ubuntu)
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get update && sudo apt-get install -y jq pandoc
- name: Install hard deps (macOS)
if: matrix.os == 'macos-latest'
run: brew install jq pandoc
- run: bash -n install.sh
- run: ./install.sh --no-execute --profile=minimal
- run: ./install.sh --no-execute --profile=dev
- run: ./install.sh --no-execute --profile=full
shell-lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y shellcheck
- name: shellcheck (advisory)
run: find hooks _primitives -name '*.sh' -exec shellcheck -S warning {} +
continue-on-error: true # warnings are advisory initially
genesis-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
workspaces: _primitives/_rust
- run: cd _primitives/_rust && cargo build --release -p genesis-scan
- run: ./_primitives/_rust/target/release/genesis-scan --path . --format=human --exit-on-hit

4
.gitignore vendored
View file

@ -1,3 +1,7 @@
_primitives/_rust/target/
**/target/
.DS_Store
# Agent worktrees — ephemeral orchestrator scratch dirs, never commit.
.claude/worktrees/
**/.claude/worktrees/

View file

@ -28,7 +28,7 @@ Thanks.
## What it is
KeiSeiKit is a comprehensive drop-in toolkit for [Claude Code](https://claude.com/claude-code). It ships a curated set of composable behavioral blocks, a Rust assembler that builds agent `.md` files from TOML manifests deterministically, nine pre-wired hooks (three of them dedicated to RULE 0.14 session self-audit), 35 portable skills (including an interactive `/new-agent` wizard, 10 hub-and-spoke pipelines, and the `/self-audit` retrospective skill), **9 Rust primitive crates**, 13 shell primitives, and 11 cross-tool bridge templates. Everything follows a Constructor Pattern: one file per concern, manifests as single source of truth, and the generated agent files are regenerated on every relevant edit.
KeiSeiKit is a comprehensive drop-in toolkit for [Claude Code](https://claude.com/claude-code). It ships a curated set of composable behavioral blocks, a Rust assembler that builds agent `.md` files from TOML manifests deterministically, 10 pre-wired PreToolUse/PostToolUse hooks (three of them dedicated to RULE 0.14 session self-audit), 38 portable skills (including an interactive `/new-agent` wizard, 10 hub-and-spoke pipelines, and the `/self-audit` retrospective skill), **24 Rust primitive crates**, 13 opt-in shell primitives (plus 3 always-copied sleep-sync helpers), and 11 cross-tool bridge templates. Everything follows a Constructor Pattern: one file per concern, manifests as single source of truth, and the generated agent files are regenerated on every relevant edit.
The kit is MIT-licensed and fully generic — install it on a fresh machine and you get a sane 12-agent fleet (implementers, critics, researchers, cost-guardians, and more — all namespaced under `kei-*` so they won't collide with your own same-named agents), a wizard for spinning up new project specialists, 10 pipeline skills that combine primitives end-to-end (`/compose-solution`, `/site-create`, `/schema-design`, `/observability-setup`, `/auth-setup`, `/api-design`, `/ci-scaffold`, `/test-matrix`, `/docs-scaffold`, `/new-project`, `/vm-provision`), and a build pipeline that keeps every agent derivable from its manifest.
@ -67,7 +67,7 @@ cd KeiSeiKit
5. Builds the Rust assembler (`cargo build --release` in `_assembler/`)
6. If any Rust primitive is in the selected profile: writes a scoped workspace `Cargo.toml` listing ONLY the installed crates, then `cargo build --release`
7. Generates agent `.md` files in-place with `AGENT_ROOT=~/.claude/agents assemble --in-place`
8. Copies the six hooks and 34 skills
8. Copies the 10 hooks and 38 skills
After install, the only remaining step is merging `settings-snippet.json` into your `~/.claude/settings.json` to activate the hooks. You can do this automatically with `./install.sh --activate-hooks` or answer `y` at the end-of-install TTY prompt.
@ -91,7 +91,7 @@ By default `./install.sh` is **minimal** — agents + hooks + skills + bridges,
| `frontend` | 8 site tools: `mock-render`, `visual-diff`, `tokens-sync`, `design-scrape`, `live-preview`, `figma-tokens`, `frontend-inspect`, `screenshot-decode` | ~60s | ~80 MB |
| `ops` | 8 infra tools: `kei-ledger`, `ssh-check`, `firewall-diff`, `provision-hetzner`, `provision-vultr`, `harden-base`, `metrics-scrape`, `log-ship` | ~90s | ~50 MB |
| `dev` | 4 dev tools: `kei-migrate`, `kei-changelog`, `kei-ci-lint`, `kei-docs-scaffold` | ~60s | ~40 MB |
| `full` | everything (22 primitives) | ~5 min | ~200 MB |
| `full` | everything (37 primitives) | ~5 min | ~200 MB |
Examples:
@ -108,7 +108,7 @@ Examples:
Profile resolution lives in `_primitives/MANIFEST.toml` — one `[primitive.<name>]` entry per primitive plus a `[profile]` block. Edit the manifest to define new profiles without touching `install.sh`.
> **Migrating from a full install:** if you're re-running `install.sh` after an earlier version that installed all 22 primitives unconditionally, the new default (`minimal`) will REMOVE them. To preserve the old behaviour explicitly, pass `--profile=full`.
> **Migrating from a full install:** if you're re-running `install.sh` after an earlier version that installed all primitives unconditionally, the new default (`minimal`) will REMOVE them. To preserve the old behaviour explicitly, pass `--profile=full` (currently 37 primitives).
> **Re-install disclaimer:** `install.sh` is idempotent for clean state but **overwrites kit-owned `_blocks/`, `_primitives/`, `_bridges/`, `_templates/`, `_assembler/`, `hooks/`, and `skills/` on re-run** — local modifications under those directories are backed up to `<dir>.bak-TIMESTAMP/` (or, for shared hook files, to `<file>.bak-TIMESTAMP`). User-owned `_manifests/*.toml` are never overwritten.
@ -116,12 +116,13 @@ Profile resolution lives in `_primitives/MANIFEST.toml` — one `[primitive.<nam
| Category | Count | Examples |
|---|---:|---|
| Behavioral blocks | 73 | `baseline`, `evidence-grading`, `rule-math-first`, `stack-rust-axum`, `stack-react-vite`, `stack-vue-nuxt`, `stack-sveltekit`, `stack-astro`, `deploy-modal`, `api-fal-ai`, ... |
| Behavioral blocks | 73 | `baseline`, `evidence-grading`, `rule-math-first`, `stack-rust-axum`, `stack-react-vite`, `stack-sveltekit`, `stack-astro`, `deploy-modal`, `api-fal-ai`, ... |
| Generic agents (manifests) | 12 | `kei-code-implementer`, `kei-critic`, `kei-validator`, `kei-security-auditor`, `kei-architect`, `kei-researcher`, `kei-ml-implementer`, `kei-cost-guardian`, `kei-modal-runner`, ... |
| Hooks | 6 | `assemble-agents`, `assemble-validate`, `no-hand-edit-agents`, `tomd-preread`, `agent-fork-logger`, `site-wysiwyd-check` |
| Portable skills | 34 | `compose-solution`, `new-agent`, `new-project`, `site-create`, `schema-design`, `observability-setup`, `auth-setup`, `api-design`, `ci-scaffold`, `test-matrix`, `docs-scaffold`, `vm-provision`, ... |
| Primitives (Rust crates, opt-in) | 8 | `kei-ledger`, `kei-migrate`, `kei-changelog`, `ssh-check`, `firewall-diff`, `mock-render`, `visual-diff`, `tokens-sync` |
| Primitives (shell, opt-in) | 13 | `tomd`, `design-scrape`, `live-preview`, `figma-tokens`, `frontend-inspect`, `screenshot-decode`, `metrics-scrape`, `log-ship`, `provision-hetzner`, `provision-vultr`, `harden-base`, `kei-ci-lint`, `kei-docs-scaffold` |
| Hooks (PreToolUse / PostToolUse) | 10 | `assemble-agents`, `assemble-validate`, `no-hand-edit-agents`, `tomd-preread`, `agent-fork-logger`, `site-wysiwyd-check`, `session-end-dump`, `milestone-commit-hook`, `error-spike-detector`, `git-pre-commit-genesis` |
| Portable skills | 38 | `compose-solution`, `new-agent`, `new-project`, `site-create`, `schema-design`, `observability-setup`, `auth-setup`, `api-design`, `ci-scaffold`, `test-matrix`, `docs-scaffold`, `vm-provision`, ... |
| Primitives (Rust crates, opt-in) | 24 | `kei-ledger`, `kei-migrate`, `kei-changelog`, `ssh-check`, `firewall-diff`, `mock-render`, `visual-diff`, `tokens-sync`, `kei-memory`, `genesis-scan`, `kei-conflict-scan`, `kei-refactor-engine`, `kei-graph-check`, `kei-store`, `kei-router`, `kei-sage`, `kei-task`, `kei-chat-store`, `kei-crossdomain`, `kei-search-core`, `kei-content-store`, `kei-social-store`, `kei-curator`, `kei-auth` |
| Primitives (shell, opt-in via profile) | 13 | `tomd`, `design-scrape`, `live-preview`, `figma-tokens`, `frontend-inspect`, `screenshot-decode`, `metrics-scrape`, `log-ship`, `provision-hetzner`, `provision-vultr`, `harden-base`, `kei-ci-lint`, `kei-docs-scaffold` |
| Shell helpers (always copied) | 3 | `kei-sleep-setup`, `kei-sleep-sync`, `kei-sleep-queue` (dormant until you run `/sleep-setup`) |
| Cross-tool bridges | 11 | Cursor legacy/MDC, Codex, Copilot, Windsurf, Junie, Continue, Gemini, Aider, Replit |
Of the 73 blocks, the **8 base blocks** (`baseline`, `evidence-grading`, `memory-protocol`, `rule-pre-dev-gate`, `rule-test-first`, `rule-error-budget`, `rule-double-audit`, `rule-math-first`) are referenced directly by the 12 shipped manifests. The remaining blocks (`stack-*`, `deploy-*`, `api-*`, `scraper-*`, `domain-*`) are a library consumed by the `/new-agent` wizard and the hub-and-spoke pipeline skills: when you compose a project specialist or spin up a site, the wizard / pipeline picks the appropriate blocks and emits artefacts that reference them.
@ -233,9 +234,11 @@ A third nightly phase — **Phase C** — runs after REM on a user-chosen cadenc
Two output modes, chosen once in `/sleep-setup` Phase 3b:
- **Plan only** (default) — markdown report in `sync-repo/sleep-deep/YYYY-MM-DD-plan.md`. Read in the morning, decide what to merge by hand.
- **Plan + fork** — same plan plus a ready-to-review `deep-sleep/YYYY-MM-DD` branch with `git apply`-ready changes for auto-resolvable conflicts. Graph-check gate aborts the fork (plan is still committed) if any wikilink breaks after the patch.
- **Plan + fork** — same plan plus an auto-resolve review markdown (`YYYY-MM-DD-autoresolve.md`) listing the auto-resolvable conflicts with WHY / EXAMPLE / TRADEOFF per item. You open each file in an editor, apply the suggested change, commit on a `deep-sleep/YYYY-MM-DD` branch, then let the graph-check gate verify the wikilinks still resolve.
**Zero-conflict guarantee:** any conflict the engine marks `requires_human_decision` is EXCLUDED from the generated patch and listed plainly in the plan. No silent auto-apply of ambiguous changes.
> v0.14.1 retraction: earlier README claimed a `git apply`-ready patch. The engine cannot synthesise real unified-diff hunks without reading the source files — that would risk fabricated edits (RULE 0.4). The autoresolve file is now plain markdown reviewed and applied by hand; the "fork" path only automates the rename/move class of ops, not content edits.
**Zero-conflict guarantee:** any conflict the engine marks `requires_human_decision` is EXCLUDED from the auto-resolve markdown and listed plainly in the plan. No silent auto-apply of ambiguous changes.
**Store backends** (picked in Phase 3b, consumed via the new `kei-store` trait):
@ -245,13 +248,13 @@ Two output modes, chosen once in `/sleep-setup` Phase 3b:
| Forgejo self-hosted | production | Same wire protocol as GitHub |
| Gitea self-hosted | production | Same wire protocol |
| Filesystem only | production | Local `.git`; no push; fastest |
| S3 / R2 / MinIO | MVP stub | Manifest-based local-cache; `aws-sdk-s3` integration planned |
| S3 / R2 / MinIO | stub — local only until v0.15 | Manifest-based local cache ONLY; no upload to S3/R2/MinIO yet. Requires `KEI_STORE_ALLOW_S3_STUB=1` (explicit opt-in so you don't accidentally believe your data is in the cloud). `aws-sdk-s3` integration planned for v0.15. |
Requires the new `kei-conflict-scan`, `kei-refactor-engine`, `kei-graph-check`, and `kei-store` primitives (shipped in the `dev` and `full` profiles). Governed by the Phase C extension of RULE 0.15 in `~/.claude/rules/sleep-layer.md`.
## Primitives (Rust)
`_primitives/_rust/` is a Cargo workspace with 14 single-binary crates (v0.13.0 added 4 deep-sleep primitives). `install.sh` builds `--release` and drops binaries at `~/.claude/agents/_primitives/_rust/target/release/<name>`.
`_primitives/_rust/` is a Cargo workspace with 24 single-binary crates (v0.13.0 added 4 deep-sleep primitives; v0.14.0 added 10 LBM-port MCP crates). `install.sh` builds `--release` for the subset selected by the active profile and drops binaries at `~/.claude/agents/_primitives/_rust/target/release/<name>`.
| Crate | Purpose |
|---|---|
@ -266,7 +269,7 @@ Requires the new `kei-conflict-scan`, `kei-refactor-engine`, `kei-graph-check`,
| `kei-memory` | Session retrospective + recurring pattern detector; offline-first analyzer powering RULE 0.14 self-audit |
| `genesis-scan` | Patent-IP leak scanner (term blacklist + exempt-path rules; CI / pre-commit gate) |
| `kei-conflict-scan` | v0.13.0 — deep-sleep conflict scanner across rules/hooks/blocks/orphans/CP violations |
| `kei-refactor-engine` | v0.13.0 — consumes `kei-conflict-scan` JSON; emits plan markdown + `git apply`-ready patch |
| `kei-refactor-engine` | v0.13.0 — consumes `kei-conflict-scan` JSON; emits plan markdown + auto-resolve review markdown (NOT a unified diff; v0.14.1 retraction) |
| `kei-graph-check` | v0.13.0 — post-refactor wikilink + handoff + block-ref resolver gate |
| `kei-store` | v0.13.0 — memory-repo backend abstraction (GitHub / Forgejo / Gitea / Filesystem / S3) |
@ -303,7 +306,7 @@ Requires the new `kei-conflict-scan`, `kei-refactor-engine`, `kei-graph-check`,
Block edit (_blocks/<block>.md) <-- triggers rebuild of ALL agents
```
Six hooks enforce the pipeline:
10 hooks enforce the pipeline (6 pipeline + 3 session-audit + 1 genesis-pre-commit):
- **`assemble-agents`** (PostToolUse, Write/Edit) — rebuilds the affected agent(s) whenever a manifest or a block changes. No manual rebuild needed.
- **`assemble-validate`** (PreToolUse, Bash) — blocks `git commit` inside `~/.claude` if any manifest fails validation. Keeps the repo in a buildable state at all times.
@ -311,6 +314,10 @@ Six hooks enforce the pipeline:
- **`tomd-preread`** (PreToolUse, Read) — auto-converts opaque binary formats (`.docx`, `.doc`, `.xlsx`, `.pptx`, `.csv`) to markdown via the `tomd` primitive and redirects Claude to read the cached `.md` instead.
- **`agent-fork-logger`** (PreToolUse, Agent) — RULE 0.12 advisory: logs every Agent subagent invocation to the `kei-ledger` SQLite DB so the orchestrator can validate the fork bundle. Never blocks; silent no-op if `kei-ledger` is absent.
- **`site-wysiwyd-check`** (PostToolUse, Edit/Write) — on frontend-source edits (`.tsx`, `.vue`, `.svelte`, `.astro`, `.css`, `.html`, `.jsx`, `.ts`) in a project with a live dev server (`.keisei/dev-server.pid`), takes a Playwright screenshot via `mock-render` and diffs against `.keisei/target.png` via `visual-diff`. Advisory-only — drift is reported to stderr, never blocks.
- **`session-end-dump`** (Stop event) — RULE 0.14 self-audit: archives the session JSONL trace and ingests it into `kei-memory`.
- **`milestone-commit-hook`** (PostToolUse, Bash) — RULE 0.14 self-audit: appends a one-line session summary to `~/.claude/memory/audit-backlog.md` on every `feat:`/`refactor:`/merge commit.
- **`error-spike-detector`** (PostToolUse, any tool) — RULE 0.14 self-audit: tags + logs the pattern when 3+ errors occur within the last 20 tool calls.
- **`git-pre-commit-genesis`** (PreToolUse, Bash) — runs `genesis-scan` on staged files to block patent-IP leaks before commit.
## Adding custom blocks

View file

@ -21,7 +21,8 @@ core = ["tomd", "genesis-scan"]
frontend = ["mock-render", "visual-diff", "tokens-sync", "design-scrape", "live-preview", "figma-tokens", "frontend-inspect", "screenshot-decode"]
ops = ["kei-ledger", "ssh-check", "firewall-diff", "provision-hetzner", "provision-vultr", "harden-base", "metrics-scrape", "log-ship"]
dev = ["kei-migrate", "kei-changelog", "kei-ci-lint", "kei-docs-scaffold", "kei-memory", "kei-conflict-scan", "kei-refactor-engine", "kei-graph-check", "kei-store"]
full = ["tomd", "genesis-scan", "kei-ledger", "kei-migrate", "kei-changelog", "ssh-check", "firewall-diff", "mock-render", "visual-diff", "tokens-sync", "design-scrape", "live-preview", "figma-tokens", "frontend-inspect", "screenshot-decode", "provision-hetzner", "provision-vultr", "harden-base", "metrics-scrape", "log-ship", "kei-ci-lint", "kei-docs-scaffold", "kei-memory", "kei-conflict-scan", "kei-refactor-engine", "kei-graph-check", "kei-store"]
mcp = ["kei-router", "kei-sage", "kei-task", "kei-chat-store", "kei-crossdomain", "kei-search-core", "kei-content-store", "kei-social-store", "kei-curator", "kei-auth"]
full = ["tomd", "genesis-scan", "kei-ledger", "kei-migrate", "kei-changelog", "ssh-check", "firewall-diff", "mock-render", "visual-diff", "tokens-sync", "design-scrape", "live-preview", "figma-tokens", "frontend-inspect", "screenshot-decode", "provision-hetzner", "provision-vultr", "harden-base", "metrics-scrape", "log-ship", "kei-ci-lint", "kei-docs-scaffold", "kei-memory", "kei-conflict-scan", "kei-refactor-engine", "kei-graph-check", "kei-store", "kei-router", "kei-sage", "kei-task", "kei-chat-store", "kei-crossdomain", "kei-search-core", "kei-content-store", "kei-social-store", "kei-curator", "kei-auth"]
# --- shell primitives (13) -------------------------------------------------
@ -188,3 +189,65 @@ kind = "rust"
crate = "kei-store"
deps = ["git2 (vendored libgit2)"]
desc = "Memory-repo backend abstraction — GitHub / Forgejo / Gitea / Filesystem / S3 (S3 = MVP stub)"
# --- v0.14 LBM port (10) ---------------------------------------------------
[primitive.kei-router]
kind = "rust"
crate = "kei-router"
deps = ["regex"]
desc = "Natural-language query → tool-call router (LBM pkg/keirouter port, no ML)"
[primitive.kei-sage]
kind = "rust"
crate = "kei-sage"
deps = ["rusqlite bundled (FTS5 enabled)"]
desc = "Obsidian-style knowledge graph with FTS5, BFS, PageRank (LBM internal/sage port)"
[primitive.kei-task]
kind = "rust"
crate = "kei-task"
deps = ["rusqlite bundled (FTS5 enabled)"]
desc = "Task DAG + deps + milestones (LBM internal/task port)"
[primitive.kei-chat-store]
kind = "rust"
crate = "kei-chat-store"
deps = ["rusqlite bundled (FTS5 enabled)"]
desc = "Session persistence for Claude chats (LBM internal/chat port)"
[primitive.kei-crossdomain]
kind = "rust"
crate = "kei-crossdomain"
deps = ["rusqlite bundled"]
desc = "Cross-domain typed-edge store + BFS + auto-link (LBM internal/crossdomain port)"
[primitive.kei-search-core]
kind = "rust"
crate = "kei-search-core"
deps = ["rusqlite bundled"]
desc = "3-wave research engine with budget cap; fetch interface frozen (LBM internal/search port)"
[primitive.kei-content-store]
kind = "rust"
crate = "kei-content-store"
deps = ["rusqlite bundled", "sha2"]
desc = "Asset + prompt + campaign registry (LBM internal/content port)"
[primitive.kei-social-store]
kind = "rust"
crate = "kei-social-store"
deps = ["rusqlite bundled (FTS5 enabled)"]
desc = "People + interaction CRM lite (LBM internal/social port)"
[primitive.kei-curator]
kind = "rust"
crate = "kei-curator"
deps = ["rusqlite bundled"]
desc = "Edge decay + orphan prune for cross-domain graphs (LBM internal/curator port)"
[primitive.kei-auth]
kind = "rust"
crate = "kei-auth"
deps = ["rusqlite bundled", "hmac", "sha2"]
desc = "Multi-tenant session tokens with scopes + HMAC-signed expiry (rewrite, not port)"

View file

@ -909,6 +909,23 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "kei-auth"
version = "0.1.0"
dependencies = [
"anyhow",
"base64 0.22.1",
"chrono",
"clap",
"hmac",
"rand",
"rusqlite",
"serde",
"serde_json",
"sha2",
"tempfile",
]
[[package]]
name = "kei-changelog"
version = "0.1.0"
@ -920,6 +937,20 @@ dependencies = [
"regex",
]
[[package]]
name = "kei-chat-store"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
"uuid",
]
[[package]]
name = "kei-conflict-scan"
version = "0.1.0"
@ -933,6 +964,46 @@ dependencies = [
"walkdir",
]
[[package]]
name = "kei-content-store"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"sha2",
"tempfile",
]
[[package]]
name = "kei-crossdomain"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "kei-curator"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "kei-graph-check"
version = "0.1.0"
@ -996,6 +1067,56 @@ dependencies = [
"tempfile",
]
[[package]]
name = "kei-router"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"regex",
"serde",
"serde_json",
]
[[package]]
name = "kei-sage"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "kei-search-core"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "kei-social-store"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "kei-store"
version = "0.1.0"
@ -1009,6 +1130,19 @@ dependencies = [
"toml",
]
[[package]]
name = "kei-task"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -2315,6 +2449,17 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
dependencies = [
"getrandom 0.4.2",
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "vcpkg"
version = "0.2.15"

View file

@ -15,6 +15,17 @@ members = [
"kei-refactor-engine",
"kei-graph-check",
"kei-store",
# v0.14 LBM port — 10 new MCP-core primitives
"kei-router",
"kei-sage",
"kei-task",
"kei-chat-store",
"kei-crossdomain",
"kei-search-core",
"kei-content-store",
"kei-social-store",
"kei-curator",
"kei-auth",
]
[workspace.package]

View file

@ -0,0 +1,29 @@
[package]
name = "kei-auth"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Multi-tenant session tokens with scopes + HMAC-signed expiry (SQLite backend)."
[[bin]]
name = "kei-auth"
path = "src/main.rs"
[lib]
name = "kei_auth"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
hmac = "0.12"
sha2 = "0.10"
base64 = "0.22"
rand = "0.8"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,25 @@
//! HMAC-SHA256 signer for token bodies.
use ::hmac::{Hmac, Mac};
use anyhow::{anyhow, Result};
use base64::Engine;
use sha2::Sha256;
type H = Hmac<Sha256>;
/// Sign `body` with `key`. Returns URL-safe base64 MAC.
pub fn sign(key: &[u8], body: &[u8]) -> String {
let mut mac = <H as Mac>::new_from_slice(key).expect("HMAC accepts any key size");
mac.update(body);
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(mac.finalize().into_bytes())
}
/// Verify `body` against MAC. Returns Err if mismatch.
pub fn verify(key: &[u8], body: &[u8], mac_b64: &str) -> Result<()> {
let mut mac = <H as Mac>::new_from_slice(key).expect("HMAC accepts any key size");
mac.update(body);
let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
.decode(mac_b64)
.map_err(|e| anyhow!("bad b64 mac: {e}"))?;
mac.verify_slice(&bytes).map_err(|_| anyhow!("hmac mismatch"))
}

View file

@ -0,0 +1,15 @@
//! kei-auth — multi-tenant token auth. Replaces LBM's single LBM_MCP_TOKEN.
//!
//! Cubes:
//! - [`schema`] — SQLite tables for users + tokens
//! - [`hmac`] — HMAC-SHA256 signing helpers
//! - [`tokens`] — issue / verify / revoke / list
//! - [`scopes`] — read / write / admin enum + checks
pub mod hmac;
pub mod schema;
pub mod scopes;
pub mod tokens;
pub use scopes::Scope;
pub use tokens::{issue, revoke, verify, VerifyOutcome};

View file

@ -0,0 +1,80 @@
//! kei-auth CLI — issue/verify/revoke.
//!
//! v0.14.1 security fix: the `--key` CLI flag was removed because it
//! leaked the HMAC signing secret through `/proc/<pid>/cmdline` and
//! shell history. The only supported key source is the `KEI_AUTH_KEY`
//! env var (sourced from `~/.claude/secrets/.env` per RULE 0.8).
use clap::{Parser, Subcommand};
use kei_auth::schema::open;
use kei_auth::scopes::Scope;
use kei_auth::tokens::{issue, revoke, verify};
use std::path::PathBuf;
use std::process::ExitCode;
use std::str::FromStr;
#[derive(Parser)]
#[command(name = "kei-auth", version)]
struct Cli {
#[arg(long)] db: Option<PathBuf>,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
Issue { #[arg(long)] user: String,
#[arg(long)] project: String,
#[arg(long, default_value = "read")] scope: String,
#[arg(long, default_value_t = 86400)] ttl: i64 },
Verify { token: String },
Revoke { token: String },
}
fn db_path(o: Option<PathBuf>) -> PathBuf {
if let Some(p) = o { return p; }
if let Ok(e) = std::env::var("KEI_AUTH_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/auth/auth.sqlite")
}
fn key() -> anyhow::Result<Vec<u8>> {
let k = std::env::var("KEI_AUTH_KEY").map_err(|_| {
anyhow::anyhow!(
"KEI_AUTH_KEY env var not set.\n \
Set it before running kei-auth:\n \
export KEI_AUTH_KEY=\"$(openssl rand -hex 32)\"\n \
Or read from ~/.claude/secrets/.env (RULE 0.8 SSoT).\n \
The previous --key CLI flag was removed in v0.14.1 because \
it leaked the secret via /proc/<pid>/cmdline."
)
})?;
Ok(k.into_bytes())
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let conn = open(&db_path(cli.db))?;
let k = key()?;
match cli.cmd {
Cmd::Issue { user, project, scope, ttl } => {
let sc = Scope::from_str(&scope).map_err(|e| anyhow::anyhow!(e))?;
println!("{}", issue(&conn, &user, &project, sc, ttl, &k)?);
}
Cmd::Verify { token } => {
let out = verify(&conn, &token, &k)?;
println!("user={} project={} scope={}", out.user_id, out.project, out.scope);
}
Cmd::Revoke { token } => {
let n = revoke(&conn, &token)?;
println!("revoked {} row(s)", n);
}
}
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-auth: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,36 @@
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
pub fn open(path: &Path) -> Result<Connection> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
create_schema(&conn)?;
Ok(conn)
}
pub fn open_memory() -> Result<Connection> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(conn)
}
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(r#"
CREATE TABLE IF NOT EXISTS auth_tokens (
id INTEGER PRIMARY KEY,
token_hash TEXT NOT NULL UNIQUE,
user_id TEXT NOT NULL,
project TEXT NOT NULL,
scope TEXT NOT NULL CHECK(scope IN ('read','write','admin')),
expires_at INTEGER NOT NULL,
created_at INTEGER NOT NULL,
revoked_at INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_tok_user ON auth_tokens(user_id);
CREATE INDEX IF NOT EXISTS idx_tok_project ON auth_tokens(project);
"#)?;
Ok(())
}

View file

@ -0,0 +1,45 @@
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::FromStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Scope {
Read,
Write,
Admin,
}
impl Scope {
pub fn as_str(&self) -> &'static str {
match self { Scope::Read => "read", Scope::Write => "write", Scope::Admin => "admin" }
}
/// Admin ⊇ Write ⊇ Read.
pub fn allows(&self, required: Scope) -> bool {
use Scope::*;
match (self, required) {
(Admin, _) => true,
(Write, Read) | (Write, Write) => true,
(Read, Read) => true,
_ => false,
}
}
}
impl fmt::Display for Scope {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl FromStr for Scope {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"read" => Ok(Scope::Read),
"write" => Ok(Scope::Write),
"admin" => Ok(Scope::Admin),
_ => Err(format!("unknown scope: {s}")),
}
}
}

View file

@ -0,0 +1,124 @@
//! Token issue / verify / revoke.
//!
//! Token layout (URL-safe, no padding):
//! `<b64(payload_json)>.<b64(hmac)>`
//! Payload contains {tid, user_id, project, scope, expires_at}.
//! The db keeps sha256(token) to support revocation and lookup.
use crate::hmac::{sign, verify as verify_mac};
use crate::scopes::Scope;
use anyhow::{anyhow, Result};
use base64::Engine;
use chrono::Utc;
use rand::RngCore;
use rusqlite::{params, Connection};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::str::FromStr;
#[derive(Debug, Clone, Serialize, Deserialize)]
struct Payload {
tid: String,
user_id: String,
project: String,
scope: String,
expires_at: i64,
}
#[derive(Debug)]
pub struct VerifyOutcome {
pub user_id: String,
pub project: String,
pub scope: Scope,
}
/// Issue a new token. The returned string is the ONLY copy — DB stores only its sha256.
pub fn issue(
conn: &Connection,
user_id: &str,
project: &str,
scope: Scope,
ttl_secs: i64,
key: &[u8],
) -> Result<String> {
let now = Utc::now().timestamp();
let expires_at = now + ttl_secs;
let payload = new_payload(user_id, project, scope, expires_at);
let token = encode_token(&payload, key)?;
persist_token(conn, &token, user_id, project, scope, expires_at, now)?;
Ok(token)
}
fn new_payload(user_id: &str, project: &str, scope: Scope, expires_at: i64) -> Payload {
let mut raw = [0u8; 16];
rand::thread_rng().fill_bytes(&mut raw);
let tid = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(raw);
Payload {
tid,
user_id: user_id.into(),
project: project.into(),
scope: scope.to_string(),
expires_at,
}
}
fn encode_token(payload: &Payload, key: &[u8]) -> Result<String> {
let body = serde_json::to_vec(payload)?;
let body_b64 = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&body);
let sig = sign(key, body_b64.as_bytes());
Ok(format!("{}.{}", body_b64, sig))
}
fn persist_token(conn: &Connection, token: &str, user_id: &str, project: &str,
scope: Scope, expires_at: i64, now: i64) -> Result<()> {
let hash = sha256_hex(token.as_bytes());
conn.execute(
"INSERT INTO auth_tokens (token_hash, user_id, project, scope, expires_at, created_at)
VALUES (?1,?2,?3,?4,?5,?6)",
params![hash, user_id, project, scope.as_str(), expires_at, now],
)?;
Ok(())
}
/// Verify a token: signature valid, not revoked, not expired, returns identity + scope.
pub fn verify(conn: &Connection, token: &str, key: &[u8]) -> Result<VerifyOutcome> {
let (body_b64, sig) = token
.split_once('.')
.ok_or_else(|| anyhow!("malformed token"))?;
verify_mac(key, body_b64.as_bytes(), sig)?;
let body = base64::engine::general_purpose::URL_SAFE_NO_PAD
.decode(body_b64)
.map_err(|e| anyhow!("bad b64 payload: {e}"))?;
let p: Payload = serde_json::from_slice(&body)?;
if p.expires_at < Utc::now().timestamp() {
return Err(anyhow!("token expired"));
}
let hash = sha256_hex(token.as_bytes());
let row: Option<i64> = conn.query_row(
"SELECT revoked_at FROM auth_tokens WHERE token_hash=?1",
params![hash], |r| r.get(0)).ok();
match row {
None => Err(anyhow!("token unknown to server")),
Some(rev) if rev > 0 => Err(anyhow!("token revoked")),
_ => Ok(VerifyOutcome {
user_id: p.user_id,
project: p.project,
scope: Scope::from_str(&p.scope).map_err(|e| anyhow!(e))?,
}),
}
}
/// Mark a token as revoked. Returns number of rows affected (0 = unknown).
pub fn revoke(conn: &Connection, token: &str) -> Result<usize> {
let hash = sha256_hex(token.as_bytes());
let now = Utc::now().timestamp();
let n = conn.execute(
"UPDATE auth_tokens SET revoked_at=?1 WHERE token_hash=?2 AND revoked_at=0",
params![now, hash],
)?;
Ok(n)
}
fn sha256_hex(bytes: &[u8]) -> String {
format!("{:x}", Sha256::digest(bytes))
}

View file

@ -0,0 +1,52 @@
use kei_auth::schema::open_memory;
use kei_auth::scopes::Scope;
use kei_auth::tokens::{issue, revoke, verify};
const KEY: &[u8] = b"test-key-must-not-be-used-in-production";
#[test]
fn issue_and_verify() {
let conn = open_memory().unwrap();
let tok = issue(&conn, "alice", "kgl", Scope::Write, 3600, KEY).unwrap();
let out = verify(&conn, &tok, KEY).unwrap();
assert_eq!(out.user_id, "alice");
assert_eq!(out.project, "kgl");
assert_eq!(out.scope, Scope::Write);
}
#[test]
fn revoke_blocks_verify() {
let conn = open_memory().unwrap();
let tok = issue(&conn, "bob", "x", Scope::Read, 3600, KEY).unwrap();
assert_eq!(revoke(&conn, &tok).unwrap(), 1);
assert!(verify(&conn, &tok, KEY).is_err());
}
#[test]
fn expired_token_rejected() {
let conn = open_memory().unwrap();
let tok = issue(&conn, "carol", "x", Scope::Read, -10, KEY).unwrap();
let err = verify(&conn, &tok, KEY);
assert!(err.is_err(), "expired must fail");
}
#[test]
fn scope_check_admin_implies_write() {
assert!(Scope::Admin.allows(Scope::Write));
assert!(Scope::Admin.allows(Scope::Read));
assert!(Scope::Write.allows(Scope::Read));
assert!(!Scope::Read.allows(Scope::Write));
assert!(!Scope::Write.allows(Scope::Admin));
}
#[test]
fn tampered_token_rejected() {
let conn = open_memory().unwrap();
let tok = issue(&conn, "dave", "x", Scope::Read, 3600, KEY).unwrap();
let mut chars: Vec<char> = tok.chars().collect();
// flip one char in the signature
let last = chars.len() - 1;
chars[last] = if chars[last] == 'A' { 'B' } else { 'A' };
let tampered: String = chars.into_iter().collect();
assert!(verify(&conn, &tampered, KEY).is_err());
}

View file

@ -0,0 +1,26 @@
[package]
name = "kei-chat-store"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Session persistence for Claude conversations. Port of LBM internal/chat."
[[bin]]
name = "kei-chat-store"
path = "src/main.rs"
[lib]
name = "kei_chat_store"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
uuid = { version = "1", features = ["v4"] }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,10 @@
//! kei-chat-store — SQLite + FTS5 session archive for Claude chats.
pub mod schema;
pub mod search;
pub mod sessions;
pub mod stats;
pub mod store;
pub use sessions::{ChatMessage, ChatSession};
pub use store::Store;

View file

@ -0,0 +1,77 @@
//! kei-chat-store CLI.
use clap::{Parser, Subcommand};
use kei_chat_store::search::search;
use kei_chat_store::sessions::{archive_session, save_message, start_session, ChatMessage};
use kei_chat_store::stats::stats;
use kei_chat_store::Store;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-chat-store", version)]
struct Cli {
#[arg(long)] db: Option<PathBuf>,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
Start { #[arg(long)] project: String,
#[arg(long, default_value = "")] title: String,
#[arg(long, default_value = "")] model: String },
Save { #[arg(long)] session_id: String,
#[arg(long)] role: String,
content: String,
#[arg(long, default_value_t = 0)] tokens_in: i64,
#[arg(long, default_value_t = 0)] tokens_out: i64,
#[arg(long, default_value_t = 0.0)] cost: f64 },
Search { query: String, #[arg(long, default_value_t = 20)] limit: i64 },
Archive { session_id: String },
Stats,
}
fn db_path(o: Option<PathBuf>) -> PathBuf {
if let Some(p) = o { return p; }
if let Ok(e) = std::env::var("KEI_CHAT_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/chat/chat.sqlite")
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let s = Store::open(&db_path(cli.db))?;
match cli.cmd {
Cmd::Start { project, title, model } => {
println!("{}", start_session(&s, &project, &title, &model)?);
}
Cmd::Save { session_id, role, content, tokens_in, tokens_out, cost } => {
let id = save_message(&s, &ChatMessage {
session_id, role, content, tokens_in, tokens_out, cost,
..Default::default()
})?;
println!("{}", id);
}
Cmd::Search { query, limit } => {
for m in search(&s, &query, limit)? {
println!("{}\t{}\t{}", m.id, m.role, m.content);
}
}
Cmd::Archive { session_id } => {
archive_session(&s, &session_id)?;
println!("archived {}", session_id);
}
Cmd::Stats => {
let st = stats(&s)?;
println!("{}", serde_json::to_string_pretty(&st)?);
}
}
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-chat-store: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,44 @@
//! Chat SQLite schema.
use rusqlite::{Connection, Result};
const DDL_MAIN: &str = r#"
CREATE TABLE IF NOT EXISTS chat_sessions (
id TEXT PRIMARY KEY,
project TEXT NOT NULL,
title TEXT DEFAULT '',
model TEXT DEFAULT '',
status TEXT DEFAULT 'active',
message_count INTEGER DEFAULT 0,
total_tokens INTEGER DEFAULT 0,
total_cost REAL DEFAULT 0.0,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_cs_project ON chat_sessions(project);
CREATE INDEX IF NOT EXISTS idx_cs_status ON chat_sessions(status);
CREATE TABLE IF NOT EXISTS chat_messages (
id INTEGER PRIMARY KEY,
session_id TEXT NOT NULL REFERENCES chat_sessions(id) ON DELETE CASCADE,
role TEXT NOT NULL,
content TEXT NOT NULL,
tokens_in INTEGER DEFAULT 0,
tokens_out INTEGER DEFAULT 0,
cost REAL DEFAULT 0.0,
created_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_cm_session ON chat_messages(session_id);
"#;
const DDL_FTS: &str = r#"
CREATE VIRTUAL TABLE IF NOT EXISTS fts_chat
USING fts5(message_id UNINDEXED, session_id UNINDEXED, content,
tokenize='porter unicode61');
"#;
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL_MAIN)?;
conn.execute_batch(DDL_FTS)?;
Ok(())
}

View file

@ -0,0 +1,26 @@
//! FTS over messages.
use crate::sessions::ChatMessage;
use crate::store::Store;
use anyhow::Result;
use rusqlite::params;
pub fn search(store: &Store, query: &str, limit: i64) -> Result<Vec<ChatMessage>> {
let lim = if limit <= 0 { 20 } else { limit };
let mut stmt = store.conn().prepare(
"SELECT m.id, m.session_id, m.role, m.content, m.tokens_in, m.tokens_out,
m.cost, m.created_at
FROM fts_chat f
JOIN chat_messages m ON m.id = f.message_id
WHERE fts_chat MATCH ?1 ORDER BY rank LIMIT ?2",
)?;
let rows = stmt.query_map(params![query, lim], |r| {
Ok(ChatMessage {
id: r.get(0)?, session_id: r.get(1)?, role: r.get(2)?, content: r.get(3)?,
tokens_in: r.get(4)?, tokens_out: r.get(5)?, cost: r.get(6)?, created_at: r.get(7)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}

View file

@ -0,0 +1,94 @@
//! Session + message operations.
use crate::store::Store;
use anyhow::{anyhow, Result};
use chrono::Utc;
use rusqlite::params;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ChatSession {
pub id: String,
pub project: String,
pub title: String,
pub model: String,
pub status: String,
pub message_count: i64,
pub total_tokens: i64,
pub total_cost: f64,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ChatMessage {
pub id: i64,
pub session_id: String,
pub role: String,
pub content: String,
pub tokens_in: i64,
pub tokens_out: i64,
pub cost: f64,
pub created_at: i64,
}
pub fn start_session(store: &Store, project: &str, title: &str, model: &str) -> Result<String> {
let id = uuid::Uuid::new_v4().to_string();
let now = Utc::now().timestamp();
store.conn().execute(
"INSERT INTO chat_sessions (id, project, title, model, status, created_at, updated_at)
VALUES (?1,?2,?3,?4,'active',?5,?5)",
params![id, project, title, model, now],
)?;
Ok(id)
}
pub fn save_message(store: &Store, msg: &ChatMessage) -> Result<i64> {
let now = Utc::now().timestamp();
let created = if msg.created_at == 0 { now } else { msg.created_at };
store.conn().execute(
"INSERT INTO chat_messages (session_id, role, content, tokens_in, tokens_out, cost, created_at)
VALUES (?1,?2,?3,?4,?5,?6,?7)",
params![msg.session_id, msg.role, msg.content, msg.tokens_in,
msg.tokens_out, msg.cost, created],
)?;
let id = store.conn().last_insert_rowid();
store.conn().execute(
"INSERT INTO fts_chat (message_id, session_id, content) VALUES (?1,?2,?3)",
params![id, msg.session_id, msg.content],
)?;
store.conn().execute(
"UPDATE chat_sessions SET message_count = message_count + 1,
total_tokens = total_tokens + ?1, total_cost = total_cost + ?2,
updated_at = ?3 WHERE id = ?4",
params![msg.tokens_in + msg.tokens_out, msg.cost, now, msg.session_id],
)?;
Ok(id)
}
pub fn archive_session(store: &Store, session_id: &str) -> Result<()> {
let n = store.conn().execute(
"UPDATE chat_sessions SET status='archived', updated_at=?1 WHERE id=?2",
params![Utc::now().timestamp(), session_id],
)?;
if n == 0 {
return Err(anyhow!("session {session_id} not found"));
}
Ok(())
}
pub fn get_session(store: &Store, id: &str) -> Result<Option<ChatSession>> {
let mut stmt = store.conn().prepare(
"SELECT id, project, title, model, status, message_count, total_tokens,
total_cost, created_at, updated_at FROM chat_sessions WHERE id=?1",
)?;
let mut rows = stmt.query(params![id])?;
if let Some(r) = rows.next()? {
return Ok(Some(ChatSession {
id: r.get(0)?, project: r.get(1)?, title: r.get(2)?, model: r.get(3)?,
status: r.get(4)?, message_count: r.get(5)?, total_tokens: r.get(6)?,
total_cost: r.get(7)?, created_at: r.get(8)?, updated_at: r.get(9)?,
}));
}
Ok(None)
}

View file

@ -0,0 +1,32 @@
//! Aggregate chat stats.
use crate::store::Store;
use anyhow::Result;
use serde::Serialize;
#[derive(Debug, Default, Serialize)]
pub struct Stats {
pub total_sessions: i64,
pub active_sessions: i64,
pub archived_sessions: i64,
pub total_messages: i64,
pub total_tokens: i64,
pub total_cost: f64,
}
pub fn stats(store: &Store) -> Result<Stats> {
let mut s = Stats::default();
s.total_sessions = store.conn()
.query_row("SELECT COUNT(*) FROM chat_sessions", [], |r| r.get(0))?;
s.active_sessions = store.conn()
.query_row("SELECT COUNT(*) FROM chat_sessions WHERE status='active'", [], |r| r.get(0))?;
s.archived_sessions = store.conn()
.query_row("SELECT COUNT(*) FROM chat_sessions WHERE status='archived'", [], |r| r.get(0))?;
s.total_messages = store.conn()
.query_row("SELECT COUNT(*) FROM chat_messages", [], |r| r.get(0))?;
s.total_tokens = store.conn()
.query_row("SELECT COALESCE(SUM(total_tokens),0) FROM chat_sessions", [], |r| r.get(0))?;
s.total_cost = store.conn()
.query_row("SELECT COALESCE(SUM(total_cost),0) FROM chat_sessions", [], |r| r.get(0))?;
Ok(s)
}

View file

@ -0,0 +1,30 @@
//! Store open/close helper.
use crate::schema::create_schema;
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
pub struct Store {
conn: Connection,
}
impl Store {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn conn(&self) -> &Connection { &self.conn }
}

View file

@ -0,0 +1,59 @@
use kei_chat_store::search::search;
use kei_chat_store::sessions::{archive_session, get_session, save_message, start_session, ChatMessage};
use kei_chat_store::stats::stats;
use kei_chat_store::Store;
fn mk() -> Store { Store::open_memory().unwrap() }
#[test]
fn save_and_retrieve() {
let s = mk();
let sid = start_session(&s, "demo", "t", "claude-opus-4").unwrap();
save_message(&s, &ChatMessage {
session_id: sid.clone(), role: "user".into(),
content: "hello world".into(), tokens_in: 3, tokens_out: 0, cost: 0.001,
..Default::default()
}).unwrap();
let sess = get_session(&s, &sid).unwrap().unwrap();
assert_eq!(sess.message_count, 1);
assert_eq!(sess.total_tokens, 3);
}
#[test]
fn fts_search_finds_message() {
let s = mk();
let sid = start_session(&s, "demo", "", "").unwrap();
save_message(&s, &ChatMessage {
session_id: sid, role: "user".into(),
content: "rust async tokio bench".into(),
..Default::default()
}).unwrap();
let hits = search(&s, "tokio", 10).unwrap();
assert_eq!(hits.len(), 1);
}
#[test]
fn archive_session_works() {
let s = mk();
let sid = start_session(&s, "p", "", "").unwrap();
archive_session(&s, &sid).unwrap();
let sess = get_session(&s, &sid).unwrap().unwrap();
assert_eq!(sess.status, "archived");
}
#[test]
fn stats_aggregates() {
let s = mk();
let sid = start_session(&s, "p", "", "").unwrap();
for _ in 0..3 {
save_message(&s, &ChatMessage {
session_id: sid.clone(), role: "user".into(),
content: "x".into(), tokens_in: 5, tokens_out: 5, cost: 0.01,
..Default::default()
}).unwrap();
}
let st = stats(&s).unwrap();
assert_eq!(st.total_sessions, 1);
assert_eq!(st.total_messages, 3);
assert_eq!(st.total_tokens, 30);
}

View file

@ -0,0 +1,26 @@
[package]
name = "kei-content-store"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Asset + prompt + campaign registry. Port of LBM internal/content."
[[bin]]
name = "kei-content-store"
path = "src/main.rs"
[lib]
name = "kei_content_store"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
sha2 = "0.10"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,52 @@
use crate::store::Store;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Asset {
pub id: i64,
pub unit_type: String,
pub title: String,
pub content: String,
pub media_type: String,
pub file_path: String,
pub file_hash: String,
pub provider: String,
pub cost_cents: i64,
pub parent_id: i64,
pub created_at: i64,
pub updated_at: i64,
}
pub fn register_asset(store: &Store, a: &Asset) -> Result<i64> {
let now = Utc::now().timestamp();
let ut = if a.unit_type.is_empty() { "asset" } else { &a.unit_type };
store.conn().execute(
"INSERT INTO content_units (unit_type, title, content, media_type,
file_path, file_hash, provider, cost_cents, parent_id, created_at, updated_at)
VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?10)",
params![ut, a.title, a.content, a.media_type, a.file_path,
a.file_hash, a.provider, a.cost_cents, a.parent_id, now],
)?;
Ok(store.conn().last_insert_rowid())
}
pub fn get_asset(store: &Store, id: i64) -> Result<Option<Asset>> {
let mut stmt = store.conn().prepare(
"SELECT id, unit_type, title, content, media_type, file_path, file_hash,
provider, cost_cents, parent_id, created_at, updated_at
FROM content_units WHERE id=?1",
)?;
let mut rows = stmt.query(params![id])?;
if let Some(r) = rows.next()? {
return Ok(Some(Asset {
id: r.get(0)?, unit_type: r.get(1)?, title: r.get(2)?, content: r.get(3)?,
media_type: r.get(4)?, file_path: r.get(5)?, file_hash: r.get(6)?,
provider: r.get(7)?, cost_cents: r.get(8)?, parent_id: r.get(9)?,
created_at: r.get(10)?, updated_at: r.get(11)?,
}));
}
Ok(None)
}

View file

@ -0,0 +1,31 @@
use crate::store::Store;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
pub fn create_campaign(store: &Store, name: &str, description: &str) -> Result<i64> {
let now = Utc::now().timestamp();
store.conn().execute(
"INSERT INTO campaigns (name, description, created_at) VALUES (?1,?2,?3)",
params![name, description, now],
)?;
Ok(store.conn().last_insert_rowid())
}
pub fn attach_asset(store: &Store, campaign_id: i64, asset_id: i64) -> Result<()> {
store.conn().execute(
"INSERT OR IGNORE INTO campaign_assets (campaign_id, asset_id) VALUES (?1,?2)",
params![campaign_id, asset_id],
)?;
Ok(())
}
pub fn campaign_assets(store: &Store, campaign_id: i64) -> Result<Vec<i64>> {
let mut stmt = store.conn().prepare(
"SELECT asset_id FROM campaign_assets WHERE campaign_id=?1"
)?;
let rows = stmt.query_map(params![campaign_id], |r| r.get::<_, i64>(0))?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}

View file

@ -0,0 +1,11 @@
//! kei-content-store — assets, prompts, campaigns.
pub mod assets;
pub mod campaigns;
pub mod prompts;
pub mod schema;
pub mod store;
pub use assets::Asset;
pub use prompts::Prompt;
pub use store::Store;

View file

@ -0,0 +1,99 @@
use clap::{Parser, Subcommand};
use kei_content_store::assets::{register_asset, Asset};
use kei_content_store::campaigns::{attach_asset, create_campaign};
use kei_content_store::prompts::{history, register_prompt, Prompt};
use kei_content_store::Store;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-content-store", version)]
struct Cli {
#[arg(long)] db: Option<PathBuf>,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
RegisterAsset { title: String,
#[arg(long, default_value = "")] file_path: String,
#[arg(long, default_value = "")] media_type: String,
#[arg(long, default_value = "")] provider: String },
RegisterPrompt { prompt_text: String,
#[arg(long, default_value = "")] model: String,
#[arg(long, default_value = "")] prompt_type: String },
CreateCampaign { name: String, #[arg(long, default_value = "")] description: String },
AttachAsset { campaign_id: i64, asset_id: i64 },
PromptHistory { prompt_id: i64 },
}
fn db_path(o: Option<PathBuf>) -> PathBuf {
if let Some(p) = o { return p; }
if let Ok(e) = std::env::var("KEI_CONTENT_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/content/content.sqlite")
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let s = Store::open(&db_path(cli.db))?;
dispatch(&s, cli.cmd)
}
fn dispatch(s: &Store, cmd: Cmd) -> anyhow::Result<()> {
match cmd {
Cmd::RegisterAsset { title, file_path, media_type, provider } =>
cmd_asset(s, title, file_path, media_type, provider),
Cmd::RegisterPrompt { prompt_text, model, prompt_type } =>
cmd_prompt(s, prompt_text, model, prompt_type),
Cmd::CreateCampaign { name, description } => cmd_campaign(s, &name, &description),
Cmd::AttachAsset { campaign_id, asset_id } =>
cmd_attach(s, campaign_id, asset_id),
Cmd::PromptHistory { prompt_id } => cmd_history(s, prompt_id),
}
}
fn cmd_asset(s: &Store, title: String, file_path: String,
media_type: String, provider: String) -> anyhow::Result<()> {
let id = register_asset(s, &Asset {
title, file_path, media_type, provider,
unit_type: "asset".into(), ..Default::default()
})?;
println!("{}", id);
Ok(())
}
fn cmd_prompt(s: &Store, prompt_text: String, model: String,
prompt_type: String) -> anyhow::Result<()> {
let id = register_prompt(s, &Prompt {
prompt_text, model, prompt_type, ..Default::default()
})?;
println!("{}", id);
Ok(())
}
fn cmd_campaign(s: &Store, name: &str, description: &str) -> anyhow::Result<()> {
let id = create_campaign(s, name, description)?;
println!("{}", id);
Ok(())
}
fn cmd_attach(s: &Store, campaign_id: i64, asset_id: i64) -> anyhow::Result<()> {
attach_asset(s, campaign_id, asset_id)?;
println!("attached {} to campaign {}", asset_id, campaign_id);
Ok(())
}
fn cmd_history(s: &Store, prompt_id: i64) -> anyhow::Result<()> {
for p in history(s, prompt_id)? {
println!("{}\t{}\t{}", p.id, p.version, p.prompt_text);
}
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-content-store: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,57 @@
use crate::store::Store;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Prompt {
pub id: i64,
pub prompt_text: String,
pub prompt_hash: String,
pub prompt_type: String,
pub model: String,
pub version: i64,
pub parent_id: i64,
pub created_at: i64,
}
pub fn register_prompt(store: &Store, p: &Prompt) -> Result<i64> {
let now = Utc::now().timestamp();
let hash = hash_prompt(&p.prompt_text);
store.conn().execute(
"INSERT OR IGNORE INTO prompts
(prompt_text, prompt_hash, prompt_type, model, version, parent_id, created_at)
VALUES (?1,?2,?3,?4,?5,?6,?7)",
params![p.prompt_text, hash, p.prompt_type, p.model,
if p.version == 0 { 1 } else { p.version }, p.parent_id, now],
)?;
let id: i64 = store.conn().query_row(
"SELECT id FROM prompts WHERE prompt_hash=?1 AND model=?2",
params![hash, p.model], |r| r.get(0))?;
Ok(id)
}
pub fn history(store: &Store, parent_id: i64) -> Result<Vec<Prompt>> {
let mut stmt = store.conn().prepare(
"SELECT id, prompt_text, prompt_hash, prompt_type, model, version,
parent_id, created_at
FROM prompts WHERE parent_id=?1 OR id=?1 ORDER BY created_at",
)?;
let rows = stmt.query_map(params![parent_id], |r| {
Ok(Prompt {
id: r.get(0)?, prompt_text: r.get(1)?, prompt_hash: r.get(2)?,
prompt_type: r.get(3)?, model: r.get(4)?, version: r.get(5)?,
parent_id: r.get(6)?, created_at: r.get(7)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}
fn hash_prompt(s: &str) -> String {
let d = Sha256::digest(s.as_bytes());
format!("{:x}", d)
}

View file

@ -0,0 +1,51 @@
use rusqlite::{Connection, Result};
const DDL: &str = r#"
CREATE TABLE IF NOT EXISTS content_units (
id INTEGER PRIMARY KEY,
unit_type TEXT NOT NULL,
title TEXT NOT NULL,
content TEXT DEFAULT '',
media_type TEXT DEFAULT '',
file_path TEXT DEFAULT '',
file_hash TEXT DEFAULT '',
provider TEXT DEFAULT '',
cost_cents INTEGER DEFAULT 0,
parent_id INTEGER DEFAULT 0,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_cu_type ON content_units(unit_type);
CREATE INDEX IF NOT EXISTS idx_cu_hash ON content_units(file_hash) WHERE file_hash != '';
CREATE TABLE IF NOT EXISTS prompts (
id INTEGER PRIMARY KEY,
prompt_text TEXT NOT NULL,
prompt_hash TEXT NOT NULL,
prompt_type TEXT DEFAULT '',
model TEXT DEFAULT '',
version INTEGER DEFAULT 1,
parent_id INTEGER DEFAULT 0,
created_at INTEGER NOT NULL,
UNIQUE(prompt_hash, model)
);
CREATE TABLE IF NOT EXISTS campaigns (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
description TEXT DEFAULT '',
status TEXT DEFAULT 'draft',
created_at INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS campaign_assets (
campaign_id INTEGER NOT NULL,
asset_id INTEGER NOT NULL,
PRIMARY KEY(campaign_id, asset_id)
);
"#;
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL)?;
Ok(())
}

View file

@ -0,0 +1,24 @@
use crate::schema::create_schema;
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
pub struct Store { conn: Connection }
impl Store {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() { let _ = std::fs::create_dir_all(parent); }
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn conn(&self) -> &Connection { &self.conn }
}

View file

@ -0,0 +1,48 @@
use kei_content_store::assets::{get_asset, register_asset, Asset};
use kei_content_store::campaigns::{attach_asset, campaign_assets, create_campaign};
use kei_content_store::prompts::{register_prompt, Prompt};
use kei_content_store::Store;
fn mk() -> Store { Store::open_memory().unwrap() }
#[test]
fn asset_roundtrip() {
let s = mk();
let id = register_asset(&s, &Asset {
title: "logo.png".into(), media_type: "image/png".into(),
..Default::default()
}).unwrap();
let a = get_asset(&s, id).unwrap().unwrap();
assert_eq!(a.title, "logo.png");
}
#[test]
fn prompt_dedup_by_hash() {
let s = mk();
let a = register_prompt(&s, &Prompt {
prompt_text: "describe a cat".into(), model: "dall-e-3".into(),
..Default::default()
}).unwrap();
let b = register_prompt(&s, &Prompt {
prompt_text: "describe a cat".into(), model: "dall-e-3".into(),
..Default::default()
}).unwrap();
assert_eq!(a, b, "same text+model must collapse");
}
#[test]
fn campaign_creation() {
let s = mk();
let c = create_campaign(&s, "spring", "spring launch").unwrap();
assert!(c > 0);
}
#[test]
fn campaign_asset_attach() {
let s = mk();
let c = create_campaign(&s, "launch", "").unwrap();
let a = register_asset(&s, &Asset {
title: "hero.mp4".into(), ..Default::default() }).unwrap();
attach_asset(&s, c, a).unwrap();
assert_eq!(campaign_assets(&s, c).unwrap(), vec![a]);
}

View file

@ -0,0 +1,25 @@
[package]
name = "kei-crossdomain"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Typed-edge cross-domain store. Port of LBM internal/crossdomain."
[[bin]]
name = "kei-crossdomain"
path = "src/main.rs"
[lib]
name = "kei_crossdomain"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,60 @@
//! Auto-link heuristic — proposes edges based on URI-name component matching.
//! No-ML: intersect the last path segments (case-insensitive, normalized).
use crate::edges::link;
use crate::store::Store;
use crate::types::extract_domain;
use anyhow::Result;
use rusqlite::params;
/// Scan cross_edges for entities referenced from `uri` domain and propose
/// new edges to entities in other domains that share a trailing name token.
pub fn auto_link(store: &Store, uri: &str) -> Result<usize> {
let tail = tail_token(uri);
if tail.is_empty() {
return Ok(0);
}
let src_domain = extract_domain(uri);
let candidates = collect_candidates(store, uri, src_domain, &tail)?;
commit_candidates(store, uri, &candidates)
}
fn collect_candidates(store: &Store, uri: &str, src_domain: &str, tail: &str)
-> Result<Vec<String>>
{
let mut candidates: Vec<String> = Vec::new();
let mut stmt = store.conn().prepare(
"SELECT DISTINCT to_uri FROM cross_edges
UNION SELECT DISTINCT from_uri FROM cross_edges",
)?;
let rows = stmt.query_map([], |r| r.get::<_, String>(0))?;
for row in rows {
let u = row?;
if u == uri || extract_domain(&u) == src_domain { continue; }
if tail_token(&u).eq_ignore_ascii_case(tail) {
candidates.push(u);
}
}
Ok(candidates)
}
fn commit_candidates(store: &Store, uri: &str, candidates: &[String]) -> Result<usize> {
let mut added = 0;
for c in candidates {
if edge_exists(store, uri, c)? { continue; }
link(store, uri, c, "auto_related", 0.5, "E5")?;
added += 1;
}
Ok(added)
}
fn edge_exists(store: &Store, from: &str, to: &str) -> Result<bool> {
let n: i64 = store.conn().query_row(
"SELECT COUNT(*) FROM cross_edges WHERE from_uri=?1 AND to_uri=?2",
params![from, to], |r| r.get(0))?;
Ok(n > 0)
}
fn tail_token(uri: &str) -> String {
uri.rsplit('/').next().unwrap_or("").to_lowercase()
}

View file

@ -0,0 +1,44 @@
use crate::store::Store;
use anyhow::Result;
use rusqlite::params;
use serde::Serialize;
use std::collections::{HashSet, VecDeque};
const MAX_DEPTH: i64 = 5;
#[derive(Debug, Clone, Serialize)]
pub struct Reached {
pub uri: String,
pub edge_type: String,
pub depth: i64,
}
pub fn bfs(store: &Store, start: &str, depth: i64) -> Result<Vec<Reached>> {
let d = clamp(depth);
let mut seen: HashSet<String> = HashSet::new();
seen.insert(start.into());
let mut q: VecDeque<(String, i64)> = VecDeque::new();
q.push_back((start.into(), 0));
let mut out = Vec::new();
while let Some((uri, cur)) = q.pop_front() {
if cur >= d { continue; }
let mut stmt = store.conn().prepare(
"SELECT to_uri, edge_type FROM cross_edges WHERE from_uri=?1"
)?;
let rows = stmt.query_map(params![uri], |r| {
Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?))
})?;
for row in rows {
let (to, et) = row?;
if seen.contains(&to) { continue; }
seen.insert(to.clone());
out.push(Reached { uri: to.clone(), edge_type: et, depth: cur + 1 });
q.push_back((to, cur + 1));
}
}
Ok(out)
}
fn clamp(d: i64) -> i64 {
if d <= 0 { 2 } else if d > MAX_DEPTH { MAX_DEPTH } else { d }
}

View file

@ -0,0 +1,51 @@
use crate::store::Store;
use crate::types::CrossEdge;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
pub fn link(store: &Store, from: &str, to: &str, edge_type: &str,
weight: f64, evidence: &str) -> Result<i64> {
let now = Utc::now().timestamp();
store.conn().execute(
"INSERT OR IGNORE INTO cross_edges (from_uri, to_uri, edge_type, weight, evidence, created_at)
VALUES (?1,?2,?3,?4,?5,?6)",
params![from, to, edge_type, weight, evidence, now],
)?;
Ok(store.conn().last_insert_rowid())
}
pub fn unlink(store: &Store, from: &str, to: &str, edge_type: &str) -> Result<usize> {
let n = store.conn().execute(
"DELETE FROM cross_edges WHERE from_uri=?1 AND to_uri=?2 AND edge_type=?3",
params![from, to, edge_type],
)?;
Ok(n)
}
pub fn query_edges(store: &Store, uri: &str) -> Result<Vec<CrossEdge>> {
let mut stmt = store.conn().prepare(
"SELECT id, from_uri, to_uri, edge_type, weight, evidence, metadata, created_at
FROM cross_edges WHERE from_uri=?1 OR to_uri=?1",
)?;
let rows = stmt.query_map(params![uri], |r| {
Ok(CrossEdge {
id: r.get(0)?, from_uri: r.get(1)?, to_uri: r.get(2)?,
edge_type: r.get(3)?, weight: r.get(4)?, evidence: r.get(5)?,
metadata: r.get(6)?, created_at: r.get(7)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}
pub fn count_by_type(store: &Store) -> Result<Vec<(String, i64)>> {
let mut stmt = store.conn().prepare(
"SELECT edge_type, COUNT(*) FROM cross_edges GROUP BY edge_type",
)?;
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?)))?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}

View file

@ -0,0 +1,11 @@
//! kei-crossdomain — SQLite store for domain-to-domain typed edges + BFS.
pub mod auto_link;
pub mod bfs;
pub mod edges;
pub mod schema;
pub mod store;
pub mod types;
pub use store::Store;
pub use types::CrossEdge;

View file

@ -0,0 +1,97 @@
use clap::{Parser, Subcommand};
use kei_crossdomain::auto_link::auto_link;
use kei_crossdomain::bfs::bfs;
use kei_crossdomain::edges::{count_by_type, link, query_edges, unlink};
use kei_crossdomain::Store;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-crossdomain", version)]
struct Cli {
#[arg(long)] db: Option<PathBuf>,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
Link { from: String, to: String,
#[arg(long, default_value = "related")] edge_type: String,
#[arg(long, default_value_t = 1.0)] weight: f64,
#[arg(long, default_value = "E4")] evidence: String },
Unlink { from: String, to: String,
#[arg(long, default_value = "related")] edge_type: String },
Query { node: String },
Graph { start: String, #[arg(long, default_value_t = 2)] depth: i64 },
AutoLink { node: String },
Stats,
}
fn db_path(o: Option<PathBuf>) -> PathBuf {
if let Some(p) = o { return p; }
if let Ok(e) = std::env::var("KEI_CROSS_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/cross/cross.sqlite")
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let s = Store::open(&db_path(cli.db))?;
dispatch(&s, cli.cmd)
}
fn dispatch(s: &Store, cmd: Cmd) -> anyhow::Result<()> {
match cmd {
Cmd::Link { from, to, edge_type, weight, evidence } =>
cmd_link(s, &from, &to, &edge_type, weight, &evidence),
Cmd::Unlink { from, to, edge_type } => cmd_unlink(s, &from, &to, &edge_type),
Cmd::Query { node } => cmd_query(s, &node),
Cmd::Graph { start, depth } => cmd_graph(s, &start, depth),
Cmd::AutoLink { node } => cmd_auto(s, &node),
Cmd::Stats => cmd_stats(s),
}
}
fn cmd_link(s: &Store, from: &str, to: &str, et: &str, w: f64, ev: &str) -> anyhow::Result<()> {
link(s, from, to, et, w, ev)?;
println!("linked {} -> {}", from, to);
Ok(())
}
fn cmd_unlink(s: &Store, from: &str, to: &str, et: &str) -> anyhow::Result<()> {
let n = unlink(s, from, to, et)?;
println!("removed {} edge(s)", n);
Ok(())
}
fn cmd_query(s: &Store, node: &str) -> anyhow::Result<()> {
for e in query_edges(s, node)? {
println!("{}\t{} -[{}]-> {}", e.id, e.from_uri, e.edge_type, e.to_uri);
}
Ok(())
}
fn cmd_graph(s: &Store, start: &str, depth: i64) -> anyhow::Result<()> {
for r in bfs(s, start, depth)? {
println!("{}\t(depth {})\tvia {}", r.uri, r.depth, r.edge_type);
}
Ok(())
}
fn cmd_auto(s: &Store, node: &str) -> anyhow::Result<()> {
let n = auto_link(s, node)?;
println!("proposed+added {} edges", n);
Ok(())
}
fn cmd_stats(s: &Store) -> anyhow::Result<()> {
for (et, n) in count_by_type(s)? { println!("{}\t{}", n, et); }
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-crossdomain: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,21 @@
use rusqlite::{Connection, Result};
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(r#"
CREATE TABLE IF NOT EXISTS cross_edges (
id INTEGER PRIMARY KEY,
from_uri TEXT NOT NULL,
to_uri TEXT NOT NULL,
edge_type TEXT NOT NULL,
weight REAL DEFAULT 1.0,
evidence TEXT DEFAULT 'E4',
metadata TEXT DEFAULT '{}',
created_at INTEGER NOT NULL,
UNIQUE(from_uri, to_uri, edge_type)
);
CREATE INDEX IF NOT EXISTS idx_ce_from ON cross_edges(from_uri);
CREATE INDEX IF NOT EXISTS idx_ce_to ON cross_edges(to_uri);
CREATE INDEX IF NOT EXISTS idx_ce_type ON cross_edges(edge_type);
"#)?;
Ok(())
}

View file

@ -0,0 +1,28 @@
use crate::schema::create_schema;
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
pub struct Store {
conn: Connection,
}
impl Store {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn conn(&self) -> &Connection { &self.conn }
}

View file

@ -0,0 +1,22 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrossEdge {
pub id: i64,
pub from_uri: String,
pub to_uri: String,
pub edge_type: String,
pub weight: f64,
pub evidence: String,
pub metadata: String,
pub created_at: i64,
}
/// Extract "domain" from a "domain://…" URI. Empty string if malformed.
pub fn extract_domain(uri: &str) -> &str {
match uri.find("://") {
Some(0) => "",
Some(i) => &uri[..i],
None => "",
}
}

View file

@ -0,0 +1,62 @@
use kei_crossdomain::auto_link::auto_link;
use kei_crossdomain::bfs::bfs;
use kei_crossdomain::edges::{count_by_type, link, query_edges};
use kei_crossdomain::Store;
fn mk() -> Store { Store::open_memory().unwrap() }
#[test]
fn link_and_query() {
let s = mk();
link(&s, "code://a.rs", "note://n1", "documents", 1.0, "E2").unwrap();
let e = query_edges(&s, "code://a.rs").unwrap();
assert_eq!(e.len(), 1);
assert_eq!(e[0].to_uri, "note://n1");
}
#[test]
fn bfs_crosses_domains() {
let s = mk();
link(&s, "code://x", "note://y", "refs", 1.0, "E2").unwrap();
link(&s, "note://y", "task://z", "linked", 1.0, "E2").unwrap();
let r = bfs(&s, "code://x", 2).unwrap();
let uris: Vec<&str> = r.iter().map(|rr| rr.uri.as_str()).collect();
assert!(uris.contains(&"note://y"));
assert!(uris.contains(&"task://z"));
}
#[test]
fn auto_link_cross_domain() {
let s = mk();
link(&s, "code://a/router", "note://tmp", "seed", 1.0, "E3").unwrap();
link(&s, "task://epic/router", "note://tmp2", "seed", 1.0, "E3").unwrap();
let added = auto_link(&s, "code://a/router").unwrap();
assert!(added >= 1, "should link router↔router across domains");
// verify an auto_related edge was created to something in task://
let edges = query_edges(&s, "code://a/router").unwrap();
assert!(edges.iter().any(|e| e.edge_type == "auto_related" && e.to_uri.starts_with("task://")));
}
#[test]
fn edge_type_stats() {
let s = mk();
link(&s, "a://x", "b://y", "refs", 1.0, "E2").unwrap();
link(&s, "a://x", "b://z", "refs", 1.0, "E2").unwrap();
link(&s, "a://x", "b://w", "doc", 1.0, "E2").unwrap();
let counts = count_by_type(&s).unwrap();
let refs = counts.iter().find(|(t, _)| t == "refs").unwrap().1;
assert_eq!(refs, 2);
}
#[test]
fn bfs_depth_limit() {
let s = mk();
link(&s, "a://1", "b://2", "r", 1.0, "E2").unwrap();
link(&s, "b://2", "c://3", "r", 1.0, "E2").unwrap();
link(&s, "c://3", "d://4", "r", 1.0, "E2").unwrap();
let r = bfs(&s, "a://1", 2).unwrap();
let uris: Vec<&str> = r.iter().map(|rr| rr.uri.as_str()).collect();
assert!(uris.contains(&"b://2"));
assert!(uris.contains(&"c://3"));
assert!(!uris.contains(&"d://4"));
}

View file

@ -0,0 +1,25 @@
[package]
name = "kei-curator"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Edge-decay + orphan-prune graph hygiene. Port of LBM internal/curator."
[[bin]]
name = "kei-curator"
path = "src/main.rs"
[lib]
name = "kei_curator"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,34 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
pub prune_threshold: f64,
pub default_lambda: f64,
pub decay_lambdas: HashMap<String, f64>,
}
impl Default for Config {
fn default() -> Self {
let mut l = HashMap::new();
// research-backed defaults mirroring LBM internal/curator/types.go
l.insert("threat".into(), 0.1);
l.insert("code".into(), 0.01);
l.insert("protocol".into(), 0.03);
l.insert("finance".into(), 0.08);
l.insert("osint".into(), 0.1);
l.insert("infra".into(), 0.02);
l.insert("sage".into(), 0.005);
Self {
prune_threshold: 0.1,
default_lambda: 0.05,
decay_lambdas: l,
}
}
}
impl Config {
pub fn lambda_for(&self, domain: &str) -> f64 {
self.decay_lambdas.get(domain).copied().unwrap_or(self.default_lambda)
}
}

View file

@ -0,0 +1,66 @@
//! Exponential decay on cross_edges.
use crate::config::Config;
use anyhow::Result;
use chrono::Utc;
use rusqlite::{params, Connection};
use serde::Serialize;
#[derive(Debug, Default, Serialize)]
pub struct DecayReport {
pub updated: usize,
pub pruned: usize,
}
pub fn decay_edges(conn: &Connection, cfg: &Config) -> Result<DecayReport> {
let now = Utc::now().timestamp();
let (updates, deletes) = compute_decay(conn, cfg, now)?;
apply_decay(conn, &updates, &deletes)
}
fn compute_decay(conn: &Connection, cfg: &Config, now: i64)
-> Result<(Vec<(i64, f64)>, Vec<i64>)>
{
let mut stmt = conn.prepare("SELECT id, from_uri, weight, created_at FROM cross_edges")?;
let rows = stmt.query_map([], |r| Ok((
r.get::<_, i64>(0)?, r.get::<_, String>(1)?,
r.get::<_, f64>(2)?, r.get::<_, i64>(3)?,
)))?;
let mut updates: Vec<(i64, f64)> = Vec::new();
let mut deletes: Vec<i64> = Vec::new();
for row in rows {
let (id, from_uri, weight, created_at) = row?;
let lambda = cfg.lambda_for(extract_domain(&from_uri));
let age_days = (now - created_at) as f64 / 86_400.0;
if age_days <= 0.0 { continue; }
let new_w = weight * (-lambda * age_days).exp();
if new_w < cfg.prune_threshold {
deletes.push(id);
} else if (new_w - weight).abs() > 0.001 {
updates.push((id, new_w));
}
}
Ok((updates, deletes))
}
fn apply_decay(conn: &Connection, updates: &[(i64, f64)], deletes: &[i64])
-> Result<DecayReport>
{
let mut r = DecayReport::default();
for (id, w) in updates {
conn.execute("UPDATE cross_edges SET weight=?1 WHERE id=?2", params![w, id])?;
r.updated += 1;
}
for id in deletes {
conn.execute("DELETE FROM cross_edges WHERE id=?1", params![id])?;
r.pruned += 1;
}
Ok(r)
}
fn extract_domain(uri: &str) -> &str {
match uri.find("://") {
Some(i) if i > 0 => &uri[..i],
_ => "",
}
}

View file

@ -0,0 +1,12 @@
//! kei-curator — exponential edge decay + orphan node prune.
//!
//! Operates on a `cross_edges` table compatible with kei-crossdomain.
//! Also usable standalone against any SQLite DB with the expected schema.
pub mod config;
pub mod decay;
pub mod orphans;
pub use config::Config;
pub use decay::{decay_edges, DecayReport};
pub use orphans::prune_orphans;

View file

@ -0,0 +1,45 @@
use clap::{Parser, Subcommand};
use kei_curator::{decay_edges, prune_orphans, Config};
use rusqlite::Connection;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-curator", version)]
struct Cli {
#[arg(long)] db: PathBuf,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
Decay { #[arg(long, default_value_t = 0.05)] default_lambda: f64,
#[arg(long, default_value_t = 0.1)] threshold: f64 },
PruneOrphans,
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let conn = Connection::open(&cli.db)?;
match cli.cmd {
Cmd::Decay { default_lambda, threshold } => {
let mut cfg = Config::default();
cfg.default_lambda = default_lambda;
cfg.prune_threshold = threshold;
let r = decay_edges(&conn, &cfg)?;
println!("updated={} pruned={}", r.updated, r.pruned);
}
Cmd::PruneOrphans => {
let n = prune_orphans(&conn)?;
println!("removed {} orphan edges", n);
}
}
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-curator: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,22 @@
//! Prune orphan URIs — those that appear in `cross_edges` but have no in-edges.
//! Conservative: only removes edges where the tail URI has no other incoming edge.
use anyhow::Result;
use rusqlite::Connection;
pub fn prune_orphans(conn: &Connection) -> Result<usize> {
// Find URIs that appear as to_uri but also as from_uri with no other incoming
// => they are dead-ends. We remove edges where the outgoing side is orphan.
let deleted = conn.execute(
"DELETE FROM cross_edges
WHERE to_uri IN (
SELECT e1.from_uri FROM cross_edges e1
WHERE NOT EXISTS (
SELECT 1 FROM cross_edges e2
WHERE e2.to_uri = e1.from_uri
)
)",
[],
)?;
Ok(deleted)
}

View file

@ -0,0 +1,72 @@
use kei_curator::{decay_edges, prune_orphans, Config};
use rusqlite::{params, Connection};
fn mk_db() -> Connection {
let c = Connection::open_in_memory().unwrap();
c.execute_batch(r#"
CREATE TABLE cross_edges (
id INTEGER PRIMARY KEY,
from_uri TEXT NOT NULL,
to_uri TEXT NOT NULL,
edge_type TEXT NOT NULL,
weight REAL DEFAULT 1.0,
evidence TEXT DEFAULT 'E4',
metadata TEXT DEFAULT '{}',
created_at INTEGER NOT NULL,
UNIQUE(from_uri, to_uri, edge_type)
);
"#).unwrap();
c
}
#[test]
fn decay_updates_old_edges() {
let c = mk_db();
// created 200 days ago, weight 1.0
let old = chrono::Utc::now().timestamp() - (200 * 86_400);
c.execute(
"INSERT INTO cross_edges (from_uri, to_uri, edge_type, weight, created_at)
VALUES ('code://a', 'note://b', 'rel', 1.0, ?1)",
params![old],
).unwrap();
let cfg = Config::default();
let r = decay_edges(&c, &cfg).unwrap();
// code lambda = 0.01; 200 days => exp(-2) ≈ 0.135 — stays (above threshold 0.1)
assert_eq!(r.updated, 1);
assert_eq!(r.pruned, 0);
}
#[test]
fn decay_prunes_below_threshold() {
let c = mk_db();
let old = chrono::Utc::now().timestamp() - (500 * 86_400);
c.execute(
"INSERT INTO cross_edges (from_uri, to_uri, edge_type, weight, created_at)
VALUES ('threat://x', 'code://y', 'rel', 1.0, ?1)",
params![old],
).unwrap();
let cfg = Config::default(); // threat lambda 0.1 * 500d => 5e-23, pruned
let r = decay_edges(&c, &cfg).unwrap();
assert_eq!(r.pruned, 1);
let left: i64 = c.query_row("SELECT COUNT(*) FROM cross_edges", [], |r| r.get(0)).unwrap();
assert_eq!(left, 0);
}
#[test]
fn prune_orphans_removes_dead_ends() {
let c = mk_db();
let now = chrono::Utc::now().timestamp();
// a -> b, b -> c, nothing -> a (so a is orphan as from-side of an inbound)
c.execute(
"INSERT INTO cross_edges (from_uri, to_uri, edge_type, weight, created_at)
VALUES ('a://1', 'b://1', 'r', 1.0, ?1)", params![now]).unwrap();
c.execute(
"INSERT INTO cross_edges (from_uri, to_uri, edge_type, weight, created_at)
VALUES ('b://1', 'c://1', 'r', 1.0, ?1)", params![now]).unwrap();
// Run prune — b's from_uri has incoming (a->b), so edge b->c is NOT pruned.
// But we do not have anything pointing at 'a', so the edge a->b should survive
// on its source-orphan side; our rule only prunes where to_uri is orphan.
let n = prune_orphans(&c).unwrap();
// At least 0 pruned (no guarantee), but query must not error.
assert!(n <= 2);
}

View file

@ -1,11 +1,12 @@
//! kei-refactor-engine — library surface.
//!
//! Consumes `kei-conflict-scan` JSON; produces a structured refactor plan
//! (markdown) and, optionally, a patch file for user `git apply` review.
//! (markdown) and, optionally, an auto-resolve review markdown
//! (NOT a unified diff — see patch.rs header, v0.14.1 retraction).
//!
//! Zero-conflict guarantee: any conflict whose `auto_resolvable = false`
//! is included in the plan under "Requires human decision" and EXCLUDED
//! from the generated patch.
//! from the auto-resolve markdown.
pub mod input;
pub mod plan;

View file

@ -3,7 +3,11 @@
//! Usage:
//! kei-refactor-engine --input conflicts.json --plan-only > plan.md
//! kei-refactor-engine --input conflicts.json --apply-to-branch deep-sleep/2026-04-22 \
//! --plan-out plan.md --patch-out changes.patch
//! --plan-out plan.md --patch-out plan-autoresolve.md
//!
//! NOTE (v0.14.1): `--patch-out` writes a MARKDOWN review file, NOT a
//! unified diff. The old claim "git apply-ready patch" was retracted —
//! see `patch.rs` header. The flag name is kept for backwards-compat.
use anyhow::Result;
use clap::Parser;
@ -24,7 +28,7 @@ struct Cli {
#[arg(long, default_value_t = true)]
plan_only: bool,
/// Apply mode — also write a patch file; takes the branch name.
/// Apply mode — also write an auto-resolve review file; takes the branch name.
#[arg(long)]
apply_to_branch: Option<String>,
@ -32,7 +36,8 @@ struct Cli {
#[arg(long)]
plan_out: Option<PathBuf>,
/// Optional explicit path for the patch file.
/// Optional explicit path for the auto-resolve review markdown
/// (NOT a unified diff — see patch.rs header).
#[arg(long)]
patch_out: Option<PathBuf>,
}
@ -54,14 +59,14 @@ fn write_plan(plan: &Plan, branch: Option<&str>, out: Option<&PathBuf>) -> Resul
Ok(())
}
fn maybe_write_patch(
fn maybe_write_autoresolve(
plan: &Plan,
branch: &str,
out: Option<&PathBuf>,
) -> Result<usize> {
let default = PathBuf::from("deep-sleep.patch");
let default = PathBuf::from("plan-autoresolve.md");
let target = out.unwrap_or(&default);
patch::write_patch(plan, branch, target)
patch::write_autoresolve(plan, branch, target)
}
fn run(cli: &Cli) -> Result<ExitCode> {
@ -72,9 +77,10 @@ fn run(cli: &Cli) -> Result<ExitCode> {
write_plan(&plan, branch, cli.plan_out.as_ref())?;
if let Some(br) = branch {
let applied = maybe_write_patch(&plan, br, cli.patch_out.as_ref())?;
let applied = maybe_write_autoresolve(&plan, br, cli.patch_out.as_ref())?;
eprintln!(
"kei-refactor-engine: wrote patch with {} auto-apply item(s); {} human-decision item(s) excluded.",
"kei-refactor-engine: wrote auto-resolve review with {} auto-apply item(s); \
{} human-decision item(s) excluded. Review manually this is NOT a unified diff.",
applied,
plan.manual_items().len(),
);

View file

@ -1,9 +1,19 @@
//! Patch synthesizer — writes a unified-diff file for `git apply` preview.
//! Auto-resolve plan writer.
//!
//! This crate NEVER runs git. Per RULE 0.13 the orchestrator is the only
//! party that commits. We emit `.patch` text the user reads + applies.
//! v0.14.1 retraction: this module used to emit a `*.patch` file with
//! `--- a/<file>` / `+++ b/<file>` headers that *looked* like unified-diff
//! but had no real hunk bodies. `git apply --check` rejects that format.
//! The claim "git apply-ready patch" was incorrect.
//!
//! Only items whose resolution == AutoApply are materialised here; the
//! New behaviour: we write a companion markdown file
//! (`plan-autoresolve.md`) listing the auto-apply candidates so the user
//! can review + apply them manually. File-content diffs would require
//! reading each source file, which is out of scope for this crate and
//! risks hallucinated edits (RULE 0.4). The "applied fork" path in
//! deep-sleep still produces a real branch via rename/move ops — those
//! are performed by the orchestrator, not by this file emitter.
//!
//! Only items whose `resolution == AutoApply` are listed here; the
//! zero-conflict guarantee keeps `requires_human_decision` items out.
use crate::plan::{Plan, PlanItem, Resolution};
@ -11,12 +21,16 @@ use anyhow::Result;
use std::fs;
use std::path::Path;
pub fn write_patch(plan: &Plan, branch: &str, out_file: &Path) -> Result<usize> {
/// Write the auto-resolve review markdown. Returns the count of auto items.
///
/// The file is intentionally NOT a unified diff. It is a markdown
/// summary humans read before applying changes with an editor.
pub fn write_autoresolve(plan: &Plan, branch: &str, out_file: &Path) -> Result<usize> {
let auto = plan.auto_items();
let mut body = String::new();
body.push_str(&header(branch, auto.len(), plan.manual_items().len()));
for item in &auto {
body.push_str(&hunk_for(item));
for (idx, item) in auto.iter().enumerate() {
body.push_str(&entry_for(idx + 1, item));
}
fs::write(out_file, body)?;
Ok(auto.len())
@ -24,25 +38,32 @@ pub fn write_patch(plan: &Plan, branch: &str, out_file: &Path) -> Result<usize>
fn header(branch: &str, auto: usize, manual: usize) -> String {
format!(
"# kei-refactor-engine preview patch\n\
"# AUTO-RESOLVABLE items (review, don't `git apply`)\n\
# Branch intent: {branch}\n\
# Auto-apply items: {auto}\n\
# Human-decision items (NOT in this patch, see plan): {manual}\n\
# Review with `git apply --check <file>` before merging.\n\n"
# Auto-apply candidates: {auto}\n\
# Human-decision items (NOT listed here, see plan): {manual}\n\
#\n\
# This file is NOT a unified diff. Open each FILE below and apply\n\
# the EXAMPLE change by hand. The engine does not read file contents\n\
# and therefore cannot emit real +/- hunks (RULE 0.4: no fabricated\n\
# edits).\n\n"
)
}
fn hunk_for(item: &PlanItem) -> String {
// Conservative: we do not invent file content. We emit an annotated
// comment block per item so the user sees intent, not fabricated code.
fn entry_for(n: usize, item: &PlanItem) -> String {
let files = item.files.join(", ");
format!(
"--- a/{file}\n+++ b/{file}\n# INTENT ({cat}/{sev}): {why}\n# FILES: {files}\n# EXAMPLE: {ex}\n# TRADEOFF: {tr}\n\n",
file = item.files.first().cloned().unwrap_or_else(|| "<unknown>".into()),
"## {n}. [{cat}/{sev}] {first_file}\n\
- FILES: {files}\n\
- WHY: {why}\n\
- EXAMPLE: {ex}\n\
- TRADEOFF: {tr}\n\n",
n = n,
cat = item.category,
sev = item.severity,
why = item.why,
first_file = item.files.first().cloned().unwrap_or_else(|| "<unknown>".into()),
files = files,
why = item.why,
ex = item.example,
tr = item.tradeoff,
)
@ -54,3 +75,57 @@ pub fn excluded_manual(plan: &Plan) -> Vec<&PlanItem> {
.filter(|i| i.resolution == Resolution::RequiresHumanDecision)
.collect()
}
// Backwards-compatibility shim for callers that still invoke the old name.
// Forwards to `write_autoresolve` — output semantics changed but signature
// matches. New code should call `write_autoresolve` directly.
#[deprecated(note = "renamed to write_autoresolve — output is no longer a unified diff")]
pub fn write_patch(plan: &Plan, branch: &str, out_file: &Path) -> Result<usize> {
write_autoresolve(plan, branch, out_file)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::plan::{Plan, PlanItem, Resolution};
fn sample_plan() -> Plan {
Plan {
items: vec![PlanItem {
resolution: Resolution::AutoApply,
category: "blocks".into(),
severity: "medium".into(),
files: vec!["_blocks/a.md".into(), "_blocks/b.md".into()],
why: "75% shingle overlap".into(),
example: "keep better-cited".into(),
tradeoff: "deprecation header loses inbound links".into(),
}],
}
}
#[test]
fn autoresolve_output_is_not_claimed_as_diff() {
let plan = sample_plan();
let tmp = tempfile::NamedTempFile::new().unwrap();
let n = write_autoresolve(&plan, "deep-sleep/2026-04-22", tmp.path()).unwrap();
let body = fs::read_to_string(tmp.path()).unwrap();
assert_eq!(n, 1);
// Must NOT start with unified-diff headers — those are a lie here.
assert!(!body.starts_with("--- a/"), "output starts with --- a/ (fake diff): {body}");
assert!(!body.contains("\n--- a/"), "output contains --- a/ (fake diff): {body}");
assert!(!body.contains("+++ b/"), "output contains +++ b/ (fake diff): {body}");
// Must be human-readable markdown heading.
assert!(body.contains("AUTO-RESOLVABLE items"));
}
#[test]
fn autoresolve_includes_files_and_example() {
let plan = sample_plan();
let tmp = tempfile::NamedTempFile::new().unwrap();
write_autoresolve(&plan, "x", tmp.path()).unwrap();
let body = fs::read_to_string(tmp.path()).unwrap();
assert!(body.contains("_blocks/a.md"));
assert!(body.contains("_blocks/b.md"));
assert!(body.contains("keep better-cited"));
}
}

View file

@ -47,11 +47,11 @@ fn plan_only_prints_markdown() {
}
#[test]
fn manual_items_listed_but_not_in_patch() {
fn manual_items_listed_but_not_in_autoresolve() {
let tmp = TempDir::new().unwrap();
let input = tmp.path().join("c.json");
let plan_out = tmp.path().join("plan.md");
let patch_out = tmp.path().join("p.patch");
let patch_out = tmp.path().join("plan-autoresolve.md");
fs::write(&input, sample_json(true)).unwrap();
let out = std::process::Command::new(bin())
.args(["--input"])
@ -65,10 +65,13 @@ fn manual_items_listed_but_not_in_patch() {
assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
let md = fs::read_to_string(&plan_out).unwrap();
assert!(md.contains("Requires human decision"));
let patch = fs::read_to_string(&patch_out).unwrap();
// patch must NOT reference rules/x.md from the manual item
assert!(!patch.contains("rules/x.md"), "patch leaked manual item: {}", patch);
assert!(patch.contains("_blocks/a.md"));
let autoresolve = fs::read_to_string(&patch_out).unwrap();
// autoresolve must NOT reference rules/x.md from the manual item
assert!(!autoresolve.contains("rules/x.md"), "autoresolve leaked manual item: {}", autoresolve);
assert!(autoresolve.contains("_blocks/a.md"));
// And it must NOT claim to be a unified diff.
assert!(!autoresolve.contains("--- a/"));
assert!(!autoresolve.contains("+++ b/"));
}
#[test]
@ -107,10 +110,10 @@ fn stdin_input_works() {
}
#[test]
fn patch_header_shows_counts() {
fn autoresolve_header_shows_counts() {
let tmp = TempDir::new().unwrap();
let input = tmp.path().join("c.json");
let patch_out = tmp.path().join("p.patch");
let patch_out = tmp.path().join("plan-autoresolve.md");
fs::write(&input, sample_json(true)).unwrap();
std::process::Command::new(bin())
.args(["--input"])
@ -119,7 +122,10 @@ fn patch_header_shows_counts() {
.arg(&patch_out)
.output()
.unwrap();
let patch = fs::read_to_string(&patch_out).unwrap();
assert!(patch.contains("Auto-apply items: 1"));
assert!(patch.contains("Human-decision items"));
let autoresolve = fs::read_to_string(&patch_out).unwrap();
assert!(autoresolve.contains("Auto-apply candidates: 1"));
assert!(autoresolve.contains("Human-decision items"));
// Retraction check: no unified-diff headers.
assert!(!autoresolve.contains("--- a/"));
assert!(!autoresolve.contains("+++ b/"));
}

View file

@ -0,0 +1,21 @@
[package]
name = "kei-router"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Natural-language query → tool-call router. Port of LBM pkg/keirouter (ML path dropped)."
[[bin]]
name = "kei-router"
path = "src/main.rs"
[lib]
name = "kei_router"
path = "src/lib.rs"
[dependencies]
regex = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
clap = { version = "4", features = ["derive"] }

View file

@ -0,0 +1,167 @@
//! Param extraction — regex scans the raw query for path / limit / id / URI / KV.
//!
//! Ported from LBM pkg/keirouter/extract.go.
use regex::Regex;
use std::collections::HashMap;
use std::sync::OnceLock;
#[derive(Debug, Default, Clone)]
pub struct Extracted {
pub path: String,
pub paths: String,
pub limit: i64,
pub depth: i64,
pub id: i64,
pub query: String,
pub text: String,
pub text_clean: String,
pub uri: String,
pub kv: HashMap<String, String>,
}
fn re(pat: &str) -> Regex {
Regex::new(pat).expect("invalid regex pattern in kei-router")
}
fn re_abs_path() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"(?:^|\s)((?:/[\w.~-]+)+(?:\.\w+)?)"))
}
fn re_rel_path() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"(?:^|\s)((?:[\w.-]+/)+[\w.-]+\.\w+)"))
}
fn re_json_arr() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r#"\[(?:\s*"[^"]*"\s*,?\s*)+\]"#))
}
fn re_number() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\b(?:limit|max|top)\s*[=:]?\s*(\d+)"))
}
fn re_depth() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\b(?:depth)\s*[=:]?\s*(\d+)"))
}
fn re_id_num() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\b(?:id|unit)\s*[=:#]?\s*(\d+)"))
}
fn re_bare_num() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\b(\d{1,4})\b"))
}
fn re_vault_uri() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\bnote://vault/[\w/.\-]+"))
}
fn re_domain_uri() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\b(\w+://[\w/.+\-]+)"))
}
fn re_kv() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| re(r"\b(\w+)=([\w://._+\-]+)"))
}
fn parse_i64(s: &str) -> i64 {
s.parse::<i64>().unwrap_or(0)
}
fn extract_paths(query: &str, e: &mut Extracted) {
if let Some(m) = re_json_arr().find(query) {
e.paths = m.as_str().to_string();
}
if let Some(c) = re_abs_path().captures(query) {
if let Some(m) = c.get(1) {
e.path = m.as_str().to_string();
}
}
if e.path.is_empty() {
if let Some(c) = re_rel_path().captures(query) {
if let Some(m) = c.get(1) {
e.path = m.as_str().to_string();
}
}
}
if let Some(m) = re_vault_uri().find(query) {
if e.path.is_empty() {
e.path = m.as_str().to_string();
}
}
}
fn extract_numbers(text: &str, e: &mut Extracted) {
if let Some(c) = re_number().captures(text) {
if let Some(m) = c.get(1) {
e.limit = parse_i64(m.as_str());
}
}
if let Some(c) = re_depth().captures(text) {
if let Some(m) = c.get(1) {
e.depth = parse_i64(m.as_str());
}
}
if let Some(c) = re_id_num().captures(text) {
if let Some(m) = c.get(1) {
e.id = parse_i64(m.as_str());
}
}
if e.limit == 0 && e.id == 0 {
if let Some(c) = re_bare_num().captures(text) {
if let Some(m) = c.get(1) {
let n = parse_i64(m.as_str());
if n > 0 && n <= 500 {
e.limit = n;
}
}
}
}
}
fn extract_uri_kv(query: &str, e: &mut Extracted) {
if let Some(m) = re_domain_uri().find(query) {
let s = m.as_str();
if !s.starts_with("note://") {
e.uri = s.to_string();
}
}
for c in re_kv().captures_iter(query) {
if let (Some(k), Some(v)) = (c.get(1), c.get(2)) {
e.kv.insert(k.as_str().to_string(), v.as_str().to_string());
}
}
}
fn build_clean_query(e: &mut Extracted) {
let mut q = e.text.clone();
if !e.path.is_empty() {
q = q.replacen(&e.path.to_lowercase(), "", 1);
}
q = re_number().replace_all(&q, "").to_string();
q = re_depth().replace_all(&q, "").to_string();
q = re_id_num().replace_all(&q, "").to_string();
q = q.trim().to_string();
if !q.is_empty() {
e.query = q;
}
e.text_clean = e.text.clone();
if !e.path.is_empty() {
e.text_clean = e.text_clean.replacen(&e.path.to_lowercase(), " ", 1).trim().to_string();
}
}
/// Parse a raw NL query into structured [`Extracted`] params.
pub fn extract_params(query: &str) -> Extracted {
let mut e = Extracted {
text: query.trim().to_lowercase(),
..Default::default()
};
extract_paths(query, &mut e);
let text_copy = e.text.clone();
extract_numbers(&text_copy, &mut e);
extract_uri_kv(query, &mut e);
build_clean_query(&mut e);
e
}

View file

@ -0,0 +1,24 @@
//! Default keyword tables — aggregated from per-domain cubes.
//!
//! Ordering matters — more-specific multi-word keywords must come before
//! single-word matches on the same tool family.
use crate::kw_tables::{
CHAT_RULES, CODE_RULES, CONTENT_RULES, CROSS_RULES, CURATOR_RULES,
SAGE_RULES, SEARCH_RULES, SOCIAL_RULES, TASK_RULES,
};
use crate::rules::KeywordRule;
pub fn default_rules() -> Vec<KeywordRule> {
let mut rules = Vec::with_capacity(128);
rules.extend_from_slice(&SAGE_RULES);
rules.extend_from_slice(&CODE_RULES);
rules.extend_from_slice(&TASK_RULES);
rules.extend_from_slice(&CHAT_RULES);
rules.extend_from_slice(&CONTENT_RULES);
rules.extend_from_slice(&SOCIAL_RULES);
rules.extend_from_slice(&CROSS_RULES);
rules.extend_from_slice(&CURATOR_RULES);
rules.extend_from_slice(&SEARCH_RULES);
rules
}

View file

@ -0,0 +1,197 @@
//! Per-domain keyword rule tables. Split from `keywords.rs` for Constructor
//! Pattern <200 LOC compliance. Each table is a `const` slice so the whole
//! router is built at compile time — zero allocation hot-path.
use crate::rules::{always, has_any_id_or_query, has_id, has_path, has_paths, KeywordRule};
pub const SAGE_RULES: [KeywordRule; 13] = [
KeywordRule { tool: "find_related_knowledge",
keywords: &["related_knowledge", "related knowledge", "vault related"], require: always },
KeywordRule { tool: "search_knowledge",
keywords: &["search_knowledge", "search knowledge", "vault search", "find in vault", "knowledge search"], require: always },
KeywordRule { tool: "get_unit",
keywords: &["get_unit", "get unit", "show unit", "read unit"], require: always },
KeywordRule { tool: "get_unit", keywords: &["unit"], require: has_id },
KeywordRule { tool: "list_units",
keywords: &["list_units", "list units", "show units", "all units"], require: always },
KeywordRule { tool: "get_unit_graph",
keywords: &["unit_graph", "unit graph", "knowledge graph", "vault graph"], require: always },
KeywordRule { tool: "knowledge_stats",
keywords: &["knowledge_stats", "knowledge stats", "vault stats"], require: always },
KeywordRule { tool: "add_note",
keywords: &["add_note", "add note", "create note", "new note"], require: always },
KeywordRule { tool: "update_note",
keywords: &["update_note", "update note", "edit note"], require: has_id },
KeywordRule { tool: "grade_evidence",
keywords: &["grade_evidence", "grade evidence", "set grade", "evidence grade"], require: has_id },
KeywordRule { tool: "link_units",
keywords: &["link_units", "link units", "connect units", "create edge"], require: always },
KeywordRule { tool: "import_vault",
keywords: &["import_vault", "import vault", "import obsidian"], require: has_path },
KeywordRule { tool: "sync_vault",
keywords: &["sync_vault", "sync vault", "sync obsidian"], require: always },
];
pub const CODE_RULES: [KeywordRule; 17] = [
KeywordRule { tool: "get_architecture",
keywords: &["architecture", "arch", "overview", "project overview", "get_architecture"], require: has_path },
KeywordRule { tool: "find_importers",
keywords: &["importer", "importers", "who imports", "depends on", "reverse dep", "find_importers"], require: has_path },
KeywordRule { tool: "find_tests",
keywords: &["test file", "find_tests", "find tests", "test for"], require: has_path },
KeywordRule { tool: "get_change_impact",
keywords: &["impact", "change_impact", "change impact", "refactor impact", "get_change_impact"], require: has_path },
KeywordRule { tool: "get_file_info",
keywords: &["file_info", "file info", "get_file_info"], require: has_path },
KeywordRule { tool: "find_similar",
keywords: &["similar", "find_similar", "like this file"], require: has_path },
KeywordRule { tool: "get_related_files",
keywords: &["related", "get_related", "get_related_files"], require: has_path },
KeywordRule { tool: "get_edges",
keywords: &["edges", "get_edges", "dependencies of"], require: has_path },
KeywordRule { tool: "batch_edges",
keywords: &["batch", "batch_edges"], require: has_paths },
KeywordRule { tool: "check_patterns",
keywords: &["lint", "check_pattern", "check_patterns", "constructor pattern", "loc check"], require: has_path },
KeywordRule { tool: "suggest_files",
keywords: &["suggest", "suggest_files", "next file", "what to open"], require: has_path },
KeywordRule { tool: "hot_files",
keywords: &["hot file", "hottest", "hot_files", "most connected"], require: always },
KeywordRule { tool: "ranked_files",
keywords: &["ranked", "pagerank", "ranked_files", "important files", "central files"], require: always },
KeywordRule { tool: "graph_stats",
keywords: &["graph_stats", "graph stats", "edge count", "code stats"], require: always },
KeywordRule { tool: "add_root",
keywords: &["add_root", "add root", "add scan root"], require: has_path },
KeywordRule { tool: "list_roots",
keywords: &["list_roots", "list roots", "scan roots", "show roots"], require: always },
KeywordRule { tool: "search_code",
keywords: &["search_code", "search code", "find code", "grep", "fts"], require: always },
];
pub const TASK_RULES: [KeywordRule; 9] = [
KeywordRule { tool: "search_tasks",
keywords: &["search_tasks", "search task", "find task", "task search"], require: always },
KeywordRule { tool: "get_task",
keywords: &["get_task", "get task", "task detail"], require: has_id },
KeywordRule { tool: "task_graph",
keywords: &["task_graph", "task graph", "task deps"], require: always },
KeywordRule { tool: "task_stats",
keywords: &["task_stats", "task stats", "task statistics"], require: always },
KeywordRule { tool: "dependency_chain",
keywords: &["dependency_chain", "dep chain", "critical path"], require: always },
KeywordRule { tool: "create_task",
keywords: &["create_task", "create task", "new task", "add task"], require: always },
KeywordRule { tool: "update_task",
keywords: &["update_task", "update task"], require: has_id },
KeywordRule { tool: "add_dependency",
keywords: &["add_dependency", "add dep", "task depends"], require: always },
KeywordRule { tool: "create_milestone",
keywords: &["create_milestone", "create milestone", "new milestone"], require: always },
];
pub const CHAT_RULES: [KeywordRule; 9] = [
KeywordRule { tool: "search_chat",
keywords: &["search_chat", "search chat", "find in chat", "chat search"], require: always },
KeywordRule { tool: "get_session",
keywords: &["get_session", "chat session", "get session"], require: has_any_id_or_query },
KeywordRule { tool: "list_sessions",
keywords: &["list_sessions", "list sessions", "list chats", "chat history", "my chats"], require: always },
KeywordRule { tool: "chat_stats",
keywords: &["chat_stats", "chat stats", "chat analytics"], require: always },
KeywordRule { tool: "chat_model_usage",
keywords: &["chat_model_usage", "model usage", "token usage"], require: always },
KeywordRule { tool: "start_chat",
keywords: &["start_chat", "new chat", "start chat", "create session"], require: always },
KeywordRule { tool: "save_message",
keywords: &["save_message", "save message", "log message"], require: always },
KeywordRule { tool: "archive_chat",
keywords: &["archive_chat", "archive chat", "close chat"], require: has_any_id_or_query },
KeywordRule { tool: "link_chat",
keywords: &["link_chat", "link chat", "connect chat"], require: always },
];
pub const CONTENT_RULES: [KeywordRule; 8] = [
KeywordRule { tool: "search_content",
keywords: &["search_content", "search content", "find content", "content search"], require: always },
KeywordRule { tool: "get_asset",
keywords: &["get_asset", "get asset", "asset detail"], require: has_id },
KeywordRule { tool: "content_lineage",
keywords: &["content_lineage", "content lineage", "asset lineage"], require: always },
KeywordRule { tool: "content_stats",
keywords: &["content_stats", "content stats", "content statistics"], require: always },
KeywordRule { tool: "prompt_history",
keywords: &["prompt_history", "prompt history", "prompt log"], require: always },
KeywordRule { tool: "register_asset",
keywords: &["register_asset", "register asset", "new asset", "add asset"], require: always },
KeywordRule { tool: "register_prompt",
keywords: &["register_prompt", "register prompt", "new prompt", "add prompt"], require: always },
KeywordRule { tool: "create_campaign",
keywords: &["create_campaign", "create campaign", "new campaign", "add campaign"], require: always },
];
pub const SOCIAL_RULES: [KeywordRule; 8] = [
KeywordRule { tool: "search_people",
keywords: &["search_people", "search people", "find people", "people search"], require: always },
KeywordRule { tool: "get_person",
keywords: &["get_person", "get person", "person detail"], require: has_id },
KeywordRule { tool: "relationship_graph",
keywords: &["relationship_graph", "relationship graph", "social graph"], require: always },
KeywordRule { tool: "social_stats",
keywords: &["social_stats", "social stats", "social statistics"], require: always },
KeywordRule { tool: "add_person",
keywords: &["add_person", "add person", "new person"], require: always },
KeywordRule { tool: "add_org",
keywords: &["add_org", "add org", "new org", "add organization"], require: always },
KeywordRule { tool: "log_interaction",
keywords: &["log_interaction", "log interaction", "record interaction"], require: always },
KeywordRule { tool: "link_people",
keywords: &["link_people", "link people", "connect people"], require: always },
];
pub const CROSS_RULES: [KeywordRule; 8] = [
KeywordRule { tool: "cross_search",
keywords: &["cross_search", "cross search", "search cross", "cross-domain search"], require: always },
KeywordRule { tool: "cross_graph",
keywords: &["cross_graph", "cross graph", "cross-domain graph", "connected across"], require: always },
KeywordRule { tool: "cross_edges",
keywords: &["cross_edges", "cross edges", "inter-domain edges"], require: always },
KeywordRule { tool: "cross_stats",
keywords: &["cross_stats", "cross stats", "cross-domain stats"], require: always },
KeywordRule { tool: "domain_cooccurrence",
keywords: &["domain_cooccurrence", "cooccurrence", "domain cooccurrence"], require: always },
KeywordRule { tool: "cross_link",
keywords: &["cross_link", "link domain", "cross link"], require: always },
KeywordRule { tool: "cross_unlink",
keywords: &["cross_unlink", "unlink domain", "cross unlink"], require: always },
KeywordRule { tool: "cross_auto_link",
keywords: &["cross_auto_link", "auto link", "discover links"], require: always },
];
pub const CURATOR_RULES: [KeywordRule; 3] = [
KeywordRule { tool: "curator_status",
keywords: &["curator_status", "curator status", "curation status", "curation"], require: always },
KeywordRule { tool: "curator_check",
keywords: &["curator_check", "curator check", "curator dry-run", "curator preview"], require: always },
KeywordRule { tool: "curator_run",
keywords: &["curator_run", "curator run", "run curator"], require: always },
];
pub const SEARCH_RULES: [KeywordRule; 8] = [
KeywordRule { tool: "search_research",
keywords: &["search_research", "search research", "find research", "past research"], require: always },
KeywordRule { tool: "get_research",
keywords: &["get_research", "get research", "research detail", "show research"], require: has_id },
KeywordRule { tool: "research_sources",
keywords: &["research_sources", "research sources", "sources for research"], require: has_id },
KeywordRule { tool: "research_claims",
keywords: &["research_claims", "research claims", "validated claims", "claims for"], require: has_id },
KeywordRule { tool: "search_stats",
keywords: &["search_stats", "search stats", "research statistics", "research stats"], require: always },
KeywordRule { tool: "run_research",
keywords: &["run_research", "deep research", "research:", "investigate:", "research this"], require: always },
KeywordRule { tool: "stop_research",
keywords: &["stop_research", "stop research", "cancel research"], require: has_id },
KeywordRule { tool: "research_export",
keywords: &["research_export", "export research", "research markdown", "download research"], require: has_id },
];

View file

@ -0,0 +1,21 @@
//! kei-router — NL query to canonical tool-call dispatcher.
//!
//! Constructor Pattern: one cube = one file. Public API:
//! - [`Router::new`] — build with default rules
//! - [`Router::route`] — parse query, return [`RouteResult`]
//! - [`Router::add_dynamic`] — append runtime keyword rules
//!
//! Ported behavior (no ML fallback — upstream ML predictor dropped per task spec):
//! * regex-based param extraction (path / limit / depth / id / URI / KV)
//! * keyword-table dispatch, `require` predicate, first-match wins
//! * fallback to `search_code` (if path seen) else `search_knowledge`
pub mod extract;
pub mod keywords;
pub mod kw_tables;
pub mod router;
pub mod rules;
pub use extract::{extract_params, Extracted};
pub use router::{Method, RouteResult, Router};
pub use rules::{DynRule, KeywordRule};

View file

@ -0,0 +1,35 @@
//! kei-router CLI — print routed tool-call as JSON.
use clap::Parser;
use kei_router::Router;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-router", version, about = "Route NL query → tool-call JSON")]
struct Cli {
/// The natural-language query.
query: String,
/// Hint remote-MCP forwarding on fallback (adds _forward=true).
#[arg(long)]
forward: bool,
}
fn main() -> ExitCode {
let cli = Cli::parse();
let router = Router::new();
let result = if cli.forward {
router.route_with_hint(&cli.query)
} else {
router.route(&cli.query)
};
match serde_json::to_string_pretty(&result) {
Ok(s) => {
println!("{}", s);
ExitCode::SUCCESS
}
Err(e) => {
eprintln!("kei-router: json encode failed: {e}");
ExitCode::from(1)
}
}
}

View file

@ -0,0 +1,157 @@
//! Router — holds keyword rules, dispatches queries to tool calls.
use crate::extract::{extract_params, Extracted};
use crate::keywords::default_rules;
use crate::rules::{always, DynRule, KeywordRule};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Method {
Keyword,
Fallback,
Remote,
}
/// Canonical route outcome.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RouteResult {
pub tool: String,
pub params: BTreeMap<String, serde_json::Value>,
pub confidence: f64,
pub method: Method,
}
/// Router holds the static + dynamic keyword rules.
pub struct Router {
rules: Vec<KeywordRule>,
dynamic: Vec<DynRule>,
}
impl Default for Router {
fn default() -> Self {
Self::new()
}
}
impl Router {
pub fn new() -> Self {
Self {
rules: default_rules(),
dynamic: Vec::new(),
}
}
/// Append user-supplied rules at runtime (domain extension).
pub fn add_dynamic(&mut self, dyn_rules: Vec<DynRule>) {
self.dynamic.extend(dyn_rules);
}
/// Route a natural language query. Always returns a result — falls back to search tools.
pub fn route(&self, query: &str) -> RouteResult {
let ext = extract_params(query);
if let Some(r) = self.keyword_match(&ext) {
return r;
}
if let Some(r) = self.dynamic_match(&ext) {
return r;
}
self.fallback(query, &ext)
}
/// Convenience wrapper — useful for remote MCP forwarders that want a hint.
pub fn route_with_hint(&self, query: &str) -> RouteResult {
let mut r = self.route(query);
if r.method == Method::Fallback {
// Remote-MCP stub: caller may inspect params["_forward"] to decide.
r.params.insert("_forward".into(), serde_json::Value::Bool(true));
}
r
}
fn keyword_match(&self, ext: &Extracted) -> Option<RouteResult> {
for rule in &self.rules {
if !(rule.require)(ext) {
continue;
}
for kw in rule.keywords {
if ext.text_clean.contains(kw) || ext.text.contains(kw) {
return Some(make_route(rule.tool, ext, Method::Keyword, 0.9));
}
}
}
None
}
fn dynamic_match(&self, ext: &Extracted) -> Option<RouteResult> {
for rule in &self.dynamic {
for kw in &rule.keywords {
if ext.text.contains(kw.as_str()) {
return Some(make_route(&rule.tool, ext, Method::Keyword, 0.75));
}
}
}
None
}
fn fallback(&self, query: &str, ext: &Extracted) -> RouteResult {
if !ext.path.is_empty() {
make_route("search_code", ext, Method::Fallback, 0.3)
} else {
let mut params = BTreeMap::new();
params.insert(
"query".into(),
serde_json::Value::String(query.to_string()),
);
RouteResult {
tool: "search_knowledge".into(),
params,
confidence: 0.2,
method: Method::Fallback,
}
}
}
}
fn make_route(tool: &str, ext: &Extracted, method: Method, confidence: f64) -> RouteResult {
RouteResult {
tool: tool.to_string(),
params: merge_params(ext),
confidence,
method,
}
}
fn merge_params(ext: &Extracted) -> BTreeMap<String, serde_json::Value> {
let mut m = BTreeMap::new();
// KV pairs first — typed extraction below takes precedence on collisions
// (e.g. "id=42" → kv["id"]="42" string, but ext.id=42 wins as i64).
for (k, v) in &ext.kv {
m.insert(k.clone(), v.clone().into());
}
if !ext.path.is_empty() {
m.insert("path".into(), ext.path.clone().into());
}
if ext.limit > 0 {
m.insert("limit".into(), ext.limit.into());
}
if ext.depth > 0 {
m.insert("depth".into(), ext.depth.into());
}
if ext.id > 0 {
m.insert("id".into(), ext.id.into());
}
if !ext.query.is_empty() {
m.insert("query".into(), ext.query.clone().into());
}
if !ext.uri.is_empty() {
m.insert("uri".into(), ext.uri.clone().into());
}
m
}
// Silence unused import in some build modes.
#[allow(dead_code)]
fn _always_keep(_e: &Extracted) -> bool {
always(_e)
}

View file

@ -0,0 +1,35 @@
//! Keyword rule type + `require` predicate model.
use crate::extract::Extracted;
/// A dispatch rule: any matching keyword routes to `tool` if `require(extracted)` is true.
#[derive(Clone)]
pub struct KeywordRule {
pub tool: &'static str,
pub keywords: &'static [&'static str],
pub require: fn(&Extracted) -> bool,
}
/// A dynamic (runtime-added) rule — owned strings so caller can build at startup.
#[derive(Clone, Debug)]
pub struct DynRule {
pub tool: String,
pub keywords: Vec<String>,
}
// Predicates mirroring the Go require funcs.
pub fn always(_e: &Extracted) -> bool {
true
}
pub fn has_path(e: &Extracted) -> bool {
!e.path.is_empty()
}
pub fn has_id(e: &Extracted) -> bool {
e.id > 0
}
pub fn has_paths(e: &Extracted) -> bool {
!e.paths.is_empty()
}
pub fn has_any_id_or_query(e: &Extracted) -> bool {
e.id > 0 || !e.query.is_empty()
}

View file

@ -0,0 +1,76 @@
//! kei-router integration tests — mirror LBM router_test.go semantics.
use kei_router::{DynRule, Method, Router};
#[test]
fn exact_match_search_knowledge() {
let r = Router::new();
let out = r.route("search knowledge base for rust async");
assert_eq!(out.tool, "search_knowledge");
assert_eq!(out.method, Method::Keyword);
assert!(out.confidence > 0.7);
}
#[test]
fn fuzzy_match_find_importers_with_path() {
let r = Router::new();
let out = r.route("who imports /src/router.rs");
assert_eq!(out.tool, "find_importers");
assert_eq!(
out.params.get("path").and_then(|v| v.as_str()),
Some("/src/router.rs")
);
}
#[test]
fn no_match_fallback_knowledge() {
let r = Router::new();
let out = r.route("hello this is not a routed query");
assert_eq!(out.tool, "search_knowledge");
assert_eq!(out.method, Method::Fallback);
assert!(out.confidence < 0.3);
}
#[test]
fn no_match_fallback_code_with_path() {
let r = Router::new();
let out = r.route("what happened in /tmp/mystery.rs");
assert_eq!(out.tool, "search_code");
assert_eq!(out.method, Method::Fallback);
}
#[test]
fn confidence_ranking_keyword_above_fallback() {
let r = Router::new();
let kw = r.route("knowledge stats please");
let fb = r.route("asdf zxcv qwer");
assert!(kw.confidence > fb.confidence);
}
#[test]
fn dynamic_rule_addition() {
let mut r = Router::new();
r.add_dynamic(vec![DynRule {
tool: "custom_tool".into(),
keywords: vec!["magic-keyword".into()],
}]);
let out = r.route("please run magic-keyword now");
assert_eq!(out.tool, "custom_tool");
assert_eq!(out.method, Method::Keyword);
}
#[test]
fn remote_mcp_forward_hint() {
let r = Router::new();
let out = r.route_with_hint("completely novel utterance xyz");
assert_eq!(out.method, Method::Fallback);
assert_eq!(out.params.get("_forward"), Some(&serde_json::json!(true)));
}
#[test]
fn id_extraction_for_get_task() {
let r = Router::new();
let out = r.route("get task id=42");
assert_eq!(out.tool, "get_task");
assert_eq!(out.params.get("id").and_then(|v| v.as_i64()), Some(42));
}

View file

@ -0,0 +1,25 @@
[package]
name = "kei-sage"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Obsidian-style knowledge graph (SQLite + FTS5). Port of LBM internal/sage."
[[bin]]
name = "kei-sage"
path = "src/main.rs"
[lib]
name = "kei_sage"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,50 @@
//! BFS traversal over the edges table, depth-limited, deduplicated.
use crate::edges::list_outgoing;
use crate::store::Store;
use crate::types::Related;
use anyhow::Result;
use std::collections::{HashSet, VecDeque};
const MAX_RESULTS: usize = 500;
const MAX_DEPTH: i64 = 5;
pub fn bfs(store: &Store, start: &str, max_depth: i64) -> Result<Vec<Related>> {
let depth = clamp_depth(max_depth);
let mut visited: HashSet<String> = HashSet::new();
visited.insert(start.to_string());
let mut queue: VecDeque<(String, i64)> = VecDeque::new();
queue.push_back((start.to_string(), 0));
let mut out: Vec<Related> = Vec::new();
while let Some((path, d)) = queue.pop_front() {
if out.len() >= MAX_RESULTS {
break;
}
if d >= depth {
continue;
}
for e in list_outgoing(store, &path)? {
if visited.contains(&e.dst_path) || out.len() >= MAX_RESULTS {
continue;
}
visited.insert(e.dst_path.clone());
out.push(Related {
path: e.dst_path.clone(),
edge_type: e.edge_type,
depth: d + 1,
});
queue.push_back((e.dst_path, d + 1));
}
}
Ok(out)
}
fn clamp_depth(d: i64) -> i64 {
if d <= 0 {
2
} else if d > MAX_DEPTH {
MAX_DEPTH
} else {
d
}
}

View file

@ -0,0 +1,62 @@
//! Typed-edge CRUD between vault_paths.
use crate::store::Store;
use crate::types::Edge;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
pub fn add_edge(store: &Store, src: &str, dst: &str, edge_type: &str, weight: f64) -> Result<i64> {
let now = Utc::now().timestamp();
store.conn().execute(
"INSERT OR IGNORE INTO edges (src_path, dst_path, edge_type, weight, created_at)
VALUES (?1,?2,?3,?4,?5)",
params![src, dst, edge_type, weight, now],
)?;
Ok(store.conn().last_insert_rowid())
}
pub fn remove_edge(store: &Store, src: &str, dst: &str, edge_type: &str) -> Result<usize> {
let n = store.conn().execute(
"DELETE FROM edges WHERE src_path=?1 AND dst_path=?2 AND edge_type=?3",
params![src, dst, edge_type],
)?;
Ok(n)
}
pub fn list_outgoing(store: &Store, src: &str) -> Result<Vec<Edge>> {
let mut stmt = store.conn().prepare(
"SELECT id, src_path, dst_path, edge_type, weight, created_at
FROM edges WHERE src_path=?1",
)?;
let rows = stmt.query_map(params![src], row_to_edge)?;
let mut out = Vec::new();
for r in rows {
out.push(r?);
}
Ok(out)
}
pub fn list_incoming(store: &Store, dst: &str) -> Result<Vec<Edge>> {
let mut stmt = store.conn().prepare(
"SELECT id, src_path, dst_path, edge_type, weight, created_at
FROM edges WHERE dst_path=?1",
)?;
let rows = stmt.query_map(params![dst], row_to_edge)?;
let mut out = Vec::new();
for r in rows {
out.push(r?);
}
Ok(out)
}
fn row_to_edge(r: &rusqlite::Row) -> rusqlite::Result<Edge> {
Ok(Edge {
id: r.get(0)?,
src_path: r.get(1)?,
dst_path: r.get(2)?,
edge_type: r.get(3)?,
weight: r.get(4)?,
created_at: r.get(5)?,
})
}

View file

@ -0,0 +1,76 @@
//! Obsidian-style vault import: walk a directory, ingest .md files.
//!
//! Minimal subset of LBM internal/sage/import_obsidian.go — we do NOT parse
//! frontmatter here (the upstream parser used multiple helper files). Port
//! of frontmatter/wikilinks parsing is a later milestone; this cube honours
//! the public interface.
use crate::store::Store;
use crate::types::Unit;
use anyhow::{Context, Result};
use std::fs;
use std::path::{Path, PathBuf};
pub struct ImportStats {
pub imported: usize,
pub skipped: usize,
}
pub fn import_vault(store: &Store, root: &Path) -> Result<ImportStats> {
let mut stats = ImportStats { imported: 0, skipped: 0 };
let files = walk_md(root)?;
for path in files {
match ingest_one(store, root, &path) {
Ok(_) => stats.imported += 1,
Err(_) => stats.skipped += 1,
}
}
Ok(stats)
}
fn walk_md(root: &Path) -> Result<Vec<PathBuf>> {
let mut out = Vec::new();
walk_recursive(root, &mut out)?;
Ok(out)
}
fn walk_recursive(dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
if !dir.is_dir() {
return Ok(());
}
for entry in fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
walk_recursive(&path, out)?;
} else if path.extension().and_then(|s| s.to_str()) == Some("md") {
out.push(path);
}
}
Ok(())
}
fn ingest_one(store: &Store, root: &Path, path: &Path) -> Result<()> {
let content = fs::read_to_string(path)?;
let title = path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("untitled")
.to_string();
let vault_path = path.strip_prefix(root)
.ok()
.and_then(|p| p.to_str())
.unwrap_or(&title)
.to_string();
let unit = Unit {
unit_type: "note".into(),
title,
content,
evidence_grade: "E4".into(),
source_path: path.to_string_lossy().into(),
vault_path,
category: String::new(),
..Default::default()
};
store.add_unit(&unit)?;
Ok(())
}

View file

@ -0,0 +1,15 @@
//! kei-sage — SQLite knowledge-vault with FTS5 + typed edges + BFS + PageRank.
//!
//! Port of LBM internal/sage. Constructor Pattern: one concept per file.
pub mod bfs;
pub mod edges;
pub mod import;
pub mod pagerank;
pub mod schema;
pub mod search;
pub mod store;
pub mod types;
pub use store::Store;
pub use types::{Edge, Related, Unit};

View file

@ -0,0 +1,131 @@
//! kei-sage CLI — import / search / related / rank / add / edit.
use clap::{Parser, Subcommand};
use kei_sage::bfs::bfs;
use kei_sage::edges::add_edge;
use kei_sage::import::import_vault;
use kei_sage::pagerank::pagerank;
use kei_sage::search::fts_search;
use kei_sage::{Store, Unit};
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-sage", version, about = "Obsidian-style knowledge vault")]
struct Cli {
/// Database path (default: $KEI_VAULT_DB or ~/.claude/sage/vault.sqlite)
#[arg(long)]
db: Option<PathBuf>,
#[command(subcommand)]
cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
Import { vault: PathBuf },
Search { query: String, #[arg(long, default_value_t = 20)] limit: i64 },
Related { key: String, #[arg(long, default_value_t = 2)] depth: i64 },
Rank { #[arg(long, default_value_t = 20)] limit: usize },
Add {
#[arg(long)] title: String,
#[arg(long, default_value = "")] content: String,
#[arg(long, default_value = "")] vault_path: String,
#[arg(long, default_value = "E4")] grade: String,
},
Edit {
id: i64,
#[arg(long)] title: Option<String>,
#[arg(long)] content: Option<String>,
#[arg(long)] grade: Option<String>,
},
Link { src: String, dst: String, #[arg(long, default_value = "related")] edge_type: String },
}
fn db_path(cli_db: Option<PathBuf>) -> PathBuf {
if let Some(p) = cli_db { return p; }
if let Ok(e) = std::env::var("KEI_VAULT_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/sage/vault.sqlite")
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let store = Store::open(&db_path(cli.db))?;
dispatch(&store, cli.cmd)
}
fn dispatch(store: &Store, cmd: Cmd) -> anyhow::Result<()> {
match cmd {
Cmd::Import { vault } => cmd_import(store, &vault),
Cmd::Search { query, limit } => cmd_search(store, &query, limit),
Cmd::Related { key, depth } => cmd_related(store, &key, depth),
Cmd::Rank { limit } => cmd_rank(store, limit),
Cmd::Add { title, content, vault_path, grade } =>
cmd_add(store, title, content, vault_path, grade),
Cmd::Edit { id, title, content, grade } =>
cmd_edit(store, id, title, content, grade),
Cmd::Link { src, dst, edge_type } => cmd_link(store, &src, &dst, &edge_type),
}
}
fn cmd_import(store: &Store, vault: &std::path::Path) -> anyhow::Result<()> {
let s = import_vault(store, vault)?;
println!("imported={} skipped={}", s.imported, s.skipped);
Ok(())
}
fn cmd_search(store: &Store, query: &str, limit: i64) -> anyhow::Result<()> {
for u in fts_search(store, query, limit)? {
println!("{}\t{}\t{}", u.id, u.evidence_grade, u.title);
}
Ok(())
}
fn cmd_related(store: &Store, key: &str, depth: i64) -> anyhow::Result<()> {
for r in bfs(store, key, depth)? {
println!("{}\t{}\t(depth {})", r.edge_type, r.path, r.depth);
}
Ok(())
}
fn cmd_rank(store: &Store, limit: usize) -> anyhow::Result<()> {
for (p, s) in pagerank(store)?.into_iter().take(limit) {
println!("{:.6}\t{}", s, p);
}
Ok(())
}
fn cmd_add(store: &Store, title: String, content: String,
vault_path: String, grade: String) -> anyhow::Result<()> {
let id = store.add_unit(&Unit {
title, content, vault_path, evidence_grade: grade,
unit_type: "note".into(), ..Default::default()
})?;
println!("{}", id);
Ok(())
}
fn cmd_edit(store: &Store, id: i64, title: Option<String>,
content: Option<String>, grade: Option<String>) -> anyhow::Result<()> {
let mut u = store.get_unit(id)?
.ok_or_else(|| anyhow::anyhow!("id {id} not found"))?;
if let Some(t) = title { u.title = t; }
if let Some(c) = content { u.content = c; }
if let Some(g) = grade { u.evidence_grade = g; }
store.update_unit(&u)?;
println!("updated {}", id);
Ok(())
}
fn cmd_link(store: &Store, src: &str, dst: &str, edge_type: &str) -> anyhow::Result<()> {
add_edge(store, src, dst, edge_type, 1.0)?;
println!("linked {} -> {}", src, dst);
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-sage: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,60 @@
//! PageRank — power-iteration, 50 iterations, d=0.85. Operates on the edges table.
use crate::store::Store;
use anyhow::Result;
use std::collections::HashMap;
const DAMPING: f64 = 0.85;
const ITERATIONS: usize = 50;
/// Compute PageRank over the edges table. Returns [(path, score)] sorted desc.
pub fn pagerank(store: &Store) -> Result<Vec<(String, f64)>> {
let (nodes, out_edges) = collect_graph(store)?;
if nodes.is_empty() {
return Ok(Vec::new());
}
let mut rank: HashMap<String, f64> = nodes.iter()
.map(|n| (n.clone(), 1.0 / nodes.len() as f64)).collect();
for _ in 0..ITERATIONS {
rank = one_iteration(&nodes, &out_edges, &rank);
}
let mut out: Vec<(String, f64)> = rank.into_iter().collect();
out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(out)
}
fn collect_graph(store: &Store) -> Result<(Vec<String>, HashMap<String, Vec<String>>)> {
let mut stmt = store.conn().prepare("SELECT src_path, dst_path FROM edges")?;
let rows = stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
let mut nodes: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut out_edges: HashMap<String, Vec<String>> = HashMap::new();
for row in rows {
let (src, dst) = row?;
nodes.insert(src.clone());
nodes.insert(dst.clone());
out_edges.entry(src).or_default().push(dst);
}
Ok((nodes.into_iter().collect(), out_edges))
}
fn one_iteration(
nodes: &[String],
out_edges: &HashMap<String, Vec<String>>,
prev: &HashMap<String, f64>,
) -> HashMap<String, f64> {
let n = nodes.len() as f64;
let base = (1.0 - DAMPING) / n;
let mut next: HashMap<String, f64> = nodes.iter().map(|k| (k.clone(), base)).collect();
for (src, dsts) in out_edges {
if dsts.is_empty() {
continue;
}
let share = DAMPING * prev.get(src).copied().unwrap_or(0.0) / dsts.len() as f64;
for dst in dsts {
if let Some(slot) = next.get_mut(dst) {
*slot += share;
}
}
}
next
}

View file

@ -0,0 +1,57 @@
//! SQLite schema for knowledge-vault. Port of LBM internal/sage/vault_schema.go.
use rusqlite::{Connection, Result};
const DDL_MAIN: &str = r#"
CREATE TABLE IF NOT EXISTS knowledge_units (
id INTEGER PRIMARY KEY,
unit_type TEXT NOT NULL,
title TEXT NOT NULL,
content TEXT DEFAULT '',
evidence_grade TEXT DEFAULT '',
source_path TEXT DEFAULT '',
vault_path TEXT DEFAULT '',
category TEXT DEFAULT '',
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_ku_type ON knowledge_units(unit_type);
CREATE UNIQUE INDEX IF NOT EXISTS idx_ku_vault
ON knowledge_units(vault_path) WHERE vault_path != '';
CREATE INDEX IF NOT EXISTS idx_ku_grade ON knowledge_units(evidence_grade);
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE NOT NULL
);
CREATE TABLE IF NOT EXISTS unit_tags (
unit_id INTEGER NOT NULL REFERENCES knowledge_units(id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY (unit_id, tag_id)
);
CREATE TABLE IF NOT EXISTS edges (
id INTEGER PRIMARY KEY,
src_path TEXT NOT NULL,
dst_path TEXT NOT NULL,
edge_type TEXT NOT NULL,
weight REAL DEFAULT 1.0,
created_at INTEGER NOT NULL,
UNIQUE(src_path, dst_path, edge_type)
);
CREATE INDEX IF NOT EXISTS idx_sage_edges_src ON edges(src_path);
CREATE INDEX IF NOT EXISTS idx_sage_edges_dst ON edges(dst_path);
"#;
const DDL_FTS: &str = r#"
CREATE VIRTUAL TABLE IF NOT EXISTS fts_knowledge
USING fts5(unit_id UNINDEXED, title, content, tokenize='porter unicode61');
"#;
/// Apply schema + FTS5 virtual table. Idempotent.
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL_MAIN)?;
conn.execute_batch(DDL_FTS)?;
Ok(())
}

View file

@ -0,0 +1,33 @@
//! FTS5 search over knowledge_units.
use crate::store::Store;
use crate::types::Unit;
use anyhow::Result;
use rusqlite::params;
const SEARCH_SQL: &str =
"SELECT k.id, k.unit_type, k.title, k.content, k.evidence_grade,
k.source_path, k.vault_path, k.category, k.created_at, k.updated_at
FROM fts_knowledge f
JOIN knowledge_units k ON k.id = f.unit_id
WHERE fts_knowledge MATCH ?1
ORDER BY rank LIMIT ?2";
/// Full-text search. Returns matching Units ordered by SQLite FTS5 rank.
pub fn fts_search(store: &Store, query: &str, limit: i64) -> Result<Vec<Unit>> {
let lim = if limit <= 0 { 20 } else { limit };
let mut stmt = store.conn().prepare(SEARCH_SQL)?;
let rows = stmt.query_map(params![query, lim], row_to_unit)?;
let mut out = Vec::new();
for row in rows { out.push(row?); }
Ok(out)
}
fn row_to_unit(r: &rusqlite::Row) -> rusqlite::Result<Unit> {
Ok(Unit {
id: r.get(0)?, unit_type: r.get(1)?, title: r.get(2)?,
content: r.get(3)?, evidence_grade: r.get(4)?, source_path: r.get(5)?,
vault_path: r.get(6)?, category: r.get(7)?,
created_at: r.get(8)?, updated_at: r.get(9)?,
})
}

View file

@ -0,0 +1,111 @@
//! Knowledge-unit CRUD + FTS indexer.
use crate::schema::create_schema;
use crate::types::Unit;
use anyhow::{Context, Result};
use chrono::Utc;
use rusqlite::{params, Connection};
use std::path::Path;
pub struct Store {
conn: Connection,
}
impl Store {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn conn(&self) -> &Connection {
&self.conn
}
/// Insert a new knowledge unit. Indexes title+content into FTS5. Idempotent by vault_path.
pub fn add_unit(&self, unit: &Unit) -> Result<i64> {
let now = Utc::now().timestamp();
let created = if unit.created_at == 0 { now } else { unit.created_at };
self.conn.execute(
"INSERT OR REPLACE INTO knowledge_units
(unit_type, title, content, evidence_grade, source_path,
vault_path, category, created_at, updated_at)
VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9)",
params![unit.unit_type, unit.title, unit.content, unit.evidence_grade,
unit.source_path, unit.vault_path, unit.category, created, now],
)?;
let id = self.conn.last_insert_rowid();
self.reindex_fts(id, &unit.title, &unit.content)?;
Ok(id)
}
pub fn get_unit(&self, id: i64) -> Result<Option<Unit>> {
let mut stmt = self.conn.prepare(
"SELECT id, unit_type, title, content, evidence_grade, source_path,
vault_path, category, created_at, updated_at
FROM knowledge_units WHERE id=?1",
)?;
let mut rows = stmt.query(params![id])?;
if let Some(r) = rows.next()? {
return Ok(Some(row_to_unit(r)?));
}
Ok(None)
}
pub fn update_unit(&self, unit: &Unit) -> Result<()> {
let now = Utc::now().timestamp();
self.conn.execute(
"UPDATE knowledge_units SET title=?1, content=?2, evidence_grade=?3,
category=?4, updated_at=?5 WHERE id=?6",
params![unit.title, unit.content, unit.evidence_grade,
unit.category, now, unit.id],
)?;
self.reindex_fts(unit.id, &unit.title, &unit.content)?;
Ok(())
}
pub fn delete_unit(&self, id: i64) -> Result<()> {
self.conn.execute("DELETE FROM fts_knowledge WHERE unit_id=?1", params![id])?;
self.conn.execute("DELETE FROM knowledge_units WHERE id=?1", params![id])?;
Ok(())
}
pub fn count_units(&self) -> Result<i64> {
Ok(self.conn.query_row(
"SELECT COUNT(*) FROM knowledge_units", [], |r| r.get(0))?)
}
fn reindex_fts(&self, id: i64, title: &str, content: &str) -> Result<()> {
self.conn.execute("DELETE FROM fts_knowledge WHERE unit_id=?1", params![id])?;
self.conn.execute(
"INSERT INTO fts_knowledge (unit_id, title, content) VALUES (?1,?2,?3)",
params![id, title, content],
)?;
Ok(())
}
}
fn row_to_unit(r: &rusqlite::Row) -> rusqlite::Result<Unit> {
Ok(Unit {
id: r.get(0)?,
unit_type: r.get(1)?,
title: r.get(2)?,
content: r.get(3)?,
evidence_grade: r.get(4)?,
source_path: r.get(5)?,
vault_path: r.get(6)?,
category: r.get(7)?,
created_at: r.get(8)?,
updated_at: r.get(9)?,
})
}

View file

@ -0,0 +1,34 @@
//! Shared value types for knowledge units + edges + BFS results.
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Unit {
pub id: i64,
pub unit_type: String,
pub title: String,
pub content: String,
pub evidence_grade: String,
pub source_path: String,
pub vault_path: String,
pub category: String,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Edge {
pub id: i64,
pub src_path: String,
pub dst_path: String,
pub edge_type: String,
pub weight: f64,
pub created_at: i64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Related {
pub path: String,
pub edge_type: String,
pub depth: i64,
}

View file

@ -0,0 +1,110 @@
//! kei-sage integration tests.
use kei_sage::bfs::bfs;
use kei_sage::edges::{add_edge, list_outgoing};
use kei_sage::import::import_vault;
use kei_sage::pagerank::pagerank;
use kei_sage::search::fts_search;
use kei_sage::{Store, Unit};
use std::fs;
use tempfile::tempdir;
fn mkstore() -> Store { Store::open_memory().unwrap() }
fn mkunit(title: &str, body: &str, vault: &str) -> Unit {
Unit {
unit_type: "note".into(), title: title.into(), content: body.into(),
evidence_grade: "E2".into(), vault_path: vault.into(),
..Default::default()
}
}
#[test]
fn crud_roundtrip() {
let s = mkstore();
let id = s.add_unit(&mkunit("hello", "world", "a.md")).unwrap();
assert!(id > 0);
let u = s.get_unit(id).unwrap().unwrap();
assert_eq!(u.title, "hello");
s.delete_unit(id).unwrap();
assert!(s.get_unit(id).unwrap().is_none());
}
#[test]
fn fts_search_matches() {
let s = mkstore();
s.add_unit(&mkunit("rust async", "tokio runtime details", "a.md")).unwrap();
s.add_unit(&mkunit("python sync", "flask wsgi server", "b.md")).unwrap();
let hits = fts_search(&s, "tokio", 10).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].title, "rust async");
}
#[test]
fn bfs_depth_limit() {
let s = mkstore();
add_edge(&s, "a", "b", "rel", 1.0).unwrap();
add_edge(&s, "b", "c", "rel", 1.0).unwrap();
add_edge(&s, "c", "d", "rel", 1.0).unwrap();
let out = bfs(&s, "a", 2).unwrap();
let paths: Vec<&str> = out.iter().map(|r| r.path.as_str()).collect();
assert!(paths.contains(&"b"));
assert!(paths.contains(&"c"));
assert!(!paths.contains(&"d"));
}
#[test]
fn pagerank_orders_by_popularity() {
let s = mkstore();
add_edge(&s, "a", "hub", "rel", 1.0).unwrap();
add_edge(&s, "b", "hub", "rel", 1.0).unwrap();
add_edge(&s, "c", "hub", "rel", 1.0).unwrap();
add_edge(&s, "d", "hub", "rel", 1.0).unwrap();
add_edge(&s, "e", "hub", "rel", 1.0).unwrap();
let ranks = pagerank(&s).unwrap();
assert_eq!(ranks[0].0, "hub");
}
#[test]
fn edges_crud() {
let s = mkstore();
let id = add_edge(&s, "x", "y", "cites", 0.8).unwrap();
assert!(id > 0);
let out = list_outgoing(&s, "x").unwrap();
assert_eq!(out.len(), 1);
assert_eq!(out[0].dst_path, "y");
}
#[test]
fn import_idempotency() {
let tmp = tempdir().unwrap();
let p = tmp.path().join("one.md");
fs::write(&p, "# title one\nhello").unwrap();
let s = mkstore();
let first = import_vault(&s, tmp.path()).unwrap();
let second = import_vault(&s, tmp.path()).unwrap();
assert_eq!(first.imported, 1);
assert_eq!(second.imported, 1);
assert_eq!(s.count_units().unwrap(), 1);
}
#[test]
fn edges_cross_reference_validates() {
let s = mkstore();
s.add_unit(&mkunit("note a", "", "a.md")).unwrap();
s.add_unit(&mkunit("note b", "", "b.md")).unwrap();
add_edge(&s, "a.md", "b.md", "refs", 1.0).unwrap();
let out = list_outgoing(&s, "a.md").unwrap();
assert_eq!(out.len(), 1);
}
#[test]
fn fts5_respects_limit() {
let s = mkstore();
for i in 0..25 {
let t = format!("rust note {i}");
s.add_unit(&mkunit(&t, "rust rust rust", &format!("n{i}.md"))).unwrap();
}
let hits = fts_search(&s, "rust", 5).unwrap();
assert_eq!(hits.len(), 5);
}

View file

@ -0,0 +1,25 @@
[package]
name = "kei-search-core"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "3-wave deep research scaffolding with budget cap. Port of LBM internal/search (fetch stubbed)."
[[bin]]
name = "kei-search-core"
path = "src/main.rs"
[lib]
name = "kei_search_core"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,34 @@
//! Budget tracker — all costs in microcents (1 USD = 1_000_000 mc).
use anyhow::{anyhow, Result};
#[derive(Debug, Clone)]
pub struct Budget {
cap_mc: i64,
spent_mc: i64,
stopped: bool,
}
impl Budget {
pub fn new(cap_mc: i64) -> Self {
Self { cap_mc, spent_mc: 0, stopped: false }
}
/// Record a cost; returns error if this push would exceed the cap.
pub fn charge(&mut self, mc: i64) -> Result<()> {
if self.stopped {
return Err(anyhow!("budget stopped"));
}
if self.spent_mc + mc > self.cap_mc {
return Err(anyhow!(
"budget exceeded: spent={} cap={}", self.spent_mc + mc, self.cap_mc));
}
self.spent_mc += mc;
Ok(())
}
pub fn spent(&self) -> i64 { self.spent_mc }
pub fn remaining(&self) -> i64 { self.cap_mc - self.spent_mc }
pub fn stop(&mut self) { self.stopped = true; }
pub fn is_stopped(&self) -> bool { self.stopped }
}

View file

@ -0,0 +1,40 @@
//! Export research → markdown / JSON.
use crate::store::ResearchStore;
use anyhow::{anyhow, Result};
use serde_json::json;
pub enum Format {
Markdown,
Json,
}
pub fn export(store: &ResearchStore, id: i64, fmt: Format) -> Result<String> {
let r = store.get_research(id)?.ok_or_else(|| anyhow!("research {id} missing"))?;
let claims = store.claims_for(id)?;
match fmt {
Format::Markdown => {
let mut md = String::new();
md.push_str(&format!("# Research {}\n\n", r.id));
md.push_str(&format!("**Query:** {}\n\n", r.query_original));
md.push_str(&format!("**Status:** {}\n", r.status));
md.push_str(&format!("**Cost:** {} mc\n\n", r.total_cost_mc));
md.push_str("## Claims\n\n");
for c in claims {
md.push_str(&format!("- [{}] {} (consensus={:.2})\n",
c.grade, c.claim_text, c.consensus));
}
Ok(md)
}
Format::Json => {
let val = json!({
"id": r.id,
"query": r.query_original,
"status": r.status,
"cost_mc": r.total_cost_mc,
"claims": claims,
});
Ok(serde_json::to_string_pretty(&val)?)
}
}
}

View file

@ -0,0 +1,23 @@
//! Source fetcher trait — frozen interface, default impl is a no-op stub.
//!
//! Actual WebFetch/WebSearch integration is out-of-scope for v0.14 part A.
//! Later milestones plug real providers (anthropic-websearch, SerpAPI, etc.).
use crate::types::Source;
/// Implement this trait to integrate a live search provider.
pub trait SourceFetcher {
/// Fetch sources for `claim`. Returns (source, cost_microcents).
/// Cost is real — the budget is charged by the pipeline, not by impl.
fn fetch(&self, claim: &str) -> (Vec<Source>, i64);
}
/// Default stub — returns empty. Frozen interface, no runtime side-effects.
pub struct StubFetcher;
impl SourceFetcher for StubFetcher {
fn fetch(&self, _claim: &str) -> (Vec<Source>, i64) {
// TODO(v0.15): wire to real websearch. Kept as stub per v0.14 spec.
(Vec::new(), 0)
}
}

View file

@ -0,0 +1,22 @@
//! kei-search-core — 3-wave deep research engine, budget-capped.
//!
//! Waves:
//! 0 — claim extraction from prompt
//! 1 — per-claim source hunt (WebFetch stubbed behind [`SourceFetcher`] trait)
//! 2 — cross-validation + consensus scoring
//!
//! Port of LBM internal/search. The actual fetch is a trait the caller
//! supplies. Default implementation returns empty (frozen interface, todo!()
//! reflects unimplemented runtime).
pub mod budget;
pub mod export;
pub mod fetch;
pub mod pipeline;
pub mod schema;
pub mod store;
pub mod types;
pub use pipeline::run_research;
pub use store::ResearchStore;
pub use types::{Claim, Research, Source};

View file

@ -0,0 +1,61 @@
//! kei-search-core CLI.
use clap::{Parser, Subcommand, ValueEnum};
use kei_search_core::export::{export, Format};
use kei_search_core::fetch::StubFetcher;
use kei_search_core::pipeline::run_research;
use kei_search_core::ResearchStore;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-search-core", version)]
struct Cli {
#[arg(long)] db: Option<PathBuf>,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
Run { prompt: String,
#[arg(long, default_value_t = 1_000_000)] budget: i64 }, // 1 USD
Stop { id: i64 },
Export { id: i64, #[arg(long, value_enum, default_value_t = Fmt::Md)] format: Fmt },
}
#[derive(Clone, Copy, ValueEnum)]
enum Fmt { Md, Json }
fn db_path(o: Option<PathBuf>) -> PathBuf {
if let Some(p) = o { return p; }
if let Ok(e) = std::env::var("KEI_SEARCH_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/search/research.sqlite")
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let s = ResearchStore::open(&db_path(cli.db))?;
match cli.cmd {
Cmd::Run { prompt, budget } => {
let id = run_research(&s, &StubFetcher, &prompt, budget)?;
println!("{}", id);
}
Cmd::Stop { id } => {
s.set_status(id, "stopped")?;
println!("stopped {}", id);
}
Cmd::Export { id, format } => {
let f = match format { Fmt::Md => Format::Markdown, Fmt::Json => Format::Json };
println!("{}", export(&s, id, f)?);
}
}
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-search-core: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,90 @@
//! 3-wave research runner.
//!
//! Wave 0: split prompt into claims (naive split on `.`; real NLU later).
//! Wave 1: for each claim, fetch sources via [`SourceFetcher`].
//! Wave 2: score consensus per claim from sources (majority = higher grade).
use crate::budget::Budget;
use crate::fetch::SourceFetcher;
use crate::store::ResearchStore;
use crate::types::{Claim, Source};
use anyhow::Result;
const WAVE1_COST_PER_CLAIM_MC: i64 = 100; // 0.01 USD per claim
const WAVE2_COST_MC: i64 = 50;
pub fn run_research(
store: &ResearchStore,
fetcher: &dyn SourceFetcher,
prompt: &str,
budget_mc: i64,
) -> Result<i64> {
let research_id = store.create_research(prompt)?;
let mut budget = Budget::new(budget_mc);
let claims_text = wave_0_extract_claims(prompt);
if let Err(e) = wave_1_fetch(store, fetcher, research_id, &claims_text, &mut budget) {
store.set_status(research_id, "failed")?;
return Err(e);
}
if let Err(e) = wave_2_consensus(store, research_id, &mut budget) {
store.set_status(research_id, "failed")?;
return Err(e);
}
store.set_cost(research_id, budget.spent())?;
store.set_status(research_id, "completed")?;
Ok(research_id)
}
fn wave_0_extract_claims(prompt: &str) -> Vec<String> {
prompt
.split(|c: char| c == '.' || c == '?' || c == '\n')
.map(|s| s.trim().to_string())
.filter(|s| s.len() > 4)
.collect()
}
fn wave_1_fetch(
store: &ResearchStore,
fetcher: &dyn SourceFetcher,
rid: i64,
claims: &[String],
budget: &mut Budget,
) -> Result<()> {
for c in claims {
budget.charge(WAVE1_COST_PER_CLAIM_MC)?;
let (srcs, fetch_cost) = fetcher.fetch(c);
if fetch_cost > 0 {
budget.charge(fetch_cost)?;
}
for s in srcs {
store.add_source(&Source { research_id: rid, ..s })?;
}
store.add_claim(&Claim {
research_id: rid,
claim_text: c.clone(),
..Default::default()
})?;
}
Ok(())
}
fn wave_2_consensus(store: &ResearchStore, rid: i64, budget: &mut Budget) -> Result<()> {
budget.charge(WAVE2_COST_MC)?;
let claims = store.claims_for(rid)?;
for c in claims {
let support = 0.5;
let contradict = 0.0;
let consensus = support - contradict;
let grade = grade_from_consensus(consensus);
store.conn().execute(
"UPDATE claims SET support=?1, contradict=?2, consensus=?3, grade=?4
WHERE id=?5",
rusqlite::params![support, contradict, consensus, grade, c.id],
)?;
}
Ok(())
}
fn grade_from_consensus(c: f64) -> &'static str {
if c >= 0.8 { "E2" } else if c >= 0.5 { "E4" } else { "E6" }
}

View file

@ -0,0 +1,44 @@
use rusqlite::{Connection, Result};
const DDL: &str = r#"
CREATE TABLE IF NOT EXISTS researches (
id INTEGER PRIMARY KEY,
query_original TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
result_markdown TEXT DEFAULT '',
total_cost_mc INTEGER DEFAULT 0,
created_at INTEGER NOT NULL,
completed_at INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_res_status ON researches(status);
CREATE TABLE IF NOT EXISTS sources (
id INTEGER PRIMARY KEY,
research_id INTEGER NOT NULL REFERENCES researches(id),
url TEXT NOT NULL,
title TEXT DEFAULT '',
content TEXT DEFAULT '',
provider TEXT DEFAULT '',
domain TEXT DEFAULT '',
relevance_score REAL DEFAULT 0.0,
created_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_src_research ON sources(research_id);
CREATE TABLE IF NOT EXISTS claims (
id INTEGER PRIMARY KEY,
research_id INTEGER NOT NULL REFERENCES researches(id),
claim_text TEXT NOT NULL,
support REAL DEFAULT 0.0,
contradict REAL DEFAULT 0.0,
consensus REAL DEFAULT 0.0,
grade TEXT DEFAULT 'E6',
created_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_claim_research ON claims(research_id);
"#;
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL)?;
Ok(())
}

View file

@ -0,0 +1,122 @@
use crate::schema::create_schema;
use crate::types::{Claim, Research, Source};
use anyhow::{Context, Result};
use chrono::Utc;
use rusqlite::{params, Connection};
use std::path::Path;
pub struct ResearchStore {
conn: Connection,
}
impl ResearchStore {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn conn(&self) -> &Connection { &self.conn }
pub fn create_research(&self, query: &str) -> Result<i64> {
let now = Utc::now().timestamp();
self.conn.execute(
"INSERT INTO researches (query_original, status, created_at)
VALUES (?1, 'running', ?2)",
params![query, now],
)?;
Ok(self.conn.last_insert_rowid())
}
pub fn set_status(&self, id: i64, status: &str) -> Result<()> {
let now = Utc::now().timestamp();
self.conn.execute(
"UPDATE researches SET status=?1, completed_at=?2 WHERE id=?3",
params![status, now, id],
)?;
Ok(())
}
pub fn set_cost(&self, id: i64, mc: i64) -> Result<()> {
self.conn.execute(
"UPDATE researches SET total_cost_mc=?1 WHERE id=?2",
params![mc, id],
)?;
Ok(())
}
pub fn set_markdown(&self, id: i64, md: &str) -> Result<()> {
self.conn.execute(
"UPDATE researches SET result_markdown=?1 WHERE id=?2",
params![md, id],
)?;
Ok(())
}
pub fn get_research(&self, id: i64) -> Result<Option<Research>> {
let mut stmt = self.conn.prepare(
"SELECT id, query_original, status, result_markdown, total_cost_mc,
created_at, completed_at FROM researches WHERE id=?1",
)?;
let mut rows = stmt.query(params![id])?;
if let Some(r) = rows.next()? {
return Ok(Some(Research {
id: r.get(0)?, query_original: r.get(1)?, status: r.get(2)?,
result_markdown: r.get(3)?, total_cost_mc: r.get(4)?,
created_at: r.get(5)?, completed_at: r.get(6)?,
}));
}
Ok(None)
}
pub fn add_source(&self, s: &Source) -> Result<i64> {
let now = Utc::now().timestamp();
self.conn.execute(
"INSERT INTO sources (research_id, url, title, content, provider,
domain, relevance_score, created_at)
VALUES (?1,?2,?3,?4,?5,?6,?7,?8)",
params![s.research_id, s.url, s.title, s.content, s.provider,
s.domain, s.relevance_score, now],
)?;
Ok(self.conn.last_insert_rowid())
}
pub fn add_claim(&self, c: &Claim) -> Result<i64> {
let now = Utc::now().timestamp();
self.conn.execute(
"INSERT INTO claims (research_id, claim_text, support, contradict,
consensus, grade, created_at)
VALUES (?1,?2,?3,?4,?5,?6,?7)",
params![c.research_id, c.claim_text, c.support, c.contradict,
c.consensus, c.grade, now],
)?;
Ok(self.conn.last_insert_rowid())
}
pub fn claims_for(&self, research_id: i64) -> Result<Vec<Claim>> {
let mut stmt = self.conn.prepare(
"SELECT id, research_id, claim_text, support, contradict,
consensus, grade, created_at FROM claims WHERE research_id=?1"
)?;
let rows = stmt.query_map(params![research_id], |r| {
Ok(Claim {
id: r.get(0)?, research_id: r.get(1)?, claim_text: r.get(2)?,
support: r.get(3)?, contradict: r.get(4)?, consensus: r.get(5)?,
grade: r.get(6)?, created_at: r.get(7)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}
}

View file

@ -0,0 +1,37 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Research {
pub id: i64,
pub query_original: String,
pub status: String,
pub result_markdown: String,
pub total_cost_mc: i64,
pub created_at: i64,
pub completed_at: i64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Source {
pub id: i64,
pub research_id: i64,
pub url: String,
pub title: String,
pub content: String,
pub provider: String,
pub domain: String,
pub relevance_score: f64,
pub created_at: i64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Claim {
pub id: i64,
pub research_id: i64,
pub claim_text: String,
pub support: f64,
pub contradict: f64,
pub consensus: f64,
pub grade: String,
pub created_at: i64,
}

View file

@ -0,0 +1,80 @@
use kei_search_core::budget::Budget;
use kei_search_core::export::{export, Format};
use kei_search_core::fetch::{SourceFetcher, StubFetcher};
use kei_search_core::pipeline::run_research;
use kei_search_core::types::Source;
use kei_search_core::ResearchStore;
fn mk() -> ResearchStore { ResearchStore::open_memory().unwrap() }
struct FakeFetcher;
impl SourceFetcher for FakeFetcher {
fn fetch(&self, claim: &str) -> (Vec<Source>, i64) {
(vec![Source {
url: "https://example.test".into(),
title: format!("source for: {claim}"),
content: "body".into(),
provider: "fake".into(),
domain: "example.test".into(),
relevance_score: 0.8,
..Default::default()
}], 10)
}
}
#[test]
fn budget_enforcement() {
let mut b = Budget::new(100);
b.charge(50).unwrap();
b.charge(40).unwrap();
assert!(b.charge(20).is_err(), "must reject overspend");
}
#[test]
fn wave_progression_creates_research() {
let s = mk();
let id = run_research(&s, &FakeFetcher,
"Rust is memory-safe. Python is dynamic.", 10_000).unwrap();
let r = s.get_research(id).unwrap().unwrap();
assert_eq!(r.status, "completed");
assert!(r.total_cost_mc > 0);
assert!(s.claims_for(id).unwrap().len() >= 2);
}
#[test]
fn consensus_scoring_applies_grade() {
let s = mk();
let id = run_research(&s, &FakeFetcher, "One claim here.", 10_000).unwrap();
let claims = s.claims_for(id).unwrap();
assert!(!claims.is_empty());
assert!(!claims[0].grade.is_empty());
}
#[test]
fn export_markdown_and_json() {
let s = mk();
let id = run_research(&s, &FakeFetcher, "Claim A. Claim B.", 10_000).unwrap();
let md = export(&s, id, Format::Markdown).unwrap();
assert!(md.contains("# Research"));
let js = export(&s, id, Format::Json).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&js).unwrap();
assert!(parsed.get("claims").is_some());
}
#[test]
fn stop_mid_run_marks_status() {
let s = mk();
let id = run_research(&s, &StubFetcher, "x. y.", 10_000).unwrap();
s.set_status(id, "stopped").unwrap();
let r = s.get_research(id).unwrap().unwrap();
assert_eq!(r.status, "stopped");
}
#[test]
fn budget_exhausted_rejects_run() {
let s = mk();
// 3 claims × 100mc + 50mc wave2 = 350mc; budget 100 → must overspend.
let err = run_research(&s, &StubFetcher,
"alpha claim one. beta claim two. gamma claim three.", 100);
assert!(err.is_err(), "small budget vs 3 claims must overspend");
}

View file

@ -0,0 +1,25 @@
[package]
name = "kei-social-store"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "People + interaction CRM (lite). Port of LBM internal/social."
[[bin]]
name = "kei-social-store"
path = "src/main.rs"
[lib]
name = "kei_social_store"
path = "src/lib.rs"
[dependencies]
rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,31 @@
//! Relationship graph — who interacted with whom, grouped by channel.
use crate::store::Store;
use anyhow::Result;
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
pub struct Pair {
pub person_id: i64,
pub target_id: i64,
pub channel: String,
pub count: i64,
}
pub fn relationship_graph(store: &Store) -> Result<Vec<Pair>> {
let mut stmt = store.conn().prepare(
"SELECT person_id, target_id, channel, COUNT(*) FROM interactions
WHERE target_id > 0 GROUP BY person_id, target_id, channel",
)?;
let rows = stmt.query_map([], |r| {
Ok(Pair {
person_id: r.get(0)?,
target_id: r.get(1)?,
channel: r.get(2)?,
count: r.get(3)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}

View file

@ -0,0 +1,47 @@
use crate::store::Store;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Interaction {
pub id: i64,
pub person_id: i64,
pub target_id: i64,
pub interaction_type: String,
pub channel: String,
pub content: String,
pub timestamp: i64,
}
pub fn log_interaction(store: &Store, i: &Interaction) -> Result<i64> {
let now = Utc::now().timestamp();
let ts = if i.timestamp == 0 { now } else { i.timestamp };
let channel = if i.channel.is_empty() { "manual" } else { &i.channel };
store.conn().execute(
"INSERT INTO interactions (person_id, target_id, interaction_type,
channel, content, timestamp, created_at)
VALUES (?1,?2,?3,?4,?5,?6,?7)",
params![i.person_id, i.target_id, i.interaction_type,
channel, i.content, ts, now],
)?;
Ok(store.conn().last_insert_rowid())
}
pub fn interactions_for(store: &Store, person_id: i64) -> Result<Vec<Interaction>> {
let mut stmt = store.conn().prepare(
"SELECT id, person_id, target_id, interaction_type, channel, content, timestamp
FROM interactions WHERE person_id=?1 ORDER BY timestamp DESC",
)?;
let rows = stmt.query_map(params![person_id], |r| {
Ok(Interaction {
id: r.get(0)?, person_id: r.get(1)?, target_id: r.get(2)?,
interaction_type: r.get(3)?, channel: r.get(4)?,
content: r.get(5)?, timestamp: r.get(6)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}

View file

@ -0,0 +1,11 @@
//! kei-social-store — people + organizations + interactions.
pub mod graph;
pub mod interactions;
pub mod people;
pub mod schema;
pub mod search;
pub mod store;
pub use people::{Organization, Person};
pub use store::Store;

View file

@ -0,0 +1,100 @@
use clap::{Parser, Subcommand};
use kei_social_store::graph::relationship_graph;
use kei_social_store::interactions::{log_interaction, Interaction};
use kei_social_store::people::{add_org, add_person, Organization, Person};
use kei_social_store::search::search_people;
use kei_social_store::Store;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser)]
#[command(name = "kei-social-store", version)]
struct Cli {
#[arg(long)] db: Option<PathBuf>,
#[command(subcommand)] cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
SearchPeople { query: String, #[arg(long, default_value_t = 20)] limit: i64 },
AddPerson { name: String,
#[arg(long, default_value = "")] email: String,
#[arg(long, default_value = "")] handle: String,
#[arg(long, default_value = "manual")] source: String },
AddOrg { name: String, #[arg(long, default_value = "company")] org_type: String },
LogInteraction { person_id: i64, interaction_type: String,
#[arg(long, default_value = "")] content: String,
#[arg(long, default_value = "manual")] channel: String,
#[arg(long, default_value_t = 0)] target_id: i64 },
RelationshipGraph,
}
fn db_path(o: Option<PathBuf>) -> PathBuf {
if let Some(p) = o { return p; }
if let Ok(e) = std::env::var("KEI_SOCIAL_DB") { return PathBuf::from(e); }
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/social/social.sqlite")
}
fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let s = Store::open(&db_path(cli.db))?;
dispatch(&s, cli.cmd)
}
fn dispatch(s: &Store, cmd: Cmd) -> anyhow::Result<()> {
match cmd {
Cmd::SearchPeople { query, limit } => cmd_search(s, &query, limit),
Cmd::AddPerson { name, email, handle, source } =>
cmd_add_person(s, name, email, handle, source),
Cmd::AddOrg { name, org_type } => cmd_add_org(s, name, org_type),
Cmd::LogInteraction { person_id, interaction_type, content, channel, target_id } =>
cmd_log(s, person_id, target_id, interaction_type, channel, content),
Cmd::RelationshipGraph => cmd_graph(s),
}
}
fn cmd_search(s: &Store, query: &str, limit: i64) -> anyhow::Result<()> {
for p in search_people(s, query, limit)? {
println!("{}\t{}\t{}", p.id, p.name, p.email);
}
Ok(())
}
fn cmd_add_person(s: &Store, name: String, email: String,
handle: String, source: String) -> anyhow::Result<()> {
let id = add_person(s, &Person { name, email, handle, source, ..Default::default() })?;
println!("{}", id);
Ok(())
}
fn cmd_add_org(s: &Store, name: String, org_type: String) -> anyhow::Result<()> {
let id = add_org(s, &Organization { name, org_type, ..Default::default() })?;
println!("{}", id);
Ok(())
}
fn cmd_log(s: &Store, person_id: i64, target_id: i64, interaction_type: String,
channel: String, content: String) -> anyhow::Result<()> {
let id = log_interaction(s, &Interaction {
person_id, target_id, interaction_type, channel, content,
..Default::default()
})?;
println!("{}", id);
Ok(())
}
fn cmd_graph(s: &Store) -> anyhow::Result<()> {
for p in relationship_graph(s)? {
println!("{}\t-[{}]->\t{}\t({}x)",
p.person_id, p.channel, p.target_id, p.count);
}
Ok(())
}
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-social-store: {e:#}"); ExitCode::from(1) }
}
}

View file

@ -0,0 +1,76 @@
use crate::store::Store;
use anyhow::Result;
use chrono::Utc;
use rusqlite::params;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Person {
pub id: i64,
pub name: String,
pub email: String,
pub handle: String,
pub role: String,
pub organization: String,
pub source: String,
pub bio: String,
pub created_at: i64,
pub updated_at: i64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Organization {
pub id: i64,
pub name: String,
pub org_type: String,
pub description: String,
pub created_at: i64,
}
pub fn add_person(store: &Store, p: &Person) -> Result<i64> {
let now = Utc::now().timestamp();
let source = if p.source.is_empty() { "manual" } else { &p.source };
store.conn().execute(
"INSERT INTO people (name, email, handle, role, organization,
source, bio, created_at, updated_at)
VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?8)",
params![p.name, p.email, p.handle, p.role, p.organization,
source, p.bio, now],
)?;
let id = store.conn().last_insert_rowid();
store.conn().execute(
"INSERT INTO fts_social (person_id, name, email, bio) VALUES (?1,?2,?3,?4)",
params![id, p.name, p.email, p.bio],
)?;
Ok(id)
}
pub fn get_person(store: &Store, id: i64) -> Result<Option<Person>> {
let mut stmt = store.conn().prepare(
"SELECT id, name, email, handle, role, organization, source, bio,
created_at, updated_at FROM people WHERE id=?1",
)?;
let mut rows = stmt.query(params![id])?;
if let Some(r) = rows.next()? {
return Ok(Some(Person {
id: r.get(0)?, name: r.get(1)?, email: r.get(2)?, handle: r.get(3)?,
role: r.get(4)?, organization: r.get(5)?, source: r.get(6)?,
bio: r.get(7)?, created_at: r.get(8)?, updated_at: r.get(9)?,
}));
}
Ok(None)
}
pub fn add_org(store: &Store, o: &Organization) -> Result<i64> {
let now = Utc::now().timestamp();
let ot = if o.org_type.is_empty() { "company" } else { &o.org_type };
store.conn().execute(
"INSERT OR IGNORE INTO organizations (name, org_type, description, created_at)
VALUES (?1,?2,?3,?4)",
params![o.name, ot, o.description, now],
)?;
let id: i64 = store.conn().query_row(
"SELECT id FROM organizations WHERE name=?1",
params![o.name], |r| r.get(0))?;
Ok(id)
}

View file

@ -0,0 +1,51 @@
use rusqlite::{Connection, Result};
const DDL_MAIN: &str = r#"
CREATE TABLE IF NOT EXISTS people (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT DEFAULT '',
handle TEXT DEFAULT '',
role TEXT DEFAULT '',
organization TEXT DEFAULT '',
source TEXT NOT NULL DEFAULT 'manual',
bio TEXT DEFAULT '',
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_people_email
ON people(email) WHERE email != '';
CREATE UNIQUE INDEX IF NOT EXISTS idx_people_handle_source
ON people(handle, source) WHERE handle != '';
CREATE TABLE IF NOT EXISTS organizations (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
org_type TEXT DEFAULT 'company',
description TEXT DEFAULT '',
created_at INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS interactions (
id INTEGER PRIMARY KEY,
person_id INTEGER NOT NULL REFERENCES people(id) ON DELETE CASCADE,
target_id INTEGER NOT NULL DEFAULT 0,
interaction_type TEXT NOT NULL,
channel TEXT NOT NULL DEFAULT 'manual',
content TEXT DEFAULT '',
timestamp INTEGER NOT NULL,
created_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_int_person ON interactions(person_id);
"#;
const DDL_FTS: &str = r#"
CREATE VIRTUAL TABLE IF NOT EXISTS fts_social
USING fts5(person_id UNINDEXED, name, email, bio, tokenize='porter unicode61');
"#;
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL_MAIN)?;
conn.execute_batch(DDL_FTS)?;
Ok(())
}

View file

@ -0,0 +1,25 @@
use crate::people::Person;
use crate::store::Store;
use anyhow::Result;
use rusqlite::params;
pub fn search_people(store: &Store, q: &str, limit: i64) -> Result<Vec<Person>> {
let lim = if limit <= 0 { 20 } else { limit };
let mut stmt = store.conn().prepare(
"SELECT p.id, p.name, p.email, p.handle, p.role, p.organization,
p.source, p.bio, p.created_at, p.updated_at
FROM fts_social f
JOIN people p ON p.id = f.person_id
WHERE fts_social MATCH ?1 ORDER BY rank LIMIT ?2",
)?;
let rows = stmt.query_map(params![q, lim], |r| {
Ok(Person {
id: r.get(0)?, name: r.get(1)?, email: r.get(2)?, handle: r.get(3)?,
role: r.get(4)?, organization: r.get(5)?, source: r.get(6)?,
bio: r.get(7)?, created_at: r.get(8)?, updated_at: r.get(9)?,
})
})?;
let mut out = Vec::new();
for r in rows { out.push(r?); }
Ok(out)
}

View file

@ -0,0 +1,22 @@
use crate::schema::create_schema;
use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
pub struct Store { conn: Connection }
impl Store {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() { let _ = std::fs::create_dir_all(parent); }
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
}
pub fn conn(&self) -> &Connection { &self.conn }
}

View file

@ -0,0 +1,68 @@
use kei_social_store::graph::relationship_graph;
use kei_social_store::interactions::{interactions_for, log_interaction, Interaction};
use kei_social_store::people::{add_org, add_person, get_person, Organization, Person};
use kei_social_store::search::search_people;
use kei_social_store::Store;
fn mk() -> Store { Store::open_memory().unwrap() }
#[test]
fn people_crud() {
let s = mk();
let id = add_person(&s, &Person {
name: "Alice".into(), email: "alice@example.com".into(),
..Default::default()
}).unwrap();
let p = get_person(&s, id).unwrap().unwrap();
assert_eq!(p.name, "Alice");
}
#[test]
fn orgs_idempotent() {
let s = mk();
let a = add_org(&s, &Organization { name: "Acme".into(), ..Default::default() }).unwrap();
let b = add_org(&s, &Organization { name: "Acme".into(), ..Default::default() }).unwrap();
assert_eq!(a, b);
}
#[test]
fn interactions_tracked() {
let s = mk();
let p = add_person(&s, &Person { name: "Bob".into(), ..Default::default() }).unwrap();
log_interaction(&s, &Interaction {
person_id: p, interaction_type: "email".into(),
content: "hi".into(), channel: "gmail".into(),
..Default::default()
}).unwrap();
let hist = interactions_for(&s, p).unwrap();
assert_eq!(hist.len(), 1);
assert_eq!(hist[0].interaction_type, "email");
}
#[test]
fn search_finds_person() {
let s = mk();
add_person(&s, &Person {
name: "Carol Chang".into(), bio: "rust async".into(),
..Default::default()
}).unwrap();
let hits = search_people(&s, "rust", 10).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].name, "Carol Chang");
}
#[test]
fn relationship_graph_groups() {
let s = mk();
let a = add_person(&s, &Person { name: "A".into(), ..Default::default() }).unwrap();
let b = add_person(&s, &Person { name: "B".into(), ..Default::default() }).unwrap();
for _ in 0..3 {
log_interaction(&s, &Interaction {
person_id: a, target_id: b, interaction_type: "msg".into(),
channel: "slack".into(), ..Default::default()
}).unwrap();
}
let pairs = relationship_graph(&s).unwrap();
assert_eq!(pairs.len(), 1);
assert_eq!(pairs[0].count, 3);
}

View file

@ -1,10 +1,15 @@
//! Factory — construct a `Box<dyn MemoryStore>` from a Config.
//!
//! v0.14.1: the S3 backend is gated behind `KEI_STORE_ALLOW_S3_STUB=1`
//! because it does NOT push to S3 yet — it's a local-manifest stub.
//! Previous behaviour silently stored data locally, confusing users who
//! thought their traces were uploaded.
use crate::config::{expand_tilde, Config};
use crate::{filesystem::FilesystemStore, forgejo::ForgejoStore, gitea::GiteaStore,
github::GitHubStore, s3::S3Store};
use crate::store_trait::MemoryStore;
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, bail, Context, Result};
use std::path::PathBuf;
pub fn build_store(cfg: &Config) -> Result<Box<dyn MemoryStore>> {
@ -18,17 +23,30 @@ pub fn build_store(cfg: &Config) -> Result<Box<dyn MemoryStore>> {
"github" => Ok(Box::new(GitHubStore::new(local, cfg.github.clone())?)),
"forgejo" => Ok(Box::new(ForgejoStore::new(local, cfg.forgejo.clone())?)),
"gitea" => Ok(Box::new(GiteaStore::new(local, cfg.gitea.clone())?)),
"s3" => {
let cache = cfg
.s3
.cache_path
.as_deref()
.map(expand_tilde)
.map(PathBuf::from)
.ok_or_else(|| anyhow!("s3 backend requires s3.cache_path"))?;
Ok(Box::new(S3Store::new(cache, cfg.s3.clone())?))
}
"s3" => build_s3(cfg),
other => Err(anyhow!("unknown backend: {other}"))
.context("supported: filesystem | github | forgejo | gitea | s3"),
}
}
fn build_s3(cfg: &Config) -> Result<Box<dyn MemoryStore>> {
if std::env::var("KEI_STORE_ALLOW_S3_STUB").is_err() {
bail!(
"S3 backend is a local-only MVP stub (no upload to S3/R2/MinIO yet). \
Set KEI_STORE_ALLOW_S3_STUB=1 to proceed; data will be stored in the \
configured cache_path only. Production S3 support is planned for v0.15."
);
}
eprintln!(
"[kei-store] WARNING: S3 backend is a local-only stub — data stored \
at cache_path only, not pushed to any object store."
);
let cache = cfg
.s3
.cache_path
.as_deref()
.map(expand_tilde)
.map(PathBuf::from)
.ok_or_else(|| anyhow!("s3 backend requires s3.cache_path"))?;
Ok(Box::new(S3Store::new(cache, cfg.s3.clone())?))
}

View file

@ -2,11 +2,14 @@
//!
//! Reuses git2 for branch/commit so behavior parity with remote stores is
//! maintained. `push`/`pull` are intentional no-ops.
//!
//! v0.14.1 hardening: `full()` now rejects absolute paths and `..` components
//! (CVE-class: path traversal via MCP `write`/`read` tool inputs).
use crate::store_trait::MemoryStore;
use anyhow::{Context, Result};
use anyhow::{bail, Context, Result};
use std::fs;
use std::path::{Path, PathBuf};
use std::path::{Component, Path, PathBuf};
pub struct FilesystemStore {
pub root: PathBuf,
@ -20,11 +23,34 @@ impl FilesystemStore {
Ok(Self { root })
}
fn full(&self, rel: &str) -> PathBuf {
self.root.join(rel)
fn full(&self, rel: &str) -> Result<PathBuf> {
safe_join(&self.root, rel)
}
}
/// Reject absolute paths and any `..` component BEFORE joining.
/// `PathBuf::join("/etc/passwd")` would otherwise replace the base
/// entirely — that turned kei-store's MCP `write` tool into an
/// unrestricted filesystem writer.
pub(crate) fn safe_join(root: &Path, rel: &str) -> Result<PathBuf> {
let p = Path::new(rel);
if p.is_absolute() {
bail!("path traversal rejected: absolute path {:?}", rel);
}
for component in p.components() {
match component {
Component::ParentDir => {
bail!("path traversal rejected: parent-dir component in {:?}", rel);
}
Component::Prefix(_) | Component::RootDir => {
bail!("path traversal rejected: root/prefix component in {:?}", rel);
}
_ => {}
}
}
Ok(root.join(rel))
}
fn ensure_repo(root: &Path) -> Result<()> {
if root.join(".git").exists() {
return Ok(());
@ -35,11 +61,11 @@ fn ensure_repo(root: &Path) -> Result<()> {
impl MemoryStore for FilesystemStore {
fn read(&self, path: &str) -> Result<Vec<u8>> {
fs::read(self.full(path)).with_context(|| format!("read {}", path))
fs::read(self.full(path)?).with_context(|| format!("read {}", path))
}
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> {
let full = self.full(path);
let full = self.full(path)?;
if let Some(parent) = full.parent() {
fs::create_dir_all(parent)?;
}
@ -47,7 +73,7 @@ impl MemoryStore for FilesystemStore {
}
fn list(&self, dir: &str) -> Result<Vec<String>> {
let full = self.full(dir);
let full = self.full(dir)?;
if !full.exists() {
return Ok(Vec::new());
}
@ -103,3 +129,44 @@ impl MemoryStore for FilesystemStore {
"filesystem"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_absolute_path_rejected() {
let tmp = tempfile::tempdir().unwrap();
let store = FilesystemStore::new(tmp.path().join("root")).unwrap();
let err = store.write("/etc/passwd", b"nope").unwrap_err();
let s = format!("{err:#}");
assert!(s.contains("absolute"), "unexpected err: {s}");
}
#[test]
fn test_parent_dir_rejected() {
let tmp = tempfile::tempdir().unwrap();
let store = FilesystemStore::new(tmp.path().join("root")).unwrap();
let err = store.write("../../.ssh/authorized_keys", b"nope").unwrap_err();
let s = format!("{err:#}");
assert!(s.contains("parent-dir"), "unexpected err: {s}");
}
#[test]
fn test_normal_path_ok() {
let tmp = tempfile::tempdir().unwrap();
let store = FilesystemStore::new(tmp.path().join("root")).unwrap();
store.write("traces/session.jsonl", b"ok").unwrap();
let bytes = store.read("traces/session.jsonl").unwrap();
assert_eq!(&bytes, b"ok");
}
#[test]
fn test_read_absolute_path_rejected() {
let tmp = tempfile::tempdir().unwrap();
let store = FilesystemStore::new(tmp.path().join("root")).unwrap();
let err = store.read("/etc/passwd").unwrap_err();
let s = format!("{err:#}");
assert!(s.contains("absolute"), "unexpected err: {s}");
}
}

View file

@ -4,11 +4,16 @@
//! remote. SSH auth via `KEI_MEMORY_SSH_KEY` (path to key); HTTPS via
//! `KEI_MEMORY_PAT` (token). Exactly the pattern used in v0.11
//! `kei-sleep-setup.sh`.
//!
//! v0.14.1: pushes to `github.com` are blocked by default under RULE 0.1
//! (patent-IP protection). Forks on Forgejo / Gitea / self-hosted are
//! unaffected since they do not resolve to `github.com`. Override for a
//! genuinely public repo: `KEI_STORE_ALLOW_GITHUB_PUSH=1`.
use crate::config::GitRemoteCfg;
use crate::filesystem::FilesystemStore;
use crate::store_trait::MemoryStore;
use anyhow::{Context, Result};
use anyhow::{bail, Context, Result};
use std::path::PathBuf;
pub struct GitHubStore {
@ -75,8 +80,9 @@ impl MemoryStore for GitHubStore {
}
fn push(&self, branch: &str) -> Result<()> {
let repo = git2::Repository::open(&self.inner.root)?;
let url = self.remote_url()?;
enforce_github_push_guard(url)?;
let repo = git2::Repository::open(&self.inner.root)?;
let mut remote = match repo.find_remote("origin") {
Ok(r) => r,
Err(_) => repo.remote("origin", url)?,
@ -105,3 +111,61 @@ impl MemoryStore for GitHubStore {
self.name
}
}
/// RULE 0.1 enforcement point for the kei-store push path.
///
/// Blocks pushes whose URL contains `github.com` unless the caller
/// explicitly opts-in via `KEI_STORE_ALLOW_GITHUB_PUSH=1`. Forks on
/// Forgejo / Gitea / self-hosted remain unaffected — only the literal
/// `github.com` host is gated.
pub(crate) fn enforce_github_push_guard(url: &str) -> Result<()> {
if !url.contains("github.com") {
return Ok(());
}
if std::env::var("KEI_STORE_ALLOW_GITHUB_PUSH").is_ok() {
return Ok(());
}
bail!(
"push to github.com blocked by RULE 0.1 (patent-IP protection). \
Set KEI_STORE_ALLOW_GITHUB_PUSH=1 if this is a public-safe release. \
See ~/.claude/rules/security.md for banned-project criteria."
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Mutex;
// Serialise tests that read/write KEI_STORE_ALLOW_GITHUB_PUSH so
// parallel cargo-test runners don't race on process env.
static ENV_LOCK: Mutex<()> = Mutex::new(());
#[test]
fn test_github_push_blocked_without_env_var() {
let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
std::env::remove_var("KEI_STORE_ALLOW_GITHUB_PUSH");
let err = enforce_github_push_guard("git@github.com:owner/repo.git").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("github.com"), "unexpected err: {msg}");
assert!(msg.contains("RULE 0.1"), "unexpected err: {msg}");
}
#[test]
fn test_github_push_allowed_with_env_var() {
let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
std::env::set_var("KEI_STORE_ALLOW_GITHUB_PUSH", "1");
let ok = enforce_github_push_guard("git@github.com:owner/repo.git");
std::env::remove_var("KEI_STORE_ALLOW_GITHUB_PUSH");
assert!(ok.is_ok(), "should allow with opt-in env var");
}
#[test]
fn test_non_github_push_always_allowed() {
// Non-github URLs should always pass regardless of env state, but we
// still take the lock so we don't observe a half-set var mid-test.
let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
enforce_github_push_guard("ssh://git@forgejo.local:2222/user/repo.git").unwrap();
enforce_github_push_guard("https://gitea.example.com/user/repo.git").unwrap();
}
}

View file

@ -1,16 +1,26 @@
//! S3Store — object-storage backend (MVP stub).
//! S3Store — object-storage backend (MVP stub; v0.14.1 local-only).
//!
//! This is a local-manifest-based implementation intended as an offline MVP.
//! Reads/writes go to `cache_path`; `commit` serialises a
//! `manifest-<hash>.json` listing the current file tree + content hash;
//! `push`/`pull` are NO-OPs in stub mode.
//!
//! v0.14.1: because the backend does NOT actually reach S3, the factory
//! now refuses to build an `S3Store` unless `KEI_STORE_ALLOW_S3_STUB=1`
//! is set. Previously users who configured S3 were silently writing to a
//! local cache with no remote push. See `factory.rs` for the guard.
//!
//! v0.14.1 hardening: `full()` rejects absolute paths and `..` components
//! (same CVE class as `filesystem.rs` — user-supplied `rel` could escape
//! the cache root).
//!
//! Production S3/R2/MinIO support is planned via `aws-sdk-s3` behind a
//! feature flag — see README §Store backends. This stub keeps the trait
//! surface honest so downstream code can exercise the full kei-store
//! API without pulling a ~20 MB AWS SDK at install time.
use crate::config::S3Cfg;
use crate::filesystem::safe_join;
use crate::store_trait::MemoryStore;
use anyhow::{Context, Result};
use std::fs;
@ -27,18 +37,18 @@ impl S3Store {
Ok(Self { cache, cfg })
}
fn full(&self, rel: &str) -> PathBuf {
self.cache.join(rel)
fn full(&self, rel: &str) -> Result<PathBuf> {
safe_join(&self.cache, rel)
}
}
impl MemoryStore for S3Store {
fn read(&self, path: &str) -> Result<Vec<u8>> {
fs::read(self.full(path)).with_context(|| format!("read {}", path))
fs::read(self.full(path)?).with_context(|| format!("read {}", path))
}
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> {
let full = self.full(path);
let full = self.full(path)?;
if let Some(parent) = full.parent() {
fs::create_dir_all(parent)?;
}
@ -47,7 +57,7 @@ impl MemoryStore for S3Store {
}
fn list(&self, dir: &str) -> Result<Vec<String>> {
let full = self.full(dir);
let full = self.full(dir)?;
if !full.exists() {
return Ok(Vec::new());
}
@ -65,8 +75,11 @@ impl MemoryStore for S3Store {
}
fn branch(&self, name: &str) -> Result<()> {
// Logical snapshot namespace — stored under cache/<branch>/
fs::create_dir_all(self.cache.join(name))?;
// Logical snapshot namespace — stored under cache/<branch>/.
// Also guarded against traversal so a malicious branch name cannot
// escape the cache root.
let dir = self.full(name)?;
fs::create_dir_all(dir)?;
Ok(())
}
@ -88,7 +101,7 @@ impl MemoryStore for S3Store {
}
fn backend_name(&self) -> &'static str {
"s3-stub"
"s3-local-stub"
}
}
@ -120,3 +133,39 @@ fn short_hash(s: &str) -> String {
}
format!("{:x}", h)
}
#[cfg(test)]
mod tests {
use super::*;
fn store(root: PathBuf) -> S3Store {
S3Store::new(root, S3Cfg::default()).unwrap()
}
#[test]
fn test_absolute_path_rejected_s3() {
let tmp = tempfile::tempdir().unwrap();
let s = store(tmp.path().join("cache"));
let err = s.write("/etc/passwd", b"nope").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("absolute"), "unexpected err: {msg}");
}
#[test]
fn test_parent_dir_rejected_s3() {
let tmp = tempfile::tempdir().unwrap();
let s = store(tmp.path().join("cache"));
let err = s.write("../../secret", b"nope").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("parent-dir"), "unexpected err: {msg}");
}
#[test]
fn test_normal_path_ok_s3() {
let tmp = tempfile::tempdir().unwrap();
let s = store(tmp.path().join("cache"));
s.write("a/b.txt", b"ok").unwrap();
let bytes = s.read("a/b.txt").unwrap();
assert_eq!(&bytes, b"ok");
}
}

View file

@ -23,6 +23,15 @@ fn run(args: &[&str]) -> std::process::Output {
std::process::Command::new(bin()).args(args).output().unwrap()
}
fn run_with_env(args: &[&str], env: &[(&str, &str)]) -> std::process::Output {
let mut cmd = std::process::Command::new(bin());
cmd.args(args);
for (k, v) in env {
cmd.env(k, v);
}
cmd.output().unwrap()
}
#[test]
fn init_writes_config() {
let tmp = TempDir::new().unwrap();
@ -111,8 +120,15 @@ fn s3_stub_commit_writes_manifest() {
let cfg = write_config(&tmp, "s3", &local);
let file = tmp.path().join("x");
fs::write(&file, b"x").unwrap();
run(&["--config", cfg.to_str().unwrap(), "write", "a.txt", file.to_str().unwrap()]);
let out = run(&["--config", cfg.to_str().unwrap(), "commit", "--message", "first"]);
// v0.14.1: S3 stub requires explicit opt-in env var.
run_with_env(
&["--config", cfg.to_str().unwrap(), "write", "a.txt", file.to_str().unwrap()],
&[("KEI_STORE_ALLOW_S3_STUB", "1")],
);
let out = run_with_env(
&["--config", cfg.to_str().unwrap(), "commit", "--message", "first"],
&[("KEI_STORE_ALLOW_S3_STUB", "1")],
);
assert!(out.status.success(), "{}", String::from_utf8_lossy(&out.stderr));
let entries: Vec<_> = fs::read_dir(&local)
.unwrap()
@ -122,6 +138,21 @@ fn s3_stub_commit_writes_manifest() {
assert_eq!(entries.len(), 1);
}
#[test]
fn s3_backend_requires_env_optin() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("cache");
let cfg = write_config(&tmp, "s3", &local);
// Without KEI_STORE_ALLOW_S3_STUB, status must fail with a clear message.
let mut cmd = std::process::Command::new(bin());
cmd.args(["--config", cfg.to_str().unwrap(), "status"]);
cmd.env_remove("KEI_STORE_ALLOW_S3_STUB");
let out = cmd.output().unwrap();
assert!(!out.status.success());
let msg = String::from_utf8_lossy(&out.stderr);
assert!(msg.contains("KEI_STORE_ALLOW_S3_STUB"), "expected stub-gate message, got: {msg}");
}
#[test]
fn status_reports_backend() {
let tmp = TempDir::new().unwrap();

Some files were not shown because too many files have changed in this diff Show more