From 32f2e8a288b5b261ea73326b58bac4a90c8b1824 Mon Sep 17 00:00:00 2001 From: Parfii-bot Date: Thu, 23 Apr 2026 18:15:44 +0800 Subject: [PATCH] feat(wave15): kei-dna-index + kei-fork Option-D path convention fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 46 crates, 744 tests green (up from 726 at v0.31.0). ## kei-dna-index (new) — read-only adjacency analysis over kei-ledger Answers "who else touched same files / solved same task / ran nearby in time". Does NOT mutate ledger — parses DNA strings in memory. Respects SSoT (DNA string is the single source; columns NOT duplicated). Public API: - adjacent(target_dna, kind) — 5 kinds: Scope / Body / Role / Temporal / All - cluster_by(scope|body|role) — group DNAs, ≥2 members per cluster - precedent(body_sha, status_filter) — find past successful runs of same task - stats — totals, unique scopes/bodies, avg cluster size CLI: - kei-dna-index adjacent --dna D [--by kind] [--limit N] [--db PATH] - kei-dna-index cluster --by scope|body|role - kei-dna-index precedent --body HEX [--status merged|failed|all] - kei-dna-index stats 18 tests pass (13 integration + 5 parsed unit). Zero sibling deps (no kei-ledger, no kei-agent-runtime path imports — standalone tool). Separation of concerns: kei-ledger stays PURE provenance primitive. Analytical layer lives in kei-dna-index. Can swap implementations (naive scan → cached → embeddings) without touching ledger schema. ## kei-fork v0.31.2 — Option D path convention Moved fork worktree root from `.claude/forks//` to `_forks//`. Reasons: - `.claude/` is Anthropic-reserved; kit artefacts shouldn't pollute it - Claude Code sandbox denies Write in `.claude/forks/` for agents - `_forks/` matches existing kit convention (_primitives/, _roles/, _archive/, _blocks/, _capabilities/, _agents/) - Independent namespace — no coupling to Claude Code internals 13 existing kei-fork tests still pass (they use tempfile kit_roots so path convention is transparent). ## Usage enabled by these two - kei-prune can now query "all DNAs in same scope-cluster" → retire dupes - kei-brain-view can cluster-render instead of tree-render - Three-role pipeline (writer/auditor/merger) can use precedent() to find successful past patterns for same body-hash - Agents with worktree isolation can write to _forks/ without sandbox permission issues Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 1 + _primitives/_rust/Cargo.lock | 12 + _primitives/_rust/Cargo.toml | 2 + _primitives/_rust/kei-dna-index/Cargo.toml | 24 ++ .../_rust/kei-dna-index/src/adjacency.rs | 155 +++++++ .../_rust/kei-dna-index/src/cluster.rs | 50 +++ _primitives/_rust/kei-dna-index/src/db.rs | 63 +++ _primitives/_rust/kei-dna-index/src/error.rs | 25 ++ _primitives/_rust/kei-dna-index/src/lib.rs | 20 + _primitives/_rust/kei-dna-index/src/main.rs | 126 ++++++ _primitives/_rust/kei-dna-index/src/parsed.rs | 120 ++++++ .../_rust/kei-dna-index/src/precedent.rs | 34 ++ _primitives/_rust/kei-dna-index/src/stats.rs | 64 +++ .../tests/dna_index_integration.rs | 405 ++++++++++++++++++ _primitives/_rust/kei-fork/src/collect.rs | 4 +- _primitives/_rust/kei-fork/src/create.rs | 6 +- _primitives/_rust/kei-fork/src/list.rs | 6 +- _primitives/_rust/kei-fork/src/rescue.rs | 4 +- 18 files changed, 1111 insertions(+), 10 deletions(-) create mode 100644 _primitives/_rust/kei-dna-index/Cargo.toml create mode 100644 _primitives/_rust/kei-dna-index/src/adjacency.rs create mode 100644 _primitives/_rust/kei-dna-index/src/cluster.rs create mode 100644 _primitives/_rust/kei-dna-index/src/db.rs create mode 100644 _primitives/_rust/kei-dna-index/src/error.rs create mode 100644 _primitives/_rust/kei-dna-index/src/lib.rs create mode 100644 _primitives/_rust/kei-dna-index/src/main.rs create mode 100644 _primitives/_rust/kei-dna-index/src/parsed.rs create mode 100644 _primitives/_rust/kei-dna-index/src/precedent.rs create mode 100644 _primitives/_rust/kei-dna-index/src/stats.rs create mode 100644 _primitives/_rust/kei-dna-index/tests/dna_index_integration.rs diff --git a/.gitignore b/.gitignore index b4f3243..e91b3f1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _primitives/_rust/target/ .claude/worktrees/ **/.claude/worktrees/ .claude/forks/ +_forks/ # kei-fork internal markers (should never leak into main) .DONE diff --git a/_primitives/_rust/Cargo.lock b/_primitives/_rust/Cargo.lock index 4c80137..312cbdd 100644 --- a/_primitives/_rust/Cargo.lock +++ b/_primitives/_rust/Cargo.lock @@ -2532,6 +2532,18 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "kei-dna-index" +version = "0.1.0" +dependencies = [ + "clap", + "rusqlite", + "serde", + "serde_json", + "tempfile", + "thiserror 1.0.69", +] + [[package]] name = "kei-entity-store" version = "0.1.0" diff --git a/_primitives/_rust/Cargo.toml b/_primitives/_rust/Cargo.toml index 31cfd7f..e62fbd9 100644 --- a/_primitives/_rust/Cargo.toml +++ b/_primitives/_rust/Cargo.toml @@ -69,6 +69,8 @@ members = [ "kei-ledger-sign", # v0.31 Wave 15 — managed git worktree + ledger lifecycle (fork/collect/gc/rescue) "kei-fork", + # v0.32 Wave 15 — read-only DNA adjacency/cluster/precedent over kei-ledger + "kei-dna-index", ] [workspace.package] diff --git a/_primitives/_rust/kei-dna-index/Cargo.toml b/_primitives/_rust/kei-dna-index/Cargo.toml new file mode 100644 index 0000000..35e1244 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "kei-dna-index" +version = "0.1.0" +edition = "2021" +rust-version = "1.75" +description = "Read-only adjacency/cluster/precedent index over kei-ledger DNAs" + +[[bin]] +name = "kei-dna-index" +path = "src/main.rs" + +[lib] +name = "kei_dna_index" +path = "src/lib.rs" + +[dependencies] +rusqlite = { version = "0.31", features = ["bundled"] } +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +thiserror = "1" + +[dev-dependencies] +tempfile = "3" diff --git a/_primitives/_rust/kei-dna-index/src/adjacency.rs b/_primitives/_rust/kei-dna-index/src/adjacency.rs new file mode 100644 index 0000000..b248513 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/adjacency.rs @@ -0,0 +1,155 @@ +//! Adjacency queries over DNAs. +//! +//! Constructor Pattern: one file = one responsibility (adjacency kinds). + +use crate::db::{find_target, load_rows, Row}; +use crate::error::{Error, Result}; +use rusqlite::Connection; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum AdjacencyKind { + Scope, + Body, + Role, + Temporal, + All, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum Relationship { + SameScope, + SameBody, + SameRoleCaps, + TemporalNeighbor, + Cluster, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AdjacencyResult { + pub dna: String, + pub agent_id: String, + pub status: String, + pub distance: u32, + pub relationship: Relationship, +} + +pub fn adjacent( + conn: &Connection, + target_dna: &str, + kind: AdjacencyKind, + limit: usize, +) -> Result> { + let rows = load_rows(conn)?; + let target = find_target(&rows, target_dna) + .ok_or_else(|| Error::TargetNotFound(target_dna.to_string()))? + .clone(); + let results = match kind { + AdjacencyKind::Scope => same_scope(&rows, &target), + AdjacencyKind::Body => same_body(&rows, &target), + AdjacencyKind::Role => same_role_caps(&rows, &target), + AdjacencyKind::Temporal => temporal(&rows, &target), + AdjacencyKind::All => all_union(&rows, &target), + }; + Ok(truncate(results, limit)) +} + +fn same_scope(rows: &[Row], target: &Row) -> Vec { + rows.iter() + .filter(|r| r.dna != target.dna) + .filter(|r| r.parsed.scope_sha == target.parsed.scope_sha) + .map(|r| make(r, 0, Relationship::SameScope)) + .collect() +} + +fn same_body(rows: &[Row], target: &Row) -> Vec { + rows.iter() + .filter(|r| r.dna != target.dna) + .filter(|r| r.parsed.body_sha == target.parsed.body_sha) + .map(|r| make(r, 0, Relationship::SameBody)) + .collect() +} + +fn same_role_caps(rows: &[Row], target: &Row) -> Vec { + let mut out: Vec = rows + .iter() + .filter(|r| r.dna != target.dna) + .filter(|r| r.parsed.role == target.parsed.role) + .map(|r| { + let d = hamming(&r.parsed.caps, &target.parsed.caps); + make(r, d, Relationship::SameRoleCaps) + }) + .collect(); + out.sort_by_key(|r| r.distance); + out +} + +fn temporal(rows: &[Row], target: &Row) -> Vec { + let mut out: Vec = rows + .iter() + .filter(|r| r.dna != target.dna) + .map(|r| { + let d = (r.started_ts - target.started_ts).unsigned_abs() as u32; + make(r, d, Relationship::TemporalNeighbor) + }) + .collect(); + out.sort_by_key(|r| r.distance); + out +} + +fn all_union(rows: &[Row], target: &Row) -> Vec { + let mut bag: Vec = Vec::new(); + bag.extend(same_scope(rows, target)); + bag.extend(same_body(rows, target)); + bag.extend(same_role_caps(rows, target)); + bag.extend(temporal(rows, target)); + dedup_min_distance(bag) +} + +fn dedup_min_distance(bag: Vec) -> Vec { + let mut seen: std::collections::HashMap = + std::collections::HashMap::new(); + for r in bag { + seen.entry(r.dna.clone()) + .and_modify(|cur| { + if r.distance < cur.distance { + *cur = r.clone(); + } + }) + .or_insert(r); + } + let mut out: Vec = seen.into_values().collect(); + out.sort_by_key(|r| r.distance); + out +} + +fn make(r: &Row, distance: u32, relationship: Relationship) -> AdjacencyResult { + AdjacencyResult { + dna: r.dna.clone(), + agent_id: r.agent_id.clone(), + status: r.status.clone(), + distance, + relationship, + } +} + +fn truncate(mut v: Vec, limit: usize) -> Vec { + if limit > 0 && v.len() > limit { + v.truncate(limit); + } + v +} + +/// Hamming distance over ASCII bytes; differing lengths count extra bytes. +pub(crate) fn hamming(a: &str, b: &str) -> u32 { + let ab = a.as_bytes(); + let bb = b.as_bytes(); + let n = ab.len().min(bb.len()); + let mut d: u32 = 0; + for i in 0..n { + if ab[i] != bb[i] { + d += 1; + } + } + d + (ab.len().abs_diff(bb.len()) as u32) +} diff --git a/_primitives/_rust/kei-dna-index/src/cluster.rs b/_primitives/_rust/kei-dna-index/src/cluster.rs new file mode 100644 index 0000000..71c854c --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/cluster.rs @@ -0,0 +1,50 @@ +//! Clustering over DNAs by scope / body / role+caps. +//! +//! Constructor Pattern: one file = one responsibility (cluster grouping). + +use crate::db::{load_rows, Row}; +use crate::error::Result; +use rusqlite::Connection; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum ClusterBy { + Scope, + Body, + RoleCaps, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Cluster { + pub key: String, + pub members: Vec, +} + +pub fn cluster_by(conn: &Connection, by: ClusterBy) -> Result> { + let rows = load_rows(conn)?; + Ok(group(&rows, by)) +} + +/// Group rows by the selected key, dropping singleton groups. +/// Output is sorted by key for determinism. +pub(crate) fn group(rows: &[Row], by: ClusterBy) -> Vec { + let mut buckets: BTreeMap> = BTreeMap::new(); + for r in rows { + let key = key_for(r, by); + buckets.entry(key).or_default().push(r.dna.clone()); + } + buckets + .into_iter() + .filter(|(_, v)| v.len() > 1) + .map(|(key, members)| Cluster { key, members }) + .collect() +} + +fn key_for(r: &Row, by: ClusterBy) -> String { + match by { + ClusterBy::Scope => r.parsed.scope_sha.clone(), + ClusterBy::Body => r.parsed.body_sha.clone(), + ClusterBy::RoleCaps => format!("{}::{}", r.parsed.role, r.parsed.caps), + } +} diff --git a/_primitives/_rust/kei-dna-index/src/db.rs b/_primitives/_rust/kei-dna-index/src/db.rs new file mode 100644 index 0000000..d0c39ee --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/db.rs @@ -0,0 +1,63 @@ +//! Read-only SQLite access to the kei-ledger agents table. +//! +//! Constructor Pattern: one file = one responsibility (DB row loading). + +use crate::error::Result; +use crate::parsed::{split_dna, ParsedDna}; +use rusqlite::{Connection, OpenFlags}; +use std::path::Path; + +/// One row of the `agents` table, with its DNA already parsed. +/// Rows where `dna IS NULL` or parse-failed are excluded at load time. +#[derive(Debug, Clone)] +pub struct Row { + pub agent_id: String, + pub dna: String, + pub parsed: ParsedDna, + pub started_ts: i64, + pub status: String, +} + +/// Open ledger in read-only mode. No schema mutation. +pub fn open_read_only>(path: P) -> Result { + let conn = Connection::open_with_flags( + path, + OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI, + )?; + Ok(conn) +} + +/// Load all rows with non-null DNA. Malformed DNAs are skipped silently. +pub fn load_rows(conn: &Connection) -> Result> { + let mut stmt = conn.prepare( + "SELECT id, dna, started_ts, COALESCE(status,'unknown') \ + FROM agents WHERE dna IS NOT NULL", + )?; + let iter = stmt.query_map([], |r| { + let id: String = r.get(0)?; + let dna: String = r.get(1)?; + let ts: i64 = r.get(2)?; + let status: String = r.get(3)?; + Ok((id, dna, ts, status)) + })?; + + let mut rows: Vec = Vec::new(); + for rec in iter { + let (agent_id, dna, started_ts, status) = rec?; + if let Ok(parsed) = split_dna(&dna) { + rows.push(Row { + agent_id, + dna, + parsed, + started_ts, + status, + }); + } + } + Ok(rows) +} + +/// Find the row matching a given DNA string exactly. +pub fn find_target<'a>(rows: &'a [Row], target_dna: &str) -> Option<&'a Row> { + rows.iter().find(|r| r.dna == target_dna) +} diff --git a/_primitives/_rust/kei-dna-index/src/error.rs b/_primitives/_rust/kei-dna-index/src/error.rs new file mode 100644 index 0000000..84a9d32 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/error.rs @@ -0,0 +1,25 @@ +//! Error type for kei-dna-index. +//! +//! Constructor Pattern: one file = one responsibility (error taxonomy). + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("malformed DNA: {0}")] + MalformedDna(String), + + #[error("target DNA not found in ledger: {0}")] + TargetNotFound(String), + + #[error("sqlite error: {0}")] + Sqlite(#[from] rusqlite::Error), + + #[error("io error: {0}")] + Io(#[from] std::io::Error), + + #[error("serde error: {0}")] + Serde(#[from] serde_json::Error), +} + +pub type Result = std::result::Result; diff --git a/_primitives/_rust/kei-dna-index/src/lib.rs b/_primitives/_rust/kei-dna-index/src/lib.rs new file mode 100644 index 0000000..d197f7f --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/lib.rs @@ -0,0 +1,20 @@ +//! kei-dna-index — read-only adjacency / cluster / precedent primitive over +//! the kei-ledger `agents.dna` column. +//! +//! No schema mutation. No dependency on kei-ledger or kei-agent-runtime crates. + +pub mod adjacency; +pub mod cluster; +pub mod db; +pub mod error; +pub mod parsed; +pub mod precedent; +pub mod stats; + +pub use adjacency::{adjacent, AdjacencyKind, AdjacencyResult, Relationship}; +pub use cluster::{cluster_by, Cluster, ClusterBy}; +pub use db::open_read_only; +pub use error::{Error, Result}; +pub use parsed::{split_dna, ParsedDna}; +pub use precedent::precedent; +pub use stats::{stats, Stats}; diff --git a/_primitives/_rust/kei-dna-index/src/main.rs b/_primitives/_rust/kei-dna-index/src/main.rs new file mode 100644 index 0000000..b9bfbf3 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/main.rs @@ -0,0 +1,126 @@ +//! kei-dna-index CLI — JSON stdout for all subcommands. + +use clap::{Parser, Subcommand, ValueEnum}; +use kei_dna_index::{ + adjacent, cluster_by, open_read_only, precedent, stats, AdjacencyKind, ClusterBy, Result, +}; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command(name = "kei-dna-index", about = "Read-only adjacency/cluster/precedent over kei-ledger DNAs")] +struct Cli { + #[command(subcommand)] + cmd: Cmd, +} + +#[derive(Subcommand, Debug)] +enum Cmd { + Adjacent { + #[arg(long)] + dna: String, + #[arg(long, value_enum, default_value_t = ByKind::All)] + by: ByKind, + #[arg(long, default_value_t = 10)] + limit: usize, + #[arg(long)] + db: Option, + }, + Cluster { + #[arg(long, value_enum)] + by: ByCluster, + #[arg(long)] + db: Option, + }, + Precedent { + #[arg(long)] + body: String, + #[arg(long, default_value = "all")] + status: String, + #[arg(long)] + db: Option, + }, + Stats { + #[arg(long)] + db: Option, + }, +} + +#[derive(ValueEnum, Clone, Debug)] +enum ByKind { + Scope, + Body, + Role, + Temporal, + All, +} + +#[derive(ValueEnum, Clone, Debug)] +enum ByCluster { + Scope, + Body, + Role, +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + match cli.cmd { + Cmd::Adjacent { + dna, + by, + limit, + db, + } => run_adjacent(dna, by, limit, db), + Cmd::Cluster { by, db } => run_cluster(by, db), + Cmd::Precedent { body, status, db } => run_precedent(body, status, db), + Cmd::Stats { db } => run_stats(db), + } +} + +fn run_adjacent(dna: String, by: ByKind, limit: usize, db: Option) -> Result<()> { + let conn = open_read_only(resolve_db(db))?; + let kind = match by { + ByKind::Scope => AdjacencyKind::Scope, + ByKind::Body => AdjacencyKind::Body, + ByKind::Role => AdjacencyKind::Role, + ByKind::Temporal => AdjacencyKind::Temporal, + ByKind::All => AdjacencyKind::All, + }; + let out = adjacent(&conn, &dna, kind, limit)?; + println!("{}", serde_json::to_string_pretty(&out)?); + Ok(()) +} + +fn run_cluster(by: ByCluster, db: Option) -> Result<()> { + let conn = open_read_only(resolve_db(db))?; + let by = match by { + ByCluster::Scope => ClusterBy::Scope, + ByCluster::Body => ClusterBy::Body, + ByCluster::Role => ClusterBy::RoleCaps, + }; + let out = cluster_by(&conn, by)?; + println!("{}", serde_json::to_string_pretty(&out)?); + Ok(()) +} + +fn run_precedent(body: String, status: String, db: Option) -> Result<()> { + let conn = open_read_only(resolve_db(db))?; + let filter = if status == "all" { None } else { Some(status.as_str()) }; + let out = precedent(&conn, &body, filter)?; + println!("{}", serde_json::to_string_pretty(&out)?); + Ok(()) +} + +fn run_stats(db: Option) -> Result<()> { + let conn = open_read_only(resolve_db(db))?; + let out = stats(&conn)?; + println!("{}", serde_json::to_string_pretty(&out)?); + Ok(()) +} + +fn resolve_db(explicit: Option) -> PathBuf { + if let Some(p) = explicit { + return p; + } + let home = std::env::var("HOME").unwrap_or_else(|_| ".".into()); + PathBuf::from(home).join(".claude").join("agents").join("ledger.sqlite") +} diff --git a/_primitives/_rust/kei-dna-index/src/parsed.rs b/_primitives/_rust/kei-dna-index/src/parsed.rs new file mode 100644 index 0000000..bb99749 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/parsed.rs @@ -0,0 +1,120 @@ +//! DNA parser. +//! +//! Format: `::::::-` +//! Example: `edit-local::NG-FW-FD-CP-CG-TG-ND-RF::5435F821::AC73A6A3-e9bf468d` + +use crate::error::{Error, Result}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ParsedDna { + pub role: String, + pub caps: String, + pub scope_sha: String, + pub body_sha: String, + pub nonce: String, +} + +/// Parse a DNA string into its five fields. Hex widths are validated. +pub fn split_dna(dna: &str) -> Result { + let parts: Vec<&str> = dna.split("::").collect(); + if parts.len() != 4 { + return Err(Error::MalformedDna(format!( + "expected 4 '::'-segments, got {}: {}", + parts.len(), + dna + ))); + } + let role = parts[0].to_string(); + let caps = parts[1].to_string(); + let scope_sha = parts[2].to_string(); + let tail = parts[3]; + + if role.is_empty() { + return Err(Error::MalformedDna(format!("empty role: {}", dna))); + } + if caps.is_empty() { + return Err(Error::MalformedDna(format!("empty caps: {}", dna))); + } + if !is_hex8(&scope_sha) { + return Err(Error::MalformedDna(format!( + "scope_sha not 8 hex chars: {}", + scope_sha + ))); + } + + let tail_parts: Vec<&str> = tail.split('-').collect(); + if tail_parts.len() != 2 { + return Err(Error::MalformedDna(format!( + "expected '-' tail, got: {}", + tail + ))); + } + let body_sha = tail_parts[0].to_string(); + let nonce = tail_parts[1].to_string(); + + if !is_hex8(&body_sha) { + return Err(Error::MalformedDna(format!( + "body_sha not 8 hex chars: {}", + body_sha + ))); + } + if !is_hex8(&nonce) { + return Err(Error::MalformedDna(format!( + "nonce not 8 hex chars: {}", + nonce + ))); + } + + Ok(ParsedDna { + role, + caps, + scope_sha, + body_sha, + nonce, + }) +} + +fn is_hex8(s: &str) -> bool { + s.len() == 8 && s.chars().all(|c| c.is_ascii_hexdigit()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn accepts_canonical() { + let dna = "edit-local::NG-FW-FD-CP-CG-TG-ND-RF::5435F821::AC73A6A3-e9bf468d"; + let p = split_dna(dna).unwrap(); + assert_eq!(p.role, "edit-local"); + assert_eq!(p.caps, "NG-FW-FD-CP-CG-TG-ND-RF"); + assert_eq!(p.scope_sha, "5435F821"); + assert_eq!(p.body_sha, "AC73A6A3"); + assert_eq!(p.nonce, "e9bf468d"); + } + + #[test] + fn rejects_short_scope() { + let dna = "r::c::12::AC73A6A3-e9bf468d"; + assert!(split_dna(dna).is_err()); + } + + #[test] + fn rejects_non_hex_nonce() { + let dna = "r::c::12345678::AC73A6A3-ZZZZZZZZ"; + assert!(split_dna(dna).is_err()); + } + + #[test] + fn rejects_missing_body_separator() { + let dna = "r::c::12345678::AC73A6A3e9bf468d"; + assert!(split_dna(dna).is_err()); + } + + #[test] + fn rejects_empty_role() { + let dna = "::c::12345678::AC73A6A3-e9bf468d"; + assert!(split_dna(dna).is_err()); + } +} diff --git a/_primitives/_rust/kei-dna-index/src/precedent.rs b/_primitives/_rust/kei-dna-index/src/precedent.rs new file mode 100644 index 0000000..b0174e5 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/precedent.rs @@ -0,0 +1,34 @@ +//! Precedent lookup: find rows sharing a given body_sha, optionally filtered by status. +//! +//! Constructor Pattern: one file = one responsibility. + +use crate::adjacency::{AdjacencyResult, Relationship}; +use crate::db::load_rows; +use crate::error::Result; +use rusqlite::Connection; + +pub fn precedent( + conn: &Connection, + body_sha: &str, + status_filter: Option<&str>, +) -> Result> { + let rows = load_rows(conn)?; + let mut out: Vec = rows + .into_iter() + .filter(|r| r.parsed.body_sha.eq_ignore_ascii_case(body_sha)) + .filter(|r| match status_filter { + None => true, + Some("all") => true, + Some(s) => r.status == s, + }) + .map(|r| AdjacencyResult { + dna: r.dna, + agent_id: r.agent_id, + status: r.status, + distance: 0, + relationship: Relationship::SameBody, + }) + .collect(); + out.sort_by(|a, b| a.agent_id.cmp(&b.agent_id)); + Ok(out) +} diff --git a/_primitives/_rust/kei-dna-index/src/stats.rs b/_primitives/_rust/kei-dna-index/src/stats.rs new file mode 100644 index 0000000..293e1e8 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/src/stats.rs @@ -0,0 +1,64 @@ +//! Aggregate stats over parsed ledger DNAs. +//! +//! Constructor Pattern: one file = one responsibility. + +use crate::cluster::{group, ClusterBy}; +use crate::db::load_rows; +use crate::error::Result; +use rusqlite::Connection; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Stats { + pub total_dnas: usize, + pub unique_scopes: usize, + pub unique_bodies: usize, + pub clusters_scope: usize, + pub clusters_body: usize, + pub avg_cluster_size: f64, +} + +pub fn stats(conn: &Connection) -> Result { + let rows = load_rows(conn)?; + let total_dnas = rows.len(); + let unique_scopes = rows + .iter() + .map(|r| r.parsed.scope_sha.as_str()) + .collect::>() + .len(); + let unique_bodies = rows + .iter() + .map(|r| r.parsed.body_sha.as_str()) + .collect::>() + .len(); + + let scope_clusters = group(&rows, ClusterBy::Scope); + let body_clusters = group(&rows, ClusterBy::Body); + let clusters_scope = scope_clusters.len(); + let clusters_body = body_clusters.len(); + let avg_cluster_size = avg_size(&scope_clusters, &body_clusters); + + Ok(Stats { + total_dnas, + unique_scopes, + unique_bodies, + clusters_scope, + clusters_body, + avg_cluster_size, + }) +} + +fn avg_size( + scope_clusters: &[crate::cluster::Cluster], + body_clusters: &[crate::cluster::Cluster], +) -> f64 { + let total: usize = scope_clusters.iter().map(|c| c.members.len()).sum::() + + body_clusters.iter().map(|c| c.members.len()).sum::(); + let n = scope_clusters.len() + body_clusters.len(); + if n == 0 { + 0.0 + } else { + total as f64 / n as f64 + } +} diff --git a/_primitives/_rust/kei-dna-index/tests/dna_index_integration.rs b/_primitives/_rust/kei-dna-index/tests/dna_index_integration.rs new file mode 100644 index 0000000..8814dd5 --- /dev/null +++ b/_primitives/_rust/kei-dna-index/tests/dna_index_integration.rs @@ -0,0 +1,405 @@ +//! Integration tests for kei-dna-index. +//! +//! Each test builds a minimal `agents` table in a tempfile sqlite DB, +//! then opens it read-only via the library and asserts public-API behaviour. + +use kei_dna_index::{ + adjacent, cluster_by, open_read_only, precedent, split_dna, stats, AdjacencyKind, ClusterBy, + Relationship, +}; +use rusqlite::{params, Connection}; +use tempfile::NamedTempFile; + +fn setup() -> NamedTempFile { + let f = NamedTempFile::new().unwrap(); + let c = Connection::open(f.path()).unwrap(); + c.execute_batch( + "CREATE TABLE agents (\ + id TEXT PRIMARY KEY, \ + dna TEXT, \ + started_ts INTEGER NOT NULL, \ + status TEXT NOT NULL)", + ) + .unwrap(); + drop(c); + f +} + +fn insert(path: &std::path::Path, id: &str, dna: Option<&str>, ts: i64, status: &str) { + let c = Connection::open(path).unwrap(); + c.execute( + "INSERT INTO agents (id, dna, started_ts, status) VALUES (?1, ?2, ?3, ?4)", + params![id, dna, ts, status], + ) + .unwrap(); +} + +#[test] +fn parse_dna_valid_format() { + let p = split_dna("edit-local::NG-FW-FD-CP-CG-TG-ND-RF::5435F821::AC73A6A3-e9bf468d").unwrap(); + assert_eq!(p.role, "edit-local"); + assert_eq!(p.scope_sha, "5435F821"); + assert_eq!(p.body_sha, "AC73A6A3"); + assert_eq!(p.nonce, "e9bf468d"); +} + +#[test] +fn parse_dna_rejects_malformed() { + assert!(split_dna("nope").is_err()); + assert!(split_dna("a::b::c::d-e").is_err()); // short hex + assert!(split_dna("a::b::12345678::ZZZZZZZZ-12345678").is_err()); // non-hex + assert!(split_dna("a::b::12345678::12345678_12345678").is_err()); // no dash +} + +#[test] +fn adjacent_same_scope() { + let f = setup(); + let p = f.path(); + insert( + p, + "a1", + Some("edit::CAPS1::AAAAAAAA::11111111-22222222"), + 100, + "merged", + ); + insert( + p, + "a2", + Some("edit::CAPS1::AAAAAAAA::33333333-44444444"), + 200, + "running", + ); + insert( + p, + "a3", + Some("edit::CAPS1::BBBBBBBB::55555555-66666666"), + 300, + "merged", + ); + let conn = open_read_only(p).unwrap(); + let out = adjacent( + &conn, + "edit::CAPS1::AAAAAAAA::11111111-22222222", + AdjacencyKind::Scope, + 10, + ) + .unwrap(); + assert_eq!(out.len(), 1); + assert_eq!(out[0].agent_id, "a2"); + assert_eq!(out[0].relationship, Relationship::SameScope); + assert_eq!(out[0].distance, 0); +} + +#[test] +fn adjacent_same_body() { + let f = setup(); + let p = f.path(); + insert( + p, + "a1", + Some("edit::CAPS1::11111111::ABCDEF01-aaaaaaaa"), + 100, + "merged", + ); + insert( + p, + "a2", + Some("edit::CAPS2::22222222::ABCDEF01-bbbbbbbb"), + 200, + "merged", + ); + insert( + p, + "a3", + Some("edit::CAPS1::11111111::DEADBEEF-cccccccc"), + 300, + "merged", + ); + let conn = open_read_only(p).unwrap(); + let out = adjacent( + &conn, + "edit::CAPS1::11111111::ABCDEF01-aaaaaaaa", + AdjacencyKind::Body, + 10, + ) + .unwrap(); + assert_eq!(out.len(), 1); + assert_eq!(out[0].agent_id, "a2"); + assert_eq!(out[0].relationship, Relationship::SameBody); +} + +#[test] +fn adjacent_role_caps() { + let f = setup(); + let p = f.path(); + // Target role=edit caps=ABCDEFGH + insert( + p, + "a1", + Some("edit::ABCDEFGH::11111111::AAAAAAAA-aaaaaaaa"), + 100, + "merged", + ); + // Same role, caps 1-char different → Hamming=1 + insert( + p, + "a2", + Some("edit::ABCDEFGX::22222222::BBBBBBBB-bbbbbbbb"), + 200, + "merged", + ); + // Same role, caps 3-char different → Hamming=3 + insert( + p, + "a3", + Some("edit::ZBCZEFGZ::33333333::CCCCCCCC-cccccccc"), + 300, + "merged", + ); + // Different role → excluded + insert( + p, + "a4", + Some("plan::ABCDEFGH::44444444::DDDDDDDD-dddddddd"), + 400, + "merged", + ); + let conn = open_read_only(p).unwrap(); + let out = adjacent( + &conn, + "edit::ABCDEFGH::11111111::AAAAAAAA-aaaaaaaa", + AdjacencyKind::Role, + 10, + ) + .unwrap(); + assert_eq!(out.len(), 2); + assert_eq!(out[0].agent_id, "a2"); + assert_eq!(out[0].distance, 1); + assert_eq!(out[1].agent_id, "a3"); + assert_eq!(out[1].distance, 3); +} + +#[test] +fn adjacent_temporal() { + let f = setup(); + let p = f.path(); + insert( + p, + "a1", + Some("edit::C1::11111111::AAAAAAAA-aaaaaaaa"), + 1000, + "merged", + ); + insert( + p, + "a2", + Some("edit::C2::22222222::BBBBBBBB-bbbbbbbb"), + 1005, + "merged", + ); + insert( + p, + "a3", + Some("edit::C3::33333333::CCCCCCCC-cccccccc"), + 990, + "merged", + ); + insert( + p, + "a4", + Some("edit::C4::44444444::DDDDDDDD-dddddddd"), + 1100, + "merged", + ); + insert( + p, + "a5", + Some("edit::C5::55555555::EEEEEEEE-eeeeeeee"), + 500, + "merged", + ); + let conn = open_read_only(p).unwrap(); + let out = adjacent( + &conn, + "edit::C1::11111111::AAAAAAAA-aaaaaaaa", + AdjacencyKind::Temporal, + 3, + ) + .unwrap(); + assert_eq!(out.len(), 3); + assert_eq!(out[0].agent_id, "a2"); + assert_eq!(out[0].distance, 5); + assert_eq!(out[1].agent_id, "a3"); + assert_eq!(out[1].distance, 10); + assert_eq!(out[2].agent_id, "a4"); + assert_eq!(out[2].distance, 100); +} + +#[test] +fn adjacent_all_kind() { + let f = setup(); + let p = f.path(); + // Target + insert( + p, + "t0", + Some("edit::ABCDEFGH::11111111::AAAAAAAA-aaaaaaaa"), + 100, + "merged", + ); + // Same scope AND same role/caps (should appear once with dist 0) + insert( + p, + "dup", + Some("edit::ABCDEFGH::11111111::BBBBBBBB-bbbbbbbb"), + 200, + "merged", + ); + // Only temporal neighbor + insert( + p, + "far", + Some("plan::ZZZZZZZZ::99999999::CCCCCCCC-cccccccc"), + 150, + "merged", + ); + let conn = open_read_only(p).unwrap(); + let out = adjacent( + &conn, + "edit::ABCDEFGH::11111111::AAAAAAAA-aaaaaaaa", + AdjacencyKind::All, + 10, + ) + .unwrap(); + // Two distinct DNAs: "dup" and "far" + assert_eq!(out.len(), 2); + let dup = out.iter().find(|r| r.agent_id == "dup").unwrap(); + // Dup should be reported with min distance (0 from scope/body match) + assert_eq!(dup.distance, 0); + let far = out.iter().find(|r| r.agent_id == "far").unwrap(); + assert_eq!(far.distance, 50); +} + +#[test] +fn cluster_by_scope() { + let f = setup(); + let p = f.path(); + // scope AAAA×3 + insert(p, "a1", Some("r::c::AAAAAAAA::00000001-11111111"), 1, "m"); + insert(p, "a2", Some("r::c::AAAAAAAA::00000002-22222222"), 2, "m"); + insert(p, "a3", Some("r::c::AAAAAAAA::00000003-33333333"), 3, "m"); + // scope BBBB×2 + insert(p, "b1", Some("r::c::BBBBBBBB::00000004-44444444"), 4, "m"); + insert(p, "b2", Some("r::c::BBBBBBBB::00000005-55555555"), 5, "m"); + // scope CCCC×1 (singleton → filtered) + insert(p, "c1", Some("r::c::CCCCCCCC::00000006-66666666"), 6, "m"); + let conn = open_read_only(p).unwrap(); + let out = cluster_by(&conn, ClusterBy::Scope).unwrap(); + assert_eq!(out.len(), 2); + let a = out.iter().find(|c| c.key == "AAAAAAAA").unwrap(); + assert_eq!(a.members.len(), 3); + let b = out.iter().find(|c| c.key == "BBBBBBBB").unwrap(); + assert_eq!(b.members.len(), 2); +} + +#[test] +fn cluster_filters_single_member_groups() { + let f = setup(); + let p = f.path(); + // All scopes unique → no clusters + insert(p, "a", Some("r::c::AAAAAAAA::11111111-11111111"), 1, "m"); + insert(p, "b", Some("r::c::BBBBBBBB::22222222-22222222"), 2, "m"); + insert(p, "c", Some("r::c::CCCCCCCC::33333333-33333333"), 3, "m"); + let conn = open_read_only(p).unwrap(); + let out = cluster_by(&conn, ClusterBy::Scope).unwrap(); + assert!(out.is_empty()); +} + +#[test] +fn precedent_finds_merged_only() { + let f = setup(); + let p = f.path(); + insert( + p, + "a1", + Some("edit::C::11111111::DEADBEEF-11111111"), + 1, + "merged", + ); + insert( + p, + "a2", + Some("plan::C::22222222::DEADBEEF-22222222"), + 2, + "failed", + ); + insert( + p, + "a3", + Some("edit::C::33333333::DEADBEEF-33333333"), + 3, + "merged", + ); + insert( + p, + "a4", + Some("edit::C::44444444::CAFEBABE-44444444"), + 4, + "merged", + ); + let conn = open_read_only(p).unwrap(); + let merged = precedent(&conn, "DEADBEEF", Some("merged")).unwrap(); + assert_eq!(merged.len(), 2); + assert!(merged.iter().all(|r| r.status == "merged")); + let all = precedent(&conn, "DEADBEEF", None).unwrap(); + assert_eq!(all.len(), 3); + let all_explicit = precedent(&conn, "DEADBEEF", Some("all")).unwrap(); + assert_eq!(all_explicit.len(), 3); +} + +#[test] +fn stats_aggregates() { + let f = setup(); + let p = f.path(); + // 4 DNAs, 2 unique scopes, 3 unique bodies, 1 scope-cluster, 1 body-cluster + insert(p, "a1", Some("r::c::AAAAAAAA::b0d10001-11111111"), 1, "m"); + insert(p, "a2", Some("r::c::AAAAAAAA::b0d10002-22222222"), 2, "m"); + insert(p, "a3", Some("r::c::BBBBBBBB::b0d10001-33333333"), 3, "m"); + insert(p, "a4", Some("r::c::BBBBBBBB::b0d10003-44444444"), 4, "m"); + let conn = open_read_only(p).unwrap(); + let s = stats(&conn).unwrap(); + assert_eq!(s.total_dnas, 4); + assert_eq!(s.unique_scopes, 2); + assert_eq!(s.unique_bodies, 3); + assert_eq!(s.clusters_scope, 2); // AAAA×2 + BBBB×2 + assert_eq!(s.clusters_body, 1); // BODY0001×2 + assert!(s.avg_cluster_size > 1.0); +} + +#[test] +fn empty_ledger_returns_empty() { + let f = setup(); + let p = f.path(); + let conn = open_read_only(p).unwrap(); + let s = stats(&conn).unwrap(); + assert_eq!(s.total_dnas, 0); + assert_eq!(s.unique_scopes, 0); + assert_eq!(s.clusters_scope, 0); + assert_eq!(s.avg_cluster_size, 0.0); + assert!(cluster_by(&conn, ClusterBy::Scope).unwrap().is_empty()); + assert!(precedent(&conn, "DEADBEEF", None).unwrap().is_empty()); +} + +#[test] +fn malformed_dna_skipped_silently() { + let f = setup(); + let p = f.path(); + insert(p, "good", Some("r::c::AAAAAAAA::BBBBBBBB-cccccccc"), 1, "m"); + insert(p, "bad1", Some("totally-wrong"), 2, "m"); + insert(p, "bad2", Some("r::c::short::xx-yy"), 3, "m"); + insert(p, "nullrow", None, 4, "m"); + let conn = open_read_only(p).unwrap(); + let s = stats(&conn).unwrap(); + // Only 1 well-formed DNA survives the loader + assert_eq!(s.total_dnas, 1); +} diff --git a/_primitives/_rust/kei-fork/src/collect.rs b/_primitives/_rust/kei-fork/src/collect.rs index ff3a99c..61ee210 100644 --- a/_primitives/_rust/kei-fork/src/collect.rs +++ b/_primitives/_rust/kei-fork/src/collect.rs @@ -9,7 +9,7 @@ //! 5. `git worktree prune && git branch -D fork/` to clean up refs //! 6. `kei-ledger done ` unless `KEI_FORK_SKIP_LEDGER=1` //! -//! On SUCCESS: `.claude/forks//` is gone, archive exists, merge +//! On SUCCESS: `_forks//` is gone, archive exists, merge //! commit is on HEAD of kit_root. Return value carries the SHA and //! count of files added by the agent. @@ -29,7 +29,7 @@ pub struct CollectReport { } pub fn collect(agent_id: &str, commit_msg: &str, kit_root: &Path) -> Result { - let worktree_abs = kit_root.join(".claude/forks").join(agent_id); + let worktree_abs = kit_root.join("_forks").join(agent_id); if !worktree_abs.join(".DONE").exists() { return Err(Error::NotDone(agent_id.to_string())); } diff --git a/_primitives/_rust/kei-fork/src/create.rs b/_primitives/_rust/kei-fork/src/create.rs index b1cc286..f9295a5 100644 --- a/_primitives/_rust/kei-fork/src/create.rs +++ b/_primitives/_rust/kei-fork/src/create.rs @@ -2,8 +2,8 @@ //! //! Steps: //! 1. `validate_agent_id` (path-traversal defence) -//! 2. Reject if `.claude/forks//` OR branch `fork/` already exist -//! 3. `git worktree add .claude/forks/ -b fork/ ` +//! 2. Reject if `_forks//` OR branch `fork/` already exist +//! 3. `git worktree add _forks/ -b fork/ ` //! 4. Write `.KEI_FORK_META.toml` with agent_id + started_ts + base_branch + ledger_id //! 5. `kei-ledger fork` unless env `KEI_FORK_SKIP_LEDGER=1` //! @@ -22,7 +22,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; pub fn create(agent_id: &str, base_branch: &str, kit_root: &Path) -> Result { validate_agent_id(agent_id).map_err(|e| Error::Validate(e.reason))?; - let worktree_rel = PathBuf::from(".claude/forks").join(agent_id); + let worktree_rel = PathBuf::from("_forks").join(agent_id); let worktree_abs = kit_root.join(&worktree_rel); let branch = format!("fork/{agent_id}"); if worktree_abs.exists() || git::branch_exists(kit_root, &branch) { diff --git a/_primitives/_rust/kei-fork/src/list.rs b/_primitives/_rust/kei-fork/src/list.rs index 442be9c..63855a3 100644 --- a/_primitives/_rust/kei-fork/src/list.rs +++ b/_primitives/_rust/kei-fork/src/list.rs @@ -1,7 +1,7 @@ //! `list(kit_root, status_filter)` — enumerate known forks. //! //! Walks two roots: -//! - `.claude/forks//` — live worktrees (Active, Done, Stale) +//! - `_forks//` — live worktrees (Active, Done, Stale) //! - `_archive/forks///` — post-collect (Merged) //! //! For each discovered directory, reads `.KEI_FORK_META.toml` to build @@ -19,7 +19,7 @@ const STALE_HOURS_DEFAULT: u32 = 24; pub fn list(kit_root: &Path, status: Option) -> Result, Error> { let mut out = Vec::new(); - collect_live(&kit_root.join(".claude/forks"), &mut out, status); + collect_live(&kit_root.join("_forks"), &mut out, status); collect_archive(&kit_root.join("_archive/forks"), &mut out, status); out.sort_by_key(|h| h.started_ts); Ok(out) @@ -98,7 +98,7 @@ fn matches_filter(filter: Option, s: ForkStatus) -> bool { /// classified status, without filter. pub(crate) fn live_with_status(kit_root: &Path) -> Vec<(PathBuf, ForkHandle, ForkStatus)> { let mut out = Vec::new(); - let root = kit_root.join(".claude/forks"); + let root = kit_root.join("_forks"); let Ok(rd) = fs::read_dir(&root) else { return out }; for e in rd.flatten() { let p = e.path(); diff --git a/_primitives/_rust/kei-fork/src/rescue.rs b/_primitives/_rust/kei-fork/src/rescue.rs index 4ed21d5..077c92f 100644 --- a/_primitives/_rust/kei-fork/src/rescue.rs +++ b/_primitives/_rust/kei-fork/src/rescue.rs @@ -2,7 +2,7 @@ //! band. //! //! Resolution order: -//! 1. `.claude/forks//` (live) → copy to `out_dir` +//! 1. `_forks//` (live) → copy to `out_dir` //! 2. `_archive/forks///` (archived) → copy to `out_dir` //! 3. Neither → `Error::Gone` //! @@ -20,7 +20,7 @@ pub fn rescue(agent_id: &str, kit_root: &Path, out_dir: &Path) -> Result Option { - let live = kit_root.join(".claude/forks").join(agent_id); + let live = kit_root.join("_forks").join(agent_id); if live.is_dir() { return Some(live); }