diff --git a/_primitives/_rust/Cargo.lock b/_primitives/_rust/Cargo.lock index 455e72c..439b685 100644 --- a/_primitives/_rust/Cargo.lock +++ b/_primitives/_rust/Cargo.lock @@ -1968,6 +1968,7 @@ dependencies = [ "rusqlite", "serde", "serde_json", + "serde_yaml", "tempfile", ] diff --git a/_primitives/_rust/kei-sage/Cargo.toml b/_primitives/_rust/kei-sage/Cargo.toml index a9643cd..9bf1080 100644 --- a/_primitives/_rust/kei-sage/Cargo.toml +++ b/_primitives/_rust/kei-sage/Cargo.toml @@ -18,6 +18,7 @@ rusqlite = { version = "0.31", features = ["bundled"] } clap = { version = "4", features = ["derive"] } serde = { version = "1", features = ["derive"] } serde_json = "1" +serde_yaml = "0.9" anyhow = "1" chrono = { version = "0.4", default-features = false, features = ["clock"] } diff --git a/_primitives/_rust/kei-sage/src/atom_cli.rs b/_primitives/_rust/kei-sage/src/atom_cli.rs new file mode 100644 index 0000000..0aee6be --- /dev/null +++ b/_primitives/_rust/kei-sage/src/atom_cli.rs @@ -0,0 +1,73 @@ +//! CLI handlers for `atoms-*` subcommands — walks, indexes, queries atoms. +//! +//! Separate from `main.rs` to keep both files under Constructor Pattern +//! 200-LOC limit. `main.rs` wires clap, this module implements the verbs. + +use crate::atom_index::index_atoms; +use crate::atoms::{discover_atoms, AtomRecord}; +use crate::bfs::bfs; +use crate::pagerank::pagerank; +use crate::search::fts_search; +use crate::store::Store; +use anyhow::Result; +use std::path::{Path, PathBuf}; + +pub fn default_atoms_root() -> PathBuf { + let home = std::env::var("HOME").unwrap_or_else(|_| ".".into()); + PathBuf::from(home).join(".claude/agents/_primitives/_rust") +} + +pub fn cmd_atoms_discover(root: &Path) -> Result<()> { + let records = discover_atoms(root)?; + println!("full_id\tkind\tstability\tmd_path"); + for r in &records { + println!( + "{}\t{}\t{}\t{}", + r.full_id, + r.kind.as_str(), + r.stability, + r.md_path.display() + ); + } + eprintln!("discovered {} atom(s) under {}", records.len(), root.display()); + Ok(()) +} + +pub fn cmd_atoms_rank(store: &Store, root: &Path, limit: usize) -> Result<()> { + ingest(store, root)?; + for (path, score) in pagerank(store)?.into_iter().take(limit) { + println!("{:.6}\t{}", score, path); + } + Ok(()) +} + +pub fn cmd_atoms_related(store: &Store, root: &Path, atom_id: &str, depth: i64) -> Result<()> { + ingest(store, root)?; + for r in bfs(store, atom_id, depth)? { + println!("{}\t{}\t(depth {})", r.edge_type, r.path, r.depth); + } + Ok(()) +} + +pub fn cmd_atoms_search(store: &Store, root: &Path, query: &str, limit: i64) -> Result<()> { + ingest(store, root)?; + for u in fts_search(store, query, limit)? { + if u.unit_type != "atom" { + continue; + } + println!("{}\t{}\t{}", u.id, u.category, u.vault_path); + } + Ok(()) +} + +fn ingest(store: &Store, root: &Path) -> Result> { + let records = discover_atoms(root)?; + let stats = index_atoms(store, &records)?; + eprintln!( + "indexed {} atom(s), {} wikilink edge(s) from {}", + stats.units_indexed, + stats.edges_indexed, + root.display() + ); + Ok(records) +} diff --git a/_primitives/_rust/kei-sage/src/atom_index.rs b/_primitives/_rust/kei-sage/src/atom_index.rs new file mode 100644 index 0000000..175c1da --- /dev/null +++ b/_primitives/_rust/kei-sage/src/atom_index.rs @@ -0,0 +1,63 @@ +//! Persist discovered atoms into the kei-sage Store as Units + typed edges. +//! +//! Unit-type = `"atom"`; `vault_path` = atom full_id (e.g. `kei-task::create`). +//! Edge-type = `"atom_related"` for wikilinks between atoms. Idempotent: +//! re-ingesting the same corpus replaces existing rows by vault_path. + +use crate::atoms::{resolve_wikilinks, AtomRecord}; +use crate::edges::add_edge; +use crate::store::Store; +use crate::types::Unit; +use anyhow::Result; + +pub struct IndexStats { + pub units_indexed: usize, + pub edges_indexed: usize, +} + +pub fn index_atoms(store: &Store, records: &[AtomRecord]) -> Result { + let units_indexed = index_units(store, records)?; + let edges_indexed = index_edges(store, records)?; + Ok(IndexStats { units_indexed, edges_indexed }) +} + +fn index_units(store: &Store, records: &[AtomRecord]) -> Result { + let mut n = 0; + for rec in records { + store.add_unit(&record_to_unit(rec))?; + n += 1; + } + Ok(n) +} + +fn record_to_unit(rec: &AtomRecord) -> Unit { + Unit { + unit_type: "atom".into(), + title: rec.full_id.clone(), + content: build_content(rec), + evidence_grade: rec.stability.clone(), + source_path: rec.md_path.to_string_lossy().into(), + vault_path: rec.full_id.clone(), + category: rec.kind.as_str().into(), + ..Default::default() + } +} + +fn build_content(rec: &AtomRecord) -> String { + let kw = rec.keywords.join(", "); + let mut s = String::with_capacity(rec.body.len() + kw.len() + 64); + s.push_str("[keywords] "); + s.push_str(&kw); + s.push_str("\n\n"); + s.push_str(&rec.body); + s +} + +fn index_edges(store: &Store, records: &[AtomRecord]) -> Result { + let mut n = 0; + for (src, dst) in resolve_wikilinks(records) { + add_edge(store, &src, &dst, "atom_related", 1.0)?; + n += 1; + } + Ok(n) +} diff --git a/_primitives/_rust/kei-sage/src/atom_parse.rs b/_primitives/_rust/kei-sage/src/atom_parse.rs new file mode 100644 index 0000000..8e44e6c --- /dev/null +++ b/_primitives/_rust/kei-sage/src/atom_parse.rs @@ -0,0 +1,121 @@ +//! Frontmatter splitting + wikilink extraction helpers for atom `.md` files. +//! +//! Pure functions, no I/O. See `atoms.rs` for the discovery walker. + +use anyhow::{anyhow, Result}; + +/// Split a `.md` file into (frontmatter_yaml, body). Frontmatter must start +/// with `---\n` and end with a line that is exactly `---`. +pub fn split_frontmatter(text: &str) -> Result<(&str, &str)> { + let rest = text + .strip_prefix("---\n") + .or_else(|| text.strip_prefix("---\r\n")) + .ok_or_else(|| anyhow!("missing leading --- frontmatter delimiter"))?; + let end = find_closing_delim(rest) + .ok_or_else(|| anyhow!("missing closing --- frontmatter delimiter"))?; + let fm = &rest[..end.0]; + let body_start = end.0 + end.1; + Ok((fm, rest.get(body_start..).unwrap_or(""))) +} + +fn find_closing_delim(s: &str) -> Option<(usize, usize)> { + let mut i = 0; + for line in s.split_inclusive('\n') { + let trimmed = line.trim_end_matches(&['\n', '\r'][..]); + if trimmed == "---" { + return Some((i, line.len())); + } + i += line.len(); + } + None +} + +/// Parse a single wikilink `[[target]]`. Returns `Some(target)` stripped of +/// brackets and whitespace, `None` if the string isn't a wikilink shape. +pub fn parse_wikilink(raw: &str) -> Option { + let t = raw.trim(); + let inner = t.strip_prefix("[[").and_then(|s| s.strip_suffix("]]"))?; + let inner = inner.trim(); + if inner.is_empty() { + None + } else { + Some(inner.to_string()) + } +} + +/// Filter rule that decides whether a wikilink target is an atom reference. +/// Atoms use `::`; we exclude `rules/*` and `rule*` targets. +pub fn is_atom_target(target: &str) -> bool { + !target.starts_with("rules/") && !target.starts_with("rule ") +} + +/// Split `::` atom id into components. +pub fn split_atom_id(id: &str) -> Result<(String, String)> { + match id.split_once("::") { + Some((c, v)) if !c.is_empty() && !v.is_empty() => Ok((c.into(), v.into())), + _ => Err(anyhow!("atom id must be ::, got {id}")), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn split_basic() { + let src = "---\nfoo: bar\n---\nbody text\n"; + let (fm, body) = split_frontmatter(src).unwrap(); + assert_eq!(fm, "foo: bar\n"); + assert_eq!(body, "body text\n"); + } + + #[test] + fn split_crlf() { + let src = "---\r\nfoo: bar\r\n---\r\nbody\r\n"; + let (fm, _body) = split_frontmatter(src).unwrap(); + assert!(fm.contains("foo: bar")); + } + + #[test] + fn split_missing_start() { + assert!(split_frontmatter("no frontmatter\n").is_err()); + } + + #[test] + fn split_missing_end() { + assert!(split_frontmatter("---\nfoo: bar\nbody\n").is_err()); + } + + #[test] + fn wikilink_simple() { + assert_eq!( + parse_wikilink("[[kei-task::create]]"), + Some("kei-task::create".into()) + ); + } + + #[test] + fn wikilink_none() { + assert_eq!(parse_wikilink("just text"), None); + assert_eq!(parse_wikilink("[[ ]]"), None); + } + + #[test] + fn atom_target_filter() { + assert!(is_atom_target("kei-task::create")); + assert!(!is_atom_target("rules/RULE 0.12")); + } + + #[test] + fn split_id_ok() { + let (c, v) = split_atom_id("kei-task::create").unwrap(); + assert_eq!(c, "kei-task"); + assert_eq!(v, "create"); + } + + #[test] + fn split_id_bad() { + assert!(split_atom_id("no-separator").is_err()); + assert!(split_atom_id("::empty").is_err()); + } +} diff --git a/_primitives/_rust/kei-sage/src/atoms.rs b/_primitives/_rust/kei-sage/src/atoms.rs new file mode 100644 index 0000000..8273382 --- /dev/null +++ b/_primitives/_rust/kei-sage/src/atoms.rs @@ -0,0 +1,179 @@ +//! Substrate-atom discovery + frontmatter parsing + wikilink extraction. +//! +//! Walks `//atoms/*.md`, parses YAML frontmatter, returns +//! `AtomRecord`. Tolerant: skips files with invalid frontmatter (logs to +//! stderr, continues scan). See `docs/SUBSTRATE-SCHEMA.md` §Graph contract. + +use crate::atom_parse::{is_atom_target, parse_wikilink, split_atom_id, split_frontmatter}; +use anyhow::{anyhow, Context, Result}; +use serde::Deserialize; +use std::fs; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AtomKind { + Command, + Query, + Stream, + Transform, +} + +impl FromStr for AtomKind { + type Err = anyhow::Error; + fn from_str(s: &str) -> Result { + match s.trim().to_ascii_lowercase().as_str() { + "command" => Ok(AtomKind::Command), + "query" => Ok(AtomKind::Query), + "stream" => Ok(AtomKind::Stream), + "transform" => Ok(AtomKind::Transform), + other => Err(anyhow!("unknown atom kind: {other}")), + } + } +} + +impl AtomKind { + pub fn as_str(&self) -> &'static str { + match self { + AtomKind::Command => "command", + AtomKind::Query => "query", + AtomKind::Stream => "stream", + AtomKind::Transform => "transform", + } + } +} + +#[derive(Debug, Clone)] +pub struct AtomRecord { + pub full_id: String, + pub kind: AtomKind, + pub crate_name: String, + pub verb: String, + pub version: String, + pub md_path: PathBuf, + pub input_schema: Option, + pub output_schema: Option, + pub related: Vec, + pub keywords: Vec, + pub stability: String, + pub body: String, +} + +#[derive(Debug, Deserialize)] +struct SchemaRef { + schema: Option, +} + +#[derive(Debug, Deserialize)] +struct Frontmatter { + atom: String, + kind: String, + #[serde(default)] + version: Option, + #[serde(default)] + input: Option, + #[serde(default)] + output: Option, + #[serde(default)] + related: Vec, + #[serde(default)] + keywords: Vec, + #[serde(default)] + stability: Option, +} + +pub fn discover_atoms(root: &Path) -> Result> { + let mut out = Vec::new(); + if !root.is_dir() { + return Ok(out); + } + for entry in fs::read_dir(root).with_context(|| format!("read_dir {}", root.display()))? { + let crate_dir = entry?.path(); + if crate_dir.is_dir() { + collect_from_crate(&crate_dir, &mut out); + } + } + Ok(out) +} + +fn collect_from_crate(crate_dir: &Path, out: &mut Vec) { + let atoms_dir = crate_dir.join("atoms"); + if !atoms_dir.is_dir() { + return; + } + let crate_name = crate_dir + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or("") + .to_string(); + let iter = match fs::read_dir(&atoms_dir) { + Ok(it) => it, + Err(e) => { + eprintln!("skip {}: {}", atoms_dir.display(), e); + return; + } + }; + for entry in iter.flatten() { + let path = entry.path(); + if !is_md_file(&path) { + continue; + } + match parse_atom_file(&path, &crate_name) { + Ok(rec) => out.push(rec), + Err(e) => eprintln!("skip {}: {}", path.display(), e), + } + } +} + +fn is_md_file(path: &Path) -> bool { + path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("md") +} + +fn parse_atom_file(path: &Path, crate_name: &str) -> Result { + let text = fs::read_to_string(path) + .with_context(|| format!("read {}", path.display()))?; + let (fm_text, body) = split_frontmatter(&text)?; + let fm: Frontmatter = + serde_yaml::from_str(fm_text).with_context(|| "parse frontmatter YAML")?; + build_record(fm, body, path, crate_name) +} + +fn build_record(fm: Frontmatter, body: &str, path: &Path, crate_name: &str) -> Result { + let kind = AtomKind::from_str(&fm.kind)?; + let (crate_from_id, verb) = split_atom_id(&fm.atom)?; + let md_dir = path.parent().unwrap_or(path).to_path_buf(); + Ok(AtomRecord { + full_id: fm.atom.clone(), + kind, + crate_name: if crate_from_id.is_empty() { + crate_name.to_string() + } else { + crate_from_id + }, + verb, + version: fm.version.unwrap_or_default(), + md_path: path.to_path_buf(), + input_schema: fm.input.and_then(|s| s.schema).map(|s| md_dir.join(&s)), + output_schema: fm.output.and_then(|s| s.schema).map(|s| md_dir.join(&s)), + related: fm.related, + keywords: fm.keywords, + stability: fm.stability.unwrap_or_else(|| "unknown".into()), + body: body.to_string(), + }) +} + +/// Extract `(source_atom_id, target)` edges from `related:` wikilinks. +/// Non-atom targets (rules, notes) are filtered out here — scope: atoms only. +pub fn resolve_wikilinks(records: &[AtomRecord]) -> Vec<(String, String)> { + let mut out = Vec::new(); + for rec in records { + for w in &rec.related { + if let Some(target) = parse_wikilink(w) { + if is_atom_target(&target) { + out.push((rec.full_id.clone(), target)); + } + } + } + } + out +} diff --git a/_primitives/_rust/kei-sage/src/lib.rs b/_primitives/_rust/kei-sage/src/lib.rs index 9980331..10f33d7 100644 --- a/_primitives/_rust/kei-sage/src/lib.rs +++ b/_primitives/_rust/kei-sage/src/lib.rs @@ -2,6 +2,10 @@ //! //! Port of LBM internal/sage. Constructor Pattern: one concept per file. +pub mod atom_cli; +pub mod atom_index; +pub mod atom_parse; +pub mod atoms; pub mod bfs; pub mod edges; pub mod import; diff --git a/_primitives/_rust/kei-sage/src/main.rs b/_primitives/_rust/kei-sage/src/main.rs index 794c8d3..bee87b6 100644 --- a/_primitives/_rust/kei-sage/src/main.rs +++ b/_primitives/_rust/kei-sage/src/main.rs @@ -1,6 +1,9 @@ //! kei-sage CLI — import / search / related / rank / add / edit. use clap::{Parser, Subcommand}; +use kei_sage::atom_cli::{ + cmd_atoms_discover, cmd_atoms_related, cmd_atoms_search, cmd_atoms_rank, default_atoms_root, +}; use kei_sage::bfs::bfs; use kei_sage::edges::add_edge; use kei_sage::import::import_vault; @@ -39,6 +42,23 @@ enum Cmd { #[arg(long)] grade: Option, }, Link { src: String, dst: String, #[arg(long, default_value = "related")] edge_type: String }, + AtomsDiscover { + #[arg(long)] root: Option, + }, + AtomsRank { + #[arg(long)] root: Option, + #[arg(long, default_value_t = 20)] limit: usize, + }, + AtomsRelated { + atom_id: String, + #[arg(long)] root: Option, + #[arg(long, default_value_t = 2)] depth: i64, + }, + AtomsSearch { + query: String, + #[arg(long)] root: Option, + #[arg(long, default_value_t = 20)] limit: i64, + }, } fn db_path(cli_db: Option) -> PathBuf { @@ -65,6 +85,14 @@ fn dispatch(store: &Store, cmd: Cmd) -> anyhow::Result<()> { Cmd::Edit { id, title, content, grade } => cmd_edit(store, id, title, content, grade), Cmd::Link { src, dst, edge_type } => cmd_link(store, &src, &dst, &edge_type), + Cmd::AtomsDiscover { root } => + cmd_atoms_discover(&root.unwrap_or_else(default_atoms_root)), + Cmd::AtomsRank { root, limit } => + cmd_atoms_rank(store, &root.unwrap_or_else(default_atoms_root), limit), + Cmd::AtomsRelated { atom_id, root, depth } => + cmd_atoms_related(store, &root.unwrap_or_else(default_atoms_root), &atom_id, depth), + Cmd::AtomsSearch { query, root, limit } => + cmd_atoms_search(store, &root.unwrap_or_else(default_atoms_root), &query, limit), } } diff --git a/_primitives/_rust/kei-sage/tests/atoms_discover_smoke.rs b/_primitives/_rust/kei-sage/tests/atoms_discover_smoke.rs new file mode 100644 index 0000000..a99d881 --- /dev/null +++ b/_primitives/_rust/kei-sage/tests/atoms_discover_smoke.rs @@ -0,0 +1,134 @@ +//! Integration smoke test for atom discovery + wikilink resolution. +//! +//! Creates a temp root with 2 fake crates, each with `atoms/.md`, +//! asserts `discover_atoms` returns 2 records and frontmatter is parsed. + +use kei_sage::atom_index::index_atoms; +use kei_sage::atoms::{discover_atoms, resolve_wikilinks, AtomKind}; +use kei_sage::Store; +use std::fs; +use tempfile::tempdir; + +const ATOM_A: &str = r#"--- +atom: kei-task::create +kind: command +version: "0.1.0" +input: + schema: schemas/create-input.json +output: + schema: schemas/create-output.json +stability: stable +keywords: [task, todo] +related: + - "[[kei-task::add-dependency]]" + - "[[rules/RULE 0.12]]" +--- +# kei-task::create + +Creates a task. +"#; + +const ATOM_B: &str = r#"--- +atom: kei-task::add-dependency +kind: command +version: "0.1.0" +stability: beta +keywords: [task, dag] +related: [] +--- +# kei-task::add-dependency + +Links two tasks. +"#; + +const ATOM_BAD: &str = r#"not-yaml-frontmatter + +just a plain markdown file +"#; + +fn write_atom(root: &std::path::Path, crate_name: &str, verb: &str, body: &str) { + let atoms_dir = root.join(crate_name).join("atoms"); + fs::create_dir_all(&atoms_dir).unwrap(); + fs::write(atoms_dir.join(format!("{verb}.md")), body).unwrap(); +} + +#[test] +fn discover_returns_both_records() { + let tmp = tempdir().unwrap(); + write_atom(tmp.path(), "kei-task", "create", ATOM_A); + write_atom(tmp.path(), "kei-task", "add-dependency", ATOM_B); + + let recs = discover_atoms(tmp.path()).unwrap(); + assert_eq!(recs.len(), 2, "expected 2 records, got {}", recs.len()); + + let ids: Vec<&str> = recs.iter().map(|r| r.full_id.as_str()).collect(); + assert!(ids.contains(&"kei-task::create")); + assert!(ids.contains(&"kei-task::add-dependency")); +} + +#[test] +fn frontmatter_fields_parsed() { + let tmp = tempdir().unwrap(); + write_atom(tmp.path(), "kei-task", "create", ATOM_A); + + let recs = discover_atoms(tmp.path()).unwrap(); + let rec = recs.iter().find(|r| r.full_id == "kei-task::create").unwrap(); + + assert_eq!(rec.kind, AtomKind::Command); + assert_eq!(rec.crate_name, "kei-task"); + assert_eq!(rec.verb, "create"); + assert_eq!(rec.version, "0.1.0"); + assert_eq!(rec.stability, "stable"); + assert!(rec.keywords.contains(&"task".to_string())); + assert!(rec.input_schema.is_some()); + assert!(rec.output_schema.is_some()); + assert!(rec.body.contains("Creates a task")); +} + +#[test] +fn invalid_frontmatter_is_skipped_not_fatal() { + let tmp = tempdir().unwrap(); + write_atom(tmp.path(), "kei-task", "create", ATOM_A); + write_atom(tmp.path(), "kei-task", "broken", ATOM_BAD); + + let recs = discover_atoms(tmp.path()).unwrap(); + assert_eq!(recs.len(), 1); + assert_eq!(recs[0].full_id, "kei-task::create"); +} + +#[test] +fn wikilinks_filter_rule_targets() { + let tmp = tempdir().unwrap(); + write_atom(tmp.path(), "kei-task", "create", ATOM_A); + write_atom(tmp.path(), "kei-task", "add-dependency", ATOM_B); + + let recs = discover_atoms(tmp.path()).unwrap(); + let edges = resolve_wikilinks(&recs); + + // Only atom-to-atom edges remain; `[[rules/RULE 0.12]]` filtered. + assert_eq!(edges.len(), 1); + assert_eq!(edges[0].0, "kei-task::create"); + assert_eq!(edges[0].1, "kei-task::add-dependency"); +} + +#[test] +fn empty_root_returns_empty() { + let tmp = tempdir().unwrap(); + let recs = discover_atoms(tmp.path()).unwrap(); + assert!(recs.is_empty()); +} + +#[test] +fn index_atoms_persists_units_and_edges() { + let tmp = tempdir().unwrap(); + write_atom(tmp.path(), "kei-task", "create", ATOM_A); + write_atom(tmp.path(), "kei-task", "add-dependency", ATOM_B); + + let recs = discover_atoms(tmp.path()).unwrap(); + let store = Store::open_memory().unwrap(); + let stats = index_atoms(&store, &recs).unwrap(); + + assert_eq!(stats.units_indexed, 2); + assert_eq!(stats.edges_indexed, 1); + assert_eq!(store.count_units().unwrap(), 2); +}