From 15bf40196baaedb1eb7724cf86d97c88d83518da Mon Sep 17 00:00:00 2001 From: Parfii-bot Date: Thu, 23 Apr 2026 01:21:00 +0800 Subject: [PATCH] =?UTF-8?q?feat(stream-g):=20kei-sage=20rules=20integratio?= =?UTF-8?q?n=20=E2=80=94=20atoms=20+=20rules=20unified=20graph?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify atoms and rules in kei-sage's graph. Previously [[rules/...]] wikilinks were filtered (explicit Stream C scope-deferral). Now they resolve to rule-node units with rule_ref edges. kei-atom-discovery extension (non-breaking): - WikilinkTarget enum: Atom(String) | Rule(String) | Other(String) - classify_wikilink(inner: &str) -> WikilinkTarget — exposed via lib.rs - parse_wikilink unchanged for backwards-compat; new callers use classify for richer semantics kei-sage additions: - rule_index.rs (129 LOC) — RuleRecord + discover_rules walking flat *.md + extract_h1 for display name + index_rules (unit_type="rule", vault_path="rule:") + index_rule_edges (walks atom.related, emits rule_ref edges atom → rule node) - atom_cli.rs: cmd_rules_discover + default_rules_root - main.rs: AtomsRulesDiscover subcommand with --rules-root flag - tests/rules_smoke.rs: 5 tests (discovery, heading extraction, slug fallback for headingless files, empty-dir, atom→rule edge persistence) Tests: 12/12 kei-atom-discovery (+3 classify_wikilink), 28/28 kei-sage (+5 rules_smoke + unit tests now counted). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../_rust/kei-atom-discovery/src/lib.rs | 5 +- .../_rust/kei-atom-discovery/src/walk.rs | 38 +++++ .../_rust/kei-atom-discovery/tests/smoke.rs | 37 ++++- _primitives/_rust/kei-sage/src/atom_cli.rs | 16 +++ _primitives/_rust/kei-sage/src/lib.rs | 1 + _primitives/_rust/kei-sage/src/main.rs | 8 +- _primitives/_rust/kei-sage/src/rule_index.rs | 129 +++++++++++++++++ .../_rust/kei-sage/tests/rules_smoke.rs | 132 ++++++++++++++++++ 8 files changed, 362 insertions(+), 4 deletions(-) create mode 100644 _primitives/_rust/kei-sage/src/rule_index.rs create mode 100644 _primitives/_rust/kei-sage/tests/rules_smoke.rs diff --git a/_primitives/_rust/kei-atom-discovery/src/lib.rs b/_primitives/_rust/kei-atom-discovery/src/lib.rs index 6eb1cc9..4a9a6f0 100644 --- a/_primitives/_rust/kei-atom-discovery/src/lib.rs +++ b/_primitives/_rust/kei-atom-discovery/src/lib.rs @@ -18,4 +18,7 @@ pub use error::Error; pub use frontmatter::{ parse_frontmatter, AtomKind, AtomMeta, Frontmatter, SideEffect, MAX_FRONTMATTER_BYTES, }; -pub use walk::{discover_atoms, is_atom_target, parse_wikilink, safe_join, split_atom_id}; +pub use walk::{ + classify_wikilink, discover_atoms, is_atom_target, parse_wikilink, safe_join, split_atom_id, + WikilinkTarget, +}; diff --git a/_primitives/_rust/kei-atom-discovery/src/walk.rs b/_primitives/_rust/kei-atom-discovery/src/walk.rs index 9022c10..340b8d8 100644 --- a/_primitives/_rust/kei-atom-discovery/src/walk.rs +++ b/_primitives/_rust/kei-atom-discovery/src/walk.rs @@ -107,6 +107,44 @@ pub fn is_atom_target(target: &str) -> bool { !target.starts_with("rules/") && !target.starts_with("rule ") } +/// Classified wikilink target — atom, rule reference, or other (notes etc.). +/// +/// `Rule(slug)` strips the `rules/` prefix and drops any optional `RULE ` +/// token, leaving a caller-friendly slug (`"0.12"`, `"memory-protocol"`). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum WikilinkTarget { + Atom(String), + Rule(String), + Other(String), +} + +/// Classify a wikilink inner body. `inner` is the already-unwrapped target +/// (no `[[ ]]`). Use this on the output of `parse_wikilink`. +pub fn classify_wikilink(inner: &str) -> WikilinkTarget { + let t = inner.trim(); + if let Some(rest) = t.strip_prefix("rules/") { + return WikilinkTarget::Rule(normalize_rule_slug(rest)); + } + if let Some(rest) = t.strip_prefix("rule ") { + return WikilinkTarget::Rule(normalize_rule_slug(rest)); + } + if is_atom_target(t) && t.contains("::") { + WikilinkTarget::Atom(t.to_string()) + } else { + WikilinkTarget::Other(t.to_string()) + } +} + +/// Normalise the tail after `rules/` or `rule ` into a short slug. +/// `"RULE 0.12"` → `"0.12"`, `"memory-protocol"` → `"memory-protocol"`. +fn normalize_rule_slug(rest: &str) -> String { + let r = rest.trim(); + if let Some(tail) = r.strip_prefix("RULE ") { + return tail.trim().to_string(); + } + r.to_string() +} + /// Safe base+rel path join. Rejects absolute paths, parent (`..`) components, /// and post-canonicalise escapes from `base`. pub fn safe_join(base: &Path, rel: &str) -> Result { diff --git a/_primitives/_rust/kei-atom-discovery/tests/smoke.rs b/_primitives/_rust/kei-atom-discovery/tests/smoke.rs index 8b6c526..046296d 100644 --- a/_primitives/_rust/kei-atom-discovery/tests/smoke.rs +++ b/_primitives/_rust/kei-atom-discovery/tests/smoke.rs @@ -1,8 +1,8 @@ //! Smoke tests covering the 4 critical fixes consolidated in this crate. use kei_atom_discovery::{ - discover_atoms, parse_frontmatter, parse_wikilink, safe_join, AtomKind, Error, - MAX_FRONTMATTER_BYTES, + classify_wikilink, discover_atoms, parse_frontmatter, parse_wikilink, safe_join, AtomKind, + Error, WikilinkTarget, MAX_FRONTMATTER_BYTES, }; use std::fs; use std::path::Path; @@ -130,3 +130,36 @@ fn wikilink_malformed_returns_none() { Some("kei-task::create".to_string()) ); } + +// classify_wikilink — 3 variants (Atom / Rule / Other) +#[test] +fn classify_atom_target() { + assert_eq!( + classify_wikilink("kei-task::create"), + WikilinkTarget::Atom("kei-task::create".into()) + ); +} + +#[test] +fn classify_rule_targets() { + assert_eq!( + classify_wikilink("rules/RULE 0.12"), + WikilinkTarget::Rule("0.12".into()) + ); + assert_eq!( + classify_wikilink("rules/memory-protocol"), + WikilinkTarget::Rule("memory-protocol".into()) + ); + assert_eq!( + classify_wikilink("rule 0.12"), + WikilinkTarget::Rule("0.12".into()) + ); +} + +#[test] +fn classify_other_target() { + assert_eq!( + classify_wikilink("random-note"), + WikilinkTarget::Other("random-note".into()) + ); +} diff --git a/_primitives/_rust/kei-sage/src/atom_cli.rs b/_primitives/_rust/kei-sage/src/atom_cli.rs index 0aee6be..94befaa 100644 --- a/_primitives/_rust/kei-sage/src/atom_cli.rs +++ b/_primitives/_rust/kei-sage/src/atom_cli.rs @@ -7,6 +7,7 @@ use crate::atom_index::index_atoms; use crate::atoms::{discover_atoms, AtomRecord}; use crate::bfs::bfs; use crate::pagerank::pagerank; +use crate::rule_index::discover_rules; use crate::search::fts_search; use crate::store::Store; use anyhow::Result; @@ -17,6 +18,11 @@ pub fn default_atoms_root() -> PathBuf { PathBuf::from(home).join(".claude/agents/_primitives/_rust") } +pub fn default_rules_root() -> PathBuf { + let home = std::env::var("HOME").unwrap_or_else(|_| ".".into()); + PathBuf::from(home).join(".claude/rules") +} + pub fn cmd_atoms_discover(root: &Path) -> Result<()> { let records = discover_atoms(root)?; println!("full_id\tkind\tstability\tmd_path"); @@ -33,6 +39,16 @@ pub fn cmd_atoms_discover(root: &Path) -> Result<()> { Ok(()) } +pub fn cmd_rules_discover(root: &Path) -> Result<()> { + let records = discover_rules(root)?; + println!("slug\tname\tpath"); + for r in &records { + println!("{}\t{}\t{}", r.slug, r.name, r.md_path.display()); + } + eprintln!("discovered {} rule(s) under {}", records.len(), root.display()); + Ok(()) +} + pub fn cmd_atoms_rank(store: &Store, root: &Path, limit: usize) -> Result<()> { ingest(store, root)?; for (path, score) in pagerank(store)?.into_iter().take(limit) { diff --git a/_primitives/_rust/kei-sage/src/lib.rs b/_primitives/_rust/kei-sage/src/lib.rs index 10f33d7..656925e 100644 --- a/_primitives/_rust/kei-sage/src/lib.rs +++ b/_primitives/_rust/kei-sage/src/lib.rs @@ -10,6 +10,7 @@ pub mod bfs; pub mod edges; pub mod import; pub mod pagerank; +pub mod rule_index; pub mod schema; pub mod search; pub mod store; diff --git a/_primitives/_rust/kei-sage/src/main.rs b/_primitives/_rust/kei-sage/src/main.rs index bee87b6..5e7c7a8 100644 --- a/_primitives/_rust/kei-sage/src/main.rs +++ b/_primitives/_rust/kei-sage/src/main.rs @@ -2,7 +2,8 @@ use clap::{Parser, Subcommand}; use kei_sage::atom_cli::{ - cmd_atoms_discover, cmd_atoms_related, cmd_atoms_search, cmd_atoms_rank, default_atoms_root, + cmd_atoms_discover, cmd_atoms_rank, cmd_atoms_related, cmd_atoms_search, cmd_rules_discover, + default_atoms_root, default_rules_root, }; use kei_sage::bfs::bfs; use kei_sage::edges::add_edge; @@ -59,6 +60,9 @@ enum Cmd { #[arg(long)] root: Option, #[arg(long, default_value_t = 20)] limit: i64, }, + AtomsRulesDiscover { + #[arg(long)] rules_root: Option, + }, } fn db_path(cli_db: Option) -> PathBuf { @@ -93,6 +97,8 @@ fn dispatch(store: &Store, cmd: Cmd) -> anyhow::Result<()> { cmd_atoms_related(store, &root.unwrap_or_else(default_atoms_root), &atom_id, depth), Cmd::AtomsSearch { query, root, limit } => cmd_atoms_search(store, &root.unwrap_or_else(default_atoms_root), &query, limit), + Cmd::AtomsRulesDiscover { rules_root } => + cmd_rules_discover(&rules_root.unwrap_or_else(default_rules_root)), } } diff --git a/_primitives/_rust/kei-sage/src/rule_index.rs b/_primitives/_rust/kei-sage/src/rule_index.rs new file mode 100644 index 0000000..3ec50b0 --- /dev/null +++ b/_primitives/_rust/kei-sage/src/rule_index.rs @@ -0,0 +1,129 @@ +//! Rule discovery + indexing for kei-sage. +//! +//! Walks a flat `/*.md` tree (e.g. `~/.claude/rules/`), extracts +//! the first `#` heading from each file as the rule name, and uses the file +//! stem as the rule slug. Rules are persisted as Units with: +//! - `unit_type = "rule"` +//! - `vault_path = "rule:"` +//! - `title = ` +//! +//! Edges from atoms that `related:` a `[[rules/...]]` wikilink are persisted +//! with `edge_type = "rule_ref"` via `index_rule_edges`. +//! +//! Scope: flat dir only (rules live flat in `~/.claude/rules/`). No recursion. + +use crate::atoms::AtomRecord; +use crate::edges::add_edge; +use crate::store::Store; +use crate::types::Unit; +use anyhow::Result; +use kei_atom_discovery::{classify_wikilink, parse_wikilink, WikilinkTarget}; +use std::fs; +use std::path::{Path, PathBuf}; + +/// One discovered rule: slug (file stem), display name (`# heading`), md path. +#[derive(Debug, Clone)] +pub struct RuleRecord { + pub slug: String, + pub name: String, + pub md_path: PathBuf, +} + +/// Walk `/*.md` (no recursion) and parse each file's first `#` heading. +/// Files without a heading fall back to the file stem as the display name. +pub fn discover_rules(root: &Path) -> Result> { + if !root.is_dir() { + return Ok(Vec::new()); + } + let mut out = Vec::new(); + for entry in fs::read_dir(root)? { + let entry = entry?; + let path = entry.path(); + if !is_rule_md(&path) { + continue; + } + if let Some(rec) = parse_rule_file(&path) { + out.push(rec); + } + } + out.sort_by(|a, b| a.slug.cmp(&b.slug)); + Ok(out) +} + +fn is_rule_md(path: &Path) -> bool { + path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("md") +} + +fn parse_rule_file(path: &Path) -> Option { + let slug = path.file_stem().and_then(|s| s.to_str())?.to_string(); + let text = fs::read_to_string(path).ok()?; + let name = extract_h1(&text).unwrap_or_else(|| slug.clone()); + Some(RuleRecord { + slug, + name, + md_path: path.to_path_buf(), + }) +} + +/// Extract the first `# ` heading line, stripping the `#` prefix and trim. +/// Returns `None` if no `# ` line exists in the file. +fn extract_h1(text: &str) -> Option { + for line in text.lines() { + let t = line.trim_start(); + if let Some(rest) = t.strip_prefix("# ") { + return Some(rest.trim().to_string()); + } + } + None +} + +/// Persist rule units into the store. Returns the number of units indexed. +pub fn index_rules(store: &Store, records: &[RuleRecord]) -> Result { + for rec in records { + store.add_unit(&record_to_unit(rec))?; + } + Ok(records.len()) +} + +fn record_to_unit(rec: &RuleRecord) -> Unit { + Unit { + unit_type: "rule".into(), + title: rec.name.clone(), + content: String::new(), + evidence_grade: "rule".into(), + source_path: rec.md_path.to_string_lossy().into(), + vault_path: format!("rule:{}", rec.slug), + category: "rule".into(), + ..Default::default() + } +} + +/// Walk every atom's `related:` list. For every wikilink that classifies as +/// `Rule`, persist a `rule_ref` edge from the atom to `rule:`. +/// Returns the number of edges persisted. +pub fn index_rule_edges(store: &Store, records: &[AtomRecord]) -> Result { + let mut n = 0; + for rec in records { + for w in &rec.related { + if let Some(slug) = resolve_rule_ref(w) { + add_edge( + store, + &rec.full_id, + &format!("rule:{}", slug), + "rule_ref", + 1.0, + )?; + n += 1; + } + } + } + Ok(n) +} + +fn resolve_rule_ref(raw: &str) -> Option { + let inner = parse_wikilink(raw)?; + match classify_wikilink(&inner) { + WikilinkTarget::Rule(slug) => Some(slug), + _ => None, + } +} diff --git a/_primitives/_rust/kei-sage/tests/rules_smoke.rs b/_primitives/_rust/kei-sage/tests/rules_smoke.rs new file mode 100644 index 0000000..e999cff --- /dev/null +++ b/_primitives/_rust/kei-sage/tests/rules_smoke.rs @@ -0,0 +1,132 @@ +//! Integration smoke test for rule discovery + atom→rule edge persistence. +//! +//! Creates a temp rules tree with 2 rule files (flat dir), asserts +//! `discover_rules` extracts slugs + heading names correctly. Then stages +//! an atom whose `related:` lists one of those rules and asserts +//! `index_rule_edges` persists a `rule_ref` edge into the store. + +use kei_sage::atoms::discover_atoms; +use kei_sage::edges::list_outgoing; +use kei_sage::rule_index::{discover_rules, index_rule_edges, index_rules}; +use kei_sage::Store; +use std::fs; +use tempfile::tempdir; + +const RULE_012: &str = r#"# RULE 0.12 — AGENT GIT MODEL + +Body of the rule. +"#; + +const RULE_MEMORY: &str = r#"# Memory Protocol + +3-layer architecture. +"#; + +const ATOM_A: &str = r#"--- +atom: kei-task::create +kind: command +version: "0.1.0" +input: + schema: schemas/create-input.json +output: + schema: schemas/create-output.json +stability: stable +keywords: [task] +related: + - "[[rules/RULE 0.12]]" + - "[[rules/memory-protocol]]" +--- +# kei-task::create + +Body. +"#; + +fn write_rule(root: &std::path::Path, slug: &str, body: &str) { + fs::create_dir_all(root).unwrap(); + fs::write(root.join(format!("{slug}.md")), body).unwrap(); +} + +fn write_atom(root: &std::path::Path, crate_name: &str, verb: &str, body: &str) { + let atoms_dir = root.join(crate_name).join("atoms"); + fs::create_dir_all(&atoms_dir).unwrap(); + fs::write(atoms_dir.join(format!("{verb}.md")), body).unwrap(); +} + +#[test] +fn discover_rules_returns_two_records_with_correct_slugs_and_names() { + let tmp = tempdir().unwrap(); + write_rule(tmp.path(), "agent-git-model", RULE_012); + write_rule(tmp.path(), "memory-protocol", RULE_MEMORY); + + let recs = discover_rules(tmp.path()).unwrap(); + assert_eq!(recs.len(), 2, "expected 2 rules, got {}", recs.len()); + + let by_slug: std::collections::HashMap<_, _> = + recs.iter().map(|r| (r.slug.as_str(), r.name.as_str())).collect(); + assert_eq!( + by_slug.get("agent-git-model"), + Some(&"RULE 0.12 — AGENT GIT MODEL") + ); + assert_eq!(by_slug.get("memory-protocol"), Some(&"Memory Protocol")); +} + +#[test] +fn index_rules_persists_rule_units() { + let tmp = tempdir().unwrap(); + write_rule(tmp.path(), "memory-protocol", RULE_MEMORY); + + let recs = discover_rules(tmp.path()).unwrap(); + let store = Store::open_memory().unwrap(); + let n = index_rules(&store, &recs).unwrap(); + assert_eq!(n, 1); + assert_eq!(store.count_units().unwrap(), 1); +} + +#[test] +fn index_rule_edges_persists_atom_to_rule() { + let tmp_rules = tempdir().unwrap(); + let tmp_atoms = tempdir().unwrap(); + + // 1 rule file; the atom references `rules/RULE 0.12` → slug "0.12". + write_rule(tmp_rules.path(), "agent-git-model", RULE_012); + write_atom(tmp_atoms.path(), "kei-task", "create", ATOM_A); + + let rule_recs = discover_rules(tmp_rules.path()).unwrap(); + let atom_recs = discover_atoms(tmp_atoms.path()).unwrap(); + + let store = Store::open_memory().unwrap(); + index_rules(&store, &rule_recs).unwrap(); + let edges_written = index_rule_edges(&store, &atom_recs).unwrap(); + + // 2 rule wikilinks in ATOM_A — "rules/RULE 0.12" → "0.12" and + // "rules/memory-protocol" → "memory-protocol". Both edges persisted + // regardless of whether the rule unit exists (edges are path-keyed). + assert_eq!(edges_written, 2); + + let outgoing = list_outgoing(&store, "kei-task::create").unwrap(); + let rule_edges: Vec<&str> = outgoing + .iter() + .filter(|e| e.edge_type == "rule_ref") + .map(|e| e.dst_path.as_str()) + .collect(); + assert!(rule_edges.contains(&"rule:0.12")); + assert!(rule_edges.contains(&"rule:memory-protocol")); +} + +#[test] +fn discover_rules_empty_dir_returns_empty() { + let tmp = tempdir().unwrap(); + let recs = discover_rules(tmp.path()).unwrap(); + assert!(recs.is_empty()); +} + +#[test] +fn discover_rules_without_heading_falls_back_to_slug() { + let tmp = tempdir().unwrap(); + fs::write(tmp.path().join("plain.md"), "no heading in this file\n").unwrap(); + + let recs = discover_rules(tmp.path()).unwrap(); + assert_eq!(recs.len(), 1); + assert_eq!(recs[0].slug, "plain"); + assert_eq!(recs[0].name, "plain"); +}