feat(stream-g): kei-sage rules integration — atoms + rules unified graph

Unify atoms and rules in kei-sage's graph. Previously [[rules/...]]
wikilinks were filtered (explicit Stream C scope-deferral). Now they
resolve to rule-node units with rule_ref edges.

kei-atom-discovery extension (non-breaking):
- WikilinkTarget enum: Atom(String) | Rule(String) | Other(String)
- classify_wikilink(inner: &str) -> WikilinkTarget — exposed via lib.rs
- parse_wikilink unchanged for backwards-compat; new callers use
  classify for richer semantics

kei-sage additions:
- rule_index.rs (129 LOC) — RuleRecord + discover_rules walking flat
  *.md + extract_h1 for display name + index_rules (unit_type="rule",
  vault_path="rule:<slug>") + index_rule_edges (walks atom.related,
  emits rule_ref edges atom → rule node)
- atom_cli.rs: cmd_rules_discover + default_rules_root
- main.rs: AtomsRulesDiscover subcommand with --rules-root flag
- tests/rules_smoke.rs: 5 tests (discovery, heading extraction,
  slug fallback for headingless files, empty-dir, atom→rule edge
  persistence)

Tests: 12/12 kei-atom-discovery (+3 classify_wikilink),
28/28 kei-sage (+5 rules_smoke + unit tests now counted).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Parfii-bot 2026-04-23 01:21:00 +08:00
parent 9307f8d26e
commit 15bf40196b
8 changed files with 362 additions and 4 deletions

View file

@ -18,4 +18,7 @@ pub use error::Error;
pub use frontmatter::{
parse_frontmatter, AtomKind, AtomMeta, Frontmatter, SideEffect, MAX_FRONTMATTER_BYTES,
};
pub use walk::{discover_atoms, is_atom_target, parse_wikilink, safe_join, split_atom_id};
pub use walk::{
classify_wikilink, discover_atoms, is_atom_target, parse_wikilink, safe_join, split_atom_id,
WikilinkTarget,
};

View file

@ -107,6 +107,44 @@ pub fn is_atom_target(target: &str) -> bool {
!target.starts_with("rules/") && !target.starts_with("rule ")
}
/// Classified wikilink target — atom, rule reference, or other (notes etc.).
///
/// `Rule(slug)` strips the `rules/` prefix and drops any optional `RULE `
/// token, leaving a caller-friendly slug (`"0.12"`, `"memory-protocol"`).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WikilinkTarget {
Atom(String),
Rule(String),
Other(String),
}
/// Classify a wikilink inner body. `inner` is the already-unwrapped target
/// (no `[[ ]]`). Use this on the output of `parse_wikilink`.
pub fn classify_wikilink(inner: &str) -> WikilinkTarget {
let t = inner.trim();
if let Some(rest) = t.strip_prefix("rules/") {
return WikilinkTarget::Rule(normalize_rule_slug(rest));
}
if let Some(rest) = t.strip_prefix("rule ") {
return WikilinkTarget::Rule(normalize_rule_slug(rest));
}
if is_atom_target(t) && t.contains("::") {
WikilinkTarget::Atom(t.to_string())
} else {
WikilinkTarget::Other(t.to_string())
}
}
/// Normalise the tail after `rules/` or `rule ` into a short slug.
/// `"RULE 0.12"` → `"0.12"`, `"memory-protocol"` → `"memory-protocol"`.
fn normalize_rule_slug(rest: &str) -> String {
let r = rest.trim();
if let Some(tail) = r.strip_prefix("RULE ") {
return tail.trim().to_string();
}
r.to_string()
}
/// Safe base+rel path join. Rejects absolute paths, parent (`..`) components,
/// and post-canonicalise escapes from `base`.
pub fn safe_join(base: &Path, rel: &str) -> Result<PathBuf, Error> {

View file

@ -1,8 +1,8 @@
//! Smoke tests covering the 4 critical fixes consolidated in this crate.
use kei_atom_discovery::{
discover_atoms, parse_frontmatter, parse_wikilink, safe_join, AtomKind, Error,
MAX_FRONTMATTER_BYTES,
classify_wikilink, discover_atoms, parse_frontmatter, parse_wikilink, safe_join, AtomKind,
Error, WikilinkTarget, MAX_FRONTMATTER_BYTES,
};
use std::fs;
use std::path::Path;
@ -130,3 +130,36 @@ fn wikilink_malformed_returns_none() {
Some("kei-task::create".to_string())
);
}
// classify_wikilink — 3 variants (Atom / Rule / Other)
#[test]
fn classify_atom_target() {
assert_eq!(
classify_wikilink("kei-task::create"),
WikilinkTarget::Atom("kei-task::create".into())
);
}
#[test]
fn classify_rule_targets() {
assert_eq!(
classify_wikilink("rules/RULE 0.12"),
WikilinkTarget::Rule("0.12".into())
);
assert_eq!(
classify_wikilink("rules/memory-protocol"),
WikilinkTarget::Rule("memory-protocol".into())
);
assert_eq!(
classify_wikilink("rule 0.12"),
WikilinkTarget::Rule("0.12".into())
);
}
#[test]
fn classify_other_target() {
assert_eq!(
classify_wikilink("random-note"),
WikilinkTarget::Other("random-note".into())
);
}

View file

@ -7,6 +7,7 @@ use crate::atom_index::index_atoms;
use crate::atoms::{discover_atoms, AtomRecord};
use crate::bfs::bfs;
use crate::pagerank::pagerank;
use crate::rule_index::discover_rules;
use crate::search::fts_search;
use crate::store::Store;
use anyhow::Result;
@ -17,6 +18,11 @@ pub fn default_atoms_root() -> PathBuf {
PathBuf::from(home).join(".claude/agents/_primitives/_rust")
}
pub fn default_rules_root() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/rules")
}
pub fn cmd_atoms_discover(root: &Path) -> Result<()> {
let records = discover_atoms(root)?;
println!("full_id\tkind\tstability\tmd_path");
@ -33,6 +39,16 @@ pub fn cmd_atoms_discover(root: &Path) -> Result<()> {
Ok(())
}
pub fn cmd_rules_discover(root: &Path) -> Result<()> {
let records = discover_rules(root)?;
println!("slug\tname\tpath");
for r in &records {
println!("{}\t{}\t{}", r.slug, r.name, r.md_path.display());
}
eprintln!("discovered {} rule(s) under {}", records.len(), root.display());
Ok(())
}
pub fn cmd_atoms_rank(store: &Store, root: &Path, limit: usize) -> Result<()> {
ingest(store, root)?;
for (path, score) in pagerank(store)?.into_iter().take(limit) {

View file

@ -10,6 +10,7 @@ pub mod bfs;
pub mod edges;
pub mod import;
pub mod pagerank;
pub mod rule_index;
pub mod schema;
pub mod search;
pub mod store;

View file

@ -2,7 +2,8 @@
use clap::{Parser, Subcommand};
use kei_sage::atom_cli::{
cmd_atoms_discover, cmd_atoms_related, cmd_atoms_search, cmd_atoms_rank, default_atoms_root,
cmd_atoms_discover, cmd_atoms_rank, cmd_atoms_related, cmd_atoms_search, cmd_rules_discover,
default_atoms_root, default_rules_root,
};
use kei_sage::bfs::bfs;
use kei_sage::edges::add_edge;
@ -59,6 +60,9 @@ enum Cmd {
#[arg(long)] root: Option<PathBuf>,
#[arg(long, default_value_t = 20)] limit: i64,
},
AtomsRulesDiscover {
#[arg(long)] rules_root: Option<PathBuf>,
},
}
fn db_path(cli_db: Option<PathBuf>) -> PathBuf {
@ -93,6 +97,8 @@ fn dispatch(store: &Store, cmd: Cmd) -> anyhow::Result<()> {
cmd_atoms_related(store, &root.unwrap_or_else(default_atoms_root), &atom_id, depth),
Cmd::AtomsSearch { query, root, limit } =>
cmd_atoms_search(store, &root.unwrap_or_else(default_atoms_root), &query, limit),
Cmd::AtomsRulesDiscover { rules_root } =>
cmd_rules_discover(&rules_root.unwrap_or_else(default_rules_root)),
}
}

View file

@ -0,0 +1,129 @@
//! Rule discovery + indexing for kei-sage.
//!
//! Walks a flat `<rules-root>/*.md` tree (e.g. `~/.claude/rules/`), extracts
//! the first `#` heading from each file as the rule name, and uses the file
//! stem as the rule slug. Rules are persisted as Units with:
//! - `unit_type = "rule"`
//! - `vault_path = "rule:<slug>"`
//! - `title = <heading>`
//!
//! Edges from atoms that `related:` a `[[rules/...]]` wikilink are persisted
//! with `edge_type = "rule_ref"` via `index_rule_edges`.
//!
//! Scope: flat dir only (rules live flat in `~/.claude/rules/`). No recursion.
use crate::atoms::AtomRecord;
use crate::edges::add_edge;
use crate::store::Store;
use crate::types::Unit;
use anyhow::Result;
use kei_atom_discovery::{classify_wikilink, parse_wikilink, WikilinkTarget};
use std::fs;
use std::path::{Path, PathBuf};
/// One discovered rule: slug (file stem), display name (`# heading`), md path.
#[derive(Debug, Clone)]
pub struct RuleRecord {
pub slug: String,
pub name: String,
pub md_path: PathBuf,
}
/// Walk `<root>/*.md` (no recursion) and parse each file's first `#` heading.
/// Files without a heading fall back to the file stem as the display name.
pub fn discover_rules(root: &Path) -> Result<Vec<RuleRecord>> {
if !root.is_dir() {
return Ok(Vec::new());
}
let mut out = Vec::new();
for entry in fs::read_dir(root)? {
let entry = entry?;
let path = entry.path();
if !is_rule_md(&path) {
continue;
}
if let Some(rec) = parse_rule_file(&path) {
out.push(rec);
}
}
out.sort_by(|a, b| a.slug.cmp(&b.slug));
Ok(out)
}
fn is_rule_md(path: &Path) -> bool {
path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("md")
}
fn parse_rule_file(path: &Path) -> Option<RuleRecord> {
let slug = path.file_stem().and_then(|s| s.to_str())?.to_string();
let text = fs::read_to_string(path).ok()?;
let name = extract_h1(&text).unwrap_or_else(|| slug.clone());
Some(RuleRecord {
slug,
name,
md_path: path.to_path_buf(),
})
}
/// Extract the first `# ` heading line, stripping the `#` prefix and trim.
/// Returns `None` if no `# ` line exists in the file.
fn extract_h1(text: &str) -> Option<String> {
for line in text.lines() {
let t = line.trim_start();
if let Some(rest) = t.strip_prefix("# ") {
return Some(rest.trim().to_string());
}
}
None
}
/// Persist rule units into the store. Returns the number of units indexed.
pub fn index_rules(store: &Store, records: &[RuleRecord]) -> Result<usize> {
for rec in records {
store.add_unit(&record_to_unit(rec))?;
}
Ok(records.len())
}
fn record_to_unit(rec: &RuleRecord) -> Unit {
Unit {
unit_type: "rule".into(),
title: rec.name.clone(),
content: String::new(),
evidence_grade: "rule".into(),
source_path: rec.md_path.to_string_lossy().into(),
vault_path: format!("rule:{}", rec.slug),
category: "rule".into(),
..Default::default()
}
}
/// Walk every atom's `related:` list. For every wikilink that classifies as
/// `Rule`, persist a `rule_ref` edge from the atom to `rule:<slug>`.
/// Returns the number of edges persisted.
pub fn index_rule_edges(store: &Store, records: &[AtomRecord]) -> Result<usize> {
let mut n = 0;
for rec in records {
for w in &rec.related {
if let Some(slug) = resolve_rule_ref(w) {
add_edge(
store,
&rec.full_id,
&format!("rule:{}", slug),
"rule_ref",
1.0,
)?;
n += 1;
}
}
}
Ok(n)
}
fn resolve_rule_ref(raw: &str) -> Option<String> {
let inner = parse_wikilink(raw)?;
match classify_wikilink(&inner) {
WikilinkTarget::Rule(slug) => Some(slug),
_ => None,
}
}

View file

@ -0,0 +1,132 @@
//! Integration smoke test for rule discovery + atom→rule edge persistence.
//!
//! Creates a temp rules tree with 2 rule files (flat dir), asserts
//! `discover_rules` extracts slugs + heading names correctly. Then stages
//! an atom whose `related:` lists one of those rules and asserts
//! `index_rule_edges` persists a `rule_ref` edge into the store.
use kei_sage::atoms::discover_atoms;
use kei_sage::edges::list_outgoing;
use kei_sage::rule_index::{discover_rules, index_rule_edges, index_rules};
use kei_sage::Store;
use std::fs;
use tempfile::tempdir;
const RULE_012: &str = r#"# RULE 0.12 — AGENT GIT MODEL
Body of the rule.
"#;
const RULE_MEMORY: &str = r#"# Memory Protocol
3-layer architecture.
"#;
const ATOM_A: &str = r#"---
atom: kei-task::create
kind: command
version: "0.1.0"
input:
schema: schemas/create-input.json
output:
schema: schemas/create-output.json
stability: stable
keywords: [task]
related:
- "[[rules/RULE 0.12]]"
- "[[rules/memory-protocol]]"
---
# kei-task::create
Body.
"#;
fn write_rule(root: &std::path::Path, slug: &str, body: &str) {
fs::create_dir_all(root).unwrap();
fs::write(root.join(format!("{slug}.md")), body).unwrap();
}
fn write_atom(root: &std::path::Path, crate_name: &str, verb: &str, body: &str) {
let atoms_dir = root.join(crate_name).join("atoms");
fs::create_dir_all(&atoms_dir).unwrap();
fs::write(atoms_dir.join(format!("{verb}.md")), body).unwrap();
}
#[test]
fn discover_rules_returns_two_records_with_correct_slugs_and_names() {
let tmp = tempdir().unwrap();
write_rule(tmp.path(), "agent-git-model", RULE_012);
write_rule(tmp.path(), "memory-protocol", RULE_MEMORY);
let recs = discover_rules(tmp.path()).unwrap();
assert_eq!(recs.len(), 2, "expected 2 rules, got {}", recs.len());
let by_slug: std::collections::HashMap<_, _> =
recs.iter().map(|r| (r.slug.as_str(), r.name.as_str())).collect();
assert_eq!(
by_slug.get("agent-git-model"),
Some(&"RULE 0.12 — AGENT GIT MODEL")
);
assert_eq!(by_slug.get("memory-protocol"), Some(&"Memory Protocol"));
}
#[test]
fn index_rules_persists_rule_units() {
let tmp = tempdir().unwrap();
write_rule(tmp.path(), "memory-protocol", RULE_MEMORY);
let recs = discover_rules(tmp.path()).unwrap();
let store = Store::open_memory().unwrap();
let n = index_rules(&store, &recs).unwrap();
assert_eq!(n, 1);
assert_eq!(store.count_units().unwrap(), 1);
}
#[test]
fn index_rule_edges_persists_atom_to_rule() {
let tmp_rules = tempdir().unwrap();
let tmp_atoms = tempdir().unwrap();
// 1 rule file; the atom references `rules/RULE 0.12` → slug "0.12".
write_rule(tmp_rules.path(), "agent-git-model", RULE_012);
write_atom(tmp_atoms.path(), "kei-task", "create", ATOM_A);
let rule_recs = discover_rules(tmp_rules.path()).unwrap();
let atom_recs = discover_atoms(tmp_atoms.path()).unwrap();
let store = Store::open_memory().unwrap();
index_rules(&store, &rule_recs).unwrap();
let edges_written = index_rule_edges(&store, &atom_recs).unwrap();
// 2 rule wikilinks in ATOM_A — "rules/RULE 0.12" → "0.12" and
// "rules/memory-protocol" → "memory-protocol". Both edges persisted
// regardless of whether the rule unit exists (edges are path-keyed).
assert_eq!(edges_written, 2);
let outgoing = list_outgoing(&store, "kei-task::create").unwrap();
let rule_edges: Vec<&str> = outgoing
.iter()
.filter(|e| e.edge_type == "rule_ref")
.map(|e| e.dst_path.as_str())
.collect();
assert!(rule_edges.contains(&"rule:0.12"));
assert!(rule_edges.contains(&"rule:memory-protocol"));
}
#[test]
fn discover_rules_empty_dir_returns_empty() {
let tmp = tempdir().unwrap();
let recs = discover_rules(tmp.path()).unwrap();
assert!(recs.is_empty());
}
#[test]
fn discover_rules_without_heading_falls_back_to_slug() {
let tmp = tempdir().unwrap();
fs::write(tmp.path().join("plain.md"), "no heading in this file\n").unwrap();
let recs = discover_rules(tmp.path()).unwrap();
assert_eq!(recs.len(), 1);
assert_eq!(recs[0].slug, "plain");
assert_eq!(recs[0].name, "plain");
}