feat(tx2): kei-sage facet query + lineage traversal

3 new subcommands:
- facet-query <key=value> [<k2=v2>...] — AND-filter walks primitives
- lineage <primitive-id> [--depth N] — BFS ancestors/descendants/forks
- author <creator-id> [--limit N] — all primitives by creator

facet_query.rs walks _capabilities/*/*/capability.toml + _manifests/*.toml
via toml parser. Handles missing sections correctly (None != specific).

lineage.rs BFS over parents[] wikilinks + fork-from + created-by edges.

Tests: 34/34 (was 28, +6: 3 facet_smoke + 3 lineage_smoke).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Parfii-bot 2026-04-23 10:21:45 +08:00
parent 010def05ad
commit 5a34c35311
9 changed files with 574 additions and 2 deletions

View file

@ -2168,6 +2168,8 @@ dependencies = [
"serde",
"serde_json",
"tempfile",
"toml",
"walkdir",
]
[[package]]

View file

@ -20,6 +20,8 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
toml = "0.8"
walkdir = "2"
kei-atom-discovery = { path = "../kei-atom-discovery" }
kei-entity-store = { path = "../kei-entity-store" }

View file

@ -6,6 +6,8 @@
use crate::atom_index::index_atoms;
use crate::atoms::{discover_atoms, AtomRecord};
use crate::bfs::bfs;
use crate::facet_query::{discover_primitives, matches_all, parse_filters};
use crate::lineage::{discover_lineage, nodes_by_author, trace_lineage};
use crate::pagerank::pagerank;
use crate::rule_index::discover_rules;
use crate::search::fts_search;
@ -23,6 +25,57 @@ pub fn default_rules_root() -> PathBuf {
PathBuf::from(home).join(".claude/rules")
}
pub fn default_capabilities_root() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/_capabilities")
}
pub fn default_manifests_root() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".into());
PathBuf::from(home).join(".claude/_manifests")
}
pub fn cmd_facet_query(cap_root: &Path, man_root: &Path, filters: &[String]) -> Result<()> {
let pairs = parse_filters(filters);
let all = discover_primitives(cap_root, man_root);
for p in all.iter().filter(|p| matches_all(p, &pairs)) {
println!("{}", p.full_id);
}
Ok(())
}
pub fn cmd_lineage(cap_root: &Path, man_root: &Path, id: &str, depth: usize) -> Result<()> {
let nodes = discover_lineage(cap_root, man_root);
let trace = trace_lineage(&nodes, id, depth);
if let Some(f) = &trace.focus {
if let Some(c) = &f.created_at {
if let Some(by) = &f.created_by {
println!("created: {} by {}", c, by);
} else {
println!("created: {}", c);
}
} else if let Some(by) = &f.created_by {
println!("created-by: {}", by);
}
}
println!("ancestors: {}", format_list(&trace.ancestors));
println!("descendants: {}", format_list(&trace.descendants));
Ok(())
}
pub fn cmd_author(cap_root: &Path, man_root: &Path, creator: &str, limit: usize) -> Result<()> {
let nodes = discover_lineage(cap_root, man_root);
for n in nodes_by_author(&nodes, creator, limit) {
let ts = n.created_at.unwrap_or_else(|| "-".into());
println!("{}\t{}", ts, n.id);
}
Ok(())
}
fn format_list(items: &[String]) -> String {
if items.is_empty() { "(none)".into() } else { items.join(", ") }
}
pub fn cmd_atoms_discover(root: &Path) -> Result<()> {
let records = discover_atoms(root)?;
println!("full_id\tkind\tstability\tmd_path");

View file

@ -0,0 +1,134 @@
//! Facet-query over capability.toml + manifest .toml primitives.
//!
//! TX1 adds `[taxonomy]` + `[lineage]` sections to primitive TOMLs.
//! This module walks a capabilities root (`<root>/*/*/capability.toml`)
//! and a manifests root (`<root>/*.toml`), parses the taxonomy section,
//! and filters by `key=value` AND predicates.
use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
/// A primitive's identity + its taxonomy facets.
#[derive(Debug, Clone)]
pub struct PrimitiveFacets {
pub full_id: String,
pub source: PathBuf,
pub facets: BTreeMap<String, String>,
}
#[derive(Debug, Deserialize)]
struct CapabilityDoc {
capability: Option<CapabilityHead>,
#[serde(default)]
taxonomy: Option<BTreeMap<String, toml::Value>>,
}
#[derive(Debug, Deserialize)]
struct CapabilityHead {
name: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ManifestDoc {
name: Option<String>,
#[serde(default)]
taxonomy: Option<BTreeMap<String, toml::Value>>,
}
/// Parse a single TOML file into a `PrimitiveFacets`, or `None` if it's
/// unparseable or has no discoverable id.
pub fn parse_primitive(path: &Path) -> Result<Option<PrimitiveFacets>> {
let text = std::fs::read_to_string(path)
.with_context(|| format!("read {}", path.display()))?;
if let Some(p) = parse_capability(&text, path) {
return Ok(Some(p));
}
Ok(parse_manifest(&text, path))
}
fn parse_capability(text: &str, path: &Path) -> Option<PrimitiveFacets> {
let doc: CapabilityDoc = toml::from_str(text).ok()?;
let id = doc.capability.as_ref().and_then(|c| c.name.clone())?;
let facets = flatten_facets(doc.taxonomy.as_ref());
Some(PrimitiveFacets { full_id: id, source: path.to_path_buf(), facets })
}
fn parse_manifest(text: &str, path: &Path) -> Option<PrimitiveFacets> {
let doc: ManifestDoc = toml::from_str(text).ok()?;
let id = doc.name?;
let facets = flatten_facets(doc.taxonomy.as_ref());
Some(PrimitiveFacets { full_id: id, source: path.to_path_buf(), facets })
}
fn flatten_facets(tax: Option<&BTreeMap<String, toml::Value>>) -> BTreeMap<String, String> {
let mut out = BTreeMap::new();
let Some(map) = tax else { return out };
for (k, v) in map {
if let Some(s) = value_to_string(v) {
out.insert(k.clone(), s);
}
}
out
}
fn value_to_string(v: &toml::Value) -> Option<String> {
match v {
toml::Value::String(s) => Some(s.clone()),
toml::Value::Integer(i) => Some(i.to_string()),
toml::Value::Boolean(b) => Some(b.to_string()),
_ => None,
}
}
/// Walk capabilities + manifests roots and return all parseable primitives.
/// Silently skips files that fail to parse (lint is a separate concern).
pub fn discover_primitives(cap_root: &Path, man_root: &Path) -> Vec<PrimitiveFacets> {
let mut out = Vec::new();
walk_capabilities(cap_root, &mut out);
walk_manifests(man_root, &mut out);
out
}
fn walk_capabilities(root: &Path, out: &mut Vec<PrimitiveFacets>) {
if !root.is_dir() {
return;
}
for entry in WalkDir::new(root).max_depth(4).follow_links(false).into_iter().flatten() {
if entry.file_name() == "capability.toml" && entry.path().is_file() {
if let Ok(Some(p)) = parse_primitive(entry.path()) {
out.push(p);
}
}
}
}
fn walk_manifests(root: &Path, out: &mut Vec<PrimitiveFacets>) {
if !root.is_dir() {
return;
}
for entry in WalkDir::new(root).max_depth(2).follow_links(false).into_iter().flatten() {
let p = entry.path();
if p.is_file() && p.extension().and_then(|s| s.to_str()) == Some("toml") {
if let Ok(Some(pf)) = parse_primitive(p) {
out.push(pf);
}
}
}
}
/// Parse `k=v` filter strings into pairs. Bad entries (no `=`) are dropped.
pub fn parse_filters(raw: &[String]) -> Vec<(String, String)> {
raw.iter()
.filter_map(|s| s.split_once('=').map(|(k, v)| (k.to_string(), v.to_string())))
.collect()
}
/// AND-filter: a primitive matches iff ALL `(k, v)` pairs are present and equal.
/// Missing facet key → not a match (None != specific value).
pub fn matches_all(p: &PrimitiveFacets, filters: &[(String, String)]) -> bool {
filters.iter().all(|(k, v)| p.facets.get(k).map(|s| s == v).unwrap_or(false))
}

View file

@ -8,7 +8,9 @@ pub mod atom_parse;
pub mod atoms;
pub mod bfs;
pub mod edges;
pub mod facet_query;
pub mod import;
pub mod lineage;
pub mod pagerank;
pub mod rule_index;
pub mod schema;

View file

@ -0,0 +1,195 @@
//! Lineage traversal for primitive TOMLs.
//!
//! Parses `[lineage]` section of capability.toml + manifest TOMLs,
//! extracting `parents` wikilinks, `created-by`, `fork-from`. Builds
//! an in-memory directed graph and walks ancestors + descendants.
use anyhow::{Context, Result};
use kei_atom_discovery::parse_wikilink;
use serde::Deserialize;
use std::collections::{BTreeMap, HashSet, VecDeque};
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
/// Lineage metadata for a single primitive.
#[derive(Debug, Clone)]
pub struct LineageNode {
pub id: String,
pub source: PathBuf,
pub parents: Vec<String>,
pub created_by: Option<String>,
pub fork_from: Option<String>,
pub created_at: Option<String>,
}
#[derive(Debug, Deserialize)]
struct CapDoc {
capability: Option<IdHead>,
#[serde(default)]
lineage: Option<LineageSection>,
}
#[derive(Debug, Deserialize)]
struct ManDoc {
name: Option<String>,
#[serde(default)]
lineage: Option<LineageSection>,
}
#[derive(Debug, Deserialize)]
struct IdHead {
name: Option<String>,
}
#[derive(Debug, Deserialize, Default)]
struct LineageSection {
#[serde(default)]
parents: Vec<String>,
#[serde(rename = "created-by", default)]
created_by: Option<String>,
#[serde(rename = "fork-from", default)]
fork_from: Option<String>,
#[serde(rename = "created-at", default)]
created_at: Option<String>,
}
/// Parse a single TOML into a `LineageNode`, or `None` if unidentifiable.
pub fn parse_lineage(path: &Path) -> Result<Option<LineageNode>> {
let text = std::fs::read_to_string(path)
.with_context(|| format!("read {}", path.display()))?;
if let Some(n) = parse_cap_lineage(&text, path) {
return Ok(Some(n));
}
Ok(parse_man_lineage(&text, path))
}
fn parse_cap_lineage(text: &str, path: &Path) -> Option<LineageNode> {
let doc: CapDoc = toml::from_str(text).ok()?;
let id = doc.capability.as_ref().and_then(|c| c.name.clone())?;
Some(build_node(id, path, doc.lineage))
}
fn parse_man_lineage(text: &str, path: &Path) -> Option<LineageNode> {
let doc: ManDoc = toml::from_str(text).ok()?;
let id = doc.name?;
Some(build_node(id, path, doc.lineage))
}
fn build_node(id: String, path: &Path, lin: Option<LineageSection>) -> LineageNode {
let lin = lin.unwrap_or_default();
let parents = lin.parents.iter().filter_map(|w| parse_wikilink(w)).collect();
LineageNode {
id,
source: path.to_path_buf(),
parents,
created_by: lin.created_by,
fork_from: lin.fork_from,
created_at: lin.created_at,
}
}
/// Walk capabilities + manifests roots and parse every lineage node.
pub fn discover_lineage(cap_root: &Path, man_root: &Path) -> Vec<LineageNode> {
let mut out = Vec::new();
walk_root(cap_root, "capability.toml", 4, &mut out);
walk_manifest_root(man_root, &mut out);
out
}
fn walk_root(root: &Path, fname: &str, depth: usize, out: &mut Vec<LineageNode>) {
if !root.is_dir() {
return;
}
for e in WalkDir::new(root).max_depth(depth).follow_links(false).into_iter().flatten() {
if e.file_name() == fname && e.path().is_file() {
if let Ok(Some(n)) = parse_lineage(e.path()) {
out.push(n);
}
}
}
}
fn walk_manifest_root(root: &Path, out: &mut Vec<LineageNode>) {
if !root.is_dir() {
return;
}
for e in WalkDir::new(root).max_depth(2).follow_links(false).into_iter().flatten() {
let p = e.path();
if p.is_file() && p.extension().and_then(|s| s.to_str()) == Some("toml") {
if let Ok(Some(n)) = parse_lineage(p) {
out.push(n);
}
}
}
}
/// Traversal result: ancestors (via parents + fork-from) and descendants.
#[derive(Debug, Clone, Default)]
pub struct LineageTrace {
pub focus: Option<LineageNode>,
pub ancestors: Vec<String>,
pub descendants: Vec<String>,
}
/// BFS ancestors (follow parents + fork_from) + descendants (inverse edges).
pub fn trace_lineage(nodes: &[LineageNode], id: &str, depth: usize) -> LineageTrace {
let by_id: BTreeMap<&str, &LineageNode> = nodes.iter().map(|n| (n.id.as_str(), n)).collect();
LineageTrace {
focus: by_id.get(id).map(|n| (*n).clone()),
ancestors: bfs_up(&by_id, id, depth),
descendants: bfs_down(nodes, id, depth),
}
}
fn bfs_up(by_id: &BTreeMap<&str, &LineageNode>, start: &str, depth: usize) -> Vec<String> {
let mut seen: HashSet<String> = HashSet::new();
let mut queue: VecDeque<(String, usize)> = VecDeque::new();
queue.push_back((start.to_string(), 0));
let mut out = Vec::new();
while let Some((cur, d)) = queue.pop_front() {
if d >= depth { continue; }
let Some(n) = by_id.get(cur.as_str()) else { continue };
let mut parents = n.parents.clone();
if let Some(f) = &n.fork_from { parents.push(f.clone()); }
for p in parents {
if seen.insert(p.clone()) {
out.push(p.clone());
queue.push_back((p, d + 1));
}
}
}
out
}
fn bfs_down(nodes: &[LineageNode], start: &str, depth: usize) -> Vec<String> {
let mut seen: HashSet<String> = HashSet::new();
let mut frontier: Vec<String> = vec![start.to_string()];
let mut out = Vec::new();
for _ in 0..depth {
let mut next: Vec<String> = Vec::new();
for n in nodes {
let is_child = n.parents.iter().any(|p| frontier.contains(p))
|| n.fork_from.as_ref().is_some_and(|f| frontier.contains(f));
if is_child && seen.insert(n.id.clone()) {
out.push(n.id.clone());
next.push(n.id.clone());
}
}
if next.is_empty() { break; }
frontier = next;
}
out
}
/// Filter + sort nodes by a creator id, return most-recent first (by created_at).
pub fn nodes_by_author(nodes: &[LineageNode], creator: &str, limit: usize) -> Vec<LineageNode> {
let mut matched: Vec<LineageNode> = nodes
.iter()
.filter(|n| n.created_by.as_deref() == Some(creator))
.cloned()
.collect();
matched.sort_by(|a, b| b.created_at.cmp(&a.created_at));
matched.truncate(limit);
matched
}

View file

@ -2,8 +2,9 @@
use clap::{Parser, Subcommand};
use kei_sage::atom_cli::{
cmd_atoms_discover, cmd_atoms_rank, cmd_atoms_related, cmd_atoms_search, cmd_rules_discover,
default_atoms_root, default_rules_root,
cmd_atoms_discover, cmd_atoms_rank, cmd_atoms_related, cmd_atoms_search, cmd_author,
cmd_facet_query, cmd_lineage, cmd_rules_discover, default_atoms_root,
default_capabilities_root, default_manifests_root, default_rules_root,
};
use kei_sage::bfs::bfs;
use kei_sage::edges::add_edge;
@ -63,6 +64,23 @@ enum Cmd {
AtomsRulesDiscover {
#[arg(long)] rules_root: Option<PathBuf>,
},
FacetQuery {
filters: Vec<String>,
#[arg(long)] capabilities_root: Option<PathBuf>,
#[arg(long)] manifests_root: Option<PathBuf>,
},
Lineage {
id: String,
#[arg(long, default_value_t = 3)] depth: usize,
#[arg(long)] capabilities_root: Option<PathBuf>,
#[arg(long)] manifests_root: Option<PathBuf>,
},
Author {
creator: String,
#[arg(long, default_value_t = 50)] limit: usize,
#[arg(long)] capabilities_root: Option<PathBuf>,
#[arg(long)] manifests_root: Option<PathBuf>,
},
}
fn db_path(cli_db: Option<PathBuf>) -> PathBuf {
@ -99,9 +117,26 @@ fn dispatch(store: &Store, cmd: Cmd) -> anyhow::Result<()> {
cmd_atoms_search(store, &root.unwrap_or_else(default_atoms_root), &query, limit),
Cmd::AtomsRulesDiscover { rules_root } =>
cmd_rules_discover(&rules_root.unwrap_or_else(default_rules_root)),
Cmd::FacetQuery { filters, capabilities_root, manifests_root } => {
let (c, m) = prim_roots(capabilities_root, manifests_root);
cmd_facet_query(&c, &m, &filters)
}
Cmd::Lineage { id, depth, capabilities_root, manifests_root } => {
let (c, m) = prim_roots(capabilities_root, manifests_root);
cmd_lineage(&c, &m, &id, depth)
}
Cmd::Author { creator, limit, capabilities_root, manifests_root } => {
let (c, m) = prim_roots(capabilities_root, manifests_root);
cmd_author(&c, &m, &creator, limit)
}
}
}
fn prim_roots(c: Option<PathBuf>, m: Option<PathBuf>) -> (PathBuf, PathBuf) {
(c.unwrap_or_else(default_capabilities_root),
m.unwrap_or_else(default_manifests_root))
}
fn cmd_import(store: &Store, vault: &std::path::Path) -> anyhow::Result<()> {
let s = import_vault(store, vault)?;
println!("imported={} skipped={}", s.imported, s.skipped);

View file

@ -0,0 +1,80 @@
//! Smoke tests for facet-query over capability.toml primitives.
use kei_sage::facet_query::{discover_primitives, matches_all, parse_filters};
use std::fs;
use tempfile::tempdir;
const CAP_GATE: &str = r#"
[capability]
name = "policy::no-git-ops"
[taxonomy]
kingdom = "capability"
mechanism = "gate"
"#;
const CAP_SCOPE: &str = r#"
[capability]
name = "scope::files-whitelist"
[taxonomy]
kingdom = "capability"
mechanism = "gate"
severity = "warn"
"#;
const CAP_PLAIN: &str = r#"
[capability]
name = "tools::read-only"
"#;
fn write_cap(root: &std::path::Path, sub: &str, name: &str, body: &str) {
let dir = root.join(sub).join(name);
fs::create_dir_all(&dir).unwrap();
fs::write(dir.join("capability.toml"), body).unwrap();
}
#[test]
fn facet_and_filter_matches_two_primitives() {
let cap = tempdir().unwrap();
let man = tempdir().unwrap();
write_cap(cap.path(), "policy", "no-git-ops", CAP_GATE);
write_cap(cap.path(), "scope", "files-whitelist", CAP_SCOPE);
write_cap(cap.path(), "tools", "read-only", CAP_PLAIN);
let all = discover_primitives(cap.path(), man.path());
assert_eq!(all.len(), 3);
let filters = parse_filters(&["kingdom=capability".into(), "mechanism=gate".into()]);
let hits: Vec<_> = all.iter().filter(|p| matches_all(p, &filters)).collect();
assert_eq!(hits.len(), 2);
let ids: Vec<&str> = hits.iter().map(|p| p.full_id.as_str()).collect();
assert!(ids.contains(&"policy::no-git-ops"));
assert!(ids.contains(&"scope::files-whitelist"));
}
#[test]
fn missing_facet_excluded_from_match() {
let cap = tempdir().unwrap();
let man = tempdir().unwrap();
write_cap(cap.path(), "tools", "read-only", CAP_PLAIN);
let all = discover_primitives(cap.path(), man.path());
let filters = parse_filters(&["kingdom=capability".into()]);
let hits: Vec<_> = all.iter().filter(|p| matches_all(p, &filters)).collect();
assert_eq!(hits.len(), 0, "primitive without [taxonomy] must not match");
}
#[test]
fn single_filter_matches_subset() {
let cap = tempdir().unwrap();
let man = tempdir().unwrap();
write_cap(cap.path(), "policy", "no-git-ops", CAP_GATE);
write_cap(cap.path(), "scope", "files-whitelist", CAP_SCOPE);
let all = discover_primitives(cap.path(), man.path());
let filters = parse_filters(&["severity=warn".into()]);
let hits: Vec<_> = all.iter().filter(|p| matches_all(p, &filters)).collect();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].full_id, "scope::files-whitelist");
}

View file

@ -0,0 +1,69 @@
//! Smoke tests for lineage traversal over capability.toml primitives.
use kei_sage::lineage::{discover_lineage, trace_lineage};
use std::fs;
use tempfile::tempdir;
const CAP_ROOT: &str = r#"
[capability]
name = "policy::no-git-ops"
[lineage]
parents = []
created-by = "ag-human"
created-at = "2026-04-23T10:00:00Z"
"#;
const CAP_CHILD: &str = r#"
[capability]
name = "policy::no-git-ops-lax"
[lineage]
parents = ["[[policy::no-git-ops]]"]
fork-from = "policy::no-git-ops"
created-by = "ag-user-xyz"
created-at = "2026-04-23T12:00:00Z"
"#;
fn write_cap(root: &std::path::Path, sub: &str, name: &str, body: &str) {
let dir = root.join(sub).join(name);
fs::create_dir_all(&dir).unwrap();
fs::write(dir.join("capability.toml"), body).unwrap();
}
#[test]
fn lineage_no_parents_returns_only_self() {
let cap = tempdir().unwrap();
let man = tempdir().unwrap();
write_cap(cap.path(), "policy", "no-git-ops", CAP_ROOT);
let nodes = discover_lineage(cap.path(), man.path());
let trace = trace_lineage(&nodes, "policy::no-git-ops", 3);
assert!(trace.focus.is_some());
assert!(trace.ancestors.is_empty(), "no parents expected");
assert!(trace.descendants.is_empty(), "no descendants expected");
}
#[test]
fn lineage_parent_wikilink_is_traversed_upward() {
let cap = tempdir().unwrap();
let man = tempdir().unwrap();
write_cap(cap.path(), "policy", "no-git-ops", CAP_ROOT);
write_cap(cap.path(), "policy", "no-git-ops-lax", CAP_CHILD);
let nodes = discover_lineage(cap.path(), man.path());
let trace = trace_lineage(&nodes, "policy::no-git-ops-lax", 3);
assert!(trace.ancestors.contains(&"policy::no-git-ops".to_string()));
}
#[test]
fn lineage_fork_from_yields_descendant() {
let cap = tempdir().unwrap();
let man = tempdir().unwrap();
write_cap(cap.path(), "policy", "no-git-ops", CAP_ROOT);
write_cap(cap.path(), "policy", "no-git-ops-lax", CAP_CHILD);
let nodes = discover_lineage(cap.path(), man.path());
let trace = trace_lineage(&nodes, "policy::no-git-ops", 3);
assert!(trace.descendants.contains(&"policy::no-git-ops-lax".to_string()));
}