Merge TX1 — taxonomy schema extension

This commit is contained in:
Parfii-bot 2026-04-23 10:22:38 +08:00
commit 36b7941ad2
8 changed files with 447 additions and 1 deletions

View file

@ -24,3 +24,16 @@ rust-module = "gates::policy_no_git_ops"
event = "PreToolUse:Bash"
severity = "block"
bypass-env = "ORCHESTRATOR_META"
[taxonomy]
kingdom = "capability"
mechanism = "gate"
domain = "policy"
layer = "agent-substrate"
stability = "stable"
language = "rust"
[lineage]
parents = []
creator = "ag-orchestrator-human"
created = "2026-04-23"

View file

@ -19,3 +19,16 @@ path = "text.md"
rust-module = "verifies::quality_cargo_check_green"
run-mode = "both"
when = "on-return"
[taxonomy]
kingdom = "capability"
mechanism = "verify"
domain = "quality"
layer = "agent-substrate"
stability = "stable"
language = "rust"
[lineage]
parents = []
creator = "ag-orchestrator-human"
created = "2026-04-23"

View file

@ -27,3 +27,16 @@ path = "text.md"
rust-module = "gates::tools_bash_allowlist"
event = "PreToolUse:Bash"
severity = "block"
[taxonomy]
kingdom = "capability"
mechanism = "gate"
domain = "tools"
layer = "agent-substrate"
stability = "stable"
language = "rust"
[lineage]
parents = []
creator = "ag-orchestrator-human"
created = "2026-04-23"

View file

@ -54,6 +54,41 @@ pub struct SideEffect {
pub domain: String,
}
/// Optional taxonomy facets per `docs/TAXONOMY.md`. All fields optional.
/// Format-agnostic: deserialises from YAML atom frontmatter OR TOML
/// capability / manifest / role files.
#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
pub struct TaxonomyFacets {
#[serde(default)]
pub kingdom: Option<String>,
#[serde(default)]
pub mechanism: Option<String>,
#[serde(default)]
pub domain: Option<String>,
#[serde(default)]
pub layer: Option<String>,
#[serde(default)]
pub stage: Option<String>,
#[serde(default)]
pub stability: Option<String>,
#[serde(default)]
pub language: Option<String>,
}
/// Optional lineage metadata — wikilink parents + creator DNA + created date.
/// All fields optional. `parents` defaults to an empty vec.
#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
pub struct Lineage {
#[serde(default)]
pub parents: Vec<String>,
#[serde(default)]
pub creator: Option<String>,
#[serde(default)]
pub created: Option<String>,
#[serde(default)]
pub fork_from: Option<String>,
}
/// Fully-parsed atom metadata — one canonical struct shared across crates.
#[derive(Debug, Clone)]
pub struct AtomMeta {
@ -71,6 +106,8 @@ pub struct AtomMeta {
pub keywords: Vec<String>,
pub related: Vec<String>,
pub body: String,
pub taxonomy: Option<TaxonomyFacets>,
pub lineage: Option<Lineage>,
}
/// Raw deserialisation target — kept private, `AtomMeta` is the public shape.
@ -94,6 +131,10 @@ pub struct Frontmatter {
pub keywords: Vec<String>,
#[serde(default)]
pub related: Vec<String>,
#[serde(default)]
pub taxonomy: Option<TaxonomyFacets>,
#[serde(default)]
pub lineage: Option<Lineage>,
}
#[derive(Debug, Deserialize)]

View file

@ -16,7 +16,8 @@ pub mod walk;
pub use error::Error;
pub use frontmatter::{
parse_frontmatter, AtomKind, AtomMeta, Frontmatter, SideEffect, MAX_FRONTMATTER_BYTES,
parse_frontmatter, AtomKind, AtomMeta, Frontmatter, Lineage, SideEffect, TaxonomyFacets,
MAX_FRONTMATTER_BYTES,
};
pub use walk::{
classify_wikilink, discover_atoms, is_atom_target, parse_wikilink, safe_join, split_atom_id,

View file

@ -71,6 +71,8 @@ fn build_meta(fm: Frontmatter, body: &str, md_path: &Path) -> Result<AtomMeta, E
keywords: fm.keywords,
related: fm.related,
body: body.to_string(),
taxonomy: fm.taxonomy,
lineage: fm.lineage,
})
}

View file

@ -0,0 +1,154 @@
//! Taxonomy + Lineage facet parsing smoke tests.
//!
//! Covers (a) full 7-facet taxonomy + lineage with multiple parents,
//! (b) partial taxonomy (only kingdom + mechanism) — remaining fields None,
//! (c) backward-compat: atom without any [taxonomy]/[lineage] still parses,
//! (d) lineage.parents array parses correctly (multi-parent diamond lineage).
use kei_atom_discovery::{discover_atoms, Lineage, TaxonomyFacets};
use std::fs;
use std::path::Path;
use tempfile::tempdir;
const ATOM_FULL: &str = r#"---
atom: kei-task::create
kind: command
version: "0.1.0"
input:
schema: schemas/create-input.json
output:
schema: schemas/create-output.json
side_effects:
- { op: write, domain: kei-task-db }
idempotent: false
stability: stable
keywords: [task]
related: []
taxonomy:
kingdom: atom
mechanism: transform
domain: task
layer: atom-substrate
stage: runtime
stability: stable
language: rust
lineage:
parents:
- "[[kei-task::add-dependency]]"
- "[[rules/RULE 0.12]]"
creator: ag-orchestrator-human
created: "2026-04-23"
fork_from: dna-abc123
---
# body
"#;
const ATOM_PARTIAL: &str = r#"---
atom: kei-task::update
kind: command
version: "0.1.0"
input:
schema: schemas/create-input.json
output:
schema: schemas/create-output.json
side_effects: []
taxonomy:
kingdom: atom
mechanism: transform
lineage:
parents: []
---
# body
"#;
const ATOM_NO_FACETS: &str = r#"---
atom: kei-task::delete
kind: command
version: "0.1.0"
input:
schema: schemas/create-input.json
output:
schema: schemas/create-output.json
side_effects: []
---
# body
"#;
fn write_atom(root: &Path, crate_name: &str, verb: &str, body: &str) {
let atoms_dir = root.join(crate_name).join("atoms");
fs::create_dir_all(atoms_dir.join("schemas")).unwrap();
fs::write(atoms_dir.join(format!("{verb}.md")), body).unwrap();
fs::write(atoms_dir.join("schemas").join("create-input.json"), "{}").unwrap();
fs::write(atoms_dir.join("schemas").join("create-output.json"), "{}").unwrap();
}
fn find<'a>(
atoms: &'a [kei_atom_discovery::AtomMeta],
full_id: &str,
) -> &'a kei_atom_discovery::AtomMeta {
atoms
.iter()
.find(|a| a.full_id == full_id)
.expect("atom present")
}
#[test]
fn full_taxonomy_and_lineage_parse() {
let tmp = tempdir().unwrap();
write_atom(tmp.path(), "kei-task", "create", ATOM_FULL);
let atoms = discover_atoms(tmp.path());
let a = find(&atoms, "kei-task::create");
let tax: &TaxonomyFacets = a.taxonomy.as_ref().expect("taxonomy present");
assert_eq!(tax.kingdom.as_deref(), Some("atom"));
assert_eq!(tax.mechanism.as_deref(), Some("transform"));
assert_eq!(tax.domain.as_deref(), Some("task"));
assert_eq!(tax.layer.as_deref(), Some("atom-substrate"));
assert_eq!(tax.stage.as_deref(), Some("runtime"));
assert_eq!(tax.stability.as_deref(), Some("stable"));
assert_eq!(tax.language.as_deref(), Some("rust"));
let lin: &Lineage = a.lineage.as_ref().expect("lineage present");
assert_eq!(lin.creator.as_deref(), Some("ag-orchestrator-human"));
assert_eq!(lin.created.as_deref(), Some("2026-04-23"));
assert_eq!(lin.fork_from.as_deref(), Some("dna-abc123"));
}
#[test]
fn partial_taxonomy_leaves_rest_none() {
let tmp = tempdir().unwrap();
write_atom(tmp.path(), "kei-task", "update", ATOM_PARTIAL);
let atoms = discover_atoms(tmp.path());
let a = find(&atoms, "kei-task::update");
let tax = a.taxonomy.as_ref().expect("taxonomy present");
assert_eq!(tax.kingdom.as_deref(), Some("atom"));
assert_eq!(tax.mechanism.as_deref(), Some("transform"));
assert!(tax.domain.is_none());
assert!(tax.layer.is_none());
assert!(tax.stage.is_none());
assert!(tax.stability.is_none());
assert!(tax.language.is_none());
let lin = a.lineage.as_ref().expect("lineage present");
assert!(lin.parents.is_empty());
assert!(lin.creator.is_none());
}
#[test]
fn no_facets_section_still_parses_backward_compat() {
let tmp = tempdir().unwrap();
write_atom(tmp.path(), "kei-task", "delete", ATOM_NO_FACETS);
let atoms = discover_atoms(tmp.path());
let a = find(&atoms, "kei-task::delete");
assert!(a.taxonomy.is_none(), "no [taxonomy] → None");
assert!(a.lineage.is_none(), "no [lineage] → None");
}
#[test]
fn lineage_parents_array_preserved() {
let tmp = tempdir().unwrap();
write_atom(tmp.path(), "kei-task", "create", ATOM_FULL);
let atoms = discover_atoms(tmp.path());
let a = find(&atoms, "kei-task::create");
let lin = a.lineage.as_ref().expect("lineage present");
assert_eq!(lin.parents.len(), 2);
assert_eq!(lin.parents[0], "[[kei-task::add-dependency]]");
assert_eq!(lin.parents[1], "[[rules/RULE 0.12]]");
}

209
docs/TAXONOMY.md Normal file
View file

@ -0,0 +1,209 @@
# TAXONOMY — Canonical Facet Vocabulary
> Graph, not tree. Every primitive is a node; facets are orthogonal labels.
> Multi-faceted nodes are allowed (and expected). No facet is mandatory —
> the entire `[taxonomy]` and `[lineage]` sections are OPTIONAL on every
> manifest shape (`capability.toml`, `_manifests/**/*.toml`, `_roles/*.toml`,
> atom markdown frontmatter).
---
## Why facets, not a tree
A classical rooted tree (e.g. "capability → gate → policy → no-git-ops")
forces an arbitrary primary axis. Real primitives live in several axes at
once: `no-git-ops` is a *capability* (kingdom), a *gate* (mechanism), a
*policy* (domain), targets the *agent-substrate* (layer), is *stable*, and
ships as a *rust* module. A tree makes five of those six second-class.
Facets let a catalog query along any axis independently:
- "all `gate` mechanisms" — security review surface
- "all `verify` mechanisms" — quality/CI surface
- "all `policy`-domain primitives" — rule-coverage surface
- "all `experimental` stability" — risk review
- "all `rust` language" — build-graph
No primitive needs to choose a primary axis. Multiple facets coexist.
---
## Facets
### `kingdom` — What kind of thing is this?
```
kingdom = capability | atom | skill | block | runtime | schema | role | manifest
```
- `capability` — agent-substrate capability (gate / verify / transform)
- `atom` — substrate atom (command / query / stream / transform)
- `skill` — user-invocable skill (`/skill-name`)
- `block` — composable prompt-block
- `runtime` — runtime module consuming atoms/capabilities
- `schema` — JSON schema referenced by atom I/O
- `role` — agent-role manifest (`_roles/*.toml`)
- `manifest` — assembled agent manifest (`_manifests/**/*.toml`)
### `mechanism` — How does it act?
```
mechanism = gate | verify | transform | store | compose | fetch | analyze | router | cache
```
- `gate` — PreToolUse-style deny decision (e.g. `no-git-ops`, `bash-allowlist`)
- `verify` — post-condition check (e.g. `cargo-check-green`)
- `transform` — pure value-in/value-out (no side-effects)
- `store` — persisted state (SQLite, filesystem, ledger)
- `compose` — assembles other primitives (manifests, pipes)
- `fetch` — retrieves external data (provider, api)
- `analyze` — inspects input, emits report
- `router` — dispatches based on classification
- `cache` — memoizes pure invocations
### `domain` — What subject-matter area?
```
domain = policy | quality | scope | safety | output | tools | research | content | social | task | sage
```
- `policy` — RULE 0.x enforcement / compliance gates
- `quality` — cargo-check, tests-green, constructor-pattern
- `scope` — write-whitelist, file-denylist, path-guards
- `safety` — secret scanning, citation verification
- `output` — response shape, formatter, report-gen
- `tools` — tool allowlists, bash patterns, deny-tools
- `research` — research agents, search-core, fetch primitives
- `content` — content-store, content-normalizer
- `social` — social-store, social-normalizer
- `task` — task primitives (kei-task)
- `sage` — higher-level reasoning / kei-sage primitives
### `layer` — Which substrate does it live in?
```
layer = atom-substrate | agent-substrate | cross | tooling
```
- `atom-substrate` — substrate for callable atoms (kei-runtime, kei-pipe)
- `agent-substrate` — substrate for agent manifests (capabilities, roles)
- `cross` — spans both (shared discovery, schemas)
- `tooling` — pure developer tooling (kei-forge, validators)
### `stage` — When is it active?
```
stage = runtime | design-time | ephemeral
```
- `runtime` — executes during agent turns
- `design-time` — consumed at assembly / scaffold time
- `ephemeral` — one-shot (migration, provision, smoke)
### `stability` — Maturity
```
stability = experimental | beta | stable | deprecated
```
Standard semver-style ladder. `deprecated` primitives must name a successor
in `[lineage]` or their `text.md`.
### `language` — Implementation medium
```
language = rust | shell | md | toml | json | jsonschema
```
- `rust` — primary implementation in a Rust crate
- `shell` — bash / posix script
- `md` — markdown (atoms, capability text, documentation)
- `toml` — config-only (capability manifest, role manifest)
- `json` / `jsonschema` — data / schema definitions
Multiple languages can apply (e.g. atom markdown with a JSON schema attached
and a Rust runtime) — but the `language` facet names the PRIMARY medium of
the node being described.
---
## `[lineage]` — Graph edges, not tree edges
```
parents = ["[[ancestor-one]]", "[[ancestor-two]]"] # wikilinks to predecessors
creator = "ag-orchestrator-human" # DNA id or human slug
created = "2026-04-23" # ISO-8601 date
fork_from = "dna-abc123..." # parent DNA if forked
```
- `parents` — wikilinks (`[[slug]]`) to primitives this one extends or
composes. Multiple parents allowed (diamond lineage). A primitive with
no `parents` is a root of its sub-graph.
- `creator` — identity responsible for the primitive's existence. For
human-authored nodes: `ag-orchestrator-human` or a slug. For agent-
authored: the agent's DNA id.
- `created` — ISO-8601 date (YYYY-MM-DD). When the manifest was first
authored, not when it was last edited.
- `fork_from` — if this primitive was forked from another (DNA id), record
the source here so the graph shows the edge.
---
## Example — fully-faceted capability manifest
```toml
[capability]
name = "policy::no-git-ops"
category = "policy"
version = "1.0"
description = "..."
rationale = "..."
[restricts]
tool-patterns = ['^git( |$)', '^gh repo']
[parameterized]
accepts = []
[text]
path = "text.md"
[gate]
rust-module = "gates::policy_no_git_ops"
event = "PreToolUse:Bash"
severity = "block"
# Optional — all fields optional individually too.
[taxonomy]
kingdom = "capability"
mechanism = "gate"
domain = "policy"
layer = "agent-substrate"
stability = "stable"
language = "rust"
[lineage]
parents = []
creator = "ag-orchestrator-human"
created = "2026-04-23"
```
---
## Non-breaking contract
- Every field in `[taxonomy]` and `[lineage]` is OPTIONAL.
- The entire `[taxonomy]` and `[lineage]` sections are OPTIONAL.
- Manifests without either section parse exactly as before (backward-compat
guaranteed by `taxonomy_smoke.rs` tests in `kei-atom-discovery`).
- New primitives SHOULD include at least `kingdom` + `mechanism` + `domain`.
- The facet vocabularies are additive — new values can be appended without
breaking existing consumers. Unknown values pass through as strings.
---
## Rule lock
2026-04-23. Vocabularies live in this file; any new allowed value lands here
first. Runtime consumers (kei-atom-discovery, kei-sage, kei-runtime) MUST
treat unknown values as strings (never crash on new vocabulary).