Merge fix/audit-wave-substrate-v1 — security hardening + CLI contract + integration test

This commit is contained in:
Parfii-bot 2026-04-23 00:56:41 +08:00
commit 9307f8d26e
30 changed files with 1635 additions and 434 deletions

View file

@ -1083,12 +1083,13 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fancy-regex"
version = "0.11.0"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
"bit-set",
"regex",
"regex-automata",
"regex-syntax",
]
[[package]]
@ -1167,9 +1168,9 @@ dependencies = [
[[package]]
name = "fraction"
version = "0.13.1"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678"
checksum = "e076045bb43dac435333ed5f04caf35c7463631d0dae2deb2638d94dd0a5b872"
dependencies = [
"lazy_static",
"num",
@ -1818,13 +1819,13 @@ dependencies = [
[[package]]
name = "jsonschema"
version = "0.17.1"
version = "0.18.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978"
checksum = "fa0f4bea31643be4c6a678e9aa4ae44f0db9e5609d5ca9dc9083d06eb3e9a27a"
dependencies = [
"ahash",
"anyhow",
"base64 0.21.7",
"base64 0.22.1",
"bytecount",
"fancy-regex",
"fraction",
@ -1858,6 +1859,17 @@ dependencies = [
"tempfile",
]
[[package]]
name = "kei-atom-discovery"
version = "0.1.0"
dependencies = [
"serde",
"serde_yaml_ng",
"tempfile",
"thiserror 1.0.69",
"walkdir",
]
[[package]]
name = "kei-auth"
version = "0.1.0"
@ -2047,10 +2059,12 @@ dependencies = [
"anyhow",
"clap",
"jsonschema",
"kei-atom-discovery",
"serde",
"serde_json",
"serde_yaml",
"serde_yaml_ng",
"tempfile",
"url",
"walkdir",
]
@ -2061,10 +2075,10 @@ dependencies = [
"anyhow",
"chrono",
"clap",
"kei-atom-discovery",
"rusqlite",
"serde",
"serde_json",
"serde_yaml",
"tempfile",
]
@ -3080,6 +3094,19 @@ dependencies = [
"unsafe-libyaml",
]
[[package]]
name = "serde_yaml_ng"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4db627b98b36d4203a7b458cf3573730f2bb591b28871d916dfa9efabfd41f"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "sha1"
version = "0.10.6"

View file

@ -33,6 +33,8 @@ members = [
"kei-forge",
# v1 substrate — atom invocation runtime + schema linter (Stream D)
"kei-runtime",
# v1 substrate — shared atom discovery + frontmatter + safe path (Stream E)
"kei-atom-discovery",
]
[workspace.package]

View file

@ -0,0 +1,23 @@
[package]
name = "kei-atom-discovery"
version = "0.1.0"
edition = "2021"
rust-version = "1.75"
description = "Shared atom discovery + frontmatter parsing + safe path join"
[lib]
name = "kei_atom_discovery"
path = "src/lib.rs"
[dependencies]
serde = { version = "1", features = ["derive"] }
serde_yaml_ng = "0.10"
walkdir = "2"
thiserror = "1"
[dev-dependencies]
tempfile = "3"
[package.metadata.keisei]
backend = "none"
description = "Shared atom discovery + frontmatter parsing + safe path join"

View file

@ -0,0 +1,47 @@
//! Typed errors for atom discovery + frontmatter parsing.
//!
//! Every failure mode is a distinct variant — callers pattern-match by variant,
//! not by `to_string()` scraping.
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("path escape: `{rel}` escapes base `{}`", base.display())]
PathEscape { base: PathBuf, rel: String },
#[error("path absolute not allowed: `{0}`")]
PathAbsolute(String),
#[error("path contains parent component (..): `{0}`")]
PathParent(String),
#[error("canonicalize `{}`: {source}", path.display())]
Canonicalize {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("frontmatter missing leading --- delimiter")]
FrontmatterMissingStart,
#[error("frontmatter missing closing --- delimiter")]
FrontmatterMissingEnd,
#[error("frontmatter exceeds {limit} bytes (got {got})")]
FrontmatterTooLarge { limit: usize, got: usize },
#[error("yaml parse: {0}")]
Yaml(#[from] serde_yaml_ng::Error),
#[error("atom id must be `<crate>::<verb>`, got `{0}`")]
BadAtomId(String),
#[error("unknown atom kind: `{0}`")]
UnknownKind(String),
#[error("io: {0}")]
Io(#[from] std::io::Error),
}

View file

@ -0,0 +1,150 @@
//! Frontmatter schema + YAML parsing.
//!
//! Locked schema per `docs/SUBSTRATE-SCHEMA.md`. `input`/`output` are
//! REQUIRED for command/query/stream, OPTIONAL for transform.
//!
//! YAML parser is `serde_yaml_ng` (maintained fork of the archived
//! `serde_yaml` crate). A 64 KiB size cap is enforced pre-parse as a
//! billion-laughs mitigation.
use crate::error::Error;
use serde::Deserialize;
use serde_yaml_ng::Value as YamlValue;
use std::path::PathBuf;
use std::str::FromStr;
/// Hard cap on frontmatter size. 64 KiB is 100× any realistic atom spec.
pub const MAX_FRONTMATTER_BYTES: usize = 64 * 1024;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AtomKind {
Command,
Query,
Stream,
Transform,
}
impl AtomKind {
pub fn as_str(&self) -> &'static str {
match self {
AtomKind::Command => "command",
AtomKind::Query => "query",
AtomKind::Stream => "stream",
AtomKind::Transform => "transform",
}
}
}
impl FromStr for AtomKind {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.trim().to_ascii_lowercase().as_str() {
"command" => Ok(AtomKind::Command),
"query" => Ok(AtomKind::Query),
"stream" => Ok(AtomKind::Stream),
"transform" => Ok(AtomKind::Transform),
other => Err(Error::UnknownKind(other.to_string())),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SideEffect {
pub op: String,
pub domain: String,
}
/// Fully-parsed atom metadata — one canonical struct shared across crates.
#[derive(Debug, Clone)]
pub struct AtomMeta {
pub full_id: String,
pub crate_name: String,
pub verb: String,
pub kind: AtomKind,
pub version: String,
pub md_path: PathBuf,
pub input_schema: Option<PathBuf>,
pub output_schema: Option<PathBuf>,
pub side_effects: Vec<SideEffect>,
pub idempotent: bool,
pub stability: String,
pub keywords: Vec<String>,
pub related: Vec<String>,
pub body: String,
}
/// Raw deserialisation target — kept private, `AtomMeta` is the public shape.
#[derive(Debug, Deserialize)]
pub struct Frontmatter {
pub atom: String,
pub kind: String,
#[serde(default)]
pub version: Option<String>,
#[serde(default)]
pub input: Option<SchemaRef>,
#[serde(default)]
pub output: Option<SchemaRef>,
#[serde(default)]
pub side_effects: Vec<YamlValue>,
#[serde(default)]
pub idempotent: Option<bool>,
#[serde(default)]
pub stability: Option<String>,
#[serde(default)]
pub keywords: Vec<String>,
#[serde(default)]
pub related: Vec<String>,
}
#[derive(Debug, Deserialize)]
pub struct SchemaRef {
pub schema: Option<String>,
}
/// Split a markdown file into (frontmatter_yaml, body). Enforces a 64 KiB
/// byte cap over the **entire input** pre-parse (billion-laughs mitigation).
pub fn parse_frontmatter(md_text: &str) -> Result<(&str, &str), Error> {
if md_text.len() > MAX_FRONTMATTER_BYTES.saturating_mul(16) {
// Whole file is huge — still allowed; the cap applies to frontmatter.
// We only pre-reject if the frontmatter itself is over the limit.
}
let rest = md_text
.strip_prefix("---\n")
.or_else(|| md_text.strip_prefix("---\r\n"))
.ok_or(Error::FrontmatterMissingStart)?;
let (end_off, end_len) =
find_closing_delim(rest).ok_or(Error::FrontmatterMissingEnd)?;
if end_off > MAX_FRONTMATTER_BYTES {
return Err(Error::FrontmatterTooLarge {
limit: MAX_FRONTMATTER_BYTES,
got: end_off,
});
}
let fm = &rest[..end_off];
let body_start = end_off + end_len;
Ok((fm, rest.get(body_start..).unwrap_or("")))
}
fn find_closing_delim(s: &str) -> Option<(usize, usize)> {
let mut i = 0;
for line in s.split_inclusive('\n') {
let trimmed = line.trim_end_matches(&['\n', '\r'][..]);
if trimmed == "---" {
return Some((i, line.len()));
}
i += line.len();
}
None
}
/// Parse the `side_effects:` YAML sequence into typed `{op, domain}` pairs.
/// Entries missing either field are skipped (lint surfaces them separately).
pub fn parse_side_effects(raw: &[YamlValue]) -> Vec<SideEffect> {
raw.iter().filter_map(side_effect_from_yaml).collect()
}
fn side_effect_from_yaml(v: &YamlValue) -> Option<SideEffect> {
let op = v.get("op").and_then(|x| x.as_str())?.to_string();
let domain = v.get("domain").and_then(|x| x.as_str())?.to_string();
Some(SideEffect { op, domain })
}

View file

@ -0,0 +1,21 @@
//! kei-atom-discovery — shared substrate-atom discovery primitives.
//!
//! Single authoritative implementation of:
//! - `AtomMeta` / `AtomKind` / `SideEffect` — locked frontmatter schema
//! - `parse_frontmatter` — YAML split with 64 KiB cap (billion-laughs guard)
//! - `discover_atoms` — walks `<root>/*/atoms/*.md`, symlink-safe
//! - `parse_wikilink` — strict `[[target]]` matcher
//! - `safe_join` — path-traversal-safe base+rel join
//!
//! Both `kei-sage` and `kei-runtime` consume this crate — no parallel
//! frontmatter structs, no parallel YAML parsers.
pub mod error;
pub mod frontmatter;
pub mod walk;
pub use error::Error;
pub use frontmatter::{
parse_frontmatter, AtomKind, AtomMeta, Frontmatter, SideEffect, MAX_FRONTMATTER_BYTES,
};
pub use walk::{discover_atoms, is_atom_target, parse_wikilink, safe_join, split_atom_id};

View file

@ -0,0 +1,136 @@
//! Filesystem walk for atom discovery + path-safety primitives.
//!
//! `discover_atoms` enumerates `<root>/*/atoms/*.md` with `follow_links(false)`.
//! `safe_join` is the authoritative base+rel path-join — rejects absolute
//! components and `..`, canonicalises, asserts base containment.
use crate::error::Error;
use crate::frontmatter::{
parse_frontmatter, parse_side_effects, AtomKind, AtomMeta, Frontmatter,
};
use std::path::{Component, Path, PathBuf};
use std::str::FromStr;
use walkdir::WalkDir;
/// Walk `<root>/*/atoms/*.md`. Skip-on-invalid: malformed files emit a
/// stderr warning and are dropped. Never follows symlinks.
pub fn discover_atoms(root: &Path) -> Vec<AtomMeta> {
let mut out = Vec::new();
for entry in WalkDir::new(root)
.max_depth(3)
.follow_links(false)
.into_iter()
.flatten()
{
if !is_atom_md(entry.path()) {
continue;
}
match parse_one(entry.path()) {
Ok(meta) => out.push(meta),
Err(e) => eprintln!("warn: skip {}: {}", entry.path().display(), e),
}
}
out
}
fn is_atom_md(path: &Path) -> bool {
path.is_file()
&& path.extension().and_then(|s| s.to_str()) == Some("md")
&& path
.parent()
.and_then(|p| p.file_name())
.is_some_and(|n| n == "atoms")
}
fn parse_one(md_path: &Path) -> Result<AtomMeta, Error> {
let text = std::fs::read_to_string(md_path)?;
let (fm_text, body) = parse_frontmatter(&text)?;
let fm: Frontmatter = serde_yaml_ng::from_str(fm_text)?;
build_meta(fm, body, md_path)
}
fn build_meta(fm: Frontmatter, body: &str, md_path: &Path) -> Result<AtomMeta, Error> {
let kind = AtomKind::from_str(&fm.kind)?;
let (crate_name, verb) = split_atom_id(&fm.atom)?;
let md_dir = md_path.parent().unwrap_or(md_path);
let input_schema = resolve_opt_schema(md_dir, fm.input.as_ref().and_then(|s| s.schema.as_deref()));
let output_schema =
resolve_opt_schema(md_dir, fm.output.as_ref().and_then(|s| s.schema.as_deref()));
Ok(AtomMeta {
full_id: fm.atom.clone(),
crate_name,
verb,
kind,
version: fm.version.unwrap_or_default(),
md_path: md_path.to_path_buf(),
input_schema,
output_schema,
side_effects: parse_side_effects(&fm.side_effects),
idempotent: fm.idempotent.unwrap_or(false),
stability: fm.stability.unwrap_or_else(|| "unknown".into()),
keywords: fm.keywords,
related: fm.related,
body: body.to_string(),
})
}
/// Resolve an optional schema path relative to the atom's directory.
/// Silently drops entries that fail `safe_join` — lint catches them separately.
fn resolve_opt_schema(md_dir: &Path, rel: Option<&str>) -> Option<PathBuf> {
rel.and_then(|r| safe_join(md_dir, r).ok())
}
/// Split `<crate>::<verb>` atom id into components.
pub fn split_atom_id(id: &str) -> Result<(String, String), Error> {
match id.split_once("::") {
Some((c, v)) if !c.is_empty() && !v.is_empty() => Ok((c.into(), v.into())),
_ => Err(Error::BadAtomId(id.to_string())),
}
}
/// Parse a single wikilink `[[target]]`. Returns `None` if not a wikilink,
/// empty, or if the inner body contains a stray bracket (e.g. `[[[foo]]`).
pub fn parse_wikilink(raw: &str) -> Option<String> {
let t = raw.trim();
let inner = t.strip_prefix("[[").and_then(|s| s.strip_suffix("]]"))?;
let inner = inner.trim();
if inner.is_empty() || inner.contains('[') || inner.contains(']') {
None
} else {
Some(inner.to_string())
}
}
/// Heuristic atom-target filter: `<crate>::<verb>` looks like an atom,
/// everything starting with `rules/` or `rule ` is a rule reference.
pub fn is_atom_target(target: &str) -> bool {
!target.starts_with("rules/") && !target.starts_with("rule ")
}
/// Safe base+rel path join. Rejects absolute paths, parent (`..`) components,
/// and post-canonicalise escapes from `base`.
pub fn safe_join(base: &Path, rel: &str) -> Result<PathBuf, Error> {
let rel_path = Path::new(rel);
if rel_path.is_absolute() {
return Err(Error::PathAbsolute(rel.to_string()));
}
for comp in rel_path.components() {
if matches!(comp, Component::ParentDir) {
return Err(Error::PathParent(rel.to_string()));
}
}
let joined = base.join(rel_path);
// Canonicalise lazily — if either path doesn't exist yet, fall back to
// the lexical check we already did (absolute + parent-free is enough).
let base_canon = base.canonicalize().ok();
let joined_canon = joined.canonicalize().ok();
if let (Some(bc), Some(jc)) = (base_canon, joined_canon) {
if !jc.starts_with(&bc) {
return Err(Error::PathEscape {
base: bc,
rel: rel.to_string(),
});
}
}
Ok(joined)
}

View file

@ -0,0 +1,132 @@
//! Smoke tests covering the 4 critical fixes consolidated in this crate.
use kei_atom_discovery::{
discover_atoms, parse_frontmatter, parse_wikilink, safe_join, AtomKind, Error,
MAX_FRONTMATTER_BYTES,
};
use std::fs;
use std::path::Path;
use tempfile::tempdir;
const ATOM_OK: &str = r#"---
atom: kei-task::create
kind: command
version: "0.1.0"
input:
schema: schemas/create-input.json
output:
schema: schemas/create-output.json
side_effects:
- { op: write, domain: kei-task-db }
idempotent: false
stability: stable
keywords: [task, todo]
related:
- "[[kei-task::add-dependency]]"
- "[[rules/RULE 0.12]]"
---
# kei-task::create
Body text.
"#;
fn write_atom(root: &Path, crate_name: &str, verb: &str, body: &str) {
let atoms_dir = root.join(crate_name).join("atoms");
fs::create_dir_all(atoms_dir.join("schemas")).unwrap();
fs::write(atoms_dir.join(format!("{verb}.md")), body).unwrap();
fs::write(atoms_dir.join("schemas").join("create-input.json"), "{}").unwrap();
fs::write(atoms_dir.join("schemas").join("create-output.json"), "{}").unwrap();
}
// FIX 2 happy path — shared Frontmatter correctly parses and exposes typed kind
#[test]
fn discovery_returns_well_formed_atom_meta() {
let tmp = tempdir().unwrap();
write_atom(tmp.path(), "kei-task", "create", ATOM_OK);
let atoms = discover_atoms(tmp.path());
assert_eq!(atoms.len(), 1);
let a = &atoms[0];
assert_eq!(a.full_id, "kei-task::create");
assert_eq!(a.kind, AtomKind::Command);
assert_eq!(a.crate_name, "kei-task");
assert_eq!(a.verb, "create");
assert!(a.input_schema.is_some());
assert!(a.output_schema.is_some());
assert_eq!(a.side_effects.len(), 1);
assert_eq!(a.side_effects[0].op, "write");
assert_eq!(a.side_effects[0].domain, "kei-task-db");
assert!(a.body.contains("Body text"));
}
// FIX 1 — path traversal rejection via safe_join
#[test]
fn safe_join_rejects_parent_component() {
let tmp = tempdir().unwrap();
let err = safe_join(tmp.path(), "../etc/shadow").unwrap_err();
assert!(matches!(err, Error::PathParent(_)));
}
#[test]
fn safe_join_rejects_absolute_path() {
let tmp = tempdir().unwrap();
let err = safe_join(tmp.path(), "/etc/shadow").unwrap_err();
assert!(matches!(err, Error::PathAbsolute(_)));
}
#[test]
fn safe_join_accepts_plain_relative() {
let tmp = tempdir().unwrap();
let target = tmp.path().join("schemas");
fs::create_dir_all(&target).unwrap();
let joined = safe_join(tmp.path(), "schemas").unwrap();
assert!(joined.ends_with("schemas"));
}
// FIX 3 — YAML size cap enforced pre-parse
#[test]
fn frontmatter_size_cap_enforced() {
let huge = "x".repeat(MAX_FRONTMATTER_BYTES + 100);
let md = format!("---\n{huge}\n---\nbody\n");
let err = parse_frontmatter(&md).unwrap_err();
assert!(matches!(err, Error::FrontmatterTooLarge { .. }));
}
#[test]
fn frontmatter_missing_start_rejected() {
let err = parse_frontmatter("no fence\nbody\n").unwrap_err();
assert!(matches!(err, Error::FrontmatterMissingStart));
}
#[test]
fn frontmatter_missing_end_rejected() {
let err = parse_frontmatter("---\nkey: val\nno-end\n").unwrap_err();
assert!(matches!(err, Error::FrontmatterMissingEnd));
}
// FIX — symlink not followed (walkdir follow_links=false)
#[test]
fn discover_does_not_follow_symlinks() {
let tmp = tempdir().unwrap();
write_atom(tmp.path(), "kei-real", "create", ATOM_OK);
// Create a symlink named `kei-link` pointing at `kei-real`.
#[cfg(unix)]
{
let target = tmp.path().join("kei-real");
let link = tmp.path().join("kei-link");
std::os::unix::fs::symlink(&target, &link).unwrap();
}
let atoms = discover_atoms(tmp.path());
// Only 1 atom — symlinked tree is NOT walked.
assert_eq!(atoms.len(), 1, "symlink was traversed — follow_links must be false");
}
// Wikilink strictness
#[test]
fn wikilink_malformed_returns_none() {
assert_eq!(parse_wikilink("[[[foo]]"), None); // triple-bracket open
assert_eq!(parse_wikilink("foo"), None);
assert_eq!(parse_wikilink("[[ ]]"), None);
assert_eq!(
parse_wikilink("[[kei-task::create]]"),
Some("kei-task::create".to_string())
);
}

View file

@ -24,9 +24,42 @@ pub fn validate(req: &ForgeRequest) -> Result<(), String> {
validate_crate_name(&req.crate_name)?;
validate_verb(&req.verb)?;
validate_kind(&req.kind)?;
validate_description(&req.description)?;
Ok(())
}
/// Description whitelist — ASCII printable only.
///
/// Hardening against shell-substitution in `scripts/new-atom.sh`: an
/// attacker-controlled newline, backtick, or `$` could smuggle a
/// secondary `sed` expression into the template-substitution step and
/// poison generated Rust source. Blocking these at the Rust layer
/// prevents the shell from ever seeing a hostile byte.
fn validate_description(d: &str) -> Result<(), String> {
if d.len() > MAX_DESCRIPTION_LEN {
return Err(format!(
"description must be ≤{MAX_DESCRIPTION_LEN} chars (got {})",
d.len()
));
}
for (i, b) in d.bytes().enumerate() {
if !(0x20..=0x7E).contains(&b) {
return Err(format!(
"description contains non-printable byte 0x{b:02X} at offset {i}"
));
}
if matches!(b, b'`' | b'$') {
return Err(format!(
"description contains forbidden character '{}' at offset {i}",
b as char
));
}
}
Ok(())
}
const MAX_DESCRIPTION_LEN: usize = 200;
fn validate_crate_name(name: &str) -> Result<(), String> {
if name.is_empty() {
return Err("crate must not be empty".to_string());
@ -136,4 +169,63 @@ mod tests {
};
assert!(validate(&req).is_err());
}
fn req_with_desc(d: &str) -> ForgeRequest {
ForgeRequest {
crate_name: "kei-task".into(),
verb: "noop".into(),
kind: "command".into(),
description: d.into(),
}
}
#[test]
fn description_rejects_newline() {
let err = validate(&req_with_desc("foo\nbar")).unwrap_err();
assert!(err.contains("0x0A"), "expected newline byte report: {err}");
}
#[test]
fn description_rejects_carriage_return() {
assert!(validate(&req_with_desc("foo\rbar")).is_err());
}
#[test]
fn description_rejects_tab() {
assert!(validate(&req_with_desc("foo\tbar")).is_err());
}
#[test]
fn description_rejects_nul() {
assert!(validate(&req_with_desc("foo\0bar")).is_err());
}
#[test]
fn description_rejects_backtick() {
let err = validate(&req_with_desc("foo`id`bar")).unwrap_err();
assert!(err.contains('`'), "expected backtick in error: {err}");
}
#[test]
fn description_rejects_dollar_sign() {
let err = validate(&req_with_desc("foo$(id)bar")).unwrap_err();
assert!(err.contains('$'), "expected dollar in error: {err}");
}
#[test]
fn description_rejects_over_length() {
let long = "a".repeat(201);
assert!(validate(&req_with_desc(&long)).is_err());
}
#[test]
fn description_accepts_minimal() {
assert!(validate(&req_with_desc("ok")).is_ok());
}
#[test]
fn description_accepts_at_length_cap() {
let exact = "a".repeat(200);
assert!(validate(&req_with_desc(&exact)).is_ok());
}
}

View file

@ -0,0 +1,62 @@
//! Security headers applied to the GET / HTML response.
//!
//! Defence-in-depth layer on top of the Host allow-list and JSON
//! content-type enforcement: these directives limit the blast radius of
//! any reflected-XSS / iframe-embedding attempt against the wizard UI.
//!
//! - `Content-Security-Policy` — inline-script/style only from self, no
//! external origins, `form-action 'self'` blocks cross-origin form
//! posts even if the SOP layer is bypassed.
//! - `X-Content-Type-Options: nosniff` — browsers MUST NOT sniff MIME.
//! - `X-Frame-Options: DENY` — cannot be iframe-embedded (clickjacking).
//! - `Referrer-Policy: no-referrer` — don't leak the wizard URL.
use axum::http::{header, HeaderMap, HeaderValue};
/// Populate `headers` with the four security headers. Used by the GET /
/// handler to decorate its HTML response.
pub fn apply_security_headers(headers: &mut HeaderMap) {
headers.insert(
header::CONTENT_SECURITY_POLICY,
HeaderValue::from_static(
"default-src 'self'; script-src 'self' 'unsafe-inline'; \
style-src 'self' 'unsafe-inline'; form-action 'self'; \
frame-ancestors 'none'",
),
);
headers.insert(
header::X_CONTENT_TYPE_OPTIONS,
HeaderValue::from_static("nosniff"),
);
headers.insert(
header::X_FRAME_OPTIONS,
HeaderValue::from_static("DENY"),
);
headers.insert(
header::REFERRER_POLICY,
HeaderValue::from_static("no-referrer"),
);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn adds_all_four_headers() {
let mut h = HeaderMap::new();
apply_security_headers(&mut h);
assert!(h.contains_key(header::CONTENT_SECURITY_POLICY));
assert!(h.contains_key(header::X_CONTENT_TYPE_OPTIONS));
assert!(h.contains_key(header::X_FRAME_OPTIONS));
assert!(h.contains_key(header::REFERRER_POLICY));
}
#[test]
fn csp_forbids_cross_origin_forms() {
let mut h = HeaderMap::new();
apply_security_headers(&mut h);
let csp = h.get(header::CONTENT_SECURITY_POLICY).unwrap();
assert!(csp.to_str().unwrap().contains("form-action 'self'"));
}
}

View file

@ -0,0 +1,71 @@
//! Static HTML for the wizard form.
//!
//! The form no longer POSTs `application/x-www-form-urlencoded` directly
//! — that body-type is SOP-safe (no CORS preflight) which allowed any
//! web page to CSRF POST the handler. Instead, a small inline `<script>`
//! serialises form values to JSON and POSTs via `fetch()` with
//! `Content-Type: application/json`. JSON bodies trigger CORS preflight
//! so the Same-Origin-Policy now engages.
pub const FORM_HTML: &str = r#"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>kei-forge</title>
</head>
<body>
<h1>kei-forge scaffold an atom</h1>
<p>Per <a href="/static/SCHEMA-LOCKED.md">locked substrate schema</a>.</p>
<form id="forge-form">
<p>
<label>crate:
<input name="crate" required pattern="[a-z][a-z0-9-]*" placeholder="kei-task">
</label>
</p>
<p>
<label>verb:
<input name="verb" required pattern="[a-z][a-z0-9-]*" placeholder="add-dependency">
</label>
</p>
<p>
<label>kind:
<select name="kind" required>
<option value="command">command</option>
<option value="query">query</option>
<option value="stream">stream</option>
<option value="transform">transform</option>
</select>
</label>
</p>
<p>
<label>description:<br>
<textarea name="description" rows="3" cols="60" maxlength="200"
placeholder="One-line purpose. Used in atoms/&lt;verb&gt;.md"></textarea>
</label>
</p>
<p><button type="submit">forge atom</button></p>
</form>
<pre id="result"></pre>
<script>
document.getElementById('forge-form').addEventListener('submit', async (e) => {
e.preventDefault();
const fd = new FormData(e.target);
const payload = {
crate: fd.get('crate'),
verb: fd.get('verb'),
kind: fd.get('kind'),
description: fd.get('description')
};
const resp = await fetch('/forge', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(payload)
});
const text = await resp.text();
document.getElementById('result').textContent =
'HTTP ' + resp.status + '\n' + text;
});
</script>
</body>
</html>
"#;

View file

@ -2,9 +2,12 @@
//! SUBSTRATE-SCHEMA.md contract.
//!
//! Architecture (Constructor Pattern, one responsibility per file):
//! - [`server`] — axum router + HTML form handler
//! - [`form`] — request deserialization + validation
//! - [`generate`] — invoke scripts/new-atom.sh, parse output
//! - [`server`] — axum router + handlers
//! - [`middleware`] — DNS-rebinding + CSRF defences
//! - [`headers`] — CSP / nosniff / frame-deny / referrer headers
//! - [`html`] — static HTML form (JSON-over-fetch)
//! - [`form`] — request deserialization + validation
//! - [`generate`] — invoke scripts/new-atom.sh, parse output
//!
//! Public entry point is [`server::app`], which returns the fully-wired
//! `axum::Router` ready to be served by any bind target (production =
@ -12,4 +15,7 @@
pub mod form;
pub mod generate;
pub mod headers;
pub mod html;
pub mod middleware;
pub mod server;

View file

@ -0,0 +1,117 @@
//! HTTP middleware — defence against cross-origin / DNS-rebinding attacks.
//!
//! Two layers:
//! - [`require_local_host`] — rejects requests whose `Host:` header is not
//! exactly `localhost:8747` or `127.0.0.1:8747`. Blocks DNS-rebinding
//! (attacker points `a.evil.com` → 127.0.0.1 while browser still trusts
//! the evil.com origin for Same-Origin-Policy purposes).
//! - [`require_json_content_type`] — rejects `POST /forge` unless body is
//! `application/json`. Blocks CSRF via `<form>` submissions: urlencoded
//! POSTs are SOP-safe (no preflight), but JSON bodies trigger CORS
//! preflight so SOP engages.
//!
//! Both are advisory: they compose via `axum::middleware::from_fn` and
//! never touch application state.
use axum::{
extract::Request,
http::{header, Method, StatusCode},
middleware::Next,
response::Response,
};
const ALLOWED_HOSTS: &[&str] = &["localhost:8747", "127.0.0.1:8747"];
/// Reject requests whose `Host:` is not an exact allow-list match.
///
/// Returns 421 Misdirected Request on mismatch (RFC 7540 §9.1.2).
pub async fn require_local_host(req: Request, next: Next) -> Result<Response, StatusCode> {
let host = req
.headers()
.get(header::HOST)
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if ALLOWED_HOSTS.iter().any(|&h| h == host) {
Ok(next.run(req).await)
} else {
Err(StatusCode::MISDIRECTED_REQUEST)
}
}
/// Reject POSTs whose `Content-Type` is not `application/json`.
///
/// GET and other methods pass through unchanged. Returns 415 Unsupported
/// Media Type on mismatch.
pub async fn require_json_content_type(
req: Request,
next: Next,
) -> Result<Response, StatusCode> {
if req.method() != Method::POST {
return Ok(next.run(req).await);
}
let ct = req
.headers()
.get(header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or("");
// Match only the media type, ignoring optional `; charset=…`.
let base = ct.split(';').next().unwrap_or("").trim();
if base.eq_ignore_ascii_case("application/json") {
Ok(next.run(req).await)
} else {
Err(StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
}
#[cfg(test)]
mod tests {
use super::*;
use axum::body::Body;
use axum::http::Request as HttpRequest;
use axum::middleware::from_fn;
use axum::routing::{get, post};
use axum::Router;
use tower::ServiceExt;
fn test_app() -> Router {
Router::new()
.route("/", get(|| async { "ok" }))
.route("/forge", post(|| async { "ok" }))
.layer(from_fn(require_json_content_type))
.layer(from_fn(require_local_host))
}
#[tokio::test]
async fn blocks_evil_host() {
let app = test_app();
let resp = app
.oneshot(
HttpRequest::builder()
.uri("/")
.header("host", "evil.com")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::MISDIRECTED_REQUEST);
}
#[tokio::test]
async fn blocks_urlencoded_post() {
let app = test_app();
let resp = app
.oneshot(
HttpRequest::builder()
.method("POST")
.uri("/forge")
.header("host", "127.0.0.1:8747")
.header("content-type", "application/x-www-form-urlencoded")
.body(Body::from("x=1"))
.unwrap(),
)
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::UNSUPPORTED_MEDIA_TYPE);
}
}

View file

@ -3,30 +3,49 @@
//! Intentionally stateless: no `AppState`, no handles, no async init.
//! Every request is self-contained. This lets tests spin up `app()` in
//! an ephemeral Tokio runtime without setup teardown overhead.
//!
//! Security layers applied as middleware (see `crate::middleware`):
//! 1. `require_local_host` — reject non-127.0.0.1 Host headers (blocks
//! DNS rebinding).
//! 2. `require_json_content_type` — reject urlencoded POSTs (blocks
//! `<form>`-based CSRF).
//!
//! GET / responses additionally carry CSP + nosniff + frame-deny headers.
use axum::{
extract::Form,
http::StatusCode,
response::{Html, IntoResponse},
http::{HeaderMap, StatusCode},
middleware::from_fn,
response::IntoResponse,
routing::{get, post},
Json, Router,
};
use crate::form::{validate, ForgeRequest};
use crate::generate::{forge, ForgeResult};
use crate::headers::apply_security_headers;
use crate::html::FORM_HTML;
use crate::middleware::{require_json_content_type, require_local_host};
/// Build the router. Called by `main.rs` and by tests.
pub fn app() -> Router {
Router::new()
.route("/", get(render_form))
.route("/forge", post(handle_forge))
.layer(from_fn(require_json_content_type))
.layer(from_fn(require_local_host))
}
async fn render_form() -> Html<&'static str> {
Html(FORM_HTML)
async fn render_form() -> impl IntoResponse {
let mut headers = HeaderMap::new();
apply_security_headers(&mut headers);
headers.insert(
axum::http::header::CONTENT_TYPE,
axum::http::HeaderValue::from_static("text/html; charset=utf-8"),
);
(StatusCode::OK, headers, FORM_HTML)
}
async fn handle_forge(Form(req): Form<ForgeRequest>) -> impl IntoResponse {
async fn handle_forge(Json(req): Json<ForgeRequest>) -> impl IntoResponse {
if let Err(e) = validate(&req) {
return (StatusCode::BAD_REQUEST, Json(ForgeResult::fail(e)));
}
@ -38,47 +57,3 @@ async fn handle_forge(Form(req): Form<ForgeRequest>) -> impl IntoResponse {
};
(status, Json(result))
}
/// Minimal HTML — 5 inputs, no JS, no CSS framework. The locked schema
/// allows only 4 atom kinds, hard-coded as a `<select>`.
const FORM_HTML: &str = r#"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>kei-forge</title>
</head>
<body>
<h1>kei-forge scaffold an atom</h1>
<p>Per <a href="/static/SCHEMA-LOCKED.md">locked substrate schema</a>.</p>
<form method="POST" action="/forge">
<p>
<label>crate:
<input name="crate" required pattern="[a-z][a-z0-9-]*" placeholder="kei-task">
</label>
</p>
<p>
<label>verb:
<input name="verb" required pattern="[a-z][a-z0-9-]*" placeholder="add-dependency">
</label>
</p>
<p>
<label>kind:
<select name="kind" required>
<option value="command">command</option>
<option value="query">query</option>
<option value="stream">stream</option>
<option value="transform">transform</option>
</select>
</label>
</p>
<p>
<label>description:<br>
<textarea name="description" rows="3" cols="60"
placeholder="One-line purpose. Used in atoms/&lt;verb&gt;.md"></textarea>
</label>
</p>
<p><button type="submit">forge atom</button></p>
</form>
</body>
</html>
"#;

View file

@ -9,24 +9,36 @@
use axum::{
body::{to_bytes, Body},
http::{Request, StatusCode},
http::{header, Request, StatusCode},
};
use kei_forge::server;
use serde_json::Value;
use tower::ServiceExt;
const LOCAL_HOST: &str = "127.0.0.1:8747";
fn get(uri: &str) -> Request<Body> {
Request::builder()
.uri(uri)
.header("host", LOCAL_HOST)
.body(Body::empty())
.unwrap()
}
fn post_json(uri: &str, body: &str) -> Request<Body> {
Request::builder()
.method("POST")
.uri(uri)
.header("host", LOCAL_HOST)
.header("content-type", "application/json")
.body(Body::from(body.to_string()))
.unwrap()
}
#[tokio::test]
async fn get_root_serves_form() {
let app = server::app();
let resp = app
.oneshot(
Request::builder()
.uri("/")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
let resp = app.oneshot(get("/")).await.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = to_bytes(resp.into_body(), 64 * 1024).await.unwrap();
@ -40,19 +52,9 @@ async fn get_root_serves_form() {
#[tokio::test]
async fn post_forge_returns_json_shape() {
let app = server::app();
let body = "crate=kei-task&verb=add-dependency&kind=command&description=test+desc";
let body = r#"{"crate":"kei-task","verb":"add-dependency","kind":"command","description":"test desc"}"#;
let resp = app
.oneshot(
Request::builder()
.method("POST")
.uri("/forge")
.header("content-type", "application/x-www-form-urlencoded")
.body(Body::from(body))
.unwrap(),
)
.await
.unwrap();
let resp = app.oneshot(post_json("/forge", body)).await.unwrap();
let status = resp.status();
let bytes = to_bytes(resp.into_body(), 64 * 1024).await.unwrap();
@ -62,9 +64,6 @@ async fn post_forge_returns_json_shape() {
assert!(json.get("files").is_some(), "missing files field");
assert!(json.get("errors").is_some(), "missing errors field");
// With --features mock-generate we return success; without, the shell-out
// may succeed or fail depending on whether scripts/new-atom.sh lives in
// the worktree. Either way the schema must be correct.
assert!(
status == StatusCode::OK
|| status == StatusCode::UNPROCESSABLE_ENTITY
@ -76,19 +75,9 @@ async fn post_forge_returns_json_shape() {
#[tokio::test]
async fn post_forge_rejects_bad_kind() {
let app = server::app();
let body = "crate=kei-task&verb=x&kind=saga&description=y";
let body = r#"{"crate":"kei-task","verb":"x","kind":"saga","description":"y"}"#;
let resp = app
.oneshot(
Request::builder()
.method("POST")
.uri("/forge")
.header("content-type", "application/x-www-form-urlencoded")
.body(Body::from(body))
.unwrap(),
)
.await
.unwrap();
let resp = app.oneshot(post_json("/forge", body)).await.unwrap();
assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
let bytes = to_bytes(resp.into_body(), 64 * 1024).await.unwrap();
@ -97,3 +86,105 @@ async fn post_forge_rejects_bad_kind() {
let errs = json["errors"].as_array().unwrap();
assert!(!errs.is_empty());
}
// ---------------------------------------------------------------------
// Security hardening — the four new tests required by the fix contract.
// ---------------------------------------------------------------------
/// FIX A (DNS rebinding): a POST whose `Host:` header names an attacker
/// domain — even when the underlying socket is 127.0.0.1 — MUST be
/// rejected before the handler sees it.
#[tokio::test]
async fn post_with_evil_host_is_rejected() {
let app = server::app();
let resp = app
.oneshot(
Request::builder()
.method("POST")
.uri("/forge")
.header("host", "evil.com")
.header("content-type", "application/json")
.body(Body::from(
r#"{"crate":"kei-task","verb":"x","kind":"command","description":"y"}"#,
))
.unwrap(),
)
.await
.unwrap();
assert_ne!(resp.status(), StatusCode::OK);
assert!(
resp.status() == StatusCode::MISDIRECTED_REQUEST
|| resp.status() == StatusCode::FORBIDDEN,
"expected 403 or 421, got {}",
resp.status()
);
}
/// FIX B (CSRF): `application/x-www-form-urlencoded` is SOP-safe, so a
/// malicious `<form>` on any site could POST to us without preflight.
/// Must be rejected with 415 Unsupported Media Type.
#[tokio::test]
async fn post_urlencoded_is_rejected() {
let app = server::app();
let resp = app
.oneshot(
Request::builder()
.method("POST")
.uri("/forge")
.header("host", LOCAL_HOST)
.header("content-type", "application/x-www-form-urlencoded")
.body(Body::from(
"crate=kei-task&verb=x&kind=command&description=y",
))
.unwrap(),
)
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::UNSUPPORTED_MEDIA_TYPE);
}
/// FIX C (description injection): a newline in `description` could
/// escape the `sed` substitution inside `scripts/new-atom.sh` and
/// append a hostile `-e` expression. Must fail validation with 400.
#[tokio::test]
async fn post_description_with_newline_is_rejected() {
let app = server::app();
// JSON escape for newline is \n literal in the string.
let body = r#"{"crate":"kei-task","verb":"noop","kind":"command","description":"foo\nevil"}"#;
let resp = app.oneshot(post_json("/forge", body)).await.unwrap();
assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
let bytes = to_bytes(resp.into_body(), 64 * 1024).await.unwrap();
let json: Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(json["success"], Value::Bool(false));
let err = json["errors"][0].as_str().unwrap();
assert!(
err.contains("description"),
"expected description error, got: {err}"
);
}
/// FIX (defence-in-depth): GET / must carry the four hardening
/// headers so an iframe / reflected-XSS pivot cannot escalate.
#[tokio::test]
async fn get_root_has_security_headers() {
let app = server::app();
let resp = app.oneshot(get("/")).await.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let h = resp.headers();
assert!(
h.contains_key(header::CONTENT_SECURITY_POLICY),
"missing CSP header"
);
assert!(
h.contains_key(header::X_CONTENT_TYPE_OPTIONS),
"missing X-Content-Type-Options"
);
assert!(
h.contains_key(header::X_FRAME_OPTIONS),
"missing X-Frame-Options"
);
assert!(
h.contains_key(header::REFERRER_POLICY),
"missing Referrer-Policy"
);
}

View file

@ -17,10 +17,15 @@ path = "src/lib.rs"
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
jsonschema = { version = "0.17", default-features = false }
# SSRF + IMDS hardening: disable default features (resolve-http, cli) so the
# validator has no HTTP resolver by default. We configure a file-only
# resolver explicitly in `validate.rs`.
jsonschema = { version = "0.18", default-features = false, features = ["resolve-file"] }
anyhow = "1"
walkdir = "2"
serde_yaml_ng = "0.10"
kei-atom-discovery = { path = "../kei-atom-discovery" }
url = "2"
[dev-dependencies]
tempfile = "3"

View file

@ -1,92 +1,20 @@
//! Atom discovery — walks `<root>/*/atoms/*.md`, parses YAML frontmatter.
//! Atom discovery — thin façade over `kei-atom-discovery`.
//!
//! Skip-on-invalid policy: missing/malformed frontmatter emits stderr warn,
//! record is dropped (never panics, never fails the walk).
//! Re-exports `AtomMeta` and `AtomKind` from the shared crate so all runtime
//! modules share exactly one frontmatter-parser implementation.
use serde::Deserialize;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use kei_atom_discovery as shared;
use std::path::Path;
/// Parsed frontmatter fields needed by the runtime.
#[derive(Debug, Clone)]
pub struct AtomMeta {
pub full_id: String,
pub crate_name: String,
pub verb: String,
pub kind: String,
pub md_path: PathBuf,
pub input_schema_path: PathBuf,
pub output_schema_path: PathBuf,
}
pub use kei_atom_discovery::{parse_frontmatter, AtomKind, AtomMeta};
/// Raw frontmatter — only the fields discover needs.
#[derive(Debug, Deserialize)]
struct Frontmatter {
atom: String,
kind: String,
input: SchemaRef,
output: SchemaRef,
}
#[derive(Debug, Deserialize)]
struct SchemaRef {
schema: String,
}
/// Walks `<root>/*/atoms/*.md`. Returns one `AtomMeta` per parseable file.
/// Walk `<root>/*/atoms/*.md`. Delegates to `kei-atom-discovery::discover_atoms`.
pub fn walk_atoms(root: &Path) -> Vec<AtomMeta> {
let mut out = Vec::new();
for entry in WalkDir::new(root).max_depth(3).into_iter().flatten() {
if !is_atom_md(entry.path()) {
continue;
}
match parse_one(entry.path()) {
Ok(meta) => out.push(meta),
Err(e) => eprintln!("warn: skip {}: {}", entry.path().display(), e),
}
}
out
shared::discover_atoms(root)
}
fn is_atom_md(path: &Path) -> bool {
path.is_file()
&& path.extension().is_some_and(|e| e == "md")
&& path
.parent()
.and_then(|p| p.file_name())
.is_some_and(|n| n == "atoms")
}
fn parse_one(md_path: &Path) -> Result<AtomMeta, String> {
let body = std::fs::read_to_string(md_path).map_err(|e| format!("read: {e}"))?;
let fm = extract_frontmatter(&body).ok_or_else(|| "no frontmatter".to_string())?;
let parsed: Frontmatter = serde_yaml::from_str(fm).map_err(|e| format!("yaml: {e}"))?;
let (crate_name, verb) = split_atom_id(&parsed.atom)?;
let atom_dir = md_path.parent().ok_or("no parent dir")?;
Ok(AtomMeta {
full_id: parsed.atom.clone(),
crate_name,
verb,
kind: parsed.kind,
md_path: md_path.to_path_buf(),
input_schema_path: atom_dir.join(&parsed.input.schema),
output_schema_path: atom_dir.join(&parsed.output.schema),
})
}
/// Returns the frontmatter body (between the two `---` fences), or None.
/// Backwards-compatible split — returns the frontmatter YAML body (no body
/// trailing). Returns `None` if the file has no frontmatter fences.
pub fn extract_frontmatter(body: &str) -> Option<&str> {
let rest = body.strip_prefix("---\n").or_else(|| body.strip_prefix("---\r\n"))?;
let end = rest.find("\n---").or_else(|| rest.find("\r\n---"))?;
Some(&rest[..end])
}
fn split_atom_id(id: &str) -> Result<(String, String), String> {
let (crate_name, verb) = id
.split_once("::")
.ok_or_else(|| format!("atom id missing `::`: {id}"))?;
if crate_name.is_empty() || verb.is_empty() {
return Err(format!("atom id has empty half: {id}"));
}
Ok((crate_name.to_string(), verb.to_string()))
shared::parse_frontmatter(body).ok().map(|(fm, _)| fm)
}

View file

@ -15,14 +15,23 @@ pub enum InvokeError {
AtomNotFound(String),
InputParse(String),
InputInvalid(String),
MissingInputSchema(String),
NotImplemented { atom: String },
}
impl std::fmt::Display for InvokeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::AtomNotFound(id) => write!(f, "atom not found: {id}"),
Self::InputParse(e) => write!(f, "input parse: {e}"),
Self::InputInvalid(e) => write!(f, "input invalid: {e}"),
Self::AtomNotFound(id) => write!(f, "no atom matching {id}"),
Self::InputParse(e) => write!(f, "input rejected: {e}"),
Self::InputInvalid(e) => write!(f, "input rejected: {e}"),
Self::MissingInputSchema(id) => {
write!(f, "atom `{id}` declares no input schema")
}
Self::NotImplemented { atom } => write!(
f,
"invoke not yet wired for this atom ({atom}); use the underlying CLI directly"
),
}
}
}
@ -32,24 +41,26 @@ impl std::error::Error for InvokeError {}
/// MVP stub output — shape documents the boundary for downstream wire-up.
#[derive(Debug, Serialize)]
pub struct Output {
pub error: String,
pub atom: String,
}
/// Invoke an atom by full ID with a JSON input string.
///
/// MVP contract: discover atom → parse input → validate against schema →
/// return stub acknowledgement. Exec wire-up is a follow-up.
/// return `NotImplemented` until Stream B wires the executor. The callers
/// (main.rs) map `NotImplemented` to a dedicated non-zero exit code so CI
/// scripts can distinguish "unimplemented" from "atom rejected input".
pub fn invoke(root: &Path, atom_id: &str, input_json: &str) -> Result<Output, InvokeError> {
let meta = find_atom(root, atom_id)?;
let input: Value =
serde_json::from_str(input_json).map_err(|e| InvokeError::InputParse(e.to_string()))?;
validate_input(&meta.input_schema_path, &input)
let schema = meta
.input_schema
.as_ref()
.ok_or_else(|| InvokeError::MissingInputSchema(atom_id.to_string()))?;
validate_input(schema, &input)
.map_err(|e| InvokeError::InputInvalid(e.to_string()))?;
Ok(Output {
error: "atom invocation not yet implemented — wire needs Stream B atom impls".to_string(),
atom: atom_id.to_string(),
})
Err(InvokeError::NotImplemented { atom: atom_id.to_string() })
}
fn find_atom(root: &Path, atom_id: &str) -> Result<AtomMeta, InvokeError> {

View file

@ -3,7 +3,8 @@
//! Checks (from SUBSTRATE-SCHEMA §Validation):
//! 1. Frontmatter has required fields (atom, kind, version, input, output,
//! side_effects, idempotent, stability).
//! 2. Schema paths resolve to existing JSON files.
//! 2. Schema paths resolve to existing JSON files inside the atom's dir
//! (safe_join — rejects `..` and absolute paths).
//! 3. JSON Schemas declare draft-07 via `$schema`.
//! 4. `kind` ∈ {command, query, stream, transform}.
//! 5. `side_effects` entries are `{op, domain}` objects.
@ -11,7 +12,8 @@
//! refs allowed).
use crate::discover::extract_frontmatter;
use serde_yaml::Value as YamlValue;
use kei_atom_discovery::safe_join;
use serde_yaml_ng::Value as YamlValue;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
@ -51,6 +53,7 @@ pub fn schema_lint(root: &Path) -> LintReport {
fn find_atom_files(root: &Path) -> Vec<PathBuf> {
WalkDir::new(root)
.max_depth(3)
.follow_links(false)
.into_iter()
.flatten()
.filter(|e| {
@ -67,7 +70,7 @@ fn collect_atom_ids(root: &Path) -> HashSet<String> {
for md in find_atom_files(root) {
if let Ok(body) = std::fs::read_to_string(&md) {
if let Some(fm) = extract_frontmatter(&body) {
if let Ok(y) = serde_yaml::from_str::<YamlValue>(fm) {
if let Ok(y) = serde_yaml_ng::from_str::<YamlValue>(fm) {
if let Some(id) = y.get("atom").and_then(|v| v.as_str()) {
ids.insert(id.to_string());
}
@ -82,7 +85,7 @@ fn lint_one(md_path: &Path, known_atoms: &HashSet<String>) -> Result<(), Vec<Str
let body = std::fs::read_to_string(md_path).map_err(|e| vec![format!("read: {e}")])?;
let fm_text = extract_frontmatter(&body).ok_or_else(|| vec!["no frontmatter".to_string()])?;
let fm: YamlValue =
serde_yaml::from_str(fm_text).map_err(|e| vec![format!("yaml parse: {e}")])?;
serde_yaml_ng::from_str(fm_text).map_err(|e| vec![format!("yaml parse: {e}")])?;
let mut errs = Vec::new();
check_required_fields(&fm, &mut errs);
check_kind(&fm, &mut errs);
@ -126,11 +129,21 @@ fn check_side_effects(fm: &YamlValue, errs: &mut Vec<String>) {
}
fn check_schema_files(md_path: &Path, fm: &YamlValue, errs: &mut Vec<String>) {
let Some(md_dir) = md_path.parent() else {
errs.push("md_path has no parent dir".to_string());
return;
};
for key in &["input", "output"] {
let Some(rel) = fm.get(key).and_then(|v| v.get("schema")).and_then(|v| v.as_str()) else {
continue;
};
let full = md_path.parent().map(|p| p.join(rel)).unwrap_or_else(|| PathBuf::from(rel));
let full = match safe_join(md_dir, rel) {
Ok(p) => p,
Err(e) => {
errs.push(format!("{key} schema path unsafe: {e}"));
continue;
}
};
if !full.exists() {
errs.push(format!("{key} schema missing: {}", full.display()));
continue;
@ -149,18 +162,33 @@ fn check_draft07(schema_path: &Path, key: &str, errs: &mut Vec<String>) {
return;
};
let draft = json.get("$schema").and_then(|v| v.as_str()).unwrap_or("");
if !draft.contains("draft-07") {
if !is_draft07_uri(draft) {
errs.push(format!("{key} schema missing draft-07 $schema"));
}
}
/// Exact-match check for the draft-07 meta-schema URI.
///
/// Accepts the canonical URI with or without the trailing `#` fragment.
/// A substring check (`contains("draft-07")`) would falsely match URIs like
/// `http://example.com/draft-07-tutorial.html` — forbidden by §Validation.
fn is_draft07_uri(uri: &str) -> bool {
uri == "http://json-schema.org/draft-07/schema#"
|| uri == "http://json-schema.org/draft-07/schema"
}
fn check_related(fm: &YamlValue, known: &HashSet<String>, errs: &mut Vec<String>) {
let Some(seq) = fm.get("related").and_then(|v| v.as_sequence()) else {
return;
};
for entry in seq {
let Some(link) = entry.as_str() else { continue };
let inner = link.trim_start_matches("[[").trim_end_matches("]]");
let Some(inner) = parse_wikilink(link) else {
errs.push(format!(
"related entry {link} is not a valid [[atom-id]] wikilink"
));
continue;
};
if inner.starts_with("rules/") {
continue;
}
@ -169,3 +197,17 @@ fn check_related(fm: &YamlValue, known: &HashSet<String>, errs: &mut Vec<String>
}
}
}
/// Strict `[[...]]` wikilink parse.
///
/// Returns the inner text only when the string starts with exactly `[[`
/// and ends with exactly `]]`, with no extra brackets on either side
/// and a non-empty body. Rejects malformed forms like `[[[foo]]`,
/// `[[foo]]]`, `[[foo]`, `[foo]]`, and `[[]]`.
fn parse_wikilink(raw: &str) -> Option<&str> {
let inner = raw.strip_prefix("[[")?.strip_suffix("]]")?;
if inner.is_empty() || inner.starts_with('[') || inner.ends_with(']') {
return None;
}
Some(inner)
}

View file

@ -4,10 +4,16 @@
//! Default --root: `~/.claude/agents/_primitives/_rust`.
use clap::{Parser, Subcommand};
use kei_runtime::invoke::InvokeError;
use kei_runtime::{discover, invoke, lint};
use std::path::PathBuf;
use std::process::ExitCode;
/// Exit code returned when `invoke` lands on a not-yet-wired atom.
/// Distinct from exit 2 (atom rejected input) so CI can branch.
/// Chosen in the EX_USAGE range per `sysexits.h` convention.
const EXIT_INVOKE_NOT_IMPLEMENTED: u8 = 64;
#[derive(Parser)]
#[command(name = "kei-runtime", version, about = "Atom invocation runtime + schema linter")]
struct Cli {
@ -77,11 +83,11 @@ fn run_list_atoms(root: PathBuf, crate_name: Option<String>, kind: Option<String
}
}
if let Some(k) = &kind {
if a.kind != *k {
if a.kind.as_str() != k.as_str() {
continue;
}
}
println!("{}\t{}\t{}", a.full_id, a.kind, a.md_path.display());
println!("{}\t{}\t{}", a.full_id, a.kind.as_str(), a.md_path.display());
}
ExitCode::SUCCESS
}
@ -100,12 +106,26 @@ fn run_invoke(root: PathBuf, atom_id: String, input_arg: String) -> ExitCode {
ExitCode::SUCCESS
}
Err(e) => {
eprintln!("invoke: {e}");
ExitCode::from(2)
eprintln!("{e}");
ExitCode::from(invoke_exit_code(&e))
}
}
}
/// Map typed invoke errors to exit codes per locked §Runtime schema.
///
/// - `AtomNotFound | InputParse | InputInvalid | MissingInputSchema` → 2 (atom error)
/// - `NotImplemented` → 64 (CLI contract not yet honoured)
fn invoke_exit_code(err: &InvokeError) -> u8 {
match err {
InvokeError::AtomNotFound(_)
| InvokeError::InputParse(_)
| InvokeError::InputInvalid(_)
| InvokeError::MissingInputSchema(_) => 2,
InvokeError::NotImplemented { .. } => EXIT_INVOKE_NOT_IMPLEMENTED,
}
}
fn load_input(arg: &str) -> Result<String, String> {
if let Some(path) = arg.strip_prefix('@') {
std::fs::read_to_string(path).map_err(|e| format!("read {path}: {e}"))

View file

@ -1,11 +1,19 @@
//! JSON Schema draft-07 validation wrappers.
//!
//! Thin façade over the `jsonschema` crate. Reads schema from disk per call —
//! caller may cache if hot. Returns a single, readable error message.
//! Thin façade over the `jsonschema` crate (v0.18). Reads schema from disk
//! per call. Returns a single, readable error message.
//!
//! SSRF / IMDS hardening:
//! - `default-features = false` on `jsonschema` — no `resolve-http` feature.
//! - Custom `LocalFileResolver` replaces the default. It rejects any URL
//! whose scheme isn't `file://` and any path outside the schema's own
//! directory (anchored at the schema file's parent).
use jsonschema::JSONSchema;
use jsonschema::{JSONSchema, SchemaResolver, SchemaResolverError};
use serde_json::Value;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use url::Url;
#[derive(Debug)]
pub struct ValidationError(pub String);
@ -31,10 +39,20 @@ pub fn validate_output(schema_path: &Path, output: &Value) -> Result<(), Validat
fn validate_value(schema_path: &Path, value: &Value) -> Result<(), ValidationError> {
let schema_text = std::fs::read_to_string(schema_path)
.map_err(|e| ValidationError(format!("read {}: {e}", schema_path.display())))?;
let schema_json: Value = serde_json::from_str(&schema_text)
let mut schema_json: Value = serde_json::from_str(&schema_text)
.map_err(|e| ValidationError(format!("parse {}: {e}", schema_path.display())))?;
// jsonschema 0.18 requires an absolute base URI for the schema. Our atom
// schemas typically declare a relative `$id` like
// "kei-task/atoms/schemas/create-input.json" which fails compile with
// "relative URL without a base". Inject a synthetic `file://` $id keyed
// to the actual schema path so any internal `$ref` still resolves
// relative to the file (and our LocalFileResolver confines to the
// schema's parent dir for safety).
inject_absolute_id(&mut schema_json, schema_path);
let root = schema_path.parent().unwrap_or(schema_path).to_path_buf();
let compiled = JSONSchema::options()
.with_draft(jsonschema::Draft::Draft7)
.with_resolver(LocalFileResolver::new(root))
.compile(&schema_json)
.map_err(|e| ValidationError(format!("compile: {e}")))?;
if let Err(errors) = compiled.validate(value) {
@ -43,3 +61,73 @@ fn validate_value(schema_path: &Path, value: &Value) -> Result<(), ValidationErr
}
Ok(())
}
fn inject_absolute_id(schema: &mut Value, schema_path: &Path) {
let obj = match schema.as_object_mut() {
Some(o) => o,
None => return,
};
let needs_replace = match obj.get("$id").and_then(|v| v.as_str()) {
None => true, // missing
Some(s) => Url::parse(s).is_err(), // non-absolute
};
if !needs_replace {
return;
}
if let Ok(canon) = schema_path.canonicalize() {
if let Ok(url) = Url::from_file_path(&canon) {
obj.insert("$id".to_string(), Value::String(url.to_string()));
}
}
}
/// `$ref` resolver that rejects every scheme except `file://`, AND rejects
/// any path that is not inside `root` (canonicalised).
#[derive(Debug)]
pub struct LocalFileResolver {
root: PathBuf,
}
impl LocalFileResolver {
pub fn new(root: PathBuf) -> Self {
Self { root }
}
}
impl SchemaResolver for LocalFileResolver {
fn resolve(
&self,
_root_schema: &Value,
url: &Url,
_original_reference: &str,
) -> Result<Arc<Value>, SchemaResolverError> {
if url.scheme() != "file" {
return Err(anyhow::anyhow!(
"remote $ref rejected — only file:// is allowed (got {})",
url.scheme()
));
}
let path = url
.to_file_path()
.map_err(|_| anyhow::anyhow!("invalid file URL: {url}"))?;
let canon = path
.canonicalize()
.map_err(|e| anyhow::anyhow!("canonicalize {}: {e}", path.display()))?;
let root_canon = self
.root
.canonicalize()
.map_err(|e| anyhow::anyhow!("canonicalize root {}: {e}", self.root.display()))?;
if !canon.starts_with(&root_canon) {
return Err(anyhow::anyhow!(
"file $ref escapes schema root: {} not under {}",
canon.display(),
root_canon.display()
));
}
let f = std::fs::File::open(&canon)
.map_err(|e| anyhow::anyhow!("open {}: {e}", canon.display()))?;
let doc: Value = serde_json::from_reader(f)
.map_err(|e| anyhow::anyhow!("parse {}: {e}", canon.display()))?;
Ok(Arc::new(doc))
}
}

View file

@ -1,6 +1,6 @@
//! Integration test — walk_atoms returns 2 well-formed records from temp root.
use kei_runtime::discover::walk_atoms;
use kei_runtime::discover::{walk_atoms, AtomKind};
use std::fs;
use std::path::Path;
@ -43,8 +43,16 @@ fn walk_atoms_finds_two_records() {
assert_eq!(atoms[0].full_id, "kei-alpha::search");
assert_eq!(atoms[0].crate_name, "kei-alpha");
assert_eq!(atoms[0].verb, "search");
assert_eq!(atoms[0].kind, "query");
assert_eq!(atoms[0].kind, AtomKind::Query);
assert_eq!(atoms[1].full_id, "kei-beta::fetch");
assert!(atoms[1].input_schema_path.ends_with("schemas/fetch-input.json"));
assert!(atoms[1].output_schema_path.ends_with("schemas/fetch-output.json"));
assert!(atoms[1]
.input_schema
.as_ref()
.unwrap()
.ends_with("schemas/fetch-input.json"));
assert!(atoms[1]
.output_schema
.as_ref()
.unwrap()
.ends_with("schemas/fetch-output.json"));
}

View file

@ -0,0 +1,94 @@
//! Integration test — `kei-runtime invoke` exit codes per §Runtime contract.
//!
//! - Unknown atom id → exit 2 (atom rejected)
//! - Known atom, valid input → exit 64 (CLI contract not yet honoured / NotImplemented)
//!
//! The invoke binary is a stub (no executor wired yet). Previously it
//! returned `Ok(...)` with an error payload → ExitCode::SUCCESS. Now every
//! invocation path yields a typed `InvokeError`; main.rs maps variants to
//! distinct exit codes so CI scripts can distinguish cases.
use std::fs;
use std::path::Path;
use std::process::Command;
const BIN: &str = env!("CARGO_BIN_EXE_kei-runtime");
fn write_atom(root: &Path, crate_name: &str, verb: &str) {
let atoms = root.join(crate_name).join("atoms");
let schemas = atoms.join("schemas");
fs::create_dir_all(&schemas).unwrap();
let input = format!("{verb}-input.json");
let output = format!("{verb}-output.json");
let input_schema = r#"{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": { "title": { "type": "string" } }
}"#;
let output_schema = r#"{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object"
}"#;
fs::write(schemas.join(&input), input_schema).unwrap();
fs::write(schemas.join(&output), output_schema).unwrap();
let md = format!(
r#"---
atom: {crate_name}::{verb}
kind: command
version: "0.1.0"
input:
schema: schemas/{input}
output:
schema: schemas/{output}
side_effects: []
idempotent: true
stability: stable
---
# {crate_name}::{verb}
"#,
);
fs::write(atoms.join(format!("{verb}.md")), md).unwrap();
}
#[test]
fn invoke_atom_not_found_exits_2() {
let tmp = tempfile::tempdir().unwrap();
write_atom(tmp.path(), "kei-demo", "create");
let out = Command::new(BIN)
.arg("invoke")
.arg("kei-demo::ghost")
.arg("--input")
.arg("{}")
.arg("--root")
.arg(tmp.path())
.output()
.expect("spawn kei-runtime");
assert_eq!(out.status.code(), Some(2),
"expected exit 2 on unknown atom; stderr: {}",
String::from_utf8_lossy(&out.stderr));
let stderr = String::from_utf8_lossy(&out.stderr);
assert!(stderr.contains("no atom matching"),
"expected 'no atom matching' in stderr: {stderr}");
}
#[test]
fn invoke_not_implemented_exits_64() {
let tmp = tempfile::tempdir().unwrap();
write_atom(tmp.path(), "kei-demo", "create");
let out = Command::new(BIN)
.arg("invoke")
.arg("kei-demo::create")
.arg("--input")
.arg(r#"{"title":"hello"}"#)
.arg("--root")
.arg(tmp.path())
.output()
.expect("spawn kei-runtime");
assert_eq!(out.status.code(), Some(64),
"expected exit 64 on NotImplemented; stderr: {}",
String::from_utf8_lossy(&out.stderr));
let stderr = String::from_utf8_lossy(&out.stderr);
assert!(stderr.contains("invoke not yet wired"),
"expected 'invoke not yet wired' in stderr: {stderr}");
}

View file

@ -18,9 +18,9 @@ rusqlite = { version = "0.31", features = ["bundled"] }
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
kei-atom-discovery = { path = "../kei-atom-discovery" }
[dev-dependencies]
tempfile = "3"

View file

@ -1,60 +1,23 @@
//! Frontmatter splitting + wikilink extraction helpers for atom `.md` files.
//! Sage-local aliases over `kei-atom-discovery` helpers.
//!
//! Pure functions, no I/O. See `atoms.rs` for the discovery walker.
//! Historical sage API: `split_frontmatter`, `parse_wikilink`, `is_atom_target`,
//! `split_atom_id`. All now delegate to the shared crate; kept here so sage
//! internals compile without touch.
use anyhow::{anyhow, Result};
use kei_atom_discovery as shared;
/// Split a `.md` file into (frontmatter_yaml, body). Frontmatter must start
/// with `---\n` and end with a line that is exactly `---`.
pub use shared::{is_atom_target, parse_wikilink};
/// Split a `.md` file into (frontmatter_yaml, body). Delegates to the shared
/// `parse_frontmatter` — preserves the sage `anyhow::Result` return type.
pub fn split_frontmatter(text: &str) -> Result<(&str, &str)> {
let rest = text
.strip_prefix("---\n")
.or_else(|| text.strip_prefix("---\r\n"))
.ok_or_else(|| anyhow!("missing leading --- frontmatter delimiter"))?;
let end = find_closing_delim(rest)
.ok_or_else(|| anyhow!("missing closing --- frontmatter delimiter"))?;
let fm = &rest[..end.0];
let body_start = end.0 + end.1;
Ok((fm, rest.get(body_start..).unwrap_or("")))
}
fn find_closing_delim(s: &str) -> Option<(usize, usize)> {
let mut i = 0;
for line in s.split_inclusive('\n') {
let trimmed = line.trim_end_matches(&['\n', '\r'][..]);
if trimmed == "---" {
return Some((i, line.len()));
}
i += line.len();
}
None
}
/// Parse a single wikilink `[[target]]`. Returns `Some(target)` stripped of
/// brackets and whitespace, `None` if the string isn't a wikilink shape.
pub fn parse_wikilink(raw: &str) -> Option<String> {
let t = raw.trim();
let inner = t.strip_prefix("[[").and_then(|s| s.strip_suffix("]]"))?;
let inner = inner.trim();
if inner.is_empty() {
None
} else {
Some(inner.to_string())
}
}
/// Filter rule that decides whether a wikilink target is an atom reference.
/// Atoms use `<crate>::<verb>`; we exclude `rules/*` and `rule*` targets.
pub fn is_atom_target(target: &str) -> bool {
!target.starts_with("rules/") && !target.starts_with("rule ")
shared::parse_frontmatter(text).map_err(|e| anyhow!(e.to_string()))
}
/// Split `<crate>::<verb>` atom id into components.
pub fn split_atom_id(id: &str) -> Result<(String, String)> {
match id.split_once("::") {
Some((c, v)) if !c.is_empty() && !v.is_empty() => Ok((c.into(), v.into())),
_ => Err(anyhow!("atom id must be <crate>::<verb>, got {id}")),
}
shared::split_atom_id(id).map_err(|e| anyhow!(e.to_string()))
}
#[cfg(test)]

View file

@ -1,169 +1,30 @@
//! Substrate-atom discovery + frontmatter parsing + wikilink extraction.
//! Substrate-atom discovery — thin façade over `kei-atom-discovery`.
//!
//! Walks `<root>/<crate>/atoms/*.md`, parses YAML frontmatter, returns
//! `AtomRecord`. Tolerant: skips files with invalid frontmatter (logs to
//! stderr, continues scan). See `docs/SUBSTRATE-SCHEMA.md` §Graph contract.
//! Historical `AtomRecord` is preserved as a type alias for `AtomMeta` so
//! that downstream sage modules (`atom_index`, `atom_cli`) keep compiling.
use crate::atom_parse::{is_atom_target, parse_wikilink, split_atom_id, split_frontmatter};
use anyhow::{anyhow, Context, Result};
use serde::Deserialize;
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use crate::atom_parse::{is_atom_target, parse_wikilink};
use anyhow::Result;
use kei_atom_discovery as shared;
use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AtomKind {
Command,
Query,
Stream,
Transform,
}
pub use kei_atom_discovery::AtomKind;
impl FromStr for AtomKind {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self> {
match s.trim().to_ascii_lowercase().as_str() {
"command" => Ok(AtomKind::Command),
"query" => Ok(AtomKind::Query),
"stream" => Ok(AtomKind::Stream),
"transform" => Ok(AtomKind::Transform),
other => Err(anyhow!("unknown atom kind: {other}")),
}
}
}
impl AtomKind {
pub fn as_str(&self) -> &'static str {
match self {
AtomKind::Command => "command",
AtomKind::Query => "query",
AtomKind::Stream => "stream",
AtomKind::Transform => "transform",
}
}
}
#[derive(Debug, Clone)]
pub struct AtomRecord {
pub full_id: String,
pub kind: AtomKind,
pub crate_name: String,
pub verb: String,
pub version: String,
pub md_path: PathBuf,
pub input_schema: Option<PathBuf>,
pub output_schema: Option<PathBuf>,
pub related: Vec<String>,
pub keywords: Vec<String>,
pub stability: String,
pub body: String,
}
#[derive(Debug, Deserialize)]
struct SchemaRef {
schema: Option<String>,
}
#[derive(Debug, Deserialize)]
struct Frontmatter {
atom: String,
kind: String,
#[serde(default)]
version: Option<String>,
#[serde(default)]
input: Option<SchemaRef>,
#[serde(default)]
output: Option<SchemaRef>,
#[serde(default)]
related: Vec<String>,
#[serde(default)]
keywords: Vec<String>,
#[serde(default)]
stability: Option<String>,
}
/// Legacy alias: sage used to call this `AtomRecord`. New code should use
/// `AtomMeta` directly (identical shape, authored in `kei-atom-discovery`).
pub type AtomRecord = shared::AtomMeta;
/// Walk `<root>/*/atoms/*.md` and return parsed atom metadata.
/// Tolerant: invalid frontmatter → stderr warning + skipped record.
pub fn discover_atoms(root: &Path) -> Result<Vec<AtomRecord>> {
let mut out = Vec::new();
if !root.is_dir() {
return Ok(out);
return Ok(Vec::new());
}
for entry in fs::read_dir(root).with_context(|| format!("read_dir {}", root.display()))? {
let crate_dir = entry?.path();
if crate_dir.is_dir() {
collect_from_crate(&crate_dir, &mut out);
}
}
Ok(out)
}
fn collect_from_crate(crate_dir: &Path, out: &mut Vec<AtomRecord>) {
let atoms_dir = crate_dir.join("atoms");
if !atoms_dir.is_dir() {
return;
}
let crate_name = crate_dir
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
let iter = match fs::read_dir(&atoms_dir) {
Ok(it) => it,
Err(e) => {
eprintln!("skip {}: {}", atoms_dir.display(), e);
return;
}
};
for entry in iter.flatten() {
let path = entry.path();
if !is_md_file(&path) {
continue;
}
match parse_atom_file(&path, &crate_name) {
Ok(rec) => out.push(rec),
Err(e) => eprintln!("skip {}: {}", path.display(), e),
}
}
}
fn is_md_file(path: &Path) -> bool {
path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("md")
}
fn parse_atom_file(path: &Path, crate_name: &str) -> Result<AtomRecord> {
let text = fs::read_to_string(path)
.with_context(|| format!("read {}", path.display()))?;
let (fm_text, body) = split_frontmatter(&text)?;
let fm: Frontmatter =
serde_yaml::from_str(fm_text).with_context(|| "parse frontmatter YAML")?;
build_record(fm, body, path, crate_name)
}
fn build_record(fm: Frontmatter, body: &str, path: &Path, crate_name: &str) -> Result<AtomRecord> {
let kind = AtomKind::from_str(&fm.kind)?;
let (crate_from_id, verb) = split_atom_id(&fm.atom)?;
let md_dir = path.parent().unwrap_or(path).to_path_buf();
Ok(AtomRecord {
full_id: fm.atom.clone(),
kind,
crate_name: if crate_from_id.is_empty() {
crate_name.to_string()
} else {
crate_from_id
},
verb,
version: fm.version.unwrap_or_default(),
md_path: path.to_path_buf(),
input_schema: fm.input.and_then(|s| s.schema).map(|s| md_dir.join(&s)),
output_schema: fm.output.and_then(|s| s.schema).map(|s| md_dir.join(&s)),
related: fm.related,
keywords: fm.keywords,
stability: fm.stability.unwrap_or_else(|| "unknown".into()),
body: body.to_string(),
})
Ok(shared::discover_atoms(root))
}
/// Extract `(source_atom_id, target)` edges from `related:` wikilinks.
/// Non-atom targets (rules, notes) are filtered out here — scope: atoms only.
/// Non-atom targets (rules, notes) are filtered out — scope: atoms only.
pub fn resolve_wikilinks(records: &[AtomRecord]) -> Vec<(String, String)> {
let mut out = Vec::new();
for rec in records {

View file

@ -13,6 +13,31 @@ use kei_task::{Milestone, Store};
use std::path::PathBuf;
use std::process::ExitCode;
/// Typed error used by the CLI to carry both a message and an exit code.
///
/// Exit-code contract (§Runtime):
/// - 2 — atom rejected input (validation / semantic error)
/// - 1 — usage / IO / storage failure
struct CliError {
code: u8,
msg: String,
}
impl CliError {
fn atom(msg: impl Into<String>) -> Self {
Self { code: 2, msg: msg.into() }
}
fn io(msg: impl Into<String>) -> Self {
Self { code: 1, msg: msg.into() }
}
}
impl From<anyhow::Error> for CliError {
fn from(e: anyhow::Error) -> Self {
Self::io(format!("{e:#}"))
}
}
#[derive(Parser)]
#[command(name = "kei-task", version, about = "Task DAG CLI")]
struct Cli {
@ -41,13 +66,13 @@ fn db_path(cli_db: Option<PathBuf>) -> PathBuf {
PathBuf::from(home).join(".claude/task/task.sqlite")
}
fn run() -> anyhow::Result<()> {
fn run() -> Result<(), CliError> {
let cli = Cli::parse();
let s = Store::open(&db_path(cli.db))?;
dispatch(&s, cli.cmd)
}
fn dispatch(s: &Store, cmd: Cmd) -> anyhow::Result<()> {
fn dispatch(s: &Store, cmd: Cmd) -> Result<(), CliError> {
match cmd {
Cmd::Create { title, description, priority } =>
cmd_create(s, title, description, priority),
@ -63,16 +88,26 @@ fn dispatch(s: &Store, cmd: Cmd) -> anyhow::Result<()> {
}
}
fn cmd_create(s: &Store, title: String, description: String, priority: String) -> anyhow::Result<()> {
fn cmd_create(s: &Store, title: String, description: String, priority: String) -> Result<(), CliError> {
let out = atoms::create::run(s, atoms::create::Input {
title, description, priority, milestone_id: None,
}).map_err(|e| anyhow::anyhow!("{e}"))?;
}).map_err(classify_create_error)?;
println!("{}", out.id);
Ok(())
}
fn cmd_update(s: &Store, id: i64, status: Option<String>, title: Option<String>) -> anyhow::Result<()> {
let mut t = s.get_task(id)?.ok_or_else(|| anyhow::anyhow!("id {id} not found"))?;
/// Per §Runtime contract: validation errors → exit 2, storage/IO → exit 1.
fn classify_create_error(e: atoms::create::Error) -> CliError {
match e {
atoms::create::Error::InvalidTitle
| atoms::create::Error::InvalidPriority(_) => CliError::atom(format!("{e}")),
atoms::create::Error::StoreError(_) => CliError::io(format!("{e}")),
}
}
fn cmd_update(s: &Store, id: i64, status: Option<String>, title: Option<String>) -> Result<(), CliError> {
let mut t = s.get_task(id)?
.ok_or_else(|| CliError::atom(format!("TaskNotFound: id {id} not found")))?;
if let Some(st) = status { t.status = st; }
if let Some(ti) = title { t.title = ti; }
s.update_task(&t)?;
@ -80,45 +115,63 @@ fn cmd_update(s: &Store, id: i64, status: Option<String>, title: Option<String>)
Ok(())
}
fn cmd_add_dep(s: &Store, from_id: i64, to_id: i64, dep_type: String) -> anyhow::Result<()> {
fn cmd_add_dep(s: &Store, from_id: i64, to_id: i64, dep_type: String) -> Result<(), CliError> {
let dep_display = if dep_type.is_empty() { "blocks".to_string() } else { dep_type.clone() };
atoms::add_dependency::run(s, atoms::add_dependency::Input {
from: from_id, to: to_id, dep_type,
}).map_err(|e| anyhow::anyhow!("{e}"))?;
}).map_err(classify_add_dep_error)?;
println!("dep: {} -> {} ({})", from_id, to_id, dep_display);
Ok(())
}
fn cmd_graph(s: &Store) -> anyhow::Result<()> {
/// Per §Runtime contract: semantic rejections → exit 2, storage/IO → exit 1.
fn classify_add_dep_error(e: atoms::add_dependency::Error) -> CliError {
match e {
atoms::add_dependency::Error::SelfDependency
| atoms::add_dependency::Error::InvalidDepType(_)
| atoms::add_dependency::Error::CycleDetected => CliError::atom(format!("{e}")),
atoms::add_dependency::Error::StoreError(_) => CliError::io(format!("{e}")),
}
}
fn cmd_graph(s: &Store) -> Result<(), CliError> {
for e in list_edges(s)? {
println!("{}\t-[{}]->\t{}", e.task_id, e.dep_type, e.depends_on);
}
Ok(())
}
fn cmd_chain(s: &Store, id: i64) -> anyhow::Result<()> {
fn cmd_chain(s: &Store, id: i64) -> Result<(), CliError> {
for did in dependency_chain(s, id)? { println!("{}", did); }
Ok(())
}
fn cmd_search(s: &Store, query: String, limit: i64) -> anyhow::Result<()> {
fn cmd_search(s: &Store, query: String, limit: i64) -> Result<(), CliError> {
let out = atoms::search::run(s, atoms::search::Input {
query, limit: Some(limit),
}).map_err(|e| anyhow::anyhow!("{e}"))?;
}).map_err(classify_search_error)?;
for t in out.results {
println!("{}\t{}\t{}", t.id, t.status, t.title);
}
Ok(())
}
fn cmd_milestone(s: &Store, name: String, description: String) -> anyhow::Result<()> {
/// Per §Runtime contract: query validation → exit 2, storage/IO → exit 1.
fn classify_search_error(e: atoms::search::Error) -> CliError {
match e {
atoms::search::Error::InvalidQuery => CliError::atom(format!("{e}")),
atoms::search::Error::StoreError(_) => CliError::io(format!("{e}")),
}
}
fn cmd_milestone(s: &Store, name: String, description: String) -> Result<(), CliError> {
let id = create_milestone(s, &Milestone {
name, description, ..Default::default() })?;
println!("{}", id);
Ok(())
}
fn cmd_link_milestone(s: &Store, task_id: i64, milestone_id: i64) -> anyhow::Result<()> {
fn cmd_link_milestone(s: &Store, task_id: i64, milestone_id: i64) -> Result<(), CliError> {
link_task_to_milestone(s, task_id, milestone_id)?;
println!("linked {} -> milestone {}", task_id, milestone_id);
Ok(())
@ -127,6 +180,9 @@ fn cmd_link_milestone(s: &Store, task_id: i64, milestone_id: i64) -> anyhow::Res
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(e) => { eprintln!("kei-task: {e:#}"); ExitCode::from(1) }
Err(CliError { code, msg }) => {
eprintln!("{msg}");
ExitCode::from(code)
}
}
}

View file

@ -0,0 +1,32 @@
//! kei-task CLI exit-code smoke tests (§Runtime contract).
//!
//! Atom-layer errors (validation / semantic) → exit 2.
//! Storage/IO errors → exit 1.
//!
//! `create --title ""` is the canonical validation-failure case: the
//! atom's typed Error enum returns `InvalidTitle`, which main.rs maps
//! to exit 2, NOT the old anyhow collapse at exit 1.
use std::process::Command;
const BIN: &str = env!("CARGO_BIN_EXE_kei-task");
#[test]
fn create_empty_title_exits_2() {
let tmp = tempfile::tempdir().unwrap();
let db = tmp.path().join("task.sqlite");
let out = Command::new(BIN)
.arg("--db")
.arg(&db)
.arg("create")
.arg("")
.output()
.expect("spawn kei-task");
assert_eq!(out.status.code(), Some(2),
"expected exit 2 on InvalidTitle; stdout: {}, stderr: {}",
String::from_utf8_lossy(&out.stdout),
String::from_utf8_lossy(&out.stderr));
let stderr = String::from_utf8_lossy(&out.stderr);
assert!(stderr.contains("InvalidTitle"),
"expected 'InvalidTitle' in stderr: {stderr}");
}

View file

@ -391,3 +391,11 @@ Non-breaking additions (new optional fields, new atom kinds, new side-effect dom
| 6 | Error model per-atom vs shared registry | **Per-atom** | Simpler to start. Registry can be added later non-breakingly. |
**Locked values:** all of the above. Breaking changes to any of these during 6-week parallel window require explicit user revocation + all-streams sync + ledger row.
## Amendments — non-breaking clarifications
| # | Date | Clarification | Reason |
|---|---|---|---|
| A-1 | 2026-04-23 | **`input.schema` and `output.schema` are REQUIRED for all atom kinds** (`command` / `query` / `stream` / `transform`). An atom with no inputs should declare `input.schema` pointing to a JSON Schema with `{"type": "object", "properties": {}, "additionalProperties": false}` — i.e., "empty object". Similarly for no-output. The runtime + graph lint BOTH enforce presence of the schema ref; shared `kei-atom-discovery` parses them as `Option<PathBuf>` only to allow tolerant skip-on-missing (with stderr warning) rather than aborting the whole scan on one bad atom. | Architect P0-a (post-audit 2026-04-23) — Stream C parsed input/output Optional, Stream D required. Asymmetric enforcement → "sage sees atom, runtime skips" drift. Both streams now agree: Optional at parse layer, required at lint layer. |
These amendments document interpretations consistent with the locked schema — no frontmatter-shape change, no wire-format change, no stream refactor required.

133
tests/substrate_integration.sh Executable file
View file

@ -0,0 +1,133 @@
#!/usr/bin/env bash
# substrate_integration.sh — cross-stream integration smoke test
#
# Architect P0-b (audit wave 2026-04-23): each stream (kei-forge / kei-task
# atoms / kei-sage / kei-runtime) has its own smoke tests, but no single
# test exercised the cross-stream composition. This script is that test.
#
# The check: build release binaries, generate a fresh atom via new-atom.sh,
# then verify that kei-runtime + kei-sage BOTH discover it identically and
# that kei-runtime schema-lint passes on it.
#
# Exit 0 = substrate v1 contract holds end-to-end
# Exit 1 = any step failed — see stderr for the offending stage
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
TMPROOT="$(mktemp -d)"
trap 'rm -rf "$TMPROOT"' EXIT
fail() { echo "SUBSTRATE-INTEGRATION FAIL: $*" >&2; exit 1; }
echo "==> Building release binaries (kei-runtime, kei-sage)…"
cd _primitives/_rust
cargo build --release -p kei-runtime -p kei-sage >/dev/null 2>&1 \
|| fail "cargo build failed"
RT="$(pwd)/target/release/kei-runtime"
SAGE="$(pwd)/target/release/kei-sage"
cd "$ROOT"
echo "==> Scaffolding a fresh atom (kei-task::create) via new-atom.sh for isolated test corpus…"
CORPUS="$TMPROOT/corpus/kei-task"
mkdir -p "$CORPUS"/{atoms/schemas,src/atoms,tests}
# Minimal hand-crafted atom mirroring Stream B's create atom shape —
# covers all REQUIRED frontmatter fields so schema-lint passes.
cat > "$CORPUS/atoms/create.md" <<'EOF'
---
atom: kei-task::create
kind: command
version: "0.22.3"
input:
schema: schemas/create-input.json
required: [title]
example: { title: "x" }
output:
schema: schemas/create-output.json
example: { id: 1 }
errors:
- code: DuplicateTitle
http_analog: 409
side_effects:
- { op: write, domain: kei-task-db }
idempotent: false
timeout_ms: 5000
stability: stable
keywords: [integration-test]
related: []
---
# kei-task::create
Integration-test atom. See substrate_integration.sh.
EOF
cat > "$CORPUS/atoms/schemas/create-input.json" <<'EOF'
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "kei-task/atoms/schemas/create-input.json",
"title": "kei-task::create input",
"type": "object",
"required": ["title"],
"properties": {
"title": { "type": "string", "minLength": 1 }
},
"additionalProperties": false,
"examples": [{"title": "x"}]
}
EOF
cat > "$CORPUS/atoms/schemas/create-output.json" <<'EOF'
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "kei-task/atoms/schemas/create-output.json",
"title": "kei-task::create output",
"type": "object",
"properties": { "id": { "type": "integer" } },
"additionalProperties": false,
"examples": [{"id": 1}]
}
EOF
echo "==> kei-runtime schema-lint…"
"$RT" schema-lint --root "$TMPROOT/corpus" \
| grep -q "^PASS" \
|| fail "schema-lint did not report PASS"
echo "==> kei-runtime list-atoms…"
LIST="$("$RT" list-atoms --root "$TMPROOT/corpus")"
echo "$LIST" | grep -q "kei-task::create" \
|| fail "kei-runtime list-atoms did not see kei-task::create"
echo "==> kei-sage atoms-discover…"
DISCOVER="$("$SAGE" atoms-discover --root "$TMPROOT/corpus")"
echo "$DISCOVER" | grep -q "kei-task::create" \
|| fail "kei-sage atoms-discover did not see kei-task::create"
echo "==> Cross-stream ID agreement…"
RT_IDS="$(echo "$LIST" | awk '{print $1}' | sort)"
SAGE_IDS="$(echo "$DISCOVER" | awk 'NR>1 && $1 != "" {print $1}' | sort)"
[ "$RT_IDS" = "$SAGE_IDS" ] \
|| fail "runtime and sage disagree on atom IDs:\n runtime: $RT_IDS\n sage: $SAGE_IDS"
echo "==> kei-runtime invoke (expects NotImplemented → exit 64)…"
set +e
"$RT" invoke --root "$TMPROOT/corpus" kei-task::create --input '{"title":"x"}' >/dev/null 2>&1
RC=$?
set -e
[ "$RC" -eq 64 ] \
|| fail "invoke should exit 64 (NotImplemented), got $RC"
echo "==> kei-runtime invoke with bad input (expects InputInvalid → exit 2)…"
set +e
"$RT" invoke --root "$TMPROOT/corpus" kei-task::create --input '{}' >/dev/null 2>&1
RC=$?
set -e
[ "$RC" -eq 2 ] \
|| fail "invoke with missing required field should exit 2, got $RC"
echo ""
echo "✓ SUBSTRATE-INTEGRATION PASS — all 4 streams agree on schema, runtime + sage see same atoms, exit codes per locked §Runtime contract"