New crate _primitives/_rust/kei-runtime/ implementing §Runtime invocation contract from locked substrate schema. CLI (clap-derive): - list-atoms [--root] [--crate] [--kind] → walk + print - invoke <atom-id> --input <json|@file> → discover + validate input (stub exec) - schema-lint [--root] [--crate] → 6-check validator - pipe <dag.toml> → "not yet implemented" stub Modules (≤ 200 LOC each, largest lint.rs @ 171): - src/discover.rs — walk_atoms walks <root>/*/atoms/*.md, parses frontmatter - src/validate.rs — JSONSchema draft-07 via jsonschema 0.17.1 - src/invoke.rs — MVP stub: discover → parse → validate_input → boundary ack - src/lint.rs — 6 checks: required fields, kind enum, side_effects shape, schema path existence + draft-07 declaration, wikilink resolution - src/main.rs — clap CLI, exit 0|1|2 per §Runtime contract Intentional stub boundary: invoke returns structured JSON ack (exit 0), wire-up to concrete atom impls deferred to integration pass (needs Stream B atoms landed first). Registered kei-runtime in workspace members. Tests: 2/2 integration smoke (lint_smoke, discover_smoke) green. Stream D of substrate v1 parallel build. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
171 lines
5.7 KiB
Rust
171 lines
5.7 KiB
Rust
//! `schema-lint` — correctness pass over every `atoms/*.md` under `<root>`.
|
|
//!
|
|
//! Checks (from SUBSTRATE-SCHEMA §Validation):
|
|
//! 1. Frontmatter has required fields (atom, kind, version, input, output,
|
|
//! side_effects, idempotent, stability).
|
|
//! 2. Schema paths resolve to existing JSON files.
|
|
//! 3. JSON Schemas declare draft-07 via `$schema`.
|
|
//! 4. `kind` ∈ {command, query, stream, transform}.
|
|
//! 5. `side_effects` entries are `{op, domain}` objects.
|
|
//! 6. `related` wikilinks point to another atom OR `rules/...` (dangling rule
|
|
//! refs allowed).
|
|
|
|
use crate::discover::extract_frontmatter;
|
|
use serde_yaml::Value as YamlValue;
|
|
use std::collections::HashSet;
|
|
use std::path::{Path, PathBuf};
|
|
use walkdir::WalkDir;
|
|
|
|
const REQUIRED_FIELDS: &[&str] = &[
|
|
"atom",
|
|
"kind",
|
|
"version",
|
|
"input",
|
|
"output",
|
|
"side_effects",
|
|
"idempotent",
|
|
"stability",
|
|
];
|
|
const ALLOWED_KINDS: &[&str] = &["command", "query", "stream", "transform"];
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct LintReport {
|
|
pub passed: Vec<String>,
|
|
pub failed: Vec<(String, Vec<String>)>,
|
|
}
|
|
|
|
/// Run the full lint over `<root>/*/atoms/*.md`.
|
|
pub fn schema_lint(root: &Path) -> LintReport {
|
|
let mut report = LintReport::default();
|
|
let all_atoms = collect_atom_ids(root);
|
|
for md in find_atom_files(root) {
|
|
let label = md.display().to_string();
|
|
match lint_one(&md, &all_atoms) {
|
|
Ok(()) => report.passed.push(label),
|
|
Err(errs) => report.failed.push((label, errs)),
|
|
}
|
|
}
|
|
report
|
|
}
|
|
|
|
fn find_atom_files(root: &Path) -> Vec<PathBuf> {
|
|
WalkDir::new(root)
|
|
.max_depth(3)
|
|
.into_iter()
|
|
.flatten()
|
|
.filter(|e| {
|
|
e.path().is_file()
|
|
&& e.path().extension().is_some_and(|ext| ext == "md")
|
|
&& e.path().parent().and_then(|p| p.file_name()).is_some_and(|n| n == "atoms")
|
|
})
|
|
.map(|e| e.path().to_path_buf())
|
|
.collect()
|
|
}
|
|
|
|
fn collect_atom_ids(root: &Path) -> HashSet<String> {
|
|
let mut ids = HashSet::new();
|
|
for md in find_atom_files(root) {
|
|
if let Ok(body) = std::fs::read_to_string(&md) {
|
|
if let Some(fm) = extract_frontmatter(&body) {
|
|
if let Ok(y) = serde_yaml::from_str::<YamlValue>(fm) {
|
|
if let Some(id) = y.get("atom").and_then(|v| v.as_str()) {
|
|
ids.insert(id.to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ids
|
|
}
|
|
|
|
fn lint_one(md_path: &Path, known_atoms: &HashSet<String>) -> Result<(), Vec<String>> {
|
|
let body = std::fs::read_to_string(md_path).map_err(|e| vec![format!("read: {e}")])?;
|
|
let fm_text = extract_frontmatter(&body).ok_or_else(|| vec!["no frontmatter".to_string()])?;
|
|
let fm: YamlValue =
|
|
serde_yaml::from_str(fm_text).map_err(|e| vec![format!("yaml parse: {e}")])?;
|
|
let mut errs = Vec::new();
|
|
check_required_fields(&fm, &mut errs);
|
|
check_kind(&fm, &mut errs);
|
|
check_side_effects(&fm, &mut errs);
|
|
check_schema_files(md_path, &fm, &mut errs);
|
|
check_related(&fm, known_atoms, &mut errs);
|
|
if errs.is_empty() {
|
|
Ok(())
|
|
} else {
|
|
Err(errs)
|
|
}
|
|
}
|
|
|
|
fn check_required_fields(fm: &YamlValue, errs: &mut Vec<String>) {
|
|
for field in REQUIRED_FIELDS {
|
|
if fm.get(field).is_none() {
|
|
errs.push(format!("missing {field}"));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn check_kind(fm: &YamlValue, errs: &mut Vec<String>) {
|
|
if let Some(k) = fm.get("kind").and_then(|v| v.as_str()) {
|
|
if !ALLOWED_KINDS.contains(&k) {
|
|
errs.push(format!("kind `{k}` not in {ALLOWED_KINDS:?}"));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn check_side_effects(fm: &YamlValue, errs: &mut Vec<String>) {
|
|
let Some(seq) = fm.get("side_effects").and_then(|v| v.as_sequence()) else {
|
|
return;
|
|
};
|
|
for (i, entry) in seq.iter().enumerate() {
|
|
let has_op = entry.get("op").and_then(|v| v.as_str()).is_some();
|
|
let has_domain = entry.get("domain").and_then(|v| v.as_str()).is_some();
|
|
if !has_op || !has_domain {
|
|
errs.push(format!("side_effects[{i}] missing op or domain"));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn check_schema_files(md_path: &Path, fm: &YamlValue, errs: &mut Vec<String>) {
|
|
for key in &["input", "output"] {
|
|
let Some(rel) = fm.get(key).and_then(|v| v.get("schema")).and_then(|v| v.as_str()) else {
|
|
continue;
|
|
};
|
|
let full = md_path.parent().map(|p| p.join(rel)).unwrap_or_else(|| PathBuf::from(rel));
|
|
if !full.exists() {
|
|
errs.push(format!("{key} schema missing: {}", full.display()));
|
|
continue;
|
|
}
|
|
check_draft07(&full, key, errs);
|
|
}
|
|
}
|
|
|
|
fn check_draft07(schema_path: &Path, key: &str, errs: &mut Vec<String>) {
|
|
let Ok(text) = std::fs::read_to_string(schema_path) else {
|
|
errs.push(format!("{key} schema unreadable"));
|
|
return;
|
|
};
|
|
let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) else {
|
|
errs.push(format!("{key} schema not JSON"));
|
|
return;
|
|
};
|
|
let draft = json.get("$schema").and_then(|v| v.as_str()).unwrap_or("");
|
|
if !draft.contains("draft-07") {
|
|
errs.push(format!("{key} schema missing draft-07 $schema"));
|
|
}
|
|
}
|
|
|
|
fn check_related(fm: &YamlValue, known: &HashSet<String>, errs: &mut Vec<String>) {
|
|
let Some(seq) = fm.get("related").and_then(|v| v.as_sequence()) else {
|
|
return;
|
|
};
|
|
for entry in seq {
|
|
let Some(link) = entry.as_str() else { continue };
|
|
let inner = link.trim_start_matches("[[").trim_end_matches("]]");
|
|
if inner.starts_with("rules/") {
|
|
continue;
|
|
}
|
|
if !known.contains(inner) {
|
|
errs.push(format!("related `{inner}` unresolved"));
|
|
}
|
|
}
|
|
}
|