KeiSeiKit-1.0/_primitives/_rust/kei-skill-importer/src/classifier.rs
Parfii-bot a4e667de10 KeiSeiKit-public — clean state
Single-commit clean baseline after security scrub of niche-tells,
project codenames, internal jargon, and contributor-email leaks.

Contents:
- 100 Rust crates (_primitives/_rust/)
- 37 agent manifests (_manifests/) + generated specs (_generated/)
- 67 user-invocable skills (skills/)
- 33 hooks (hooks/)
- Composition blocks (_blocks/)
- Documentation (docs/, README.md)
- TS adapter packages (_ts_packages/)
- Assembler (_assembler/)
- Roles (_roles/)
- Templates (_templates/)
- Forgejo CI (.forgejo/)

Author: Denis Parfionovich <info@greendragon.info>

License: see LICENSE.
2026-05-01 12:09:03 +08:00

159 lines
5.5 KiB
Rust

//! Atom-call classifier — detects bash fences, `kei-<crate> <verb>`
//! invocations, and slash commands inside each phase body. Resolves
//! `atom_id` against a static registry (Wave 26.5 scope; Wave 27 will
//! swap this for a live `kei-atom-discovery::discover_atoms` lookup).
use crate::canonical::{AtomCall, AtomCallKind, ImportedSkill};
use regex::Regex;
use std::sync::OnceLock;
/// Static registry of known KeiSeiKit primitive verbs (Wave 26.5).
/// Used by `try_resolve_atom_id`. Wave 27 will swap to dynamic discovery.
const KNOWN_PRIMITIVES: &[&str] = &[
"kei-cortex", "kei-task", "kei-sage", "kei-router", "kei-cache",
"kei-pipe", "kei-spawn", "kei-replay", "kei-fork", "kei-ledger",
"kei-memory", "kei-migrate", "kei-changelog", "kei-pet", "kei-store",
"kei-artifact", "kei-search-core", "kei-content-store",
"kei-social-store", "kei-chat-store", "kei-crossdomain",
"kei-curator", "kei-auth", "kei-entity-store", "kei-agent-runtime",
"kei-capability", "kei-provision", "kei-discover", "kei-prune",
"kei-brain-view", "kei-hibernate", "kei-conflict-scan",
"kei-refactor-engine", "kei-graph-check", "kei-watch",
"kei-scheduler", "kei-diff", "kei-dna-index", "kei-shared",
"kei-forge", "kei-runtime", "kei-atom-discovery",
"kei-skill-importer", "tomd", "ssh-check", "firewall-diff",
"mock-render", "visual-diff", "tokens-sync",
];
fn bash_fence_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"(?ms)^```(?:bash|sh|shell)\s*\n(.*?)^```").unwrap())
}
fn slash_cmd_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"(?m)(?:^|\s)(/[a-z][a-z0-9_-]{1,40})\b").unwrap())
}
fn kei_primitive_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"\b(kei-[a-z][a-z0-9-]+)\s+([a-z][a-z0-9-]{1,30})\b").unwrap())
}
/// Walk every phase, populate `phase.atom_calls`.
pub fn classify_atom_calls(skill: &mut ImportedSkill) {
for phase in skill.phases.iter_mut() {
let calls = scan_phase(&phase.body);
phase.atom_calls = calls;
}
}
fn scan_phase(body: &str) -> Vec<AtomCall> {
let mut out: Vec<AtomCall> = Vec::new();
collect_bash_fences(body, &mut out);
collect_kei_primitives(body, &mut out);
collect_slash_commands(body, &mut out);
dedup(&mut out);
out
}
fn collect_bash_fences(body: &str, out: &mut Vec<AtomCall>) {
for cap in bash_fence_re().captures_iter(body) {
let block = cap.get(1).map(|m| m.as_str().trim()).unwrap_or("");
for raw in block.lines() {
let cmd = raw.trim();
if cmd.is_empty() || cmd.starts_with('#') {
continue;
}
let kind = classify_kind(cmd);
let atom_id = try_resolve_atom_id(cmd);
out.push(AtomCall {
raw_command: cmd.to_string(),
atom_id,
kind,
});
}
}
}
fn collect_kei_primitives(body: &str, out: &mut Vec<AtomCall>) {
for cap in kei_primitive_re().captures_iter(body) {
let prim = cap.get(1).map(|m| m.as_str()).unwrap_or("");
let verb = cap.get(2).map(|m| m.as_str()).unwrap_or("");
if !KNOWN_PRIMITIVES.contains(&prim) {
continue;
}
let target_id = format!("{prim}::{verb}");
// Skip if a bash-fence already captured this call (any raw_command
// that resolved to the same atom_id).
if out.iter().any(|c| c.atom_id.as_deref() == Some(target_id.as_str())) {
continue;
}
let raw = format!("{prim} {verb}");
if out.iter().any(|c| c.raw_command == raw) {
continue;
}
out.push(AtomCall {
raw_command: raw,
atom_id: Some(target_id),
kind: AtomCallKind::KeiPrimitive,
});
}
}
fn collect_slash_commands(body: &str, out: &mut Vec<AtomCall>) {
for cap in slash_cmd_re().captures_iter(body) {
let cmd = cap.get(1).map(|m| m.as_str()).unwrap_or("");
// Filter false positives — URL paths, regex anchors.
if cmd.contains("//") || cmd.len() < 3 {
continue;
}
let raw = cmd.to_string();
if out.iter().any(|c| c.raw_command == raw) {
continue;
}
out.push(AtomCall {
raw_command: raw,
atom_id: None,
kind: AtomCallKind::UserPrompt,
});
}
}
fn classify_kind(cmd: &str) -> AtomCallKind {
let head = cmd.split_whitespace().next().unwrap_or("");
if KNOWN_PRIMITIVES.contains(&head) {
AtomCallKind::KeiPrimitive
} else if !head.is_empty() {
AtomCallKind::Bash
} else {
AtomCallKind::Unknown
}
}
fn try_resolve_atom_id(cmd: &str) -> Option<String> {
let mut parts = cmd.split_whitespace();
let head = parts.next()?;
let verb = parts.next()?;
if !KNOWN_PRIMITIVES.contains(&head) {
return None;
}
if !verb.chars().next()?.is_ascii_lowercase() {
return None;
}
Some(format!("{head}::{verb}"))
}
fn dedup(calls: &mut Vec<AtomCall>) {
let mut seen = std::collections::HashSet::new();
calls.retain(|c| seen.insert(c.raw_command.clone()));
}
/// Public predicate used by the emit-path decision.
pub fn has_unresolved_atom_calls(skill: &ImportedSkill) -> bool {
skill
.phases
.iter()
.flat_map(|p| p.atom_calls.iter())
.any(|c| c.atom_id.is_none() && c.kind != AtomCallKind::Bash)
}