Merge feat/wave13-cleanup — HttpDriver + agent_id + safe_join + entity-store MEDIUM

This commit is contained in:
Parfii-bot 2026-04-23 16:16:28 +08:00
commit 11a14680c5
24 changed files with 2361 additions and 198 deletions

File diff suppressed because it is too large Load diff

View file

@ -22,5 +22,6 @@ pub mod registry;
pub mod role;
pub mod simulated_merge;
pub mod spawn;
pub mod validate;
pub mod verifies;
pub mod verify;

View file

@ -15,10 +15,10 @@ use crate::capability::TaskSpec;
use crate::compose::compose_prompt;
use crate::dna::Dna;
use crate::role::resolve_role;
use crate::validate::{autogen_agent_id, validate_agent_id};
use anyhow::{anyhow, Context, Result};
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
/// Everything the orchestrator needs to hand the Claude `Agent` tool.
#[derive(Debug, Clone, Serialize)]
@ -50,6 +50,8 @@ pub fn prepare(task: &TaskSpec, kit_root: &Path) -> Result<AgentInvocation> {
} else {
task.task.agent_id.clone()
};
validate_agent_id(&agent_id)
.map_err(|e| anyhow!("agent-id rejected: {e}"))?;
let role_file = load_role_meta(kit_root, &task.task.role)?;
if !role_file.role.spawnable {
return Err(anyhow!(
@ -87,15 +89,6 @@ pub fn prepare(task: &TaskSpec, kit_root: &Path) -> Result<AgentInvocation> {
})
}
fn autogen_agent_id(role: &str) -> String {
let ts_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0);
let rand_hex = format!("{:04x}", rand::random::<u16>());
format!("ag-{}-{:x}-{}", role, ts_ms, rand_hex)
}
/// Human-readable block — copy into Claude Code's Agent-tool dialog.
pub fn render_human(inv: &AgentInvocation) -> String {
let iso = inv.isolation.as_deref().unwrap_or("<none>");

View file

@ -5,17 +5,23 @@
//! verifies from that vantage to catch integration regressions invisible
//! in agent's isolated worktree.
use anyhow::{Context, Result};
use crate::validate::validate_agent_id;
use anyhow::{anyhow, Context, Result};
use std::path::{Path, PathBuf};
use std::process::Command;
/// Create a temp worktree off `main_repo` at HEAD of `main`, apply the agent's
/// diff, return the temp worktree path. Caller cleans up.
///
/// Validates `agent_id` before constructing any tmp path — path-traversal
/// defence per the HIGH-risk agent_id sink audit.
pub fn run_simulated_merge(
agent_id: &str,
agent_worktree: &Path,
main_repo: &Path,
) -> Result<PathBuf> {
validate_agent_id(agent_id)
.map_err(|e| anyhow!("agent_id rejected in run_simulated_merge: {e}"))?;
let tmp = std::env::temp_dir().join(format!("kei-test-merge-{agent_id}"));
let _ = std::fs::remove_dir_all(&tmp);
run_git(main_repo, &["worktree", "add", "-d", tmp.to_str().unwrap(), "main"])

View file

@ -4,16 +4,26 @@
use crate::capability::TaskSpec;
use crate::compose::compose_prompt;
use crate::validate::validate_agent_id;
use anyhow::{anyhow, Context, Result};
use std::fs;
use std::path::{Path, PathBuf};
/// Parse a task.toml file into `TaskSpec`.
///
/// Validates the embedded `task.agent-id` (if non-empty) before returning —
/// a hostile task.toml with `agent-id = "../../../etc/foo"` is rejected at
/// the parse boundary so it never reaches a downstream path sink.
pub fn load_task(path: &Path) -> Result<TaskSpec> {
let text = fs::read_to_string(path)
.with_context(|| format!("read task file {}", path.display()))?;
toml::from_str::<TaskSpec>(&text)
.with_context(|| format!("parse task TOML {}", path.display()))
let spec: TaskSpec = toml::from_str(&text)
.with_context(|| format!("parse task TOML {}", path.display()))?;
if !spec.task.agent_id.is_empty() {
validate_agent_id(&spec.task.agent_id)
.map_err(|e| anyhow!("task.agent-id rejected: {e}"))?;
}
Ok(spec)
}
/// Prepare a spawnable agent directory.
@ -45,9 +55,15 @@ pub struct PreparedAgent {
pub task_path: PathBuf,
}
fn resolve_agent_id(task: &TaskSpec) -> Result<String> {
if !task.task.agent_id.is_empty() {
return Ok(task.task.agent_id.clone());
/// Resolve the effective `agent_id` — validator-checked, never creates
/// files as a side effect.
pub fn resolve_agent_id(task: &TaskSpec) -> Result<String> {
if task.task.agent_id.is_empty() {
return Err(anyhow!(
"task.agent-id is empty — orchestrator must allocate via kei-ledger"
));
}
Err(anyhow!("task.agent-id is empty — orchestrator must allocate via kei-ledger"))
validate_agent_id(&task.task.agent_id)
.map_err(|e| anyhow!("task.agent-id rejected: {e}"))?;
Ok(task.task.agent_id.clone())
}

View file

@ -0,0 +1,175 @@
//! Agent-id validator — HIGH-security path-traversal defence.
//!
//! Every `agent_id` flowing from task.toml (or auto-gen) into a filesystem
//! path sink MUST pass `validate_agent_id` first. Without this gate, a
//! hostile task.toml with `agent-id = "../../../etc/foo"` reaches
//! `tasks/<agent-id>/` and writes arbitrary paths.
//!
//! Rules (enforced in order, first failure wins):
//! - non-empty, length ≤ 64
//! - ASCII-only, matches `^[A-Za-z0-9][A-Za-z0-9_.-]*$`
//! - rejects `/`, `\`, `..`, leading `.`, leading `-`, NUL, `:`,
//! whitespace, non-ASCII
//! - rejects Windows-reserved names (case-insensitive):
//! CON, PRN, AUX, NUL, COM1-9, LPT1-9
//!
//! Also hosts `autogen_agent_id` (moved from prepare.rs) so the auto-gen
//! output passes the validator by construction.
use std::time::{SystemTime, UNIX_EPOCH};
use thiserror::Error;
/// Maximum permitted `agent_id` length (bytes = chars, since ASCII-only).
pub const MAX_AGENT_ID_LEN: usize = 64;
/// Typed error — the sole failure variant of `validate_agent_id`.
#[derive(Debug, Clone, Error, PartialEq, Eq)]
#[error("invalid agent-id: {reason}")]
pub struct InvalidAgentId {
pub reason: String,
}
impl InvalidAgentId {
fn new(reason: impl Into<String>) -> Self {
Self { reason: reason.into() }
}
}
/// Validate an `agent_id` before it reaches any filesystem path.
pub fn validate_agent_id(raw: &str) -> Result<&str, InvalidAgentId> {
check_basic_shape(raw)?;
check_no_traversal_bytes(raw)?;
check_character_class(raw)?;
check_not_windows_reserved(raw)?;
Ok(raw)
}
fn check_basic_shape(raw: &str) -> Result<(), InvalidAgentId> {
if raw.is_empty() {
return Err(InvalidAgentId::new("empty"));
}
if raw.len() > MAX_AGENT_ID_LEN {
return Err(InvalidAgentId::new(format!(
"length {} exceeds max {}",
raw.len(),
MAX_AGENT_ID_LEN
)));
}
if !raw.is_ascii() {
return Err(InvalidAgentId::new("contains non-ASCII"));
}
Ok(())
}
fn check_no_traversal_bytes(raw: &str) -> Result<(), InvalidAgentId> {
if raw.contains("..") {
return Err(InvalidAgentId::new("contains parent sequence '..'"));
}
if raw.contains('/') {
return Err(InvalidAgentId::new("contains '/'"));
}
if raw.contains('\\') {
return Err(InvalidAgentId::new("contains '\\'"));
}
if raw.contains('\0') {
return Err(InvalidAgentId::new("contains NUL"));
}
if raw.contains(':') {
return Err(InvalidAgentId::new("contains ':'"));
}
if raw.chars().any(char::is_whitespace) {
return Err(InvalidAgentId::new("contains whitespace"));
}
Ok(())
}
fn check_character_class(raw: &str) -> Result<(), InvalidAgentId> {
let first = raw.chars().next().expect("non-empty checked earlier");
if !first.is_ascii_alphanumeric() {
return Err(InvalidAgentId::new(format!(
"must start with [A-Za-z0-9], got '{first}'"
)));
}
for c in raw.chars() {
if !(c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '-') {
return Err(InvalidAgentId::new(format!(
"disallowed character '{c}' (allowed: [A-Za-z0-9_.-])"
)));
}
}
Ok(())
}
fn check_not_windows_reserved(raw: &str) -> Result<(), InvalidAgentId> {
let stem = raw.split('.').next().unwrap_or(raw);
let up = stem.to_ascii_uppercase();
if is_windows_reserved(&up) {
return Err(InvalidAgentId::new(format!(
"Windows-reserved name: '{stem}'"
)));
}
Ok(())
}
fn is_windows_reserved(up: &str) -> bool {
matches!(up, "CON" | "PRN" | "AUX" | "NUL") || is_com_or_lpt(up)
}
fn is_com_or_lpt(up: &str) -> bool {
let (prefix, n) = match up.len() {
4 if up.starts_with("COM") => ("COM", &up[3..]),
4 if up.starts_with("LPT") => ("LPT", &up[3..]),
_ => return false,
};
let _ = prefix; // already matched
matches!(n, "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9")
}
/// Auto-generate a fresh `agent_id` whose output is validator-clean.
///
/// Format: `ag-<slugified-role>-<unix-ms-hex>-<4-hex-rand>`
pub fn autogen_agent_id(role: &str) -> String {
let slug = slugify_role(role);
let ts_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0);
let rand_hex = format!("{:04x}", rand::random::<u16>());
let candidate = format!("ag-{slug}-{ts_ms:x}-{rand_hex}");
// Truncate to cap while preserving the rand-hex suffix.
truncate_agent_id(&candidate, &rand_hex)
}
/// Slugify a role name into the validator's allowed class.
///
/// Non-allowed characters collapse to `_`; empty result becomes `x` so the
/// auto-gen output is never `ag--<ts>-<rand>` (leading-dash after `ag-`).
pub fn slugify_role(role: &str) -> String {
let mut out = String::with_capacity(role.len());
for c in role.chars() {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
out.push(c);
} else {
out.push('_');
}
}
if out.is_empty() {
return "x".to_string();
}
let trimmed = out.trim_matches(|c: char| c == '-' || c == '.' || c == '_');
if trimmed.is_empty() {
"x".to_string()
} else {
trimmed.to_string()
}
}
fn truncate_agent_id(candidate: &str, rand_hex: &str) -> String {
if candidate.len() <= MAX_AGENT_ID_LEN {
return candidate.to_string();
}
let keep = MAX_AGENT_ID_LEN.saturating_sub(rand_hex.len() + 1);
let head = &candidate[..keep.min(candidate.len())];
let head_trimmed = head.trim_end_matches(|c: char| c == '-' || c == '.' || c == '_');
format!("{head_trimmed}-{rand_hex}")
}

View file

@ -8,7 +8,8 @@
use crate::capability::{RunMode, TaskSpec, VerifyContext, VerifyResult};
use crate::registry;
use anyhow::Result;
use crate::validate::validate_agent_id;
use anyhow::{anyhow, Result};
use serde::Serialize;
use std::path::{Path, PathBuf};
@ -42,6 +43,8 @@ pub fn verify_task(
capability_names: &[String],
simulated_merge_path: Option<PathBuf>,
) -> Result<VerifyReport> {
validate_agent_id(agent_id)
.map_err(|e| anyhow!("agent_id rejected in verify_task: {e}"))?;
let mut report = VerifyReport::default();
for name in capability_names {
let cap = match registry::get_verify(name) {

View file

@ -0,0 +1,270 @@
//! HIGH-security agent-id validator tests.
//!
//! Covers every documented rejection class + the happy path for the shapes
//! actually produced in `autogen_agent_id` and used in fixtures.
use kei_agent_runtime::spawn::{load_task, resolve_agent_id};
use kei_agent_runtime::validate::{
autogen_agent_id, slugify_role, validate_agent_id, InvalidAgentId, MAX_AGENT_ID_LEN,
};
use kei_agent_runtime::capability::TaskSpec;
use std::fs;
use tempfile::tempdir;
// ---- basic shape ---------------------------------------------------------
#[test]
fn empty_rejected() {
let err = validate_agent_id("").unwrap_err();
assert!(err.reason.contains("empty"), "got: {}", err.reason);
}
#[test]
fn too_long_rejected() {
let raw = "a".repeat(MAX_AGENT_ID_LEN + 1);
let err = validate_agent_id(&raw).unwrap_err();
assert!(err.reason.contains("length"), "got: {}", err.reason);
}
#[test]
fn exactly_max_length_ok() {
let raw = "a".repeat(MAX_AGENT_ID_LEN);
assert!(validate_agent_id(&raw).is_ok());
}
#[test]
fn non_ascii_rejected() {
let err = validate_agent_id("agent-кириллица").unwrap_err();
assert!(err.reason.to_lowercase().contains("ascii") || err.reason.contains("character"));
}
// ---- traversal-class bytes -----------------------------------------------
#[test]
fn parent_dir_rejected() {
let err = validate_agent_id("foo..bar").unwrap_err();
assert!(err.reason.contains(".."), "got: {}", err.reason);
}
#[test]
fn literal_double_dot_rejected() {
let err = validate_agent_id("..").unwrap_err();
// "..": starts with '.', so leading-dot rule OR traversal rule fires first
// — implementation currently flags `..` first; either class is fine.
assert!(err.reason.contains("..") || err.reason.contains("start"));
}
#[test]
fn slash_rejected() {
let err = validate_agent_id("foo/bar").unwrap_err();
assert!(err.reason.contains('/'), "got: {}", err.reason);
}
#[test]
fn backslash_rejected() {
let err = validate_agent_id("foo\\bar").unwrap_err();
assert!(err.reason.contains('\\'), "got: {}", err.reason);
}
#[test]
fn leading_dot_rejected() {
let err = validate_agent_id(".secret").unwrap_err();
assert!(err.reason.contains("start"), "got: {}", err.reason);
}
#[test]
fn leading_dash_rejected() {
let err = validate_agent_id("-xyz").unwrap_err();
assert!(err.reason.contains("start"), "got: {}", err.reason);
}
#[test]
fn nul_rejected() {
let err = validate_agent_id("foo\0bar").unwrap_err();
assert!(err.reason.contains("NUL"), "got: {}", err.reason);
}
#[test]
fn colon_rejected() {
let err = validate_agent_id("foo:bar").unwrap_err();
assert!(err.reason.contains(':'), "got: {}", err.reason);
}
#[test]
fn whitespace_rejected() {
let err = validate_agent_id("foo bar").unwrap_err();
assert!(err.reason.contains("whitespace"), "got: {}", err.reason);
}
#[test]
fn tab_rejected() {
let err = validate_agent_id("foo\tbar").unwrap_err();
assert!(err.reason.contains("whitespace"), "got: {}", err.reason);
}
// ---- valid shapes --------------------------------------------------------
#[test]
fn valid_simple_passes() {
assert!(validate_agent_id("abc123").is_ok());
}
#[test]
fn valid_with_dashes_and_underscores_passes() {
assert!(validate_agent_id("ag-edit-local-xyz_1").is_ok());
assert!(validate_agent_id("ag-code.impl-abc").is_ok());
}
#[test]
fn fixture_edit_local_forge_abc123_passes() {
// Exact shape used in prepare_smoke.rs `happy_path_yields_full_invocation`.
assert!(validate_agent_id("edit-local-forge-abc123").is_ok());
}
// ---- Windows-reserved (case-insensitive) ---------------------------------
#[test]
fn windows_reserved_con_rejected() {
assert!(validate_agent_id("CON").is_err());
assert!(validate_agent_id("con").is_err());
assert!(validate_agent_id("Con").is_err());
}
#[test]
fn windows_reserved_nul_prn_aux_rejected() {
for n in ["NUL", "nul", "PRN", "prn", "AUX", "aux"] {
assert!(validate_agent_id(n).is_err(), "expected {n} to be rejected");
}
}
#[test]
fn windows_reserved_com_lpt_rejected() {
for n in ["COM1", "com2", "COM9", "LPT1", "lpt5", "LPT9"] {
assert!(validate_agent_id(n).is_err(), "expected {n} to be rejected");
}
}
#[test]
fn windows_reserved_with_extension_rejected() {
assert!(validate_agent_id("CON.txt").is_err());
assert!(validate_agent_id("com1.log").is_err());
}
#[test]
fn windows_com0_or_com10_not_reserved() {
// Only COM1..COM9 and LPT1..LPT9 are reserved.
assert!(validate_agent_id("com0").is_ok());
assert!(validate_agent_id("com10").is_ok());
assert!(validate_agent_id("lpt0").is_ok());
}
#[test]
fn not_reserved_similar_prefixes_ok() {
assert!(validate_agent_id("console").is_ok());
assert!(validate_agent_id("comedy").is_ok());
assert!(validate_agent_id("auxiliary").is_ok());
}
// ---- autogen agrees with validator ---------------------------------------
#[test]
fn autogen_output_passes_validator_100_draws() {
for role in ["edit-local", "edit-shared", "explorer", "read-only", "weird role!!"] {
for _ in 0..100 {
let id = autogen_agent_id(role);
validate_agent_id(&id).unwrap_or_else(|e| {
panic!("autogen produced invalid id '{id}' for role '{role}': {e}")
});
}
}
}
#[test]
fn autogen_prefix_is_ag_and_within_cap() {
let id = autogen_agent_id("edit-local");
assert!(id.starts_with("ag-"));
assert!(id.len() <= MAX_AGENT_ID_LEN, "len={}", id.len());
}
#[test]
fn slugify_empty_becomes_x() {
assert_eq!(slugify_role(""), "x");
assert_eq!(slugify_role("!!!"), "x");
assert_eq!(slugify_role("---"), "x");
}
#[test]
fn slugify_collapses_disallowed_but_keeps_identity() {
assert_eq!(slugify_role("edit-local"), "edit-local");
assert_eq!(slugify_role("Edit/Local"), "Edit_Local");
}
// ---- integration: resolve_agent_id + load_task propagate typed error ----
#[test]
fn resolve_agent_id_rejects_traversal_without_file_side_effect() {
let mut task = TaskSpec::default();
task.task.agent_id = "../../../etc/passwd".into();
let err = resolve_agent_id(&task).expect_err("must reject");
let msg = format!("{err:#}");
assert!(msg.contains("rejected"), "error should mention rejection: {msg}");
}
#[test]
fn resolve_agent_id_rejects_slash() {
let mut task = TaskSpec::default();
task.task.agent_id = "foo/bar".into();
assert!(resolve_agent_id(&task).is_err());
}
#[test]
fn resolve_agent_id_passes_valid() {
let mut task = TaskSpec::default();
task.task.agent_id = "edit-local-forge-abc123".into();
let resolved = resolve_agent_id(&task).unwrap();
assert_eq!(resolved, "edit-local-forge-abc123");
}
#[test]
fn load_task_rejects_hostile_agent_id() {
let tmp = tempdir().unwrap();
let path = tmp.path().join("task.toml");
fs::write(
&path,
r#"
[task]
role = "edit-local"
agent-id = "../../../etc/shadow"
"#,
)
.unwrap();
let err = load_task(&path).expect_err("hostile agent-id must be rejected at load");
let msg = format!("{err:#}");
assert!(msg.contains("rejected"), "got: {msg}");
}
#[test]
fn load_task_accepts_empty_agent_id() {
// Empty agent-id is allowed at load (auto-gen happens in prepare()).
let tmp = tempdir().unwrap();
let path = tmp.path().join("task.toml");
fs::write(
&path,
r#"
[task]
role = "edit-local"
"#,
)
.unwrap();
let spec = load_task(&path).expect("empty agent-id should parse");
assert_eq!(spec.task.agent_id, "");
}
// ---- InvalidAgentId is a typed, structured error ------------------------
#[test]
fn invalid_agent_id_is_thiserror_displayable() {
let err: InvalidAgentId = validate_agent_id("foo/bar").unwrap_err();
let display = format!("{err}");
assert!(display.starts_with("invalid agent-id"));
}

View file

@ -147,9 +147,31 @@ fn normalize_rule_slug(rest: &str) -> String {
r.to_string()
}
/// Safe base+rel path join. Rejects absolute paths, parent (`..`) components,
/// and post-canonicalise escapes from `base`.
/// Safe base+rel path join.
///
/// Rejects absolute paths, parent (`..`) components, non-existent bases,
/// and post-canonicalise escapes from `base` (including symlink escapes).
///
/// Contract:
/// - `base` MUST canonicalize (i.e. must exist as a real directory). A
/// non-existent base means the caller is not in a well-defined sandbox
/// and we refuse to construct a join.
/// - If `joined` canonicalizes, its real path MUST start with `base_canon`.
/// - If `joined` does not exist, we canonicalize `joined.parent()` and
/// require that to start with `base_canon`. This catches symlinked
/// parent directories that redirect outside the sandbox.
/// - If neither `joined` nor `joined.parent()` exist, no symlink can
/// possibly live there — the lexical (absolute + parent-free) check
/// already completed is sufficient.
pub fn safe_join(base: &Path, rel: &str) -> Result<PathBuf, Error> {
let rel_path = reject_bad_rel(rel)?;
let joined = base.join(rel_path);
let base_canon = canonicalize_base(base)?;
assert_joined_inside_base(&joined, &base_canon, rel)?;
Ok(joined)
}
fn reject_bad_rel(rel: &str) -> Result<&Path, Error> {
let rel_path = Path::new(rel);
if rel_path.is_absolute() {
return Err(Error::PathAbsolute(rel.to_string()));
@ -159,18 +181,41 @@ pub fn safe_join(base: &Path, rel: &str) -> Result<PathBuf, Error> {
return Err(Error::PathParent(rel.to_string()));
}
}
let joined = base.join(rel_path);
// Canonicalise lazily — if either path doesn't exist yet, fall back to
// the lexical check we already did (absolute + parent-free is enough).
let base_canon = base.canonicalize().ok();
let joined_canon = joined.canonicalize().ok();
if let (Some(bc), Some(jc)) = (base_canon, joined_canon) {
if !jc.starts_with(&bc) {
return Err(Error::PathEscape {
base: bc,
rel: rel.to_string(),
});
}
}
Ok(joined)
Ok(rel_path)
}
fn canonicalize_base(base: &Path) -> Result<PathBuf, Error> {
base.canonicalize().map_err(|source| Error::Canonicalize {
path: base.to_path_buf(),
source,
})
}
fn assert_joined_inside_base(
joined: &Path,
base_canon: &Path,
rel: &str,
) -> Result<(), Error> {
if let Ok(jc) = joined.canonicalize() {
return check_contained(&jc, base_canon, rel);
}
let Some(parent) = joined.parent() else {
return Ok(());
};
let Ok(pc) = parent.canonicalize() else {
// Grand-parent also doesn't exist — no symlink can live here.
return Ok(());
};
check_contained(&pc, base_canon, rel)
}
fn check_contained(candidate: &Path, base_canon: &Path, rel: &str) -> Result<(), Error> {
if candidate.starts_with(base_canon) {
Ok(())
} else {
Err(Error::PathEscape {
base: base_canon.to_path_buf(),
rel: rel.to_string(),
})
}
}

View file

@ -0,0 +1,115 @@
//! MEDIUM-severity hardening of `safe_join`.
//!
//! Covers two regressions that the original lexical-fallback implementation
//! missed:
//! 1. Accepting a non-existent `base` (no well-defined sandbox).
//! 2. Accepting a symlinked target that escapes `base`.
use kei_atom_discovery::{safe_join, Error};
use std::fs;
use tempfile::tempdir;
#[test]
fn safe_join_rejects_nonexistent_base() {
let tmp = tempdir().unwrap();
let ghost = tmp.path().join("does-not-exist");
// `ghost` was never created → canonicalize fails → safe_join rejects.
let err = safe_join(&ghost, "schemas/foo.json").expect_err("must reject ghost base");
assert!(
matches!(err, Error::Canonicalize { .. }),
"expected Canonicalize, got {err:?}"
);
}
#[test]
fn safe_join_accepts_valid_existing_base_and_rel() {
let tmp = tempdir().unwrap();
let target = tmp.path().join("schemas");
fs::create_dir_all(&target).unwrap();
let joined = safe_join(tmp.path(), "schemas").expect("valid join");
assert!(joined.ends_with("schemas"));
}
#[test]
fn safe_join_accepts_nonexistent_rel_when_parent_exists() {
// Parent-dir canonicalize succeeds → no symlink can redirect → accept.
let tmp = tempdir().unwrap();
let joined =
safe_join(tmp.path(), "not-yet-created.json").expect("nonexistent rel should join");
assert!(joined.ends_with("not-yet-created.json"));
}
#[test]
fn safe_join_accepts_deeply_nonexistent_rel() {
// Neither the file nor its parent dir exists → no symlink can live here.
let tmp = tempdir().unwrap();
let joined = safe_join(tmp.path(), "brand/new/tree/file.json")
.expect("deeply nonexistent rel should join");
assert!(joined.ends_with("brand/new/tree/file.json"));
}
#[cfg(unix)]
#[test]
fn safe_join_rejects_symlink_escape() {
use std::os::unix::fs::symlink as unix_symlink;
// Layout:
// outside_root/secret.json ← the attacker target
// sandbox/ ← our safe base
// sandbox/escape -> ../outside_root ← symlinked dir
//
// `safe_join(sandbox, "escape/secret.json")` must REJECT: after
// canonicalisation, the resolved path leaves `sandbox`.
let tmp = tempdir().unwrap();
let outside_root = tmp.path().join("outside_root");
let sandbox = tmp.path().join("sandbox");
fs::create_dir_all(&outside_root).unwrap();
fs::create_dir_all(&sandbox).unwrap();
fs::write(outside_root.join("secret.json"), "pwned").unwrap();
unix_symlink(&outside_root, sandbox.join("escape")).unwrap();
let err = safe_join(&sandbox, "escape/secret.json")
.expect_err("symlink-escape must be rejected");
assert!(
matches!(err, Error::PathEscape { .. }),
"expected PathEscape, got {err:?}"
);
}
#[cfg(unix)]
#[test]
fn safe_join_rejects_symlink_escape_to_nonexistent_target() {
// Same shape as above, but the dangling target inside outside_root doesn't
// exist. The parent (`escape`) still canonicalizes into `outside_root`, so
// the escape must still be detected.
use std::os::unix::fs::symlink as unix_symlink;
let tmp = tempdir().unwrap();
let outside_root = tmp.path().join("outside_root2");
let sandbox = tmp.path().join("sandbox2");
fs::create_dir_all(&outside_root).unwrap();
fs::create_dir_all(&sandbox).unwrap();
unix_symlink(&outside_root, sandbox.join("escape")).unwrap();
let err = safe_join(&sandbox, "escape/not-yet.json")
.expect_err("symlink-escape with nonexistent tail must be rejected");
assert!(
matches!(err, Error::PathEscape { .. }),
"expected PathEscape, got {err:?}"
);
}
#[cfg(unix)]
#[test]
fn safe_join_accepts_symlink_that_stays_inside_base() {
// A symlink that resolves BACK INTO the sandbox must still be accepted.
use std::os::unix::fs::symlink as unix_symlink;
let tmp = tempdir().unwrap();
let sandbox = tmp.path().join("sandbox3");
fs::create_dir_all(sandbox.join("schemas")).unwrap();
unix_symlink(sandbox.join("schemas"), sandbox.join("alias")).unwrap();
let ok = safe_join(&sandbox, "alias").expect("inside-base symlink is fine");
assert!(ok.ends_with("alias"));
}

View file

@ -2,8 +2,14 @@
//! under the Constructor-Pattern 200-LOC cap. One function per emitted
//! `CREATE` statement; the engine's `run_migrations` orchestrates the
//! calls and stamps `user_version`.
//!
//! Edge-table DDL lives in `ddl_edge.rs` and is re-exported below;
//! `DdlError` lives in `ddl_error.rs`. Split preserves the 200-LOC cap
//! per Constructor Pattern.
use crate::schema::{EdgeKeyKind, EntitySchema, FieldDef, FieldKind};
pub use crate::ddl_edge::{edge_table_for, try_edge_table_for};
pub use crate::ddl_error::DdlError;
use crate::schema::{EntitySchema, FieldDef, FieldKind};
pub fn primary_table(schema: &EntitySchema) -> String {
let cols: Vec<String> = schema.fields.iter().map(column).collect();
@ -83,108 +89,3 @@ pub fn fts_table(table: &str, cols: &[&str]) -> String {
)
}
/// Dispatcher — picks edge-table DDL for a given `EdgeKeyKind`. Added
/// for kei-sage migration; `IntegerPair` branch preserves legacy body.
pub fn edge_table_for(edge: &str, kind: EdgeKeyKind) -> String {
match kind {
EdgeKeyKind::IntegerPair => edge_integer(edge),
EdgeKeyKind::TextPair => edge_text(edge),
EdgeKeyKind::TextPairWithMetadata {
from_col,
to_col,
has_id,
has_weight,
has_created_at,
extra_columns,
} => edge_text_meta(
edge,
from_col,
to_col,
has_id,
has_weight,
has_created_at,
extra_columns,
),
}
}
fn edge_integer(edge: &str) -> String {
format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n \
from_id INTEGER NOT NULL,\n \
to_id INTEGER NOT NULL,\n \
edge_type TEXT NOT NULL DEFAULT 'links',\n \
PRIMARY KEY(from_id, to_id, edge_type)\n\
);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_to ON {edge}(to_id);"
)
}
/// Text-keyed edge DDL: `(src_path TEXT, dst_path TEXT, edge_type TEXT)`.
fn edge_text(edge: &str) -> String {
format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n \
src_path TEXT NOT NULL,\n \
dst_path TEXT NOT NULL,\n \
edge_type TEXT NOT NULL DEFAULT 'links',\n \
PRIMARY KEY(src_path, dst_path, edge_type)\n\
);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_dst ON {edge}(dst_path);"
)
}
/// Text-keyed edge DDL with optional metadata columns + caller-chosen
/// key column names + arbitrary extra columns.
fn edge_text_meta(
edge: &str,
from_col: &str,
to_col: &str,
has_id: bool,
has_weight: bool,
has_created_at: bool,
extras: &[(&str, FieldKind)],
) -> String {
let mut cols: Vec<String> = Vec::new();
if has_id {
cols.push("edge_id INTEGER PRIMARY KEY AUTOINCREMENT".to_string());
}
cols.push(format!("{from_col} TEXT NOT NULL"));
cols.push(format!("{to_col} TEXT NOT NULL"));
cols.push("edge_type TEXT NOT NULL DEFAULT 'links'".to_string());
if has_weight {
cols.push("weight REAL NOT NULL DEFAULT 1.0".to_string());
}
for (name, kind) in extras {
cols.push(extra_column(name, *kind));
}
if has_created_at {
cols.push("created_at INTEGER NOT NULL".to_string());
}
// Without an autoincrement PK we still want `INSERT OR IGNORE`
// idempotent over the triple; with one we emit a UNIQUE instead.
if has_id {
cols.push(format!("UNIQUE({from_col}, {to_col}, edge_type)"));
} else {
cols.push(format!("PRIMARY KEY({from_col}, {to_col}, edge_type)"));
}
let body = cols.join(",\n ");
format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n {body}\n);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_dst ON {edge}({to_col});"
)
}
/// DDL for one extra edge column. Limited subset of `FieldKind` — edge
/// extras can't be PKs, archive enums, or auto-stamped timestamps.
fn extra_column(name: &str, kind: FieldKind) -> String {
match kind {
FieldKind::Text => format!("{name} TEXT DEFAULT ''"),
FieldKind::TextNotNull => format!("{name} TEXT NOT NULL"),
FieldKind::Integer => format!("{name} INTEGER DEFAULT 0"),
FieldKind::IntegerNotNull => format!("{name} INTEGER NOT NULL"),
FieldKind::Real => format!("{name} REAL NOT NULL DEFAULT 0.0"),
other => panic!(
"edge extra_columns: unsupported FieldKind {other:?} for column '{name}'"
),
}
}

View file

@ -0,0 +1,132 @@
//! Edge-table DDL generators. Split out of `ddl.rs` to keep each file
//! inside the Constructor Pattern 200-LOC cap. `ddl.rs` retains the
//! entity-table, index, and FTS DDL; this module owns edge-table DDL
//! in all three variants (`IntegerPair`, `TextPair`,
//! `TextPairWithMetadata`).
use crate::ddl_error::DdlError;
use crate::schema::{EdgeKeyKind, FieldKind};
/// Dispatcher — picks edge-table DDL for a given `EdgeKeyKind`. Added
/// for kei-sage migration; `IntegerPair` branch preserves legacy body.
///
/// Backward-compat shim — prefer `try_edge_table_for` from new code.
/// This variant panics on unsupported `extra_columns` FieldKinds; the
/// engine's migration path uses the fallible variant to surface typed
/// errors without panicking.
pub fn edge_table_for(edge: &str, kind: EdgeKeyKind) -> String {
try_edge_table_for(edge, kind).expect("edge_table_for: unsupported extra_column FieldKind")
}
/// Fallible dispatcher — same as `edge_table_for` but returns
/// `DdlError::UnsupportedExtraColumn` instead of panicking when an
/// `extra_columns` entry carries a FieldKind outside the supported
/// subset. This is the path `Store::open` takes.
pub fn try_edge_table_for(edge: &str, kind: EdgeKeyKind) -> Result<String, DdlError> {
match kind {
EdgeKeyKind::IntegerPair => Ok(edge_integer(edge)),
EdgeKeyKind::TextPair => Ok(edge_text(edge)),
EdgeKeyKind::TextPairWithMetadata {
from_col,
to_col,
has_id,
has_weight,
has_created_at,
extra_columns,
} => edge_text_meta(
edge,
from_col,
to_col,
has_id,
has_weight,
has_created_at,
extra_columns,
),
}
}
fn edge_integer(edge: &str) -> String {
format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n \
from_id INTEGER NOT NULL,\n \
to_id INTEGER NOT NULL,\n \
edge_type TEXT NOT NULL DEFAULT 'links',\n \
PRIMARY KEY(from_id, to_id, edge_type)\n\
);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_to ON {edge}(to_id);"
)
}
/// Text-keyed edge DDL: `(src_path TEXT, dst_path TEXT, edge_type TEXT)`.
fn edge_text(edge: &str) -> String {
format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n \
src_path TEXT NOT NULL,\n \
dst_path TEXT NOT NULL,\n \
edge_type TEXT NOT NULL DEFAULT 'links',\n \
PRIMARY KEY(src_path, dst_path, edge_type)\n\
);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_dst ON {edge}(dst_path);"
)
}
/// Text-keyed edge DDL with optional metadata columns + caller-chosen
/// key column names + arbitrary extra columns. Fallible — returns
/// `DdlError::UnsupportedExtraColumn` if any `extras` entry uses a
/// disallowed `FieldKind`.
fn edge_text_meta(
edge: &str,
from_col: &str,
to_col: &str,
has_id: bool,
has_weight: bool,
has_created_at: bool,
extras: &[(&str, FieldKind)],
) -> Result<String, DdlError> {
let mut cols: Vec<String> = Vec::new();
if has_id {
cols.push("edge_id INTEGER PRIMARY KEY AUTOINCREMENT".to_string());
}
cols.push(format!("{from_col} TEXT NOT NULL"));
cols.push(format!("{to_col} TEXT NOT NULL"));
cols.push("edge_type TEXT NOT NULL DEFAULT 'links'".to_string());
if has_weight {
cols.push("weight REAL NOT NULL DEFAULT 1.0".to_string());
}
for (name, kind) in extras {
cols.push(try_extra_column(name, *kind)?);
}
if has_created_at {
cols.push("created_at INTEGER NOT NULL".to_string());
}
// Without an autoincrement PK we still want `INSERT OR IGNORE`
// idempotent over the triple; with one we emit a UNIQUE instead.
if has_id {
cols.push(format!("UNIQUE({from_col}, {to_col}, edge_type)"));
} else {
cols.push(format!("PRIMARY KEY({from_col}, {to_col}, edge_type)"));
}
let body = cols.join(",\n ");
Ok(format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n {body}\n);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_dst ON {edge}({to_col});"
))
}
/// DDL for one extra edge column. Limited subset of `FieldKind` — edge
/// extras can't be PKs, archive enums, or auto-stamped timestamps.
/// Fallible — returns `DdlError::UnsupportedExtraColumn` outside the
/// supported set instead of panicking.
fn try_extra_column(name: &str, kind: FieldKind) -> Result<String, DdlError> {
match kind {
FieldKind::Text => Ok(format!("{name} TEXT DEFAULT ''")),
FieldKind::TextNotNull => Ok(format!("{name} TEXT NOT NULL")),
FieldKind::Integer => Ok(format!("{name} INTEGER DEFAULT 0")),
FieldKind::IntegerNotNull => Ok(format!("{name} INTEGER NOT NULL")),
FieldKind::Real => Ok(format!("{name} REAL NOT NULL DEFAULT 0.0")),
other => Err(DdlError::UnsupportedExtraColumn {
kind_debug: format!("{other:?}"),
column_name: name.to_string(),
}),
}
}

View file

@ -0,0 +1,25 @@
//! `DdlError` — typed DDL-generation failures surfaced by the fallible
//! edge-table dispatcher in `ddl::try_edge_table_for`.
//!
//! Split out of `ddl.rs` to keep each file inside the Constructor
//! Pattern 200-LOC cap (1 file = 1 responsibility). `ddl.rs` owns DDL
//! string emission; this module owns the error type only.
use thiserror::Error;
/// Typed DDL-generation failure. Surfaces caller-input problems (e.g.
/// an unsupported `FieldKind` passed as an `edge.extra_columns` entry)
/// as `Result` errors instead of panicking from library code.
#[derive(Debug, Error)]
pub enum DdlError {
/// Caller passed a `FieldKind` that edge-column DDL cannot emit
/// (PKs, archive enums, auto-stamped timestamps are disallowed —
/// see `ddl::try_extra_column` for the supported subset).
#[error(
"edge extra_columns: unsupported FieldKind {kind_debug} for column '{column_name}'"
)]
UnsupportedExtraColumn {
kind_debug: String,
column_name: String,
},
}

View file

@ -31,12 +31,26 @@ pub struct Store {
impl Store {
/// Open (creates parent dirs, enables WAL, runs migrations for all
/// schemas in a single transaction).
///
/// WAL mode is a best-effort optimisation — some filesystems (NFS,
/// read-only mounts, certain FUSE backends) refuse the pragma. On
/// failure we emit a single-line stderr notice and fall back to the
/// default rollback journal instead of swallowing the error; the
/// store still opens correctly and the exit-code contract is
/// preserved (WAL unavailability is not fatal by design).
pub fn open(path: &Path, schemas: &[&EntitySchema]) -> Result<Self> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
if let Err(e) = conn.pragma_update(None, "journal_mode", "WAL") {
eprintln!(
"kei-entity-store: WAL mode unavailable at {} ({}); \
falling back to rollback journal",
path.display(),
e
);
}
run_migrations(&conn, schemas)?;
Ok(Self { conn })
}
@ -85,7 +99,9 @@ fn apply_schema(
tx.execute_batch(&ddl::fts_table(schema.table, cols))?;
}
if let Some(edge) = schema.edge_table {
tx.execute_batch(&ddl::edge_table_for(edge, schema.edge_key_kind))?;
// Fallible path: unsupported `extra_columns` FieldKinds surface
// as `VerbError::InvalidInput` (exit 2), never a panic.
tx.execute_batch(&ddl::try_edge_table_for(edge, schema.edge_key_kind)?)?;
}
for stmt in schema.custom_migrations {
tx.execute_batch(stmt)?;

View file

@ -1,6 +1,7 @@
//! Verb error type. Distinguishes user-input / validation failures
//! (map to CLI exit 2 in callers) from storage / IO failures (exit 1).
use crate::ddl_error::DdlError;
use thiserror::Error;
#[derive(Debug, Error)]
@ -60,3 +61,12 @@ impl VerbError {
Self::NotFound { entity: entity.into(), id: id.into() }
}
}
/// Map DDL-generation failures into verb errors. An unsupported
/// `extra_columns` FieldKind is caller-configuration input, so it maps
/// to `InvalidInput` (exit code 2) rather than the storage path.
impl From<DdlError> for VerbError {
fn from(e: DdlError) -> Self {
VerbError::InvalidInput(e.to_string())
}
}

View file

@ -15,6 +15,8 @@
//! `bin`. Each sibling crate remains the user-facing binary.
pub mod ddl;
pub mod ddl_edge;
pub mod ddl_error;
pub mod engine;
pub mod error;
pub mod field;

View file

@ -9,6 +9,14 @@
//! search — attackers cannot address unindexed columns or craft
//! pathological scan expressions. Embedded `"` chars in the user query
//! are escaped per FTS5 grammar by doubling (`"" → "`).
//!
//! Tokenization guard: a query with ZERO searchable tokens (e.g. all
//! punctuation, only whitespace once trimmed) is rejected with
//! `InvalidInput` (exit 2) BEFORE reaching SQLite. This preserves the
//! documented exit-code contract — otherwise the porter/unicode61
//! tokenizer produces an empty token stream and FTS5 emits an opaque
//! `fts5: syntax error` that would propagate as `VerbError::Sqlite`
//! (exit 1).
use crate::error::VerbError;
use crate::schema::EntitySchema;
@ -43,6 +51,11 @@ pub fn run(
if query.trim().is_empty() {
return Err(VerbError::InvalidInput("search: query must be non-empty".into()));
}
if !has_searchable_token(query) {
return Err(VerbError::InvalidInput(
"search: query has no searchable tokens".into(),
));
}
let limit = clamp(input.get("limit").and_then(|v| v.as_i64()));
let safe_query = fts5_quote(query);
@ -72,6 +85,14 @@ fn fts5_quote(raw: &str) -> String {
format!("\"{escaped}\"")
}
/// True if `raw` contains at least one character the FTS5 porter /
/// unicode61 tokenizer will emit as a token (alphabetic or numeric).
/// Punctuation- and whitespace-only queries produce zero tokens and
/// would trip an opaque `fts5: syntax error` at MATCH time.
fn has_searchable_token(raw: &str) -> bool {
raw.chars().any(|c| c.is_alphanumeric())
}
fn clamp(raw: Option<i64>) -> i64 {
match raw {
Some(n) if n > 0 && n <= MAX_LIMIT => n,
@ -81,7 +102,7 @@ fn clamp(raw: Option<i64>) -> i64 {
#[cfg(test)]
mod tests {
use super::fts5_quote;
use super::{fts5_quote, has_searchable_token};
#[test]
fn quote_basic() {
@ -100,4 +121,28 @@ mod tests {
// literal tokens `title:evil` across the configured columns.
assert_eq!(fts5_quote("title:evil"), "\"title:evil\"");
}
#[test]
fn has_token_accepts_alpha() {
assert!(has_searchable_token("hello"));
assert!(has_searchable_token(" hi! "));
}
#[test]
fn has_token_accepts_digits() {
assert!(has_searchable_token("2026"));
}
#[test]
fn has_token_rejects_punct_only() {
assert!(!has_searchable_token("!@#$"));
assert!(!has_searchable_token("..."));
assert!(!has_searchable_token("---"));
}
#[test]
fn has_token_accepts_unicode_alpha() {
// Porter/unicode61 tokenises Cyrillic; our gate must too.
assert!(has_searchable_token("привет"));
}
}

View file

@ -133,6 +133,125 @@ fn fts5_injection_neutralized_by_phrase_quoting() {
assert_eq!(count_hits(&s, "secr*"), 0, "wildcard leaked");
}
#[test]
fn fts5_phrase_quoting_preserves_legitimate_queries() {
// Inverse failure mode of the sanitizer: over-broad escape would
// also destroy real tokens, so the injection test alone (hits==0)
// would pass even for a broken `fts5_quote` that returns "". This
// pins: a real token MUST still match the seeded row.
let s = mk();
create::run(s.conn(), &SCHEMA, json!({
"title": "ordinary record", "description": "nothing special"
})).unwrap();
create::run(s.conn(), &SCHEMA, json!({
"title": "secret handshake", "description": "hidden"
})).unwrap();
assert_eq!(count_hits(&s, "secret"), 1, "plain token must match");
assert_eq!(count_hits(&s, "handshake"), 1, "second plain token must match");
assert_eq!(count_hits(&s, "nothing"), 1, "description-side token must match");
}
#[test]
fn search_rejects_query_with_no_searchable_tokens() {
// Punctuation-only query passes the trim().is_empty() check but
// produces zero FTS5 tokens. Without the guard this would surface
// as an opaque rusqlite syntax error (exit code 1). The typed
// `InvalidInput` response keeps the exit-code-2 contract.
let s = mk();
create::run(s.conn(), &SCHEMA, json!({ "title": "anything" })).unwrap();
let err = search::run(s.conn(), &SCHEMA, json!({ "query": "!@#$" })).unwrap_err();
assert_eq!(err.exit_code(), 2, "must map to validation exit code");
match err {
VerbError::InvalidInput(ref msg) => assert!(
msg.contains("no searchable tokens"),
"message should identify the tokenization failure, got: {msg}"
),
other => panic!("expected InvalidInput, got {other:?}"),
}
// Also cover whitespace + punctuation combo and long punctuation.
let err = search::run(s.conn(), &SCHEMA, json!({ "query": " ... " })).unwrap_err();
assert_eq!(err.exit_code(), 2);
let err = search::run(s.conn(), &SCHEMA, json!({ "query": "-+=*/" })).unwrap_err();
assert_eq!(err.exit_code(), 2);
}
// ---------- DdlError — unsupported extra_column FieldKind ----------
#[test]
fn ddl_try_edge_table_for_rejects_unsupported_kind() {
// Reachable from public API: `EdgeKeyKind::TextPairWithMetadata
// { extra_columns: [("x", FieldKind::TextDefault)] }`. Must return
// a typed DdlError, not panic. Integration-level proof that
// Store::open's migration path maps this to InvalidInput (exit 2).
use kei_entity_store::ddl::try_edge_table_for;
use kei_entity_store::ddl_error::DdlError;
use kei_entity_store::schema::FieldKind;
static BAD_EXTRAS: &[(&str, FieldKind)] = &[("bogus", FieldKind::TextDefault)];
let kind = EdgeKeyKind::TextPairWithMetadata {
from_col: "from_uri",
to_col: "to_uri",
has_id: true,
has_weight: true,
has_created_at: true,
extra_columns: BAD_EXTRAS,
};
let err = try_edge_table_for("edges_bad", kind).unwrap_err();
match err {
DdlError::UnsupportedExtraColumn { ref column_name, ref kind_debug } => {
assert_eq!(column_name, "bogus");
assert!(
kind_debug.contains("TextDefault"),
"kind_debug should name the offending FieldKind, got {kind_debug}"
);
}
}
}
#[test]
fn store_open_maps_ddl_error_to_verb_error() {
// End-to-end: Store::open_memory on a schema with a bad
// extra_columns kind must surface the error through the
// `anyhow::Error` chain rather than panicking the thread.
use kei_entity_store::schema::FieldKind;
static BAD_EXTRAS: &[(&str, FieldKind)] = &[("bogus", FieldKind::TextDefault)];
static BAD_FIELDS: &[FieldDef] = &[FieldDef::pk("id")];
static BAD_SCHEMA: EntitySchema = EntitySchema {
name: "bad",
table: "bad_nodes",
fields: BAD_FIELDS,
enabled_verbs: &[],
fts_columns: None,
edge_table: Some("bad_edges"),
edge_key_kind: EdgeKeyKind::TextPairWithMetadata {
from_col: "from_uri",
to_col: "to_uri",
has_id: true,
has_weight: true,
has_created_at: true,
extra_columns: BAD_EXTRAS,
},
archived_field: None,
custom_migrations: &[],
};
let res = Store::open_memory(&[&BAD_SCHEMA]);
let err = match res {
Ok(_) => panic!("Store::open_memory must reject bad schema, not panic / succeed"),
Err(e) => e,
};
let msg = format!("{err:#}");
assert!(
msg.contains("bogus") && msg.contains("TextDefault"),
"error chain should mention column + kind, got: {msg}"
);
}
// ---------- TEXT size cap ----------
#[test]

View file

@ -13,6 +13,12 @@ path = "src/main.rs"
name = "kei_spawn"
path = "src/lib.rs"
[features]
default = []
# Enables the real reqwest-backed HttpDriver for `kei-spawn drive`.
# Off by default: v0.1 ships with ManualDriver only (no network deps).
http-driver = ["dep:reqwest"]
[dependencies]
kei-agent-runtime = { path = "../kei-agent-runtime" }
clap = { version = "4", features = ["derive"] }
@ -20,9 +26,11 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
anyhow = "1"
sha2 = { workspace = true }
reqwest = { version = "0.12", default-features = false, features = ["json", "blocking", "rustls-tls"], optional = true }
[dev-dependencies]
tempfile = "3"
httpmock = "0.7"
[package.metadata.keisei]
backend = "none"

View file

@ -1,22 +1,24 @@
//! drive — design-as-stubbed Anthropic-API driver for `kei-spawn drive`.
//! drive — driver trait + shared types + ManualDriver for `kei-spawn drive`.
//!
//! The `drive` subcommand is the future one-call replacement for the current
//! The `drive` subcommand is the one-call replacement for the current
//! two-step dance (`kei-spawn spawn` → orchestrator pastes Agent invocation).
//! Wiring it to a live Anthropic HTTP endpoint is a breaking change (adds
//! `reqwest` + tokio + a secrets contract), so v0.1 ships a stub: the
//! pipeline, types, and trait are defined; the HTTP impl returns
//! `NotImplemented` via `ManualDriver`.
//!
//! Two drivers live here:
//! - `ManualDriver` — always returns `NotImplemented` (v0.1 default path).
//! - `HttpDriver` — real impl lives in `drive_http` behind feature
//! `http-driver`; without the feature a stub returning
//! `NotImplemented` preserves the v0.1 API surface.
//!
//! Exit-code contract (mirrors `kei-runtime::InvokeError::NotImplemented`):
//! - 64 (EX_USAGE range) when the driver yields `NotImplemented`
//! - 1 on spawn failure (same as `kei-spawn spawn`)
//! - 0 only when a real driver returns Ok (HttpDriver future path)
//! - 0 only when a real driver returns Ok
//!
//! Constructor Pattern: one trait + two zero-state impls + one helper fn.
use serde::Serialize;
/// Success envelope for a future `HttpDriver` (and the contract
/// Success envelope for the `HttpDriver` (and the contract
/// `ManualDriver` deliberately never fulfils).
#[derive(Debug, Clone, Serialize)]
pub struct AgentResult {
@ -25,8 +27,7 @@ pub struct AgentResult {
pub finish_reason: String,
}
/// Errors surfaced from driver invocation. `NotImplemented` is retained as
/// the v0.1 escape hatch; `Transport` is reserved for the HTTP impl.
/// Errors surfaced from driver invocation.
#[derive(Debug)]
pub enum DriveError {
NotImplemented { reason: String },
@ -49,10 +50,6 @@ impl std::fmt::Display for DriveError {
impl std::error::Error for DriveError {}
/// Abstraction over "how does an agent invocation actually happen."
///
/// v0.1 has one impl: `ManualDriver` (prints instructions, returns
/// `NotImplemented`). Future: `HttpDriver` backed by `reqwest` +
/// `KEI_ANTHROPIC_KEY` + POST `https://api.anthropic.com/v1/messages`.
pub trait AnthropicDriver {
fn invoke(
&self,
@ -63,9 +60,6 @@ pub trait AnthropicDriver {
}
/// v0.1 driver — returns `NotImplemented` unconditionally.
///
/// Intentional: lets `kei-spawn drive` ship a complete CLI surface
/// (help, argument parsing, JSON emission) before the HTTP dep is taken.
pub struct ManualDriver;
impl AnthropicDriver for ManualDriver {
@ -81,14 +75,15 @@ impl AnthropicDriver for ManualDriver {
}
}
/// Placeholder for the future HTTP-backed driver.
/// Stub `HttpDriver` used when the `http-driver` feature is OFF.
///
/// Deliberately kept dep-free: adding `reqwest` + tokio here would force a
/// breaking change on every consumer of `kei-spawn` today. When the HTTP
/// impl lands, this struct gains fields (`api_key`, `endpoint`, `client`)
/// and the `invoke` body is replaced.
/// Keeps the public API stable so downstream crates can name the type
/// unconditionally. Returns `NotImplemented` with a clear message pointing
/// to the feature flag.
#[cfg(not(feature = "http-driver"))]
pub struct HttpDriver;
#[cfg(not(feature = "http-driver"))]
impl AnthropicDriver for HttpDriver {
fn invoke(
&self,
@ -97,15 +92,18 @@ impl AnthropicDriver for HttpDriver {
_isolation: Option<&str>,
) -> Result<AgentResult, DriveError> {
Err(DriveError::NotImplemented {
reason: "HttpDriver not wired in v0.1 — add reqwest + tokio in a dedicated PR"
reason: "HttpDriver requires `--features http-driver`; \
rebuild with it to enable Anthropic-API calls"
.to_string(),
})
}
}
/// Canonical stderr message for the v0.1 stub. Kept as a fn so both the
/// driver impl and the CLI layer emit the exact same string (and so tests
/// can assert on one fixture).
/// Re-export real `HttpDriver` when feature is ON.
#[cfg(feature = "http-driver")]
pub use crate::drive_http::HttpDriver;
/// Canonical stderr message for the v0.1 stub.
pub fn not_implemented_message() -> String {
"HTTP Anthropic-API integration not yet wired; use spawn then manual \
Agent-tool invocation (see printed instructions)"
@ -113,9 +111,6 @@ pub fn not_implemented_message() -> String {
}
/// Drive helper — orchestrator-facing entry that dispatches to a driver.
///
/// Kept thin on purpose: the real work (prepare + ledger fork) happens in
/// `spawn_from_task`. `drive` only layers the driver call on top.
pub fn drive_with<D: AnthropicDriver>(
driver: &D,
prompt: &str,
@ -141,8 +136,9 @@ mod tests {
}
}
#[cfg(not(feature = "http-driver"))]
#[test]
fn http_driver_also_not_implemented_in_v01() {
fn http_driver_stub_returns_not_implemented_without_feature() {
let d = HttpDriver;
assert!(matches!(
d.invoke("p", "x", None),

View file

@ -0,0 +1,162 @@
//! drive_http — reqwest::blocking-backed Anthropic driver.
//!
//! Gated behind the `http-driver` Cargo feature. Reads `KEI_ANTHROPIC_KEY`
//! at every `invoke` call (so key rotation takes effect without rebuilds).
//!
//! Endpoint defaults to <https://api.anthropic.com/v1/messages> and can be
//! overridden via `KEI_ANTHROPIC_ENDPOINT` (test hook for httpmock).
//!
//! Constructor Pattern: one struct + one impl + small helpers, every fn
//! ≤30 LOC, file ≤200 LOC.
#![cfg(feature = "http-driver")]
use std::time::Duration;
use crate::drive::{AgentResult, AnthropicDriver, DriveError};
use crate::drive_http_parse::{
compose_user_content, excerpt, parse_response, Message, MessagesRequest, ANTHROPIC_VERSION,
DEFAULT_ENDPOINT, MAX_TOKENS, MODEL_ID,
};
const ENV_API_KEY: &str = "KEI_ANTHROPIC_KEY";
const ENV_ENDPOINT: &str = "KEI_ANTHROPIC_ENDPOINT";
const TIMEOUT_TOTAL: Duration = Duration::from_secs(300);
// reqwest 0.12 blocking ClientBuilder exposes `connect_timeout` but not
// a per-read timeout; we cap the TCP+TLS handshake at 60s (matches the
// "60s read" intent — request-body read is bounded by the 300s total).
const TIMEOUT_CONNECT: Duration = Duration::from_secs(60);
const ERR_BODY_EXCERPT: usize = 512;
/// Real Anthropic-backed driver. Zero-state: key + endpoint read per call.
pub struct HttpDriver;
impl AnthropicDriver for HttpDriver {
fn invoke(
&self,
prompt: &str,
subagent_type: &str,
isolation: Option<&str>,
) -> Result<AgentResult, DriveError> {
let key = read_key()?;
let endpoint = read_endpoint();
let client = build_client()?;
let user_content = compose_user_content(prompt, subagent_type, isolation);
let body = build_request_body(&user_content);
send_and_parse(&client, &endpoint, &key, &body)
}
}
fn read_key() -> Result<String, DriveError> {
std::env::var(ENV_API_KEY).map_err(|_| DriveError::Transport {
message: format!("{ENV_API_KEY} is not set in the environment"),
})
}
fn read_endpoint() -> String {
std::env::var(ENV_ENDPOINT).unwrap_or_else(|_| DEFAULT_ENDPOINT.to_string())
}
fn build_client() -> Result<reqwest::blocking::Client, DriveError> {
reqwest::blocking::Client::builder()
.timeout(TIMEOUT_TOTAL)
.connect_timeout(TIMEOUT_CONNECT)
.build()
.map_err(|e| DriveError::Transport {
message: format!("build reqwest client: {e}"),
})
}
fn build_request_body(user_content: &str) -> String {
let req = MessagesRequest {
model: MODEL_ID,
max_tokens: MAX_TOKENS,
messages: vec![Message {
role: "user",
content: user_content,
}],
};
// Safe: types are `Serialize` with only `&str`/`u32`/`Vec`.
serde_json::to_string(&req).unwrap_or_else(|_| "{}".to_string())
}
fn send_and_parse(
client: &reqwest::blocking::Client,
endpoint: &str,
key: &str,
body: &str,
) -> Result<AgentResult, DriveError> {
let resp = client
.post(endpoint)
.header("x-api-key", key)
.header("anthropic-version", ANTHROPIC_VERSION)
.header("content-type", "application/json")
.body(body.to_string())
.send()
.map_err(map_network_error)?;
let status = resp.status();
let text = resp.text().map_err(|e| DriveError::Transport {
message: format!("read response body: {e}"),
})?;
if status.is_success() {
parse_response(&text)
} else {
Err(http_error(status.as_u16(), &text))
}
}
fn map_network_error(e: reqwest::Error) -> DriveError {
DriveError::Transport {
message: format!("network error: {e}"),
}
}
fn http_error(status: u16, body: &str) -> DriveError {
DriveError::Transport {
message: format!(
"HTTP {status}: body[:{ERR_BODY_EXCERPT}]={}",
excerpt(body, ERR_BODY_EXCERPT)
),
}
}
#[cfg(test)]
mod tests {
//! Unit-level tests for helpers. End-to-end tests (with httpmock)
//! live in `tests/http_driver.rs`.
use super::*;
#[test]
fn build_request_body_contains_model_and_prompt() {
let body = build_request_body("hello");
assert!(body.contains("\"model\":\"claude-opus-4-7\""));
assert!(body.contains("\"max_tokens\":4096"));
assert!(body.contains("\"role\":\"user\""));
assert!(body.contains("\"content\":\"hello\""));
}
#[test]
fn http_error_truncates_long_body() {
let long = "x".repeat(5_000);
let err = http_error(429, &long);
match err {
DriveError::Transport { message } => {
assert!(message.contains("HTTP 429"));
assert!(message.len() < 5_000);
}
other => panic!("expected Transport, got {other}"),
}
}
#[test]
fn read_endpoint_returns_default_when_unset() {
// Save + clear so the assertion is deterministic.
let prev = std::env::var(ENV_ENDPOINT).ok();
std::env::remove_var(ENV_ENDPOINT);
let got = read_endpoint();
if let Some(p) = prev {
std::env::set_var(ENV_ENDPOINT, p);
}
assert_eq!(got, DEFAULT_ENDPOINT);
}
}

View file

@ -0,0 +1,185 @@
//! drive_http_parse — request / response DTOs for Anthropic `/v1/messages`.
//!
//! Kept in its own module so the `drive_http` HTTP glue stays under the
//! Constructor Pattern ≤200 LOC budget and the DTO surface is unit-testable
//! without a live reqwest client.
#![cfg(feature = "http-driver")]
use serde::{Deserialize, Serialize};
use crate::drive::{AgentResult, DriveError};
/// Model id used for every `kei-spawn drive` request.
pub const MODEL_ID: &str = "claude-opus-4-7";
/// max_tokens limit per Anthropic spec (plenty for report envelopes).
pub const MAX_TOKENS: u32 = 4096;
/// Anthropic API version header value.
pub const ANTHROPIC_VERSION: &str = "2023-06-01";
/// Default endpoint; overridable via `KEI_ANTHROPIC_ENDPOINT` for tests.
pub const DEFAULT_ENDPOINT: &str = "https://api.anthropic.com/v1/messages";
/// Outbound POST body.
#[derive(Debug, Serialize)]
pub struct MessagesRequest<'a> {
pub model: &'a str,
pub max_tokens: u32,
pub messages: Vec<Message<'a>>,
}
#[derive(Debug, Serialize)]
pub struct Message<'a> {
pub role: &'a str,
pub content: &'a str,
}
/// Inbound response shape.
#[derive(Debug, Deserialize)]
pub struct MessagesResponse {
pub id: String,
#[serde(default)]
pub content: Vec<ContentBlock>,
#[serde(default)]
pub stop_reason: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct ContentBlock {
#[serde(rename = "type")]
pub kind: String,
#[serde(default)]
pub text: Option<String>,
}
/// Fold the parsed response into the public `AgentResult` envelope.
///
/// Concatenates every `text`-typed content block; non-text blocks
/// (tool_use, image, etc.) are silently skipped — kei-spawn drive only
/// surfaces transcript text.
pub fn to_agent_result(r: MessagesResponse) -> AgentResult {
let transcript = r
.content
.into_iter()
.filter(|b| b.kind == "text")
.filter_map(|b| b.text)
.collect::<Vec<_>>()
.join("");
AgentResult {
agent_id: r.id,
transcript,
finish_reason: r.stop_reason.unwrap_or_else(|| "unknown".to_string()),
}
}
/// Build the `[kei-spawn routing] …` preamble required by the task spec.
pub fn build_preamble(subagent_type: &str, isolation: Option<&str>) -> String {
format!(
"[kei-spawn routing] subagent_type={}, isolation={}\n\n",
subagent_type,
isolation.unwrap_or("<none>")
)
}
/// Build the full user message (preamble + prompt).
pub fn compose_user_content(prompt: &str, subagent_type: &str, isolation: Option<&str>) -> String {
let mut s = build_preamble(subagent_type, isolation);
s.push_str(prompt);
s
}
/// Parse a JSON response body. Errors map to `Transport` with the
/// parse error message and the first 512 bytes of the body as context.
pub fn parse_response(body: &str) -> Result<AgentResult, DriveError> {
match serde_json::from_str::<MessagesResponse>(body) {
Ok(r) => Ok(to_agent_result(r)),
Err(e) => Err(DriveError::Transport {
message: format!("parse response: {e}; body[:512]={}", excerpt(body, 512)),
}),
}
}
/// Truncate `s` to at most `n` bytes at a char boundary.
pub fn excerpt(s: &str, n: usize) -> String {
if s.len() <= n {
return s.to_string();
}
let mut end = n;
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
s[..end].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn preamble_format_matches_spec() {
let p = build_preamble("code-implementer", Some("worktree"));
assert_eq!(
p,
"[kei-spawn routing] subagent_type=code-implementer, isolation=worktree\n\n"
);
}
#[test]
fn preamble_without_isolation_falls_back() {
let p = build_preamble("critic", None);
assert!(p.contains("isolation=<none>"));
}
#[test]
fn compose_appends_prompt() {
let c = compose_user_content("hi", "x", Some("w"));
assert!(c.starts_with("[kei-spawn routing]"));
assert!(c.ends_with("hi"));
}
#[test]
fn parse_ok_multi_text_blocks() {
let body = r#"{
"id": "msg_01",
"content": [
{"type":"text","text":"hello "},
{"type":"tool_use","id":"t1"},
{"type":"text","text":"world"}
],
"stop_reason": "end_turn"
}"#;
let r = parse_response(body).unwrap();
assert_eq!(r.agent_id, "msg_01");
assert_eq!(r.transcript, "hello world");
assert_eq!(r.finish_reason, "end_turn");
}
#[test]
fn parse_missing_stop_reason_defaults() {
let body = r#"{"id":"x","content":[{"type":"text","text":"y"}]}"#;
let r = parse_response(body).unwrap();
assert_eq!(r.finish_reason, "unknown");
}
#[test]
fn parse_malformed_maps_to_transport() {
let err = parse_response("{not json").unwrap_err();
match err {
DriveError::Transport { message } => {
assert!(message.contains("parse response"));
assert!(message.contains("body[:512]="));
}
other => panic!("expected Transport, got {other}"),
}
}
#[test]
fn excerpt_respects_char_boundary() {
let s = "αβγδ"; // 2 bytes each
let out = excerpt(s, 3);
// should truncate to a valid boundary (2 bytes = "α")
assert!(s.starts_with(&out));
}
}

View file

@ -16,7 +16,8 @@
//! Design constraints:
//! - Constructor Pattern: one module = one responsibility, ≤200 LOC file,
//! ≤30 LOC fn.
//! - No HTTP / no Anthropic API — that's a later `kei-spawn drive` iteration.
//! - Optional HTTP via the `http-driver` Cargo feature (reqwest::blocking +
//! rustls). Off by default — v0.1 ships `ManualDriver` only.
//! - No git / no shell — ledger interactions go through `kei-ledger` as a
//! subprocess to avoid adding kei-ledger as a direct dep while it still
//! lacks a lib.rs (can't link to a bin-only crate).
@ -26,6 +27,10 @@
//! `kei-ledger` (which itself only writes to SQLite).
pub mod drive;
#[cfg(feature = "http-driver")]
pub mod drive_http;
#[cfg(feature = "http-driver")]
pub mod drive_http_parse;
pub mod ledger_sh;
pub mod spawn;
pub mod verify;

View file

@ -0,0 +1,181 @@
//! http_driver — end-to-end tests for the `http-driver` feature.
//!
//! Uses `httpmock` to stand up a local HTTP server and `KEI_ANTHROPIC_ENDPOINT`
//! to redirect the driver at it. `KEI_ANTHROPIC_KEY` is set per-test so the
//! tests never require real credentials.
//!
//! Every test is self-contained: fresh MockServer + per-test env vars. The
//! env_lock mutex below ensures concurrent tests don't trample each other's
//! process-global env.
#![cfg(feature = "http-driver")]
use std::sync::Mutex;
use httpmock::prelude::*;
use kei_spawn::{AnthropicDriver, DriveError, HttpDriver};
/// Cargo test harness runs tests in parallel by default — env vars are
/// process-global, so serialize access.
static ENV_LOCK: Mutex<()> = Mutex::new(());
struct EnvGuard {
key_prev: Option<String>,
endpoint_prev: Option<String>,
_guard: std::sync::MutexGuard<'static, ()>,
}
impl EnvGuard {
fn new(key: Option<&str>, endpoint: Option<&str>) -> Self {
let guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let key_prev = std::env::var("KEI_ANTHROPIC_KEY").ok();
let endpoint_prev = std::env::var("KEI_ANTHROPIC_ENDPOINT").ok();
match key {
Some(v) => std::env::set_var("KEI_ANTHROPIC_KEY", v),
None => std::env::remove_var("KEI_ANTHROPIC_KEY"),
}
match endpoint {
Some(v) => std::env::set_var("KEI_ANTHROPIC_ENDPOINT", v),
None => std::env::remove_var("KEI_ANTHROPIC_ENDPOINT"),
}
Self {
key_prev,
endpoint_prev,
_guard: guard,
}
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
match &self.key_prev {
Some(v) => std::env::set_var("KEI_ANTHROPIC_KEY", v),
None => std::env::remove_var("KEI_ANTHROPIC_KEY"),
}
match &self.endpoint_prev {
Some(v) => std::env::set_var("KEI_ANTHROPIC_ENDPOINT", v),
None => std::env::remove_var("KEI_ANTHROPIC_ENDPOINT"),
}
}
}
#[test]
fn missing_key_returns_transport_error() {
let _env = EnvGuard::new(None, Some("http://127.0.0.1:1/never"));
let d = HttpDriver;
let err = d.invoke("hi", "code-implementer", Some("worktree")).unwrap_err();
match err {
DriveError::Transport { message } => {
assert!(message.contains("KEI_ANTHROPIC_KEY"), "msg: {message}");
}
other => panic!("expected Transport, got {other}"),
}
}
#[test]
fn ok_200_roundtrip_populates_agent_result() {
let server = MockServer::start();
let _env = EnvGuard::new(Some("test-key-xxx"), Some(&server.url("/v1/messages")));
let m = server.mock(|when, then| {
when.method(POST)
.path("/v1/messages")
.header("x-api-key", "test-key-xxx")
.header("anthropic-version", "2023-06-01")
.header("content-type", "application/json")
.body_contains("[kei-spawn routing] subagent_type=code-implementer")
.body_contains("claude-opus-4-7");
then.status(200)
.header("content-type", "application/json")
.body(
r#"{
"id": "msg_test_01",
"content": [
{"type":"text","text":"hello "},
{"type":"text","text":"world"}
],
"stop_reason": "end_turn"
}"#,
);
});
let d = HttpDriver;
let out = d
.invoke("please do X", "code-implementer", Some("worktree"))
.expect("ok roundtrip");
m.assert();
assert_eq!(out.agent_id, "msg_test_01");
assert_eq!(out.transcript, "hello world");
assert_eq!(out.finish_reason, "end_turn");
}
#[test]
fn http_4xx_maps_to_transport_with_body_excerpt() {
let server = MockServer::start();
let _env = EnvGuard::new(Some("bad-key"), Some(&server.url("/v1/messages")));
let body_msg = "{\"type\":\"error\",\"error\":{\"type\":\"invalid_api_key\",\"message\":\"bad key\"}}";
server.mock(|when, then| {
when.method(POST).path("/v1/messages");
then.status(401)
.header("content-type", "application/json")
.body(body_msg);
});
let d = HttpDriver;
let err = d.invoke("x", "code-implementer", None).unwrap_err();
match err {
DriveError::Transport { message } => {
assert!(message.contains("HTTP 401"), "msg: {message}");
assert!(message.contains("invalid_api_key"), "msg: {message}");
}
other => panic!("expected Transport, got {other}"),
}
}
#[test]
fn http_5xx_maps_to_transport() {
let server = MockServer::start();
let _env = EnvGuard::new(Some("k"), Some(&server.url("/v1/messages")));
server.mock(|when, then| {
when.method(POST).path("/v1/messages");
then.status(503)
.header("content-type", "text/plain")
.body("upstream overloaded");
});
let d = HttpDriver;
let err = d.invoke("x", "y", None).unwrap_err();
match err {
DriveError::Transport { message } => {
assert!(message.contains("HTTP 503"), "msg: {message}");
assert!(message.contains("upstream overloaded"), "msg: {message}");
}
other => panic!("expected Transport, got {other}"),
}
}
#[test]
fn malformed_json_on_200_maps_to_transport() {
let server = MockServer::start();
let _env = EnvGuard::new(Some("k"), Some(&server.url("/v1/messages")));
server.mock(|when, then| {
when.method(POST).path("/v1/messages");
then.status(200)
.header("content-type", "application/json")
.body("{not-json");
});
let d = HttpDriver;
let err = d.invoke("x", "y", None).unwrap_err();
match err {
DriveError::Transport { message } => {
assert!(message.contains("parse response"), "msg: {message}");
assert!(message.contains("body[:512]="), "msg: {message}");
}
other => panic!("expected Transport, got {other}"),
}
}