From 4983d38636b919badcfdd56681d59982de076c79 Mon Sep 17 00:00:00 2001 From: Parfii-bot Date: Thu, 23 Apr 2026 05:27:59 +0800 Subject: [PATCH] fix(agent-runtime/b2): DNA entropy 32-bit + role path traversal + recursion cap + warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit H4/M4/S3 DNA entropy: - short_sha256() widened to 8 hex (32-bit) for scope + body hash - nonce expanded to 8 hex from full u32 via rand::random - Birthday collision at ~77K agents sharing role+caps (was ~256) - Dna::parse accepts legacy 4-hex values with stderr warning for rolling upgrade of pre-fix DNAs H5 role recursion: resolve_inner depth parameter + MAX_DEPTH=16. Returns RoleError::MaxDepthExceeded{depth, trace:Vec} for clear diagnostics. S1 path traversal — two sites closed: - role.rs::resolve_inner validates role name + parent in extends against regex ^[a-z][a-z0-9-]{0,63}$ before Path::join - compose.rs::split_cap_name same validation on capability category/slug before Path::join - RoleError::InvalidName{kind, value} on violation M1 relaxes warnings: eprintln replaced with ResolvedRole.warnings Vec field. Caller decides: log, fail, ignore. New typed RoleError (thiserror) — PartialEq/Eq for test ergonomics. Tests: 73/73 (was 66, +7: 3 DNA + 4 role). Full extends graph + malicious name + depth+1 all covered. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../_rust/kei-agent-runtime/src/compose.rs | 15 ++- .../_rust/kei-agent-runtime/src/dna.rs | 91 ++++++++----- .../_rust/kei-agent-runtime/src/role.rs | 99 +++++++++++--- .../kei-agent-runtime/tests/dna_smoke.rs | 69 +++++++++- .../tests/role_expression_smoke.rs | 127 +++++++++++++++++- 5 files changed, 343 insertions(+), 58 deletions(-) diff --git a/_primitives/_rust/kei-agent-runtime/src/compose.rs b/_primitives/_rust/kei-agent-runtime/src/compose.rs index a416183..49d21a9 100644 --- a/_primitives/_rust/kei-agent-runtime/src/compose.rs +++ b/_primitives/_rust/kei-agent-runtime/src/compose.rs @@ -10,7 +10,7 @@ //! 5. Append `task.body.text`. use crate::capability::TaskSpec; -use crate::role::resolve_role; +use crate::role::{resolve_role, validate_name}; use anyhow::{anyhow, Context, Result}; use std::path::Path; @@ -47,8 +47,13 @@ fn load_capability_text(kit_root: &Path, cap_name: &str) -> Result { } fn split_cap_name(cap: &str) -> Result<(&str, &str)> { - match cap.split_once("::") { - Some((cat, slug)) if !cat.is_empty() && !slug.is_empty() => Ok((cat, slug)), - _ => Err(anyhow!("malformed capability name '{cap}' — expected ::")), - } + let (cat, slug) = cap + .split_once("::") + .filter(|(c, s)| !c.is_empty() && !s.is_empty()) + .ok_or_else(|| anyhow!("malformed capability name '{cap}' — expected ::"))?; + // Block path traversal: both halves are joined into a filesystem path, + // so any `..`, `/`, `\`, upper-case, etc. is refused at the gate. + validate_name("capability-category", cat)?; + validate_name("capability-slug", slug)?; + Ok((cat, slug)) } diff --git a/_primitives/_rust/kei-agent-runtime/src/dna.rs b/_primitives/_rust/kei-agent-runtime/src/dna.rs index 0a77e6f..52c3856 100644 --- a/_primitives/_rust/kei-agent-runtime/src/dna.rs +++ b/_primitives/_rust/kei-agent-runtime/src/dna.rs @@ -5,18 +5,21 @@ //! - `role` — role slug, e.g. `edit-local` //! - `caps-bitmap` — hyphen-separated 2-char atom codes (ordered, from //! the resolved capability list) -//! - `scope-hash` — 4-char truncated SHA-256 of canonicalised scope fields -//! - `body-hash` — 4-char truncated SHA-256 of `task.body.text` -//! - `nonce` — 4-char hex from `rand::random::()` + a byte of -//! nanosecond time (deterministic enough for uniqueness -//! in a single batch; ledger dedups by DNA prefix) +//! - `scope-hash` — 8-char truncated SHA-256 of canonicalised scope fields +//! (32-bit; widened from 16-bit to push birthday collision +//! threshold from ~256 to ~65k agents per role+caps group) +//! - `body-hash` — 8-char truncated SHA-256 of `task.body.text` (32-bit) +//! - `nonce` — 8-char hex from `rand::random::()` (full 32-bit +//! entropy; was 16-bit pre-2026-04 H4/M4/S3 widening) //! //! Constructor Pattern: one cube = DNA identity primitive only. No I/O. //! //! Round-trip: `compose` → `render` → `parse` → equal. //! Parse accepts both shipped DNA strings and hand-written ones; it enforces //! the 5-segment shape but tolerates arbitrary (non-empty) segment content -//! so future schema extensions don't break old ledger rows. +//! so future schema extensions don't break old ledger rows. For rolling +//! upgrade, 4-hex legacy hash/nonce values still parse but emit a stderr +//! warning naming the narrow-entropy segment. use crate::capability::TaskSpec; use crate::role::ResolvedRole; @@ -86,30 +89,20 @@ impl Dna { } /// Parse a DNA string. Lenient on segment content, strict on shape. + /// Accepts both 8-hex (current) and 4-hex (legacy pre-widening) values + /// for `scope_hash`, `body_hash`, `nonce`; legacy widths emit a stderr + /// warning naming the narrow-entropy segment. pub fn parse(s: &str) -> Result { let parts: Vec<&str> = s.splitn(4, "::").collect(); if parts.len() != 4 { return Err(DnaError::Shape); } - let role = parts[0]; - let caps_bitmap = parts[1]; - let scope_hash = parts[2]; + let (role, caps_bitmap, scope_hash) = (parts[0], parts[1], parts[2]); let (body_hash, nonce) = parts[3].rsplit_once('-').ok_or(DnaError::Shape)?; - if role.is_empty() { - return Err(DnaError::EmptySegment("role")); - } - if caps_bitmap.is_empty() { - return Err(DnaError::EmptySegment("caps_bitmap")); - } - if scope_hash.is_empty() { - return Err(DnaError::EmptySegment("scope_hash")); - } - if body_hash.is_empty() { - return Err(DnaError::EmptySegment("body_hash")); - } - if nonce.is_empty() { - return Err(DnaError::EmptySegment("nonce")); - } + ensure_non_empty(role, caps_bitmap, scope_hash, body_hash, nonce)?; + warn_if_legacy_width("scope_hash", scope_hash); + warn_if_legacy_width("body_hash", body_hash); + warn_if_legacy_width("nonce", nonce); Ok(Self { role: role.into(), caps_bitmap: caps_bitmap.into(), @@ -120,6 +113,39 @@ impl Dna { } } +fn ensure_non_empty( + role: &str, + caps_bitmap: &str, + scope_hash: &str, + body_hash: &str, + nonce: &str, +) -> Result<(), DnaError> { + for (name, value) in [ + ("role", role), + ("caps_bitmap", caps_bitmap), + ("scope_hash", scope_hash), + ("body_hash", body_hash), + ("nonce", nonce), + ] { + if value.is_empty() { + return Err(DnaError::EmptySegment(name)); + } + } + Ok(()) +} + +/// Emit a stderr warning when a 4-hex legacy segment is parsed. New DNA +/// uses 8 hex; rolling upgrade is supported by accepting both widths. +fn warn_if_legacy_width(field: &str, value: &str) { + if value.len() == 4 && value.chars().all(|c| c.is_ascii_hexdigit()) { + eprintln!( + "[kei-agent-runtime] DNA segment `{field}` is 4-hex (legacy \ + 16-bit); current format is 8-hex (32-bit). Accepted for \ + backward compatibility — re-issue new DNA on next compose." + ); + } +} + fn build_caps_bitmap(caps: &[String]) -> String { caps.iter() .map(|c| code_for(c).to_string()) @@ -145,15 +171,16 @@ fn canonical_scope(task: &TaskSpec) -> String { fn short_sha256(input: &str) -> String { let digest = Sha256::digest(input.as_bytes()); - format!("{:02X}{:02X}", digest[0], digest[1]) + // 4 bytes = 8 hex chars = 32-bit truncation (widened from 16-bit). + format!( + "{:02X}{:02X}{:02X}{:02X}", + digest[0], digest[1], digest[2], digest[3] + ) } fn nonce_hex() -> String { - let a: u16 = rand::random(); - let b: u16 = rand::random(); - format!("{a:04x}") - .chars() - .take(2) - .chain(format!("{b:04x}").chars().take(2)) - .collect() + // 32-bit nonce (widened from 16-bit). Birthday collision threshold + // ~65k DNAs sharing the same role+caps+scope+body triple. + let r: u32 = rand::random(); + format!("{r:08x}") } diff --git a/_primitives/_rust/kei-agent-runtime/src/role.rs b/_primitives/_rust/kei-agent-runtime/src/role.rs index f8bb95b..5f20401 100644 --- a/_primitives/_rust/kei-agent-runtime/src/role.rs +++ b/_primitives/_rust/kei-agent-runtime/src/role.rs @@ -7,25 +7,61 @@ //! Semantics: //! - `extends` — optional parent role slug; loaded recursively. //! - `required` (local) — merged on top of parent's resolved required. -//! - `relaxes` — slugs in parent's resolved required to DROP. Warn on stderr -//! if a relaxed cap wasn't present in the inherited set. -//! - Cycle detection — visited set passed down the recursion; an error with -//! a clear path is returned when a cycle is found. +//! - `relaxes` — slugs in parent's resolved required to DROP. A warning is +//! collected in `ResolvedRole::warnings` if a relaxed cap wasn't present +//! in the inherited set (caller decides how to surface). +//! - Cycle detection — visited set passed down the recursion; an error +//! with a clear path is returned when a cycle is found. +//! - Depth cap — `extends` chains deeper than `MAX_DEPTH = 16` are +//! refused (`RoleError::MaxDepthExceeded`) to prevent stack overflow +//! on malformed/hostile role trees. +//! - Name validation — role slug must match `^[a-z][a-z0-9-]{0,63}$`, +//! blocks `../../etc/passwd` path traversal before the `join`. //! //! Constructor Pattern: one cube = one responsibility (role expression only). //! No I/O beyond `std::fs::read_to_string`. Dispatched from `compose::load_role` //! and `verify::load_role_capabilities` so both share the same semantics. -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result}; +use once_cell::sync::Lazy; +use regex::Regex; use serde::Deserialize; use std::collections::HashSet; use std::path::Path; +use thiserror::Error; + +/// Max depth for `extends` chain traversal. Guards against stack overflow +/// on malformed/hostile role files. +pub const MAX_DEPTH: usize = 16; + +/// Role / capability slug pattern. Lowercase start, `[a-z0-9-]` body, +/// ≤64 chars total. Blocks `..`, `/`, `\`, upper-case, unicode, +/// whitespace — any of which enables path traversal via `Path::join`. +static NAME_RE: Lazy = + Lazy::new(|| Regex::new(r"^[a-z][a-z0-9-]{0,63}$").expect("compile NAME_RE")); + +/// Structured errors from role resolution. +#[derive(Debug, Error, PartialEq, Eq)] +pub enum RoleError { + #[error("role `extends` chain exceeded MAX_DEPTH={depth}; trace: {trace:?}")] + MaxDepthExceeded { depth: usize, trace: Vec }, + #[error("invalid {kind} name `{value}` — must match ^[a-z][a-z0-9-]{{0,63}}$")] + InvalidName { kind: &'static str, value: String }, + #[error("cycle detected in role `extends` chain at `{role}` (visited: {visited:?})")] + Cycle { + role: String, + visited: Vec, + }, +} /// Flattened role ready for downstream composition. #[derive(Debug, Clone, Default)] pub struct ResolvedRole { /// Ordered capability names after `extends` merge + `relaxes` subtraction. pub required: Vec, + /// Non-fatal advisories surfaced during resolution (e.g. relaxed cap + /// was not in the inherited set). Caller decides how to surface. + pub warnings: Vec, } /// Deserialized role file (raw shape, pre-resolution). @@ -45,26 +81,54 @@ pub struct RoleCapsRaw { pub relaxes: Vec, } +/// Validate a role-or-capability slug; returns typed error if malformed. +pub fn validate_name(kind: &'static str, value: &str) -> Result<(), RoleError> { + if NAME_RE.is_match(value) { + Ok(()) + } else { + Err(RoleError::InvalidName { + kind, + value: value.to_string(), + }) + } +} + /// Resolve a role by slug; read role file, walk `extends`, apply `relaxes`. pub fn resolve_role(kit_root: &Path, role: &str) -> Result { + validate_name("role", role)?; let mut visited: HashSet = HashSet::new(); - resolve_inner(kit_root, role, &mut visited) + let mut warnings: Vec = Vec::new(); + let required = resolve_inner(kit_root, role, &mut visited, &mut warnings, 0)?; + Ok(ResolvedRole { required, warnings }) } fn resolve_inner( kit_root: &Path, role: &str, visited: &mut HashSet, -) -> Result { + warnings: &mut Vec, + depth: usize, +) -> Result> { + if depth > MAX_DEPTH { + return Err(RoleError::MaxDepthExceeded { + depth: MAX_DEPTH, + trace: visited.iter().cloned().collect(), + } + .into()); + } if !visited.insert(role.to_string()) { - return Err(anyhow!( - "cycle detected in role `extends` chain at `{role}` (path: {:?})", - visited - )); + return Err(RoleError::Cycle { + role: role.to_string(), + visited: visited.iter().cloned().collect(), + } + .into()); } let raw = read_role_file(kit_root, role)?; let mut merged = match raw.capabilities.extends.as_deref() { - Some(parent) => resolve_inner(kit_root, parent, visited)?.required, + Some(parent) => { + validate_name("role", parent)?; + resolve_inner(kit_root, parent, visited, warnings, depth + 1)? + } None => Vec::new(), }; for cap in &raw.capabilities.required { @@ -76,17 +140,18 @@ fn resolve_inner( let before = merged.len(); merged.retain(|c| c != dropped); if merged.len() == before { - eprintln!( - "[kei-agent-runtime] role `{role}` relaxes `{dropped}` \ - but it was not in the inherited capability set — no-op" - ); + warnings.push(format!( + "role `{role}` relaxes `{dropped}` but it was not in the \ + inherited capability set — no-op" + )); } } visited.remove(role); - Ok(ResolvedRole { required: merged }) + Ok(merged) } fn read_role_file(kit_root: &Path, role: &str) -> Result { + validate_name("role", role)?; let path = kit_root.join("_roles").join(format!("{role}.toml")); let text = std::fs::read_to_string(&path) .with_context(|| format!("read role file {}", path.display()))?; diff --git a/_primitives/_rust/kei-agent-runtime/tests/dna_smoke.rs b/_primitives/_rust/kei-agent-runtime/tests/dna_smoke.rs index f303730..6f7ed8f 100644 --- a/_primitives/_rust/kei-agent-runtime/tests/dna_smoke.rs +++ b/_primitives/_rust/kei-agent-runtime/tests/dna_smoke.rs @@ -30,6 +30,7 @@ fn edit_local_resolved() -> ResolvedRole { "safety::no-dep-bump".into(), "output::report-format".into(), ], + warnings: Vec::new(), } } @@ -68,10 +69,11 @@ fn rendered_dna_length_within_budget() { let task = fixture_task("body", &["a"], &["b"]); let r = edit_local_resolved(); let s = Dna::compose(&task, &r).render(); - // role(10) + caps bitmap (8 caps * 3 = 23) + scope(4) + body(4) + nonce(4) - // plus separators (3×2 + 1) = bounded; give ample 80 char headroom + // role(10) + sep(2) + caps bitmap (8 caps * 3 - 1 = 23) + sep(2) + + // scope(8) + sep(2) + body(8) + `-` + nonce(8) = 64. Budget ≤88 per + // H4/M4/S3 widening spec; hard ceiling stays comfortably short. assert!( - s.len() <= 80, + s.len() <= 88, "DNA string should stay short; got {} chars: {}", s.len(), s @@ -87,3 +89,64 @@ fn parse_rejects_malformed_shape() { "missing `-nonce` separator must fail" ); } + +#[test] +fn widened_dna_uses_8_hex_for_all_entropy_segments() { + let task = fixture_task("entropy budget check", &["x"], &["y"]); + let r = edit_local_resolved(); + let dna = Dna::compose(&task, &r); + assert_eq!( + dna.scope_hash.len(), + 8, + "scope_hash must be 8 hex (32-bit), got {}: {}", + dna.scope_hash.len(), + dna.scope_hash + ); + assert_eq!( + dna.body_hash.len(), + 8, + "body_hash must be 8 hex (32-bit), got {}: {}", + dna.body_hash.len(), + dna.body_hash + ); + assert_eq!( + dna.nonce.len(), + 8, + "nonce must be 8 hex (32-bit), got {}: {}", + dna.nonce.len(), + dna.nonce + ); + let round = Dna::parse(&dna.render()).expect("parse widened"); + assert_eq!(round, dna); +} + +#[test] +fn parse_accepts_legacy_4_hex_segments_for_rolling_upgrade() { + // Hand-built legacy DNA: pre-widening format with 4-hex segments. + // Parser MUST accept it (with stderr warning) so rolling upgrade works. + let legacy = "edit-local::NG-FW::ABCD::1234-9f0a"; + let parsed = Dna::parse(legacy).expect("legacy 4-hex DNA must parse"); + assert_eq!(parsed.role, "edit-local"); + assert_eq!(parsed.scope_hash, "ABCD"); + assert_eq!(parsed.body_hash, "1234"); + assert_eq!(parsed.nonce, "9f0a"); + assert_eq!(parsed.render(), legacy, "render preserves legacy widths"); +} + +#[test] +fn nonce_is_unique_across_10000_generated_dnas() { + // 32-bit nonce → birthday collision at ~65k; at 10k the expected number + // of duplicates is ~0.6. A single collision fails the test loudly rather + // than silently on the regression path. + let task = fixture_task("same body", &["same"], &["same"]); + let r = edit_local_resolved(); + let mut seen: std::collections::HashSet = + std::collections::HashSet::with_capacity(10_000); + for _ in 0..10_000 { + let n = Dna::compose(&task, &r).nonce; + assert!( + seen.insert(n.clone()), + "nonce collision at 32-bit entropy: {n}" + ); + } +} diff --git a/_primitives/_rust/kei-agent-runtime/tests/role_expression_smoke.rs b/_primitives/_rust/kei-agent-runtime/tests/role_expression_smoke.rs index 0ad50ef..606aea7 100644 --- a/_primitives/_rust/kei-agent-runtime/tests/role_expression_smoke.rs +++ b/_primitives/_rust/kei-agent-runtime/tests/role_expression_smoke.rs @@ -3,7 +3,7 @@ //! Fixtures built in tempdir; each test writes the role files it needs, //! runs `resolve_role`, asserts the flattened required list. -use kei_agent_runtime::role::resolve_role; +use kei_agent_runtime::role::{resolve_role, RoleError, MAX_DEPTH}; use std::fs; use std::path::Path; use tempfile::TempDir; @@ -147,3 +147,128 @@ required = ["policy::no-git-ops", "output::report-format"] assert_eq!(r.required[0], "policy::no-git-ops"); assert_eq!(r.required[1], "output::report-format"); } + +#[test] +fn extends_chain_deeper_than_max_depth_errors() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + // Build a linear chain r0 -> r1 -> ... -> r20, which exceeds + // MAX_DEPTH = 16 and must refuse rather than recurse. + let total = MAX_DEPTH + 5; + for i in 0..total { + let body = if i + 1 < total { + format!( + "[role]\nname = \"r{i}\"\n\n[capabilities]\nextends = \"r{next}\"\n", + i = i, + next = i + 1 + ) + } else { + format!("[role]\nname = \"r{i}\"\n\n[capabilities]\nrequired = []\n", i = i) + }; + write_role(root, &format!("r{i}"), &body); + } + let err = resolve_role(root, "r0").unwrap_err(); + let role_err = err + .downcast_ref::() + .expect("expected typed RoleError"); + match role_err { + RoleError::MaxDepthExceeded { depth, .. } => { + assert_eq!(*depth, MAX_DEPTH, "error must report configured cap"); + } + other => panic!("expected MaxDepthExceeded, got {other:?}"), + } +} + +#[test] +fn role_name_with_path_traversal_is_refused_before_fs_join() { + // No file created — attacker-controlled name must fail at validation, + // not at read-time with an ambiguous NotFound. + let tmp = TempDir::new().unwrap(); + let err = resolve_role(tmp.path(), "../../etc/passwd").unwrap_err(); + let role_err = err + .downcast_ref::() + .expect("expected typed RoleError"); + match role_err { + RoleError::InvalidName { kind, value } => { + assert_eq!(*kind, "role"); + assert_eq!(value, "../../etc/passwd"); + } + other => panic!("expected InvalidName, got {other:?}"), + } +} + +#[test] +fn capability_name_with_path_traversal_is_refused_in_compose() { + use kei_agent_runtime::capability::TaskSpec; + use kei_agent_runtime::compose::compose_prompt; + + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + write_role( + root, + "traversal", + r#" +[role] +name = "traversal" + +[capabilities] +required = ["../../etc::passwd"] +"#, + ); + let mut task = TaskSpec::default(); + task.task.role = "traversal".into(); + task.task.agent_id = "traversal-attempt".into(); + task.body.text = "whatever".into(); + let err = compose_prompt(&task, root).unwrap_err(); + let role_err = err + .chain() + .find_map(|e| e.downcast_ref::()) + .expect("expected typed RoleError in error chain"); + match role_err { + RoleError::InvalidName { kind, .. } => { + assert!( + kind.starts_with("capability-"), + "kind should be capability-category or capability-slug, got {kind}" + ); + } + other => panic!("expected InvalidName, got {other:?}"), + } +} + +#[test] +fn relaxes_missing_cap_collects_warning_without_failing() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + write_role( + root, + "base-warn", + r#" +[role] +name = "base-warn" + +[capabilities] +required = ["policy::no-git-ops"] +"#, + ); + write_role( + root, + "child-warn", + r#" +[role] +name = "child-warn" + +[capabilities] +extends = "base-warn" +relaxes = ["scope::files-whitelist"] +"#, + ); + + let r = resolve_role(root, "child-warn").expect("must not fail"); + assert_eq!(r.required, vec!["policy::no-git-ops".to_string()]); + assert_eq!(r.warnings.len(), 1, "expected exactly one warning"); + let w = &r.warnings[0]; + assert!( + w.contains("scope::files-whitelist") && w.contains("no-op"), + "warning should name dropped cap + no-op, got: {w}" + ); +}