diff --git a/_primitives/_rust/kei-agent-runtime/src/gates/pattern_gate.rs b/_primitives/_rust/kei-agent-runtime/src/gates/pattern_gate.rs index 2c1062e..5558fe4 100644 --- a/_primitives/_rust/kei-agent-runtime/src/gates/pattern_gate.rs +++ b/_primitives/_rust/kei-agent-runtime/src/gates/pattern_gate.rs @@ -1,56 +1,40 @@ -//! Generic pattern-based gate (Layer C convergence, 2026-04-23). -//! -//! Absorbs 5 of the 6 gate impls (`policy::no-git-ops`, -//! `scope::files-whitelist`, `scope::files-denylist`, `safety::no-dep-bump`, -//! `tools::bash-allowlist`). Each concrete gate is now a `PatternGate` -//! const declaration in its own file; dispatch logic lives here. -//! -//! `tools::deny-tools` stays in its own module — mechanism is tool-name -//! match, not pattern match. +//! Generic pattern-based gate (Layer C convergence, 2026-04-23). Absorbs +//! 5 of 6 gate impls as `PatternGate` consts. `tools::deny-tools` stays +//! separate (tool-name match). Hardening (audit 2026-04-23): H1 regex +//! cache `Mutex`→`RwLock` (per-pattern `Lazy` needs sibling-gate edits, +//! out of scope). H2 `compile_checked()` → `Result`, no panics. +//! H3 `AllowIfMatch`+task-scope fails closed. S4 `char`-based truncation. +//! L2 single-pass template render; no replace-chain bleed. use crate::capability::*; -use once_cell::sync::Lazy; use regex::Regex; use std::collections::HashMap; -use std::sync::Mutex; +use std::sync::RwLock; /// How the gate decides on a match. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum GateMode { - /// Deny if any pattern matches (policy::no-git-ops, scope::files-denylist, - /// safety::no-dep-bump). DenyIfMatch, - /// Allow only if at least one pattern matches (tools::bash-allowlist). AllowIfMatch, - /// Deny if the value is NOT matched by any pattern AND the patterns - /// list is non-empty (scope::files-whitelist — empty list = skip). DenyIfUnmatched, } -/// Source of match patterns — compile-time static (regex) or dynamic -/// from TaskSpec (glob, used by scope gates). +/// Static regex list or dynamic glob list pulled from TaskSpec scope. #[derive(Clone, Copy)] pub enum PatternSource { - /// Static regex patterns, compiled lazily + cached. StaticRegex(&'static [&'static str]), - /// `task.scope.files_whitelist`, matched via `simulated_merge::glob_match`. TaskWhitelist, - /// `task.scope.files_denylist`, matched via `simulated_merge::glob_match`. TaskDenylist, } /// Generic pattern-driven PreToolUse gate. pub struct PatternGate { pub name: &'static str, - /// Tool filter. Empty slice = applies to any tool. pub tools: &'static [&'static str], - /// JSON field in `tool_input` to read (e.g. "command", "file_path"). pub field: &'static str, pub mode: GateMode, pub patterns: PatternSource, - /// If set and `env[bypass_env] == "1"`, gate allows unconditionally. pub bypass_env: Option<&'static str>, - /// Human-readable deny reason template (`{name}` / `{cmd}` / `{path}` / `{pat}`). pub deny_template: &'static str, } @@ -98,95 +82,119 @@ impl PatternGate { } fn decide_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision { - match self.mode { - GateMode::DenyIfMatch => self.deny_if_match_regex(value, pats), - GateMode::AllowIfMatch => self.allow_if_match_regex(value, pats), - GateMode::DenyIfUnmatched => self.deny_if_unmatched_regex(value, pats), - } - } - - fn deny_if_match_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision { - for raw in pats { - if compile(raw).is_match(value) { - return GateDecision::Deny { reason: self.render_reason(value, raw) }; - } - } - GateDecision::Allow - } - - fn allow_if_match_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision { - if pats.iter().any(|raw| compile(raw).is_match(value)) { - GateDecision::Allow - } else { - GateDecision::Deny { reason: self.render_reason(value, "") } - } - } - - fn deny_if_unmatched_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision { - if pats.is_empty() { + if matches!(self.mode, GateMode::DenyIfUnmatched) && pats.is_empty() { return GateDecision::Allow; } - if pats.iter().any(|raw| compile(raw).is_match(value)) { - GateDecision::Allow - } else { - GateDecision::Deny { reason: self.render_reason(value, "") } + let hit = match self.scan_regex(value, pats) { + Ok(h) => h, + Err(d) => return d, + }; + match (self.mode, hit) { + (GateMode::DenyIfMatch, Some(raw)) => self.deny(value, raw), + (GateMode::DenyIfMatch, None) => GateDecision::Allow, + (GateMode::AllowIfMatch, Some(_)) => GateDecision::Allow, + (GateMode::AllowIfMatch, None) => self.deny(value, ""), + (GateMode::DenyIfUnmatched, Some(_)) => GateDecision::Allow, + (GateMode::DenyIfUnmatched, None) => self.deny(value, ""), } } + /// First matching raw pattern, or `Err(Deny)` on compile failure (H2). + fn scan_regex<'p>( + &self, + value: &str, + pats: &'p [&'static str], + ) -> Result, GateDecision> { + for raw in pats { + match compile_checked(raw) { + Ok(rx) if rx.is_match(value) => return Ok(Some(raw)), + Ok(_) => {} + Err(e) => return Err(GateDecision::Deny { + reason: format!( + "{} — capability misconfigured: regex `{}` invalid ({})", + self.name, raw, e + ), + }), + } + } + Ok(None) + } + fn decide_scope(&self, value: &str, pats: &[String]) -> GateDecision { + use crate::simulated_merge::glob_match as gm; match self.mode { GateMode::DenyIfUnmatched if pats.is_empty() => GateDecision::Allow, - GateMode::DenyIfUnmatched => self.scope_whitelist_decide(value, pats), - GateMode::DenyIfMatch => self.scope_denylist_decide(value, pats), - GateMode::AllowIfMatch => GateDecision::NotApplicable, + GateMode::DenyIfUnmatched if pats.iter().any(|p| gm(p, value)) => GateDecision::Allow, + GateMode::DenyIfUnmatched => self.deny(value, ""), + GateMode::DenyIfMatch => pats + .iter() + .find(|p| gm(p, value)) + .map(|p| self.deny(value, p)) + .unwrap_or(GateDecision::Allow), + // H3: AllowIfMatch + task-scope = misconfigured; fail closed. + GateMode::AllowIfMatch => GateDecision::Deny { + reason: format!( + "{} — capability misconfigured: AllowIfMatch + task-scope source is invalid; \ + scope gates must use DenyIfMatch or DenyIfUnmatched", + self.name + ), + }, } } - fn scope_whitelist_decide(&self, path: &str, pats: &[String]) -> GateDecision { - if pats.iter().any(|p| crate::simulated_merge::glob_match(p, path)) { - GateDecision::Allow - } else { - GateDecision::Deny { reason: self.render_reason(path, "") } + fn deny(&self, value: &str, pat: &str) -> GateDecision { + GateDecision::Deny { reason: render_template(self.deny_template, self.name, value, pat) } + } +} + +/// Single-pass template render (L2). Tokens: `{name}` `{path}` `{cmd}` +/// `{pat}`; unknown `{...}` emitted verbatim. Substituted text cannot +/// bleed into later tokens (unlike the old replace-chain). +fn render_template(tpl: &str, name: &str, value: &str, pat: &str) -> String { + let mut out = String::with_capacity(tpl.len() + value.len()); + let cmd = truncate_chars(value, 60); + let mut rest = tpl; + while let Some(open) = rest.find('{') { + out.push_str(&rest[..open]); + let after = &rest[open..]; + let Some(close) = after.find('}') else { + out.push_str(after); + return out; + }; + let token = &after[1..close]; + match token { + "name" => out.push_str(name), + "path" => out.push_str(value), + "cmd" => out.push_str(&cmd), + "pat" => out.push_str(pat), + _ => out.push_str(&after[..=close]), } + rest = &after[close + 1..]; } - - fn scope_denylist_decide(&self, path: &str, pats: &[String]) -> GateDecision { - for pat in pats { - if crate::simulated_merge::glob_match(pat, path) { - return GateDecision::Deny { reason: self.render_reason(path, pat) }; - } - } - GateDecision::Allow - } - - fn render_reason(&self, value: &str, pat: &str) -> String { - self.deny_template - .replace("{name}", self.name) - .replace("{path}", value) - .replace("{cmd}", &truncate(value, 60)) - .replace("{pat}", pat) - } + out.push_str(rest); + out } -/// Compile + cache regex across calls. Per-process HashMap guarded by Mutex. -fn compile(raw: &str) -> Regex { - static CACHE: Lazy>> = Lazy::new(|| Mutex::new(HashMap::new())); - let mut guard = match CACHE.lock() { - Ok(g) => g, - Err(p) => p.into_inner(), - }; - if let Some(rx) = guard.get(raw) { - return rx.clone(); +/// Compile + cache a regex (H1 + H2). `RwLock` cache: read-lock fast +/// path, write-lock only on first compile of each pattern. +fn compile_checked(raw: &str) -> Result { + use once_cell::sync::Lazy; + static CACHE: Lazy>> = + Lazy::new(|| RwLock::new(HashMap::new())); + if let Some(rx) = CACHE.read().unwrap_or_else(|p| p.into_inner()).get(raw).cloned() { + return Ok(rx); } - let rx = Regex::new(raw).expect("pattern_gate: regex compile failed"); - guard.insert(raw.to_string(), rx.clone()); - rx + let rx = Regex::new(raw)?; + let mut g = CACHE.write().unwrap_or_else(|p| p.into_inner()); + Ok(g.entry(raw.to_string()).or_insert(rx).clone()) } -fn truncate(s: &str, max: usize) -> String { - if s.len() > max { - format!("{}…", &s[..max]) - } else { - s.to_string() +/// Truncate to `max` chars (S4). Safe for multi-byte code points. +fn truncate_chars(s: &str, max: usize) -> String { + let mut it = s.chars(); + let mut out: String = it.by_ref().take(max).collect(); + if it.next().is_some() { + out.push('…'); } + out } diff --git a/_primitives/_rust/kei-agent-runtime/tests/pattern_gate_smoke.rs b/_primitives/_rust/kei-agent-runtime/tests/pattern_gate_smoke.rs index a0ca317..e9cf609 100644 --- a/_primitives/_rust/kei-agent-runtime/tests/pattern_gate_smoke.rs +++ b/_primitives/_rust/kei-agent-runtime/tests/pattern_gate_smoke.rs @@ -138,3 +138,115 @@ fn safety_no_dep_bump_allows_bypass_env() { let input = json!({"file_path": "Cargo.toml"}); assert_eq!(g.check(&ctx("Edit", &input, &task, &env)), GateDecision::Allow); } + +// --- Hardening audit 2026-04-23 — H1/H2/H3/S4/L2 --------------------------- + +/// S4 — UTF-8 boundary safety. +/// +/// Construct a command where the 30th character is a 2-byte Cyrillic code +/// point and the total char count exceeds the 60-char truncation budget. +/// The old `&s[..60]` byte-slice panicked when byte-60 landed mid-char. +/// New `truncate_chars` takes 60 chars, not 60 bytes, so this must not panic. +#[test] +fn render_reason_safe_on_multibyte_command() { + let task = TaskSpec::default(); + let env = HashMap::new(); + // 30 ASCII 'a' + 40 Cyrillic 'я' (2 bytes each) = 70 chars / 110 bytes. + // Byte 60 lands mid-'я' — would panic under old byte-slice truncation. + let mut cmd = "a".repeat(30); + for _ in 0..40 { + cmd.push('я'); + } + cmd.push_str(" forbidden"); + let input = json!({"command": cmd}); + match TEST_GATE.check(&ctx("Bash", &input, &task, &env)) { + GateDecision::Deny { reason } => { + // Truncated segment must be valid UTF-8; no panic reached here. + assert!(reason.contains("matched")); + } + other => panic!("expected Deny on multibyte cmd, got {other:?}"), + } +} + +/// H2 — invalid static regex fails closed instead of panicking. +const BAD_REGEX_GATE: PatternGate = PatternGate { + name: "test::bad-regex", + tools: &["Bash"], + field: "command", + // Unbalanced `[` — regex::Regex::new will return Err. + mode: GateMode::DenyIfMatch, + patterns: PatternSource::StaticRegex(&["[unclosed"]), + bypass_env: None, + deny_template: "{name} — {cmd} matched {pat}", +}; + +#[test] +fn malformed_static_regex_denies_without_panic() { + let task = TaskSpec::default(); + let env = HashMap::new(); + let input = json!({"command": "anything"}); + match BAD_REGEX_GATE.check(&ctx("Bash", &input, &task, &env)) { + GateDecision::Deny { reason } => { + assert!(reason.contains("misconfigured"), "reason: {reason}"); + assert!(reason.contains("[unclosed"), "reason: {reason}"); + } + other => panic!("expected Deny on malformed regex, got {other:?}"), + } +} + +/// H3 — AllowIfMatch + task-scope source is structurally invalid; the +/// previous impl silently returned `NotApplicable` (effectively allowing +/// every tool call through). Hardened impl fails closed with a clear +/// `capability misconfigured` `Deny`. +const BAD_COMBO_GATE: PatternGate = PatternGate { + name: "test::bad-combo", + tools: &["Edit"], + field: "file_path", + mode: GateMode::AllowIfMatch, + patterns: PatternSource::TaskWhitelist, + bypass_env: None, + deny_template: "{name} — {path}", +}; + +#[test] +fn allow_if_match_with_task_scope_fails_closed() { + let task = TaskSpec::default(); + let env = HashMap::new(); + let input = json!({"file_path": "anywhere/foo.rs"}); + match BAD_COMBO_GATE.check(&ctx("Edit", &input, &task, &env)) { + GateDecision::Deny { reason } => { + assert!(reason.contains("misconfigured"), "reason: {reason}"); + assert!(reason.contains("AllowIfMatch"), "reason: {reason}"); + } + other => panic!("expected Deny on bad combo, got {other:?}"), + } +} + +/// H1 — regex cache behaviour. Same pattern across many calls must stay +/// deterministic and cheap; this test doesn't measure timing but pins the +/// behavioural contract: 1k calls against the cached pattern all agree +/// with the first call's outcome. A full per-pattern `Lazy` +/// refactor would let us assert pointer-identity; with the `RwLock`-only +/// fix in this file-bounded audit, we assert observational stability. +#[test] +fn regex_cache_is_stable_across_many_calls() { + let task = TaskSpec::default(); + let env = HashMap::new(); + let input_hit = json!({"command": "run forbidden action"}); + let input_miss = json!({"command": "echo hello"}); + let first_hit = TEST_GATE.check(&ctx("Bash", &input_hit, &task, &env)); + let first_miss = TEST_GATE.check(&ctx("Bash", &input_miss, &task, &env)); + for _ in 0..1000 { + assert!(matches!( + TEST_GATE.check(&ctx("Bash", &input_hit, &task, &env)), + GateDecision::Deny { .. } + )); + assert_eq!( + TEST_GATE.check(&ctx("Bash", &input_miss, &task, &env)), + GateDecision::Allow + ); + } + // Sanity: first outcomes still match after the warm-up loop. + assert!(matches!(first_hit, GateDecision::Deny { .. })); + assert_eq!(first_miss, GateDecision::Allow); +}