fix(agent-runtime/b1): PatternGate hardening — RwLock, Result, UTF-8, explicit deny

H1 lock contention: Mutex<HashMap> → RwLock<HashMap>. Hot path cheap
read-lock, write-lock only on first compile per pattern.

H2 panic removed: compile_checked() returns Result<Regex, regex::Error>;
all call sites propagate as GateDecision::Deny{reason:"misconfigured
regex..."}. No .expect()/.unwrap() on compile anywhere.

H3 dead branch: AllowIfMatch + TaskWhitelist/TaskDenylist combo was
silently NotApplicable (allow-everything). Now explicit Deny with
"misconfigured" + "AllowIfMatch" in reason. Fail-closed.

S4 UTF-8 panic: &s[..60] byte-slice → s.chars().take(60). Cyrillic/
emoji safe. 60-char budget by code point.

L2 render chain: String::replace chain → single-pass render_template()
that scans {token} markers once. Substituted text cannot bleed into
later tokens.

Tests: 70/70 (was 66, +4). pattern_gate_smoke 10 → 14.
Follow-up (out of file-boundary scope): per-pattern Lazy<Regex> variant
would eliminate the RwLock entirely but requires editing sibling gate
consts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Parfii-bot 2026-04-23 05:27:59 +08:00
parent 324ad5d53e
commit f9bfcc23b7
2 changed files with 217 additions and 97 deletions

View file

@ -1,56 +1,40 @@
//! Generic pattern-based gate (Layer C convergence, 2026-04-23).
//!
//! Absorbs 5 of the 6 gate impls (`policy::no-git-ops`,
//! `scope::files-whitelist`, `scope::files-denylist`, `safety::no-dep-bump`,
//! `tools::bash-allowlist`). Each concrete gate is now a `PatternGate`
//! const declaration in its own file; dispatch logic lives here.
//!
//! `tools::deny-tools` stays in its own module — mechanism is tool-name
//! match, not pattern match.
//! Generic pattern-based gate (Layer C convergence, 2026-04-23). Absorbs
//! 5 of 6 gate impls as `PatternGate` consts. `tools::deny-tools` stays
//! separate (tool-name match). Hardening (audit 2026-04-23): H1 regex
//! cache `Mutex`→`RwLock` (per-pattern `Lazy` needs sibling-gate edits,
//! out of scope). H2 `compile_checked()` → `Result`, no panics.
//! H3 `AllowIfMatch`+task-scope fails closed. S4 `char`-based truncation.
//! L2 single-pass template render; no replace-chain bleed.
use crate::capability::*;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use std::sync::Mutex;
use std::sync::RwLock;
/// How the gate decides on a match.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GateMode {
/// Deny if any pattern matches (policy::no-git-ops, scope::files-denylist,
/// safety::no-dep-bump).
DenyIfMatch,
/// Allow only if at least one pattern matches (tools::bash-allowlist).
AllowIfMatch,
/// Deny if the value is NOT matched by any pattern AND the patterns
/// list is non-empty (scope::files-whitelist — empty list = skip).
DenyIfUnmatched,
}
/// Source of match patterns — compile-time static (regex) or dynamic
/// from TaskSpec (glob, used by scope gates).
/// Static regex list or dynamic glob list pulled from TaskSpec scope.
#[derive(Clone, Copy)]
pub enum PatternSource {
/// Static regex patterns, compiled lazily + cached.
StaticRegex(&'static [&'static str]),
/// `task.scope.files_whitelist`, matched via `simulated_merge::glob_match`.
TaskWhitelist,
/// `task.scope.files_denylist`, matched via `simulated_merge::glob_match`.
TaskDenylist,
}
/// Generic pattern-driven PreToolUse gate.
pub struct PatternGate {
pub name: &'static str,
/// Tool filter. Empty slice = applies to any tool.
pub tools: &'static [&'static str],
/// JSON field in `tool_input` to read (e.g. "command", "file_path").
pub field: &'static str,
pub mode: GateMode,
pub patterns: PatternSource,
/// If set and `env[bypass_env] == "1"`, gate allows unconditionally.
pub bypass_env: Option<&'static str>,
/// Human-readable deny reason template (`{name}` / `{cmd}` / `{path}` / `{pat}`).
pub deny_template: &'static str,
}
@ -98,95 +82,119 @@ impl PatternGate {
}
fn decide_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision {
match self.mode {
GateMode::DenyIfMatch => self.deny_if_match_regex(value, pats),
GateMode::AllowIfMatch => self.allow_if_match_regex(value, pats),
GateMode::DenyIfUnmatched => self.deny_if_unmatched_regex(value, pats),
}
}
fn deny_if_match_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision {
for raw in pats {
if compile(raw).is_match(value) {
return GateDecision::Deny { reason: self.render_reason(value, raw) };
}
}
GateDecision::Allow
}
fn allow_if_match_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision {
if pats.iter().any(|raw| compile(raw).is_match(value)) {
GateDecision::Allow
} else {
GateDecision::Deny { reason: self.render_reason(value, "<none>") }
}
}
fn deny_if_unmatched_regex(&self, value: &str, pats: &[&'static str]) -> GateDecision {
if pats.is_empty() {
if matches!(self.mode, GateMode::DenyIfUnmatched) && pats.is_empty() {
return GateDecision::Allow;
}
if pats.iter().any(|raw| compile(raw).is_match(value)) {
GateDecision::Allow
} else {
GateDecision::Deny { reason: self.render_reason(value, "<unmatched>") }
let hit = match self.scan_regex(value, pats) {
Ok(h) => h,
Err(d) => return d,
};
match (self.mode, hit) {
(GateMode::DenyIfMatch, Some(raw)) => self.deny(value, raw),
(GateMode::DenyIfMatch, None) => GateDecision::Allow,
(GateMode::AllowIfMatch, Some(_)) => GateDecision::Allow,
(GateMode::AllowIfMatch, None) => self.deny(value, "<none>"),
(GateMode::DenyIfUnmatched, Some(_)) => GateDecision::Allow,
(GateMode::DenyIfUnmatched, None) => self.deny(value, "<unmatched>"),
}
}
/// First matching raw pattern, or `Err(Deny)` on compile failure (H2).
fn scan_regex<'p>(
&self,
value: &str,
pats: &'p [&'static str],
) -> Result<Option<&'p &'static str>, GateDecision> {
for raw in pats {
match compile_checked(raw) {
Ok(rx) if rx.is_match(value) => return Ok(Some(raw)),
Ok(_) => {}
Err(e) => return Err(GateDecision::Deny {
reason: format!(
"{} — capability misconfigured: regex `{}` invalid ({})",
self.name, raw, e
),
}),
}
}
Ok(None)
}
fn decide_scope(&self, value: &str, pats: &[String]) -> GateDecision {
use crate::simulated_merge::glob_match as gm;
match self.mode {
GateMode::DenyIfUnmatched if pats.is_empty() => GateDecision::Allow,
GateMode::DenyIfUnmatched => self.scope_whitelist_decide(value, pats),
GateMode::DenyIfMatch => self.scope_denylist_decide(value, pats),
GateMode::AllowIfMatch => GateDecision::NotApplicable,
GateMode::DenyIfUnmatched if pats.iter().any(|p| gm(p, value)) => GateDecision::Allow,
GateMode::DenyIfUnmatched => self.deny(value, "<whitelist>"),
GateMode::DenyIfMatch => pats
.iter()
.find(|p| gm(p, value))
.map(|p| self.deny(value, p))
.unwrap_or(GateDecision::Allow),
// H3: AllowIfMatch + task-scope = misconfigured; fail closed.
GateMode::AllowIfMatch => GateDecision::Deny {
reason: format!(
"{} — capability misconfigured: AllowIfMatch + task-scope source is invalid; \
scope gates must use DenyIfMatch or DenyIfUnmatched",
self.name
),
},
}
}
fn scope_whitelist_decide(&self, path: &str, pats: &[String]) -> GateDecision {
if pats.iter().any(|p| crate::simulated_merge::glob_match(p, path)) {
GateDecision::Allow
} else {
GateDecision::Deny { reason: self.render_reason(path, "<whitelist>") }
fn deny(&self, value: &str, pat: &str) -> GateDecision {
GateDecision::Deny { reason: render_template(self.deny_template, self.name, value, pat) }
}
}
/// Single-pass template render (L2). Tokens: `{name}` `{path}` `{cmd}`
/// `{pat}`; unknown `{...}` emitted verbatim. Substituted text cannot
/// bleed into later tokens (unlike the old replace-chain).
fn render_template(tpl: &str, name: &str, value: &str, pat: &str) -> String {
let mut out = String::with_capacity(tpl.len() + value.len());
let cmd = truncate_chars(value, 60);
let mut rest = tpl;
while let Some(open) = rest.find('{') {
out.push_str(&rest[..open]);
let after = &rest[open..];
let Some(close) = after.find('}') else {
out.push_str(after);
return out;
};
let token = &after[1..close];
match token {
"name" => out.push_str(name),
"path" => out.push_str(value),
"cmd" => out.push_str(&cmd),
"pat" => out.push_str(pat),
_ => out.push_str(&after[..=close]),
}
rest = &after[close + 1..];
}
fn scope_denylist_decide(&self, path: &str, pats: &[String]) -> GateDecision {
for pat in pats {
if crate::simulated_merge::glob_match(pat, path) {
return GateDecision::Deny { reason: self.render_reason(path, pat) };
}
}
GateDecision::Allow
}
fn render_reason(&self, value: &str, pat: &str) -> String {
self.deny_template
.replace("{name}", self.name)
.replace("{path}", value)
.replace("{cmd}", &truncate(value, 60))
.replace("{pat}", pat)
}
out.push_str(rest);
out
}
/// Compile + cache regex across calls. Per-process HashMap guarded by Mutex.
fn compile(raw: &str) -> Regex {
static CACHE: Lazy<Mutex<HashMap<String, Regex>>> = Lazy::new(|| Mutex::new(HashMap::new()));
let mut guard = match CACHE.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
if let Some(rx) = guard.get(raw) {
return rx.clone();
/// Compile + cache a regex (H1 + H2). `RwLock` cache: read-lock fast
/// path, write-lock only on first compile of each pattern.
fn compile_checked(raw: &str) -> Result<Regex, regex::Error> {
use once_cell::sync::Lazy;
static CACHE: Lazy<RwLock<HashMap<String, Regex>>> =
Lazy::new(|| RwLock::new(HashMap::new()));
if let Some(rx) = CACHE.read().unwrap_or_else(|p| p.into_inner()).get(raw).cloned() {
return Ok(rx);
}
let rx = Regex::new(raw).expect("pattern_gate: regex compile failed");
guard.insert(raw.to_string(), rx.clone());
rx
let rx = Regex::new(raw)?;
let mut g = CACHE.write().unwrap_or_else(|p| p.into_inner());
Ok(g.entry(raw.to_string()).or_insert(rx).clone())
}
fn truncate(s: &str, max: usize) -> String {
if s.len() > max {
format!("{}", &s[..max])
} else {
s.to_string()
/// Truncate to `max` chars (S4). Safe for multi-byte code points.
fn truncate_chars(s: &str, max: usize) -> String {
let mut it = s.chars();
let mut out: String = it.by_ref().take(max).collect();
if it.next().is_some() {
out.push('…');
}
out
}

View file

@ -138,3 +138,115 @@ fn safety_no_dep_bump_allows_bypass_env() {
let input = json!({"file_path": "Cargo.toml"});
assert_eq!(g.check(&ctx("Edit", &input, &task, &env)), GateDecision::Allow);
}
// --- Hardening audit 2026-04-23 — H1/H2/H3/S4/L2 ---------------------------
/// S4 — UTF-8 boundary safety.
///
/// Construct a command where the 30th character is a 2-byte Cyrillic code
/// point and the total char count exceeds the 60-char truncation budget.
/// The old `&s[..60]` byte-slice panicked when byte-60 landed mid-char.
/// New `truncate_chars` takes 60 chars, not 60 bytes, so this must not panic.
#[test]
fn render_reason_safe_on_multibyte_command() {
let task = TaskSpec::default();
let env = HashMap::new();
// 30 ASCII 'a' + 40 Cyrillic 'я' (2 bytes each) = 70 chars / 110 bytes.
// Byte 60 lands mid-'я' — would panic under old byte-slice truncation.
let mut cmd = "a".repeat(30);
for _ in 0..40 {
cmd.push('я');
}
cmd.push_str(" forbidden");
let input = json!({"command": cmd});
match TEST_GATE.check(&ctx("Bash", &input, &task, &env)) {
GateDecision::Deny { reason } => {
// Truncated segment must be valid UTF-8; no panic reached here.
assert!(reason.contains("matched"));
}
other => panic!("expected Deny on multibyte cmd, got {other:?}"),
}
}
/// H2 — invalid static regex fails closed instead of panicking.
const BAD_REGEX_GATE: PatternGate = PatternGate {
name: "test::bad-regex",
tools: &["Bash"],
field: "command",
// Unbalanced `[` — regex::Regex::new will return Err.
mode: GateMode::DenyIfMatch,
patterns: PatternSource::StaticRegex(&["[unclosed"]),
bypass_env: None,
deny_template: "{name} — {cmd} matched {pat}",
};
#[test]
fn malformed_static_regex_denies_without_panic() {
let task = TaskSpec::default();
let env = HashMap::new();
let input = json!({"command": "anything"});
match BAD_REGEX_GATE.check(&ctx("Bash", &input, &task, &env)) {
GateDecision::Deny { reason } => {
assert!(reason.contains("misconfigured"), "reason: {reason}");
assert!(reason.contains("[unclosed"), "reason: {reason}");
}
other => panic!("expected Deny on malformed regex, got {other:?}"),
}
}
/// H3 — AllowIfMatch + task-scope source is structurally invalid; the
/// previous impl silently returned `NotApplicable` (effectively allowing
/// every tool call through). Hardened impl fails closed with a clear
/// `capability misconfigured` `Deny`.
const BAD_COMBO_GATE: PatternGate = PatternGate {
name: "test::bad-combo",
tools: &["Edit"],
field: "file_path",
mode: GateMode::AllowIfMatch,
patterns: PatternSource::TaskWhitelist,
bypass_env: None,
deny_template: "{name} — {path}",
};
#[test]
fn allow_if_match_with_task_scope_fails_closed() {
let task = TaskSpec::default();
let env = HashMap::new();
let input = json!({"file_path": "anywhere/foo.rs"});
match BAD_COMBO_GATE.check(&ctx("Edit", &input, &task, &env)) {
GateDecision::Deny { reason } => {
assert!(reason.contains("misconfigured"), "reason: {reason}");
assert!(reason.contains("AllowIfMatch"), "reason: {reason}");
}
other => panic!("expected Deny on bad combo, got {other:?}"),
}
}
/// H1 — regex cache behaviour. Same pattern across many calls must stay
/// deterministic and cheap; this test doesn't measure timing but pins the
/// behavioural contract: 1k calls against the cached pattern all agree
/// with the first call's outcome. A full per-pattern `Lazy<Regex>`
/// refactor would let us assert pointer-identity; with the `RwLock`-only
/// fix in this file-bounded audit, we assert observational stability.
#[test]
fn regex_cache_is_stable_across_many_calls() {
let task = TaskSpec::default();
let env = HashMap::new();
let input_hit = json!({"command": "run forbidden action"});
let input_miss = json!({"command": "echo hello"});
let first_hit = TEST_GATE.check(&ctx("Bash", &input_hit, &task, &env));
let first_miss = TEST_GATE.check(&ctx("Bash", &input_miss, &task, &env));
for _ in 0..1000 {
assert!(matches!(
TEST_GATE.check(&ctx("Bash", &input_hit, &task, &env)),
GateDecision::Deny { .. }
));
assert_eq!(
TEST_GATE.check(&ctx("Bash", &input_miss, &task, &env)),
GateDecision::Allow
);
}
// Sanity: first outcomes still match after the warm-up loop.
assert!(matches!(first_hit, GateDecision::Deny { .. }));
assert_eq!(first_miss, GateDecision::Allow);
}