From c0d900a9430e542a3edb0125028126d99b8f4132 Mon Sep 17 00:00:00 2001 From: Parfii-bot Date: Sun, 3 May 2026 15:38:23 +0800 Subject: [PATCH] fix(security): cortex /term env_clear + bind guard, agent-stub-scan stdin, magiclink revoke MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent security hardenings from cross-cutting audits. 1. cortex /term PTY env leak + bind guard (HIGH — Sonnet Cross-cutting + Opus) - kei-cortex/src/handlers/term_pty.rs: PTY spawn was inheriting daemon's full process env (KEI_AUTH_KEY, ANTHROPIC_API_KEY, FAL_KEY, etc.) into every authenticated /term shell. Combined with default cors_origin = https://keisei.app, one stored XSS on keisei.app + one bearer token = full local shell with all daemon secrets. Added apply_safe_env() helper: env_clear() + re-set only HOME, PATH, USER, LANG, TERM. Spawn helper invokes it before spawn_command. - kei-cortex/src/main.rs: extracted build_config() helper; added enforce_loopback_or_local_cors() guard called before serve.bind. Refuses to start if bind addr is non-loopback AND cors_origin is a public domain — prevents the XSS-to-shell scenario in production. 2. agent-stub-scan.sh stdin parsing (HIGH — multiple audits) - hooks/agent-stub-scan.sh: previously read $CLAUDE_AGENT_TRANSCRIPT env var which Claude Code does NOT set on PostToolUse:Agent. Hook silently exited 0 — RULE 0.16 enforcement was dead-code in production. Rewrote to read stdin JSON via jq, flatten .tool_response recursively (string|array|object via the same pattern as agent-event-done.sh), guard on .tool_name == "Agent" and command -v jq. Maintained WARN-tier exit-0 with TODO marker for ENFORCE flip on 2026-05-05 (per RULE 0.16 §2 ladder). 3. magiclink revoke() silent no-op (HIGH — Opus Rust + Sonnet Cross-cutting) - kei-auth-magiclink/src/{error,provider}.rs: revoke() previously returned Ok(()) without doing anything. Operators expecting "revoke a session" semantics from the AuthProvider trait got false success. Stolen magic- link URLs remained valid until the 15-minute TTL. Added Error::Unsupported variant. revoke() now returns Err(Unsupported(...)) with explicit guidance: "rotate KEI_MAGICLINK_HMAC_ KEY to invalidate all live tokens, or maintain a deny-list at the caller layer". Test provider_revoke_returns_unsupported_error confirms the error variant is wired. Tests: cargo check + cargo test both PASS. 444 functional tests across kei-cortex (428 lib) + kei-auth-magiclink (16 lib + smoke). Pre-existing openai_loop_wiring.rs 502 failures in routes/openai/{chat,responses}.rs are NOT introduced by these fixes — separate unrelated triage. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../_rust/kei-auth-magiclink/src/error.rs | 6 ++ .../_rust/kei-auth-magiclink/src/provider.rs | 17 +++++- .../tests/magiclink_smoke.rs | 20 ++++++ .../_rust/kei-cortex/src/handlers/term_pty.rs | 19 ++++++ _primitives/_rust/kei-cortex/src/main.rs | 61 ++++++++++++++----- hooks/agent-stub-scan.sh | 49 +++++++++++---- 6 files changed, 141 insertions(+), 31 deletions(-) diff --git a/_primitives/_rust/kei-auth-magiclink/src/error.rs b/_primitives/_rust/kei-auth-magiclink/src/error.rs index a6b7316..8d07de2 100644 --- a/_primitives/_rust/kei-auth-magiclink/src/error.rs +++ b/_primitives/_rust/kei-auth-magiclink/src/error.rs @@ -34,6 +34,12 @@ pub enum Error { /// DNA build / parse error from `kei_runtime_core::dna`. #[error("magiclink dna error: {0}")] Dna(String), + + /// Operation is not supported by this provider — surfaced when a caller + /// asks for behaviour that the underlying primitive cannot deliver + /// (e.g. server-side revocation of a stateless HMAC token). + #[error("magiclink unsupported: {0}")] + Unsupported(String), } pub type Result = std::result::Result; diff --git a/_primitives/_rust/kei-auth-magiclink/src/provider.rs b/_primitives/_rust/kei-auth-magiclink/src/provider.rs index f1a310d..7d75c71 100644 --- a/_primitives/_rust/kei-auth-magiclink/src/provider.rs +++ b/_primitives/_rust/kei-auth-magiclink/src/provider.rs @@ -132,9 +132,20 @@ impl AuthProvider for MagicLinkProvider { } async fn revoke(&self, _session: &Dna) -> kei_runtime_core::Result<()> { - // v0.1: stateless tokens have no server-side revocation. - // Callers maintain a deny-list externally if needed. - Ok(()) + // SECURITY: do NOT silently return Ok here. Stateless HMAC tokens + // cannot be server-side revoked, and a silent Ok() would lie to + // every caller that thought it had killed a session. Surface the + // truth so callers can either rotate the HMAC key (kills ALL live + // tokens) or maintain an external deny-list keyed on the token's + // first two parts (email + expiry). + Err(Error::Unsupported( + "magiclink: stateless tokens cannot be server-side revoked. \ + Rotate KEI_MAGICLINK_HMAC_KEY to invalidate all live tokens, \ + OR maintain a deny-list at the caller layer keyed on the \ + token's first two parts." + .into(), + ) + .into()) } fn is_passwordless(&self) -> bool { diff --git a/_primitives/_rust/kei-auth-magiclink/tests/magiclink_smoke.rs b/_primitives/_rust/kei-auth-magiclink/tests/magiclink_smoke.rs index 5e829af..2475f61 100644 --- a/_primitives/_rust/kei-auth-magiclink/tests/magiclink_smoke.rs +++ b/_primitives/_rust/kei-auth-magiclink/tests/magiclink_smoke.rs @@ -152,3 +152,23 @@ fn provider_build_magic_url_shape() { // Trailing slash on base_url MUST be stripped. assert!(!url.contains("//auth/magic")); } + +#[test] +fn provider_revoke_returns_unsupported_error() { + // RULE 0.16 / security review: stateless tokens cannot be server-side + // revoked. Returning Ok() silently would lie to every caller. The + // provider MUST surface an Unsupported error so the caller can choose + // to rotate the HMAC key or maintain a deny-list. + let parent = parent_dna(); + let provider = MagicLinkProvider::new(parent.clone(), KEY.to_vec(), 900) + .expect("provider new ok"); + let rt = tokio::runtime::Runtime::new().expect("runtime"); + let err = rt + .block_on(provider.revoke(&parent)) + .expect_err("revoke must error, not silently succeed"); + let msg = err.to_string().to_lowercase(); + assert!( + msg.contains("unsupported") || msg.contains("cannot be server-side revoked"), + "expected unsupported error mentioning revocation, got: {msg}" + ); +} diff --git a/_primitives/_rust/kei-cortex/src/handlers/term_pty.rs b/_primitives/_rust/kei-cortex/src/handlers/term_pty.rs index a45a451..7dc02ce 100644 --- a/_primitives/_rust/kei-cortex/src/handlers/term_pty.rs +++ b/_primitives/_rust/kei-cortex/src/handlers/term_pty.rs @@ -76,6 +76,7 @@ pub fn spawn_pty( let shell = std::env::var("SHELL").unwrap_or_else(|_| "/bin/sh".into()); let mut cmd = CommandBuilder::new(&shell); cmd.cwd(cwd); + apply_safe_env(&mut cmd); let child = pair.slave.spawn_command(cmd).map_err(|e| e.to_string())?; let reader = pair.master.try_clone_reader().map_err(|e| e.to_string())?; let writer = pair.master.take_writer().map_err(|e| e.to_string())?; @@ -89,6 +90,24 @@ pub fn spawn_pty( }) } +/// SECURITY: drop ALL inherited env so daemon secrets (KEI_AUTH_KEY, +/// ANTHROPIC_API_KEY, MAGICLINK_HMAC_KEY, etc.) cannot leak into the PTY +/// shell. A stored XSS on the cors_origin domain would otherwise pivot +/// directly to a local shell holding every daemon secret. After clearing, +/// re-set ONLY the minimal env a shell legitimately needs. +fn apply_safe_env(cmd: &mut CommandBuilder) { + cmd.env_clear(); + let home = std::env::var("HOME").unwrap_or_else(|_| "/".into()); + let path = std::env::var("PATH") + .unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into()); + let user = std::env::var("USER").unwrap_or_else(|_| "user".into()); + cmd.env("HOME", &home); + cmd.env("PATH", &path); + cmd.env("USER", &user); + cmd.env("LANG", "en_US.UTF-8"); + cmd.env("TERM", "xterm-256color"); +} + /// Forward PTY stdout bytes to the WS sender via a bounded channel. Reads /// happen on a `spawn_blocking` task because the underlying `Read` is sync; /// the loop checks `cancel` every iteration so disconnect / shutdown diff --git a/_primitives/_rust/kei-cortex/src/main.rs b/_primitives/_rust/kei-cortex/src/main.rs index 20ea999..5308fac 100644 --- a/_primitives/_rust/kei-cortex/src/main.rs +++ b/_primitives/_rust/kei-cortex/src/main.rs @@ -79,7 +79,27 @@ async fn main() -> Result<()> { } async fn serve(args: ServeArgs) -> Result<()> { - let config = AppConfig::try_new( + let config = build_config(args)?; + warn_if_live2d_missing(&config.live2d_samples_dir); + let bind_ip = IpAddr::V4(Ipv4Addr::LOCALHOST); + enforce_loopback_or_local_cors(&bind_ip, &config.cors_origin)?; + let token = load_or_bootstrap_token(&config.token_path)?; + let state = AppState::new(config.clone(), token); + let router = build_router(state); + let addr = SocketAddr::new(bind_ip, config.port); + let listener = TcpListener::bind(addr) + .await + .with_context(|| format!("bind {addr}"))?; + eprintln!("kei-cortex listening on http://{addr}"); + axum::serve(listener, router) + .with_graceful_shutdown(shutdown_signal()) + .await + .context("axum serve")?; + Ok(()) +} + +fn build_config(args: ServeArgs) -> Result { + AppConfig::try_new( Some(args.port), Some(args.cors_origin), args.token_path, @@ -92,20 +112,31 @@ async fn serve(args: ServeArgs) -> Result<()> { Some(args.default_provider), args.token_tracker_db, ) - .context("assemble config")?; - warn_if_live2d_missing(&config.live2d_samples_dir); - let token = load_or_bootstrap_token(&config.token_path)?; - let state = AppState::new(config.clone(), token); - let router = build_router(state); - let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), config.port); - let listener = TcpListener::bind(addr) - .await - .with_context(|| format!("bind {addr}"))?; - eprintln!("kei-cortex listening on http://{addr}"); - axum::serve(listener, router) - .with_graceful_shutdown(shutdown_signal()) - .await - .context("axum serve")?; + .context("assemble config") +} + +/// SECURITY: refuse to start when bound to a non-loopback address with a +/// public cors_origin. The combination is an XSS-to-local-shell pivot: +/// any stored XSS on the public origin can talk to the daemon and reach +/// `/term`, gaining a PTY shell on the local machine. The bind path stays +/// hardcoded to LOCALHOST today, but this guard future-proofs the code if +/// a `--bind` flag is ever added (and protects against a misconfigured +/// reverse proxy that proxies the daemon publicly). +fn enforce_loopback_or_local_cors(bind: &IpAddr, cors_origin: &str) -> Result<()> { + if bind.is_loopback() { + return Ok(()); + } + let lower = cors_origin.to_ascii_lowercase(); + let public = !(lower.contains("localhost") + || lower.contains("127.0.0.1") + || lower.contains("[::1]")); + if public { + anyhow::bail!( + "refusing to bind to non-loopback address {bind} with public \ + cors_origin {cors_origin:?} (XSS-to-shell risk); set \ + --cors-origin to localhost-only or use --bind 127.0.0.1" + ); + } Ok(()) } diff --git a/hooks/agent-stub-scan.sh b/hooks/agent-stub-scan.sh index 46a2f27..b641ec2 100755 --- a/hooks/agent-stub-scan.sh +++ b/hooks/agent-stub-scan.sh @@ -1,7 +1,16 @@ #!/usr/bin/env bash # agent-stub-scan.sh — RULE 0.16 enforcement hook (PostToolUse:Agent). -# Scans agent transcript for STATUS-TRUTH MARKER and validates internal +# Scans agent response for STATUS-TRUTH MARKER and validates internal # consistency. Severity per RULE 0.10 ladder; bypass via STATUS_TRUTH_BYPASS=1. +# +# Wave 47 fix: reads stdin JSON (Claude Code's actual hook payload format) +# instead of the never-set CLAUDE_AGENT_TRANSCRIPT env var. The previous +# env-var path silently exited 0 on every invocation, leaving RULE 0.16 as +# dead-code in production. Mirrors the flatten pattern from +# `agent-event-done.sh` so both hooks share one shape. +# +# TODO(2026-05-05): flip WARN -> ENFORCE per RULE 0.16 §2 ladder. +# Until then, every inconsistency exits 0 with stderr only. set -u log_block() { @@ -13,25 +22,39 @@ if [ "${STATUS_TRUTH_BYPASS:-0}" = "1" ]; then exit 0 fi -TRANSCRIPT="${CLAUDE_AGENT_TRANSCRIPT:-}" -if [ -z "$TRANSCRIPT" ] || [ ! -r "$TRANSCRIPT" ]; then - # Hook runs in many contexts; absent transcript is not a failure. - exit 0 -fi +command -v jq >/dev/null 2>&1 || exit 0 -if ! grep -q "=== STATUS-TRUTH MARKER ===" "$TRANSCRIPT"; then - log_block "MISSING STATUS-TRUTH MARKER block in agent transcript. +INPUT=$(cat 2>/dev/null || true) +[ -n "$INPUT" ] || exit 0 + +TOOL=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null) +[ "$TOOL" = "Agent" ] || exit 0 + +# Flatten tool_response.content (string OR array OR object) to plain text. +# Same recursive shape as agent-event-done.sh so the two hooks parse the +# same payload identically. +RESPONSE=$(printf '%s' "$INPUT" | jq -r ' + (.tool_response // "") as $r | def f: + if type=="string" then . elif type=="array" then map(f)|join("\n") + elif type=="object" then (if has("text") then .text elif has("content") then .content|f else tostring end) + else "" end; $r|f' 2>/dev/null || true) + +[ -n "$RESPONSE" ] || exit 0 + +if ! printf '%s' "$RESPONSE" | grep -q '=== STATUS-TRUTH MARKER ==='; then + log_block "MISSING STATUS-TRUTH MARKER block in agent response. RULE 0.16 §1 requires every code-implementer agent to emit the marker. Add the block to the agent's final report; see ~/.claude/rules/shipped-vs-functional.md" # WARN tier (until 2026-05-05): exit 0 with stderr. After: exit 1. exit 0 fi -SHIPPED=$(grep -m1 '^shipped:' "$TRANSCRIPT" | sed 's/^shipped:[[:space:]]*//' | awk '{print $1}') -STUB_COUNT=$(grep -cE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|NOT-RUN|stub_|stub agent)\b' "$TRANSCRIPT" || true) +SHIPPED=$(printf '%s' "$RESPONSE" | grep -m1 '^shipped:' \ + | sed 's/^shipped:[[:space:]]*//' | awk '{print $1}') +STUB_COUNT=$(printf '%s' "$RESPONSE" | grep -cE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|NOT-RUN|stub_|stub agent)\b' || true) -if [ "$SHIPPED" = "functional" ] && [ "$STUB_COUNT" -gt 0 ]; then - LOCS=$(grep -nE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|stub_)\b' "$TRANSCRIPT" | head -20) +if [ "$SHIPPED" = "functional" ] && [ "${STUB_COUNT:-0}" -gt 0 ]; then + LOCS=$(printf '%s' "$RESPONSE" | grep -nE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|stub_)\b' | head -20) log_block "INCONSISTENCY: shipped=functional but $STUB_COUNT stub-markers found. First locations: $LOCS @@ -40,5 +63,5 @@ Either downgrade shipped to 'partial'/'scaffolding' or remove the stubs." exit 0 fi -log_block "OK: shipped=$SHIPPED, stubs=$STUB_COUNT (consistent)." +log_block "OK: shipped=$SHIPPED, stubs=${STUB_COUNT:-0} (consistent)." exit 0