fix(security): cortex /term env_clear + bind guard, agent-stub-scan stdin, magiclink revoke

Three independent security hardenings from cross-cutting audits.

1. cortex /term PTY env leak + bind guard (HIGH — Sonnet Cross-cutting + Opus)
   - kei-cortex/src/handlers/term_pty.rs: PTY spawn was inheriting daemon's
     full process env (KEI_AUTH_KEY, ANTHROPIC_API_KEY, FAL_KEY, etc.) into
     every authenticated /term shell. Combined with default cors_origin =
     https://keisei.app, one stored XSS on keisei.app + one bearer token =
     full local shell with all daemon secrets.
     Added apply_safe_env() helper: env_clear() + re-set only HOME, PATH,
     USER, LANG, TERM. Spawn helper invokes it before spawn_command.
   - kei-cortex/src/main.rs: extracted build_config() helper; added
     enforce_loopback_or_local_cors() guard called before serve.bind. Refuses
     to start if bind addr is non-loopback AND cors_origin is a public
     domain — prevents the XSS-to-shell scenario in production.

2. agent-stub-scan.sh stdin parsing (HIGH — multiple audits)
   - hooks/agent-stub-scan.sh: previously read $CLAUDE_AGENT_TRANSCRIPT env
     var which Claude Code does NOT set on PostToolUse:Agent. Hook silently
     exited 0 — RULE 0.16 enforcement was dead-code in production.
     Rewrote to read stdin JSON via jq, flatten .tool_response recursively
     (string|array|object via the same pattern as agent-event-done.sh),
     guard on .tool_name == "Agent" and command -v jq. Maintained WARN-tier
     exit-0 with TODO marker for ENFORCE flip on 2026-05-05 (per RULE 0.16
     §2 ladder).

3. magiclink revoke() silent no-op (HIGH — Opus Rust + Sonnet Cross-cutting)
   - kei-auth-magiclink/src/{error,provider}.rs: revoke() previously returned
     Ok(()) without doing anything. Operators expecting "revoke a session"
     semantics from the AuthProvider trait got false success. Stolen magic-
     link URLs remained valid until the 15-minute TTL.
     Added Error::Unsupported variant. revoke() now returns
     Err(Unsupported(...)) with explicit guidance: "rotate KEI_MAGICLINK_HMAC_
     KEY to invalidate all live tokens, or maintain a deny-list at the caller
     layer". Test provider_revoke_returns_unsupported_error confirms the
     error variant is wired.

Tests: cargo check + cargo test both PASS. 444 functional tests across
kei-cortex (428 lib) + kei-auth-magiclink (16 lib + smoke). Pre-existing
openai_loop_wiring.rs 502 failures in routes/openai/{chat,responses}.rs are
NOT introduced by these fixes — separate unrelated triage.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Parfii-bot 2026-05-03 15:38:23 +08:00
parent cf91956001
commit c0d900a943
6 changed files with 141 additions and 31 deletions

View file

@ -34,6 +34,12 @@ pub enum Error {
/// DNA build / parse error from `kei_runtime_core::dna`. /// DNA build / parse error from `kei_runtime_core::dna`.
#[error("magiclink dna error: {0}")] #[error("magiclink dna error: {0}")]
Dna(String), Dna(String),
/// Operation is not supported by this provider — surfaced when a caller
/// asks for behaviour that the underlying primitive cannot deliver
/// (e.g. server-side revocation of a stateless HMAC token).
#[error("magiclink unsupported: {0}")]
Unsupported(String),
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;

View file

@ -132,9 +132,20 @@ impl AuthProvider for MagicLinkProvider {
} }
async fn revoke(&self, _session: &Dna) -> kei_runtime_core::Result<()> { async fn revoke(&self, _session: &Dna) -> kei_runtime_core::Result<()> {
// v0.1: stateless tokens have no server-side revocation. // SECURITY: do NOT silently return Ok here. Stateless HMAC tokens
// Callers maintain a deny-list externally if needed. // cannot be server-side revoked, and a silent Ok() would lie to
Ok(()) // every caller that thought it had killed a session. Surface the
// truth so callers can either rotate the HMAC key (kills ALL live
// tokens) or maintain an external deny-list keyed on the token's
// first two parts (email + expiry).
Err(Error::Unsupported(
"magiclink: stateless tokens cannot be server-side revoked. \
Rotate KEI_MAGICLINK_HMAC_KEY to invalidate all live tokens, \
OR maintain a deny-list at the caller layer keyed on the \
token's first two parts."
.into(),
)
.into())
} }
fn is_passwordless(&self) -> bool { fn is_passwordless(&self) -> bool {

View file

@ -152,3 +152,23 @@ fn provider_build_magic_url_shape() {
// Trailing slash on base_url MUST be stripped. // Trailing slash on base_url MUST be stripped.
assert!(!url.contains("//auth/magic")); assert!(!url.contains("//auth/magic"));
} }
#[test]
fn provider_revoke_returns_unsupported_error() {
// RULE 0.16 / security review: stateless tokens cannot be server-side
// revoked. Returning Ok() silently would lie to every caller. The
// provider MUST surface an Unsupported error so the caller can choose
// to rotate the HMAC key or maintain a deny-list.
let parent = parent_dna();
let provider = MagicLinkProvider::new(parent.clone(), KEY.to_vec(), 900)
.expect("provider new ok");
let rt = tokio::runtime::Runtime::new().expect("runtime");
let err = rt
.block_on(provider.revoke(&parent))
.expect_err("revoke must error, not silently succeed");
let msg = err.to_string().to_lowercase();
assert!(
msg.contains("unsupported") || msg.contains("cannot be server-side revoked"),
"expected unsupported error mentioning revocation, got: {msg}"
);
}

View file

@ -76,6 +76,7 @@ pub fn spawn_pty(
let shell = std::env::var("SHELL").unwrap_or_else(|_| "/bin/sh".into()); let shell = std::env::var("SHELL").unwrap_or_else(|_| "/bin/sh".into());
let mut cmd = CommandBuilder::new(&shell); let mut cmd = CommandBuilder::new(&shell);
cmd.cwd(cwd); cmd.cwd(cwd);
apply_safe_env(&mut cmd);
let child = pair.slave.spawn_command(cmd).map_err(|e| e.to_string())?; let child = pair.slave.spawn_command(cmd).map_err(|e| e.to_string())?;
let reader = pair.master.try_clone_reader().map_err(|e| e.to_string())?; let reader = pair.master.try_clone_reader().map_err(|e| e.to_string())?;
let writer = pair.master.take_writer().map_err(|e| e.to_string())?; let writer = pair.master.take_writer().map_err(|e| e.to_string())?;
@ -89,6 +90,24 @@ pub fn spawn_pty(
}) })
} }
/// SECURITY: drop ALL inherited env so daemon secrets (KEI_AUTH_KEY,
/// ANTHROPIC_API_KEY, MAGICLINK_HMAC_KEY, etc.) cannot leak into the PTY
/// shell. A stored XSS on the cors_origin domain would otherwise pivot
/// directly to a local shell holding every daemon secret. After clearing,
/// re-set ONLY the minimal env a shell legitimately needs.
fn apply_safe_env(cmd: &mut CommandBuilder) {
cmd.env_clear();
let home = std::env::var("HOME").unwrap_or_else(|_| "/".into());
let path = std::env::var("PATH")
.unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into());
let user = std::env::var("USER").unwrap_or_else(|_| "user".into());
cmd.env("HOME", &home);
cmd.env("PATH", &path);
cmd.env("USER", &user);
cmd.env("LANG", "en_US.UTF-8");
cmd.env("TERM", "xterm-256color");
}
/// Forward PTY stdout bytes to the WS sender via a bounded channel. Reads /// Forward PTY stdout bytes to the WS sender via a bounded channel. Reads
/// happen on a `spawn_blocking` task because the underlying `Read` is sync; /// happen on a `spawn_blocking` task because the underlying `Read` is sync;
/// the loop checks `cancel` every iteration so disconnect / shutdown /// the loop checks `cancel` every iteration so disconnect / shutdown

View file

@ -79,7 +79,27 @@ async fn main() -> Result<()> {
} }
async fn serve(args: ServeArgs) -> Result<()> { async fn serve(args: ServeArgs) -> Result<()> {
let config = AppConfig::try_new( let config = build_config(args)?;
warn_if_live2d_missing(&config.live2d_samples_dir);
let bind_ip = IpAddr::V4(Ipv4Addr::LOCALHOST);
enforce_loopback_or_local_cors(&bind_ip, &config.cors_origin)?;
let token = load_or_bootstrap_token(&config.token_path)?;
let state = AppState::new(config.clone(), token);
let router = build_router(state);
let addr = SocketAddr::new(bind_ip, config.port);
let listener = TcpListener::bind(addr)
.await
.with_context(|| format!("bind {addr}"))?;
eprintln!("kei-cortex listening on http://{addr}");
axum::serve(listener, router)
.with_graceful_shutdown(shutdown_signal())
.await
.context("axum serve")?;
Ok(())
}
fn build_config(args: ServeArgs) -> Result<AppConfig> {
AppConfig::try_new(
Some(args.port), Some(args.port),
Some(args.cors_origin), Some(args.cors_origin),
args.token_path, args.token_path,
@ -92,20 +112,31 @@ async fn serve(args: ServeArgs) -> Result<()> {
Some(args.default_provider), Some(args.default_provider),
args.token_tracker_db, args.token_tracker_db,
) )
.context("assemble config")?; .context("assemble config")
warn_if_live2d_missing(&config.live2d_samples_dir); }
let token = load_or_bootstrap_token(&config.token_path)?;
let state = AppState::new(config.clone(), token); /// SECURITY: refuse to start when bound to a non-loopback address with a
let router = build_router(state); /// public cors_origin. The combination is an XSS-to-local-shell pivot:
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), config.port); /// any stored XSS on the public origin can talk to the daemon and reach
let listener = TcpListener::bind(addr) /// `/term`, gaining a PTY shell on the local machine. The bind path stays
.await /// hardcoded to LOCALHOST today, but this guard future-proofs the code if
.with_context(|| format!("bind {addr}"))?; /// a `--bind` flag is ever added (and protects against a misconfigured
eprintln!("kei-cortex listening on http://{addr}"); /// reverse proxy that proxies the daemon publicly).
axum::serve(listener, router) fn enforce_loopback_or_local_cors(bind: &IpAddr, cors_origin: &str) -> Result<()> {
.with_graceful_shutdown(shutdown_signal()) if bind.is_loopback() {
.await return Ok(());
.context("axum serve")?; }
let lower = cors_origin.to_ascii_lowercase();
let public = !(lower.contains("localhost")
|| lower.contains("127.0.0.1")
|| lower.contains("[::1]"));
if public {
anyhow::bail!(
"refusing to bind to non-loopback address {bind} with public \
cors_origin {cors_origin:?} (XSS-to-shell risk); set \
--cors-origin to localhost-only or use --bind 127.0.0.1"
);
}
Ok(()) Ok(())
} }

View file

@ -1,7 +1,16 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# agent-stub-scan.sh — RULE 0.16 enforcement hook (PostToolUse:Agent). # agent-stub-scan.sh — RULE 0.16 enforcement hook (PostToolUse:Agent).
# Scans agent transcript for STATUS-TRUTH MARKER and validates internal # Scans agent response for STATUS-TRUTH MARKER and validates internal
# consistency. Severity per RULE 0.10 ladder; bypass via STATUS_TRUTH_BYPASS=1. # consistency. Severity per RULE 0.10 ladder; bypass via STATUS_TRUTH_BYPASS=1.
#
# Wave 47 fix: reads stdin JSON (Claude Code's actual hook payload format)
# instead of the never-set CLAUDE_AGENT_TRANSCRIPT env var. The previous
# env-var path silently exited 0 on every invocation, leaving RULE 0.16 as
# dead-code in production. Mirrors the flatten pattern from
# `agent-event-done.sh` so both hooks share one shape.
#
# TODO(2026-05-05): flip WARN -> ENFORCE per RULE 0.16 §2 ladder.
# Until then, every inconsistency exits 0 with stderr only.
set -u set -u
log_block() { log_block() {
@ -13,25 +22,39 @@ if [ "${STATUS_TRUTH_BYPASS:-0}" = "1" ]; then
exit 0 exit 0
fi fi
TRANSCRIPT="${CLAUDE_AGENT_TRANSCRIPT:-}" command -v jq >/dev/null 2>&1 || exit 0
if [ -z "$TRANSCRIPT" ] || [ ! -r "$TRANSCRIPT" ]; then
# Hook runs in many contexts; absent transcript is not a failure.
exit 0
fi
if ! grep -q "=== STATUS-TRUTH MARKER ===" "$TRANSCRIPT"; then INPUT=$(cat 2>/dev/null || true)
log_block "MISSING STATUS-TRUTH MARKER block in agent transcript. [ -n "$INPUT" ] || exit 0
TOOL=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null)
[ "$TOOL" = "Agent" ] || exit 0
# Flatten tool_response.content (string OR array OR object) to plain text.
# Same recursive shape as agent-event-done.sh so the two hooks parse the
# same payload identically.
RESPONSE=$(printf '%s' "$INPUT" | jq -r '
(.tool_response // "") as $r | def f:
if type=="string" then . elif type=="array" then map(f)|join("\n")
elif type=="object" then (if has("text") then .text elif has("content") then .content|f else tostring end)
else "" end; $r|f' 2>/dev/null || true)
[ -n "$RESPONSE" ] || exit 0
if ! printf '%s' "$RESPONSE" | grep -q '=== STATUS-TRUTH MARKER ==='; then
log_block "MISSING STATUS-TRUTH MARKER block in agent response.
RULE 0.16 §1 requires every code-implementer agent to emit the marker. RULE 0.16 §1 requires every code-implementer agent to emit the marker.
Add the block to the agent's final report; see ~/.claude/rules/shipped-vs-functional.md" Add the block to the agent's final report; see ~/.claude/rules/shipped-vs-functional.md"
# WARN tier (until 2026-05-05): exit 0 with stderr. After: exit 1. # WARN tier (until 2026-05-05): exit 0 with stderr. After: exit 1.
exit 0 exit 0
fi fi
SHIPPED=$(grep -m1 '^shipped:' "$TRANSCRIPT" | sed 's/^shipped:[[:space:]]*//' | awk '{print $1}') SHIPPED=$(printf '%s' "$RESPONSE" | grep -m1 '^shipped:' \
STUB_COUNT=$(grep -cE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|NOT-RUN|stub_|stub agent)\b' "$TRANSCRIPT" || true) | sed 's/^shipped:[[:space:]]*//' | awk '{print $1}')
STUB_COUNT=$(printf '%s' "$RESPONSE" | grep -cE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|NOT-RUN|stub_|stub agent)\b' || true)
if [ "$SHIPPED" = "functional" ] && [ "$STUB_COUNT" -gt 0 ]; then if [ "$SHIPPED" = "functional" ] && [ "${STUB_COUNT:-0}" -gt 0 ]; then
LOCS=$(grep -nE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|stub_)\b' "$TRANSCRIPT" | head -20) LOCS=$(printf '%s' "$RESPONSE" | grep -nE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|stub_)\b' | head -20)
log_block "INCONSISTENCY: shipped=functional but $STUB_COUNT stub-markers found. log_block "INCONSISTENCY: shipped=functional but $STUB_COUNT stub-markers found.
First locations: First locations:
$LOCS $LOCS
@ -40,5 +63,5 @@ Either downgrade shipped to 'partial'/'scaffolding' or remove the stubs."
exit 0 exit 0
fi fi
log_block "OK: shipped=$SHIPPED, stubs=$STUB_COUNT (consistent)." log_block "OK: shipped=$SHIPPED, stubs=${STUB_COUNT:-0} (consistent)."
exit 0 exit 0