fix(security): cortex /term env_clear + bind guard, agent-stub-scan stdin, magiclink revoke
Three independent security hardenings from cross-cutting audits.
1. cortex /term PTY env leak + bind guard (HIGH — Sonnet Cross-cutting + Opus)
- kei-cortex/src/handlers/term_pty.rs: PTY spawn was inheriting daemon's
full process env (KEI_AUTH_KEY, ANTHROPIC_API_KEY, FAL_KEY, etc.) into
every authenticated /term shell. Combined with default cors_origin =
https://keisei.app, one stored XSS on keisei.app + one bearer token =
full local shell with all daemon secrets.
Added apply_safe_env() helper: env_clear() + re-set only HOME, PATH,
USER, LANG, TERM. Spawn helper invokes it before spawn_command.
- kei-cortex/src/main.rs: extracted build_config() helper; added
enforce_loopback_or_local_cors() guard called before serve.bind. Refuses
to start if bind addr is non-loopback AND cors_origin is a public
domain — prevents the XSS-to-shell scenario in production.
2. agent-stub-scan.sh stdin parsing (HIGH — multiple audits)
- hooks/agent-stub-scan.sh: previously read $CLAUDE_AGENT_TRANSCRIPT env
var which Claude Code does NOT set on PostToolUse:Agent. Hook silently
exited 0 — RULE 0.16 enforcement was dead-code in production.
Rewrote to read stdin JSON via jq, flatten .tool_response recursively
(string|array|object via the same pattern as agent-event-done.sh),
guard on .tool_name == "Agent" and command -v jq. Maintained WARN-tier
exit-0 with TODO marker for ENFORCE flip on 2026-05-05 (per RULE 0.16
§2 ladder).
3. magiclink revoke() silent no-op (HIGH — Opus Rust + Sonnet Cross-cutting)
- kei-auth-magiclink/src/{error,provider}.rs: revoke() previously returned
Ok(()) without doing anything. Operators expecting "revoke a session"
semantics from the AuthProvider trait got false success. Stolen magic-
link URLs remained valid until the 15-minute TTL.
Added Error::Unsupported variant. revoke() now returns
Err(Unsupported(...)) with explicit guidance: "rotate KEI_MAGICLINK_HMAC_
KEY to invalidate all live tokens, or maintain a deny-list at the caller
layer". Test provider_revoke_returns_unsupported_error confirms the
error variant is wired.
Tests: cargo check + cargo test both PASS. 444 functional tests across
kei-cortex (428 lib) + kei-auth-magiclink (16 lib + smoke). Pre-existing
openai_loop_wiring.rs 502 failures in routes/openai/{chat,responses}.rs are
NOT introduced by these fixes — separate unrelated triage.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cf91956001
commit
c0d900a943
6 changed files with 141 additions and 31 deletions
|
|
@ -34,6 +34,12 @@ pub enum Error {
|
|||
/// DNA build / parse error from `kei_runtime_core::dna`.
|
||||
#[error("magiclink dna error: {0}")]
|
||||
Dna(String),
|
||||
|
||||
/// Operation is not supported by this provider — surfaced when a caller
|
||||
/// asks for behaviour that the underlying primitive cannot deliver
|
||||
/// (e.g. server-side revocation of a stateless HMAC token).
|
||||
#[error("magiclink unsupported: {0}")]
|
||||
Unsupported(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
|
|
|||
|
|
@ -132,9 +132,20 @@ impl AuthProvider for MagicLinkProvider {
|
|||
}
|
||||
|
||||
async fn revoke(&self, _session: &Dna) -> kei_runtime_core::Result<()> {
|
||||
// v0.1: stateless tokens have no server-side revocation.
|
||||
// Callers maintain a deny-list externally if needed.
|
||||
Ok(())
|
||||
// SECURITY: do NOT silently return Ok here. Stateless HMAC tokens
|
||||
// cannot be server-side revoked, and a silent Ok() would lie to
|
||||
// every caller that thought it had killed a session. Surface the
|
||||
// truth so callers can either rotate the HMAC key (kills ALL live
|
||||
// tokens) or maintain an external deny-list keyed on the token's
|
||||
// first two parts (email + expiry).
|
||||
Err(Error::Unsupported(
|
||||
"magiclink: stateless tokens cannot be server-side revoked. \
|
||||
Rotate KEI_MAGICLINK_HMAC_KEY to invalidate all live tokens, \
|
||||
OR maintain a deny-list at the caller layer keyed on the \
|
||||
token's first two parts."
|
||||
.into(),
|
||||
)
|
||||
.into())
|
||||
}
|
||||
|
||||
fn is_passwordless(&self) -> bool {
|
||||
|
|
|
|||
|
|
@ -152,3 +152,23 @@ fn provider_build_magic_url_shape() {
|
|||
// Trailing slash on base_url MUST be stripped.
|
||||
assert!(!url.contains("//auth/magic"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provider_revoke_returns_unsupported_error() {
|
||||
// RULE 0.16 / security review: stateless tokens cannot be server-side
|
||||
// revoked. Returning Ok() silently would lie to every caller. The
|
||||
// provider MUST surface an Unsupported error so the caller can choose
|
||||
// to rotate the HMAC key or maintain a deny-list.
|
||||
let parent = parent_dna();
|
||||
let provider = MagicLinkProvider::new(parent.clone(), KEY.to_vec(), 900)
|
||||
.expect("provider new ok");
|
||||
let rt = tokio::runtime::Runtime::new().expect("runtime");
|
||||
let err = rt
|
||||
.block_on(provider.revoke(&parent))
|
||||
.expect_err("revoke must error, not silently succeed");
|
||||
let msg = err.to_string().to_lowercase();
|
||||
assert!(
|
||||
msg.contains("unsupported") || msg.contains("cannot be server-side revoked"),
|
||||
"expected unsupported error mentioning revocation, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ pub fn spawn_pty(
|
|||
let shell = std::env::var("SHELL").unwrap_or_else(|_| "/bin/sh".into());
|
||||
let mut cmd = CommandBuilder::new(&shell);
|
||||
cmd.cwd(cwd);
|
||||
apply_safe_env(&mut cmd);
|
||||
let child = pair.slave.spawn_command(cmd).map_err(|e| e.to_string())?;
|
||||
let reader = pair.master.try_clone_reader().map_err(|e| e.to_string())?;
|
||||
let writer = pair.master.take_writer().map_err(|e| e.to_string())?;
|
||||
|
|
@ -89,6 +90,24 @@ pub fn spawn_pty(
|
|||
})
|
||||
}
|
||||
|
||||
/// SECURITY: drop ALL inherited env so daemon secrets (KEI_AUTH_KEY,
|
||||
/// ANTHROPIC_API_KEY, MAGICLINK_HMAC_KEY, etc.) cannot leak into the PTY
|
||||
/// shell. A stored XSS on the cors_origin domain would otherwise pivot
|
||||
/// directly to a local shell holding every daemon secret. After clearing,
|
||||
/// re-set ONLY the minimal env a shell legitimately needs.
|
||||
fn apply_safe_env(cmd: &mut CommandBuilder) {
|
||||
cmd.env_clear();
|
||||
let home = std::env::var("HOME").unwrap_or_else(|_| "/".into());
|
||||
let path = std::env::var("PATH")
|
||||
.unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into());
|
||||
let user = std::env::var("USER").unwrap_or_else(|_| "user".into());
|
||||
cmd.env("HOME", &home);
|
||||
cmd.env("PATH", &path);
|
||||
cmd.env("USER", &user);
|
||||
cmd.env("LANG", "en_US.UTF-8");
|
||||
cmd.env("TERM", "xterm-256color");
|
||||
}
|
||||
|
||||
/// Forward PTY stdout bytes to the WS sender via a bounded channel. Reads
|
||||
/// happen on a `spawn_blocking` task because the underlying `Read` is sync;
|
||||
/// the loop checks `cancel` every iteration so disconnect / shutdown
|
||||
|
|
|
|||
|
|
@ -79,7 +79,27 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
|
||||
async fn serve(args: ServeArgs) -> Result<()> {
|
||||
let config = AppConfig::try_new(
|
||||
let config = build_config(args)?;
|
||||
warn_if_live2d_missing(&config.live2d_samples_dir);
|
||||
let bind_ip = IpAddr::V4(Ipv4Addr::LOCALHOST);
|
||||
enforce_loopback_or_local_cors(&bind_ip, &config.cors_origin)?;
|
||||
let token = load_or_bootstrap_token(&config.token_path)?;
|
||||
let state = AppState::new(config.clone(), token);
|
||||
let router = build_router(state);
|
||||
let addr = SocketAddr::new(bind_ip, config.port);
|
||||
let listener = TcpListener::bind(addr)
|
||||
.await
|
||||
.with_context(|| format!("bind {addr}"))?;
|
||||
eprintln!("kei-cortex listening on http://{addr}");
|
||||
axum::serve(listener, router)
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await
|
||||
.context("axum serve")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_config(args: ServeArgs) -> Result<AppConfig> {
|
||||
AppConfig::try_new(
|
||||
Some(args.port),
|
||||
Some(args.cors_origin),
|
||||
args.token_path,
|
||||
|
|
@ -92,20 +112,31 @@ async fn serve(args: ServeArgs) -> Result<()> {
|
|||
Some(args.default_provider),
|
||||
args.token_tracker_db,
|
||||
)
|
||||
.context("assemble config")?;
|
||||
warn_if_live2d_missing(&config.live2d_samples_dir);
|
||||
let token = load_or_bootstrap_token(&config.token_path)?;
|
||||
let state = AppState::new(config.clone(), token);
|
||||
let router = build_router(state);
|
||||
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), config.port);
|
||||
let listener = TcpListener::bind(addr)
|
||||
.await
|
||||
.with_context(|| format!("bind {addr}"))?;
|
||||
eprintln!("kei-cortex listening on http://{addr}");
|
||||
axum::serve(listener, router)
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await
|
||||
.context("axum serve")?;
|
||||
.context("assemble config")
|
||||
}
|
||||
|
||||
/// SECURITY: refuse to start when bound to a non-loopback address with a
|
||||
/// public cors_origin. The combination is an XSS-to-local-shell pivot:
|
||||
/// any stored XSS on the public origin can talk to the daemon and reach
|
||||
/// `/term`, gaining a PTY shell on the local machine. The bind path stays
|
||||
/// hardcoded to LOCALHOST today, but this guard future-proofs the code if
|
||||
/// a `--bind` flag is ever added (and protects against a misconfigured
|
||||
/// reverse proxy that proxies the daemon publicly).
|
||||
fn enforce_loopback_or_local_cors(bind: &IpAddr, cors_origin: &str) -> Result<()> {
|
||||
if bind.is_loopback() {
|
||||
return Ok(());
|
||||
}
|
||||
let lower = cors_origin.to_ascii_lowercase();
|
||||
let public = !(lower.contains("localhost")
|
||||
|| lower.contains("127.0.0.1")
|
||||
|| lower.contains("[::1]"));
|
||||
if public {
|
||||
anyhow::bail!(
|
||||
"refusing to bind to non-loopback address {bind} with public \
|
||||
cors_origin {cors_origin:?} (XSS-to-shell risk); set \
|
||||
--cors-origin to localhost-only or use --bind 127.0.0.1"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,16 @@
|
|||
#!/usr/bin/env bash
|
||||
# agent-stub-scan.sh — RULE 0.16 enforcement hook (PostToolUse:Agent).
|
||||
# Scans agent transcript for STATUS-TRUTH MARKER and validates internal
|
||||
# Scans agent response for STATUS-TRUTH MARKER and validates internal
|
||||
# consistency. Severity per RULE 0.10 ladder; bypass via STATUS_TRUTH_BYPASS=1.
|
||||
#
|
||||
# Wave 47 fix: reads stdin JSON (Claude Code's actual hook payload format)
|
||||
# instead of the never-set CLAUDE_AGENT_TRANSCRIPT env var. The previous
|
||||
# env-var path silently exited 0 on every invocation, leaving RULE 0.16 as
|
||||
# dead-code in production. Mirrors the flatten pattern from
|
||||
# `agent-event-done.sh` so both hooks share one shape.
|
||||
#
|
||||
# TODO(2026-05-05): flip WARN -> ENFORCE per RULE 0.16 §2 ladder.
|
||||
# Until then, every inconsistency exits 0 with stderr only.
|
||||
set -u
|
||||
|
||||
log_block() {
|
||||
|
|
@ -13,25 +22,39 @@ if [ "${STATUS_TRUTH_BYPASS:-0}" = "1" ]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
TRANSCRIPT="${CLAUDE_AGENT_TRANSCRIPT:-}"
|
||||
if [ -z "$TRANSCRIPT" ] || [ ! -r "$TRANSCRIPT" ]; then
|
||||
# Hook runs in many contexts; absent transcript is not a failure.
|
||||
exit 0
|
||||
fi
|
||||
command -v jq >/dev/null 2>&1 || exit 0
|
||||
|
||||
if ! grep -q "=== STATUS-TRUTH MARKER ===" "$TRANSCRIPT"; then
|
||||
log_block "MISSING STATUS-TRUTH MARKER block in agent transcript.
|
||||
INPUT=$(cat 2>/dev/null || true)
|
||||
[ -n "$INPUT" ] || exit 0
|
||||
|
||||
TOOL=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null)
|
||||
[ "$TOOL" = "Agent" ] || exit 0
|
||||
|
||||
# Flatten tool_response.content (string OR array OR object) to plain text.
|
||||
# Same recursive shape as agent-event-done.sh so the two hooks parse the
|
||||
# same payload identically.
|
||||
RESPONSE=$(printf '%s' "$INPUT" | jq -r '
|
||||
(.tool_response // "") as $r | def f:
|
||||
if type=="string" then . elif type=="array" then map(f)|join("\n")
|
||||
elif type=="object" then (if has("text") then .text elif has("content") then .content|f else tostring end)
|
||||
else "" end; $r|f' 2>/dev/null || true)
|
||||
|
||||
[ -n "$RESPONSE" ] || exit 0
|
||||
|
||||
if ! printf '%s' "$RESPONSE" | grep -q '=== STATUS-TRUTH MARKER ==='; then
|
||||
log_block "MISSING STATUS-TRUTH MARKER block in agent response.
|
||||
RULE 0.16 §1 requires every code-implementer agent to emit the marker.
|
||||
Add the block to the agent's final report; see ~/.claude/rules/shipped-vs-functional.md"
|
||||
# WARN tier (until 2026-05-05): exit 0 with stderr. After: exit 1.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
SHIPPED=$(grep -m1 '^shipped:' "$TRANSCRIPT" | sed 's/^shipped:[[:space:]]*//' | awk '{print $1}')
|
||||
STUB_COUNT=$(grep -cE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|NOT-RUN|stub_|stub agent)\b' "$TRANSCRIPT" || true)
|
||||
SHIPPED=$(printf '%s' "$RESPONSE" | grep -m1 '^shipped:' \
|
||||
| sed 's/^shipped:[[:space:]]*//' | awk '{print $1}')
|
||||
STUB_COUNT=$(printf '%s' "$RESPONSE" | grep -cE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|NOT-RUN|stub_|stub agent)\b' || true)
|
||||
|
||||
if [ "$SHIPPED" = "functional" ] && [ "$STUB_COUNT" -gt 0 ]; then
|
||||
LOCS=$(grep -nE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|stub_)\b' "$TRANSCRIPT" | head -20)
|
||||
if [ "$SHIPPED" = "functional" ] && [ "${STUB_COUNT:-0}" -gt 0 ]; then
|
||||
LOCS=$(printf '%s' "$RESPONSE" | grep -nE '\b(todo!\(\)|unimplemented!\(\)|placeholder|echo-stub|stub_)\b' | head -20)
|
||||
log_block "INCONSISTENCY: shipped=functional but $STUB_COUNT stub-markers found.
|
||||
First locations:
|
||||
$LOCS
|
||||
|
|
@ -40,5 +63,5 @@ Either downgrade shipped to 'partial'/'scaffolding' or remove the stubs."
|
|||
exit 0
|
||||
fi
|
||||
|
||||
log_block "OK: shipped=$SHIPPED, stubs=$STUB_COUNT (consistent)."
|
||||
log_block "OK: shipped=$SHIPPED, stubs=${STUB_COUNT:-0} (consistent)."
|
||||
exit 0
|
||||
|
|
|
|||
Loading…
Reference in a new issue