From 033b9efbad999ac04044ecb440d98f9b9d04e3a7 Mon Sep 17 00:00:00 2001 From: Parfii-bot Date: Sat, 2 May 2026 01:09:15 +0800 Subject: [PATCH] fix(outcome-hook): production payload uses object.content[*].text shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook never fired in production despite passing unit tests. Diagnosed via debug-log + payload dump: real Claude Code PostToolUse:Agent sends `tool_response` as an OBJECT (not string, not array), with the agent's reply at `tool_response.content[0].text` — keys: agentId / agentType / content / prompt / status / toolStats / totalDurationMs / totalTokens / totalToolUseCount / usage. Original jq filter handled string + object (`$r.content // $r.text`) but `$r.content` returns the array verbatim; `jq -r` then dumps the JSON literal which has `\n` as escape sequences, defeating the `grep -m1 '^shipped:'` line-anchor. Fix: recursive `flatten` jq function: string → as-is array of any → recurse, join "\n" object with .text → return .text object with .content → recurse into content anything else → "" Verified end-to-end: latest 4 code-implementer spawns now write outcome=functional to ledger correctly. Beta posterior in kei-model-router begins receiving signal. Production cleanup: - Removed verbose debug-log + payload-dump diagnostic. Toggle via `AGENT_OUTCOME_DEBUG=1` env if hook stops firing in some future Claude Code version. - Hook source committed to `hooks/agent-outcome-backfill.sh` so `install.sh` deploys it on fresh installs (was only in user-home previously — gap from `feat/substrate-path-atoms` agent run). === STATUS-TRUTH MARKER === shipped: functional stubs: 0 cargo-check: NOT-RUN behaviour-verified: yes follow-up-required: - none Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/DNA-INDEX.md | 17 ++--- hooks/agent-outcome-backfill.sh | 116 ++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 8 deletions(-) create mode 100755 hooks/agent-outcome-backfill.sh diff --git a/docs/DNA-INDEX.md b/docs/DNA-INDEX.md index 05bedd4..52af073 100644 --- a/docs/DNA-INDEX.md +++ b/docs/DNA-INDEX.md @@ -1,12 +1,12 @@ # KeiSeiKit DNA Encyclopedia -> Auto-generated from kei-registry. Last regenerated: 2026-05-01T16:06:16Z. -> Total blocks: 512. Per-type breakdown: +> Auto-generated from kei-registry. Last regenerated: 2026-05-01T17:09:15Z. +> Total blocks: 513. Per-type breakdown: | Type | Count | |---|---:| | atom | 121 | -| hook | 40 | +| hook | 41 | | primitive | 109 | | rule | 174 | | skill | 68 | @@ -81,8 +81,8 @@ Sorted alphabetically by name. | kei-memory-sled | primitive::md,networ… | _primitives/_rust/kei-memory-sled/Cargo.toml | 6fdae904 | | kei-memory-sqlite | primitive::md,networ… | _primitives/_rust/kei-memory-sqlite/Cargo.toml | 93761682 | | kei-migrate | primitive::cli,hash,… | _primitives/_rust/kei-migrate/Cargo.toml | fd996e76 | +| kei-model | primitive::cli,md,re… | _primitives/_rust/kei-model/Cargo.toml | 1a4038fd | | kei-model-router | primitive::md,sqlite… | _primitives/_rust/kei-model-router/Cargo.toml | b67e44b9 | -| kei-model::kei-model | primitive::_::6a479a… | _primitives/_rust/kei-model/Cargo.toml | 3f74b167 | | kei-net-ipsec | primitive::md,networ… | _primitives/_rust/kei-net-ipsec/Cargo.toml | edb79478 | | kei-net-openvpn | primitive::md,networ… | _primitives/_rust/kei-net-openvpn/Cargo.toml | a209e645 | | kei-net-wireguard | primitive::md,networ… | _primitives/_rust/kei-net-wireguard/Cargo.toml | 05a75c60 | @@ -98,10 +98,10 @@ Sorted alphabetically by name. | kei-provision | primitive::cli,md::1… | _primitives/_rust/kei-provision/Cargo.toml | cfa53bb3 | | kei-prune | primitive::cli,md,sq… | _primitives/_rust/kei-prune/Cargo.toml | 4454513b | | kei-refactor-engine | primitive::cli,md::c… | _primitives/_rust/kei-refactor-engine/Cargo.toml | 92e83ce0 | +| kei-registry | primitive::cli,fs,ha… | _primitives/_rust/kei-registry/Cargo.toml | 5a2e79d8 | | kei-registry::foo | primitive::_::12366c… | _primitives/_rust/kei-registry/tests/fixtures/fake-kit/_primitives/_rust/foo/Cargo.toml | 403bc4b0 | | kei-registry::foo | primitive::_::3937fa… | _primitives/_rust/kei-registry/tests/fixtures/fake-kit/_primitives/_rust/foo/Cargo.toml | 403bc4b0 | | kei-registry::kei-registry | primitive::_::30e60a… | _primitives/_rust/kei-registry/Cargo.toml | d5146bbd | -| kei-registry::kei-registry | primitive::_::4744f0… | _primitives/_rust/kei-registry/Cargo.toml | 4e595599 | | kei-registry::mini-prim | primitive::_::57f8eb… | _primitives/_rust/kei-registry/tests/fixtures/mini-kit/_primitives/_rust/mini-prim/Cargo.toml | 9fa2b304 | | kei-registry::mini-prim | primitive::_::bb2052… | _primitives/_rust/kei-registry/tests/fixtures/mini-kit/_primitives/_rust/mini-prim/Cargo.toml | 9fa2b304 | | kei-replay | primitive::cli,hash,… | _primitives/_rust/kei-replay/Cargo.toml | 74f2fcc4 | @@ -838,7 +838,7 @@ Sorted alphabetically by name. | sleep-layer::the-rule | rule::_::576bbb7f::d… | d0e03a0d | -## Hook (40) +## Hook (41) Sorted alphabetically by name. @@ -850,6 +850,7 @@ Sorted alphabetically by name. | agent-fork-done | shell | hook::shell::eeaa011… | hooks/agent-fork-done.sh | | agent-fork-logger | shell | hook::shell::1b43957… | hooks/agent-fork-logger.sh | | agent-heartbeat-tick | shell | hook::shell::29d6dbe… | hooks/agent-heartbeat-tick.sh | +| agent-outcome-backfill | shell | hook::shell::c22f3e8… | hooks/agent-outcome-backfill.sh | | agent-stub-scan | shell | hook::shell::92df903… | hooks/agent-stub-scan.sh | | alignment-check | shell | hook::shell::01f8f21… | hooks/alignment-check.sh | | assemble-agents | shell | hook::shell::9cd98a7… | hooks/assemble-agents.sh | @@ -1094,7 +1095,7 @@ Sorted alphabetically by name. - `kei-memory-sqlite` — 2 versions: f64bbb1d → 93761682 - `kei-memory::kei-memory` — 33 versions: adcd4146 → 4645a074 → a8883527 → 898880d6 → 63248191 → 13461cd3 → 43470a70 → a2665f92 → fc8f7afb → 347c6675 → 2405f427 → a64eaf5c → 6fd5449b → d8509f53 → bba89ea5 → 4c12d77d → 5940f848 → e3b6aa5d → 7de01ed1 → fd2b0d2d → 2054601f → 04b9f270 → 0e6a981d → 802f8487 → 0da8e0c7 → c136273f → 1035f140 → a02e197e → 739a6c0f → 5a1ebf4f → 0bf3b6f7 → 2f7698b2 → 0dd1dfc8 - `kei-migrate` — 2 versions: db2e7bd0 → fd996e76 -- `kei-model` — 2 versions: 0a6ce8bc → 1a4038fd +- `kei-model` — 3 versions: 0a6ce8bc → 1a4038fd → 1a4038fd - `kei-model-router` — 2 versions: 1280a1dd → b67e44b9 - `kei-model::kei-model` — 2 versions: 0948fb4f → 3f74b167 - `kei-net-ipsec` — 2 versions: 600684a8 → edb79478 @@ -1112,7 +1113,7 @@ Sorted alphabetically by name. - `kei-provision` — 2 versions: 1d613e5d → cfa53bb3 - `kei-prune` — 2 versions: 7c0a0c11 → 4454513b - `kei-refactor-engine` — 2 versions: 90048888 → 92e83ce0 -- `kei-registry` — 3 versions: 7d9570ad → 5a2e79d8 → 5a2e79d8 +- `kei-registry` — 4 versions: 7d9570ad → 5a2e79d8 → 5a2e79d8 → 5a2e79d8 - `kei-registry::foo` — 2 versions: 403bc4b0 → 403bc4b0 - `kei-registry::kei-registry` — 36 versions: a9d4104f → 4110ba86 → 6e2dc3fd → 1f486539 → f10a08ba → 48886c98 → 6aeaf85c → ca0c09e0 → 130372c0 → f69680b3 → 50364568 → 30e6dee3 → 3bb6d4f8 → 26a25696 → 0951d355 → 3261f321 → 5a190e74 → 80762a78 → d2bd49f3 → 99859be7 → b134cecf → 713f693b → 5faa1d45 → 84b3d3aa → f0fd45d4 → a50c01c9 → a4b4526d → b6f981f1 → 93eeffff → d3feb512 → f21fe020 → cbe1a45d → d5146bbd → a33bb21f → a3f03a74 → 4e595599 - `kei-registry::mini-prim` — 2 versions: 9fa2b304 → 9fa2b304 diff --git a/hooks/agent-outcome-backfill.sh b/hooks/agent-outcome-backfill.sh new file mode 100755 index 0000000..4c7a663 --- /dev/null +++ b/hooks/agent-outcome-backfill.sh @@ -0,0 +1,116 @@ +#!/bin/sh +# agent-outcome-backfill.sh — PostToolUse:Agent hook. +# +# Backfills `outcome` + `stubs_count` columns in kei-ledger after an Agent +# tool call completes, by parsing the STATUS-TRUTH MARKER block (RULE 0.16) +# emitted in the agent's final message. +# +# Closes the learning loop for kei-model-router: without an outcome signal +# the Beta posterior never converges and the router falls back to the top +# tier on every spawn. After ~10-20 invocations the prior becomes useful; +# after ~50 the router stops defaulting to Opus on unfamiliar tasks. +# +# Defensive: never blocks the tool call, never propagates errors, exits 0 +# on every path. Bypass via `OUTCOME_BACKFILL_BYPASS=1`. +# +# Production payload shape (verified 2026-05-01 against real Claude Code +# PostToolUse:Agent stdin): +# .tool_use_id — string, matches agents.id in kei-ledger +# .tool_response — object with `.content` (array of {type,text} blocks) +# plus prompt / status / agentId / agentType / usage etc +# The `.tool_response.content[*].text` strings carry the agent's final +# message — that's where the STATUS-TRUTH MARKER lives. +set -u + +# Optional debug log. Toggle via `AGENT_OUTCOME_DEBUG=1` for diagnostics +# when the hook stops firing for some reason. Disabled by default to keep +# the production path cheap and silent. +if [ "${AGENT_OUTCOME_DEBUG:-0}" = "1" ]; then + LOG="$HOME/.claude/agent-outcome-backfill.log" + PAYLOAD_DBG=$(cat 2>/dev/null || true) + printf '[%s] invoked, payload-len=%d\n' \ + "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + "${#PAYLOAD_DBG}" \ + >> "$LOG" 2>&1 || true +else + PAYLOAD_DBG=$(cat 2>/dev/null || true) +fi + +# Bypass. +if [ "${OUTCOME_BACKFILL_BYPASS:-0}" = "1" ]; then + exit 0 +fi + +# Tool dependencies — silent no-op if missing. +command -v jq >/dev/null 2>&1 || exit 0 +command -v sqlite3 >/dev/null 2>&1 || exit 0 + +DB="${KEI_LEDGER_DB:-$HOME/.claude/agents/ledger.sqlite}" +[ -f "$DB" ] || exit 0 + +PAYLOAD="$PAYLOAD_DBG" +[ -n "$PAYLOAD" ] || exit 0 + +# Extract tool_use_id (top-level or nested). +TOOL_USE_ID=$(printf '%s' "$PAYLOAD" | jq -r '.tool_use_id // .toolUseId // empty' 2>/dev/null || true) +[ -n "$TOOL_USE_ID" ] || exit 0 + +# Extract the agent's final message text. Recursively flattens whatever +# tool_response shape Claude Code happens to use: +# string → return as-is +# array of strings/objects → flatten each, join with newlines +# object with `.text` → return .text +# object with `.content` (array) → recurse into content +# anything else → empty (hook exits below) +# +# Verified against the production shape: tool_response is an object with +# .content[0].text holding the agent's reply. The flatten function reaches +# the .text field via the content recursion. +RESPONSE=$(printf '%s' "$PAYLOAD" | jq -r ' + (.tool_response // .toolResponse // "") as $r + | def flatten: + if type == "string" then . + elif type == "array" then map(flatten) | join("\n") + elif type == "object" then + if has("text") then .text + elif has("content") then .content | flatten + else (. | tostring) end + else "" end; + $r | flatten +' 2>/dev/null || true) +[ -n "$RESPONSE" ] || exit 0 + +# Locate the STATUS-TRUTH MARKER block. Absent marker is a normal case +# (read-only / research agents do not emit one) — silent no-op. +printf '%s' "$RESPONSE" | grep -q '=== STATUS-TRUTH MARKER ===' 2>/dev/null || exit 0 + +# Parse `shipped:` — first match wins, lowercased + trimmed first word. +SHIPPED=$(printf '%s' "$RESPONSE" \ + | grep -m1 '^shipped:' \ + | sed 's/^shipped:[[:space:]]*//' \ + | awk '{print tolower($1)}' 2>/dev/null || true) + +# Validate against ledger CHECK constraint domain. +case "$SHIPPED" in + functional|partial|scaffolding|fail) ;; + *) exit 0 ;; +esac + +# Parse `stubs:` count — first integer on the line, default 0. +STUBS=$(printf '%s' "$RESPONSE" \ + | grep -m1 '^stubs:' \ + | sed 's/^stubs:[[:space:]]*//' \ + | grep -oE '[0-9]+' \ + | head -1 2>/dev/null || true) +[ -n "$STUBS" ] || STUBS=0 + +# Idempotent UPDATE. Failure (locked DB, no row, etc.) → advisory only, +# never blocks the originating tool call. +sqlite3 "$DB" \ + "UPDATE agents SET outcome='$SHIPPED', stubs_count=$STUBS WHERE id='$TOOL_USE_ID';" \ + 2>/dev/null || { + printf '[agent-outcome-backfill] UPDATE failed for id=%s\n' "$TOOL_USE_ID" >&2 + exit 0 + } + +exit 0