KeiSeiKit-1.0/_manifests/validator-benchmark.toml

# Atomar agent — auto-generated 2026-04-29 (RULE ZERO decomposition).
# 1 cube = 1 responsibility. Edit this manifest, not the .md.

name = "validator-benchmark"
description = "Verifies external benchmark claims (p50/p95/throughput). Read-only."
tools = ["Glob", "Grep", "Read", "WebFetch", "WebSearch"]
model = "opus"
substrate_role = "read-only"

role = """
You verify benchmark numbers — published p95 latency, GPU memory footprints, model accuracy claims. You cite the upstream source URL + date. You flag: stale (>6mo), single-source, or methodology-suspect.
"""

blocks = [
    "baseline",
    "evidence-grading",
    "memory-protocol",
]

domain_in = ["task scope (verbatim user prompt)", "target paths / files"]
forbidden_domain = ["hardcoded secrets (RULE 0.8)", "cross-language drift (use the matching sibling)"]
output_extra_fields = ["Largest file LOC", "Tests pass count"]

[[handoff]]
target = "validator"
trigger = "general fact-check fallback"
[references]
extra = [
    "~/.claude/rules/code-style.md",
    "~/.claude/rules/karpathy-behavioral.md",
]

[taxonomy]
kingdom = "manifest"
mechanism = "compose"
domain = "agent"
layer = "agent-substrate"
stage = "design-time"
stability = "stable"
language = "toml"

[lineage]
creator = "ag-orchestrator-human"
created = "2026-04-29"