Merge branch 'feat/v0.10-genesis-scan' — kei-memory + genesis-scan v0.10.0 reconciled

This commit is contained in:
Parfii-bot 2026-04-22 01:04:32 +08:00
commit 48b1a8cdcf
12 changed files with 718 additions and 5 deletions

View file

@ -1,6 +1,6 @@
# KeiSeiKit — Constructor-Pattern Agent Kit for Claude Code
KeiSeiKit is a comprehensive drop-in toolkit for [Claude Code](https://claude.com/claude-code). It ships a curated set of composable behavioral blocks, a Rust assembler that builds agent `.md` files from TOML manifests deterministically, nine pre-wired hooks (three of them dedicated to RULE 0.14 session self-audit), 35 portable skills (including an interactive `/new-agent` wizard, 10 hub-and-spoke pipelines, and the `/self-audit` retrospective skill), 9 Rust primitive crates, 13 shell primitives, and 11 cross-tool bridge templates. Everything follows a Constructor Pattern: one file per concern, manifests as single source of truth, and the generated agent files are regenerated on every relevant edit.
KeiSeiKit is a comprehensive drop-in toolkit for [Claude Code](https://claude.com/claude-code). It ships a curated set of composable behavioral blocks, a Rust assembler that builds agent `.md` files from TOML manifests deterministically, nine pre-wired hooks (three of them dedicated to RULE 0.14 session self-audit), 35 portable skills (including an interactive `/new-agent` wizard, 10 hub-and-spoke pipelines, and the `/self-audit` retrospective skill), **10 Rust primitive crates** (including `genesis-scan` patent-IP leak detector), 13 shell primitives, and 11 cross-tool bridge templates. Everything follows a Constructor Pattern: one file per concern, manifests as single source of truth, and the generated agent files are regenerated on every relevant edit.
The kit is MIT-licensed and fully generic — install it on a fresh machine and you get a sane 12-agent fleet (implementers, critics, researchers, cost-guardians, and more — all namespaced under `kei-*` so they won't collide with your own same-named agents), a wizard for spinning up new project specialists, 10 pipeline skills that combine primitives end-to-end (`/compose-solution`, `/site-create`, `/schema-design`, `/observability-setup`, `/auth-setup`, `/api-design`, `/ci-scaffold`, `/test-matrix`, `/docs-scaffold`, `/new-project`, `/vm-provision`), and a build pipeline that keeps every agent derivable from its manifest.
@ -59,7 +59,7 @@ By default `./install.sh` is **minimal** — agents + hooks + skills + bridges,
| Profile | Primitives added | Install time | Disk (approx) |
|---|---|---|---|
| `minimal` (default) | none | ~5s | ~2 MB |
| `core` | `tomd` | ~5s | ~2 MB |
| `core` | `tomd`, `genesis-scan` | ~10s | ~5 MB |
| `frontend` | 8 site tools: `mock-render`, `visual-diff`, `tokens-sync`, `design-scrape`, `live-preview`, `figma-tokens`, `frontend-inspect`, `screenshot-decode` | ~60s | ~80 MB |
| `ops` | 8 infra tools: `kei-ledger`, `ssh-check`, `firewall-diff`, `provision-hetzner`, `provision-vultr`, `harden-base`, `metrics-scrape`, `log-ship` | ~90s | ~50 MB |
| `dev` | 4 dev tools: `kei-migrate`, `kei-changelog`, `kei-ci-lint`, `kei-docs-scaffold` | ~60s | ~40 MB |
@ -154,7 +154,7 @@ Requires the `kei-memory` primitive. Included in the `dev` and `full` profiles;
## Primitives (Rust)
`_primitives/_rust/` is a Cargo workspace with 8 single-binary crates. `install.sh` builds `--release` and drops binaries at `~/.claude/agents/_primitives/_rust/target/release/<name>`.
`_primitives/_rust/` is a Cargo workspace with 9 single-binary crates. `install.sh` builds `--release` and drops binaries at `~/.claude/agents/_primitives/_rust/target/release/<name>`.
| Crate | Purpose |
|---|---|
@ -167,6 +167,7 @@ Requires the `kei-memory` primitive. Included in the `dev` and `full` profiles;
| `visual-diff` | Pixel diff with tolerance — used in `/site-create` screenshot-regression loop |
| `tokens-sync` | Design tokens JSON → Tailwind config extend + CSS variables under `:root` |
| `kei-memory` | Session retrospective + recurring pattern detector; offline-first analyzer powering RULE 0.14 self-audit |
| `genesis-scan` | Patent-IP leak scanner — runs as a git pre-commit or CI gate (complements the `genesis-leak-guard` runtime hook) |
## Primitives (shell)

View file

@ -17,11 +17,11 @@
[profile]
minimal = []
core = ["tomd"]
core = ["tomd", "genesis-scan"]
frontend = ["mock-render", "visual-diff", "tokens-sync", "design-scrape", "live-preview", "figma-tokens", "frontend-inspect", "screenshot-decode"]
ops = ["kei-ledger", "ssh-check", "firewall-diff", "provision-hetzner", "provision-vultr", "harden-base", "metrics-scrape", "log-ship"]
dev = ["kei-migrate", "kei-changelog", "kei-ci-lint", "kei-docs-scaffold", "kei-memory"]
full = ["tomd", "kei-ledger", "kei-migrate", "kei-changelog", "ssh-check", "firewall-diff", "mock-render", "visual-diff", "tokens-sync", "design-scrape", "live-preview", "figma-tokens", "frontend-inspect", "screenshot-decode", "provision-hetzner", "provision-vultr", "harden-base", "metrics-scrape", "log-ship", "kei-ci-lint", "kei-docs-scaffold", "kei-memory"]
full = ["tomd", "genesis-scan", "kei-ledger", "kei-migrate", "kei-changelog", "ssh-check", "firewall-diff", "mock-render", "visual-diff", "tokens-sync", "design-scrape", "live-preview", "figma-tokens", "frontend-inspect", "screenshot-decode", "provision-hetzner", "provision-vultr", "harden-base", "metrics-scrape", "log-ship", "kei-ci-lint", "kei-docs-scaffold", "kei-memory"]
# --- shell primitives (13) -------------------------------------------------
@ -158,3 +158,9 @@ kind = "rust"
crate = "kei-memory"
deps = ["rusqlite bundled (no system sqlite required)"]
desc = "Session retrospective + recurring pattern detector (offline-first, RULE 0.14)"
[primitive.genesis-scan]
kind = "rust"
crate = "genesis-scan"
deps = []
desc = "Patent-IP leak scanner (Genesis-term blacklist, CI pre-commit gate)"

View file

@ -584,6 +584,19 @@ dependencies = [
"version_check",
]
[[package]]
name = "genesis-scan"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"regex",
"serde",
"serde_json",
"tempfile",
"walkdir",
]
[[package]]
name = "getrandom"
version = "0.2.17"
@ -1523,6 +1536,15 @@ version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
@ -2206,6 +2228,16 @@ dependencies = [
"tempfile",
]
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
@ -2331,6 +2363,15 @@ dependencies = [
"wasite",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "windows-core"
version = "0.62.2"

View file

@ -10,6 +10,7 @@ members = [
"visual-diff",
"tokens-sync",
"kei-memory",
"genesis-scan",
]
[workspace.package]

View file

@ -0,0 +1,20 @@
[package]
name = "genesis-scan"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
[[bin]]
name = "genesis-scan"
path = "src/main.rs"
[dependencies]
clap = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
regex = "1"
walkdir = "2"
anyhow = "1"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,100 @@
//! genesis-scan — patent-IP leak scanner.
//!
//! Complements the runtime Claude-Code hook (`~/.claude/hooks/genesis-leak-guard.sh`)
//! by providing a binary suitable for `git` pre-commit hooks and CI pipelines.
//!
//! USAGE
//! genesis-scan --path <dir> walk a tree
//! genesis-scan --path <file> single file
//! genesis-scan --staged scan `git diff --cached`
//! genesis-scan --stdin --file <label> scan piped content
//! genesis-scan --format=human|json|github-actions
//! genesis-scan --exit-on-hit CI mode: exit 2 on any hit
//! genesis-scan --list-patterns dump rules and exit
//!
//! EXIT
//! 0 no hits (or --exit-on-hit not set)
//! 1 usage / runtime error
//! 2 hits found AND --exit-on-hit is set
mod patterns;
mod report;
mod scanner;
use clap::Parser;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser, Debug)]
#[command(name = "genesis-scan", about = "Genesis / patent-IP leak scanner.")]
struct Cli {
/// Path to a file or directory to scan.
#[arg(long, conflicts_with_all = ["staged", "stdin", "list_patterns"])]
path: Option<PathBuf>,
/// Scan staged git blobs (pre-commit hook mode).
#[arg(long, conflicts_with_all = ["path", "stdin"])]
staged: bool,
/// Read content from stdin.
#[arg(long, conflicts_with_all = ["path", "staged"])]
stdin: bool,
/// Virtual file label for --stdin (used in report).
#[arg(long, default_value = "<stdin>")]
file: String,
/// Output format.
#[arg(long, value_enum, default_value_t = report::Format::Human)]
format: report::Format,
/// Exit with code 2 if any hit is found (CI-friendly).
#[arg(long)]
exit_on_hit: bool,
/// Dump forbidden patterns + exempt paths and exit.
#[arg(long)]
list_patterns: bool,
}
fn collect_hits(
cli: &Cli,
combined: &regex::Regex,
individual: &[(String, regex::Regex)],
) -> anyhow::Result<Vec<scanner::Hit>> {
if cli.staged {
scanner::scan_staged(combined, individual)
} else if cli.stdin {
scanner::scan_stdin(&cli.file, combined, individual)
} else if let Some(p) = cli.path.as_ref() {
scanner::scan_path(p, combined, individual)
} else {
anyhow::bail!("need one of --path / --staged / --stdin / --list-patterns")
}
}
fn run() -> anyhow::Result<ExitCode> {
let cli = Cli::parse();
if cli.list_patterns {
report::dump_patterns();
return Ok(ExitCode::SUCCESS);
}
let (combined, individual) = patterns::compile()?;
let hits = collect_hits(&cli, &combined, &individual)?;
report::emit(&hits, cli.format);
Ok(if cli.exit_on_hit && !hits.is_empty() {
ExitCode::from(2)
} else {
ExitCode::SUCCESS
})
}
fn main() -> ExitCode {
match run() {
Ok(code) => code,
Err(e) => {
eprintln!("genesis-scan: {e}");
ExitCode::from(1)
}
}
}

View file

@ -0,0 +1,91 @@
//! Forbidden Genesis-IP term patterns + exempt-scope path globs.
//!
//! Source of truth: `~/.claude/hooks/genesis-leak-guard.sh`.
//! Keep the two in sync — if a term lands in one, add it to the other.
use regex::Regex;
/// Forbidden-term regex strings. Matches the master list in
/// `~/.claude/hooks/genesis-leak-guard.sh` (keep in sync).
pub const FORBIDDEN: &[&str] = &[
r"Genesis",
r"Born[- ]?rule",
r"Born[- ]?GAP",
r"Weingarten",
r"normalize\(S[[:space:]]*\+",
r"x.?S[\u{1D40}T].?x",
r"rank-1 attractor",
r"Frobenius sphere",
r"submartingale",
r"paradigm-native",
r"Paradigm-native",
r"Haar.*measure",
r"Haar.*blindness",
r"\u{03C3}\u{2081}",
r"\bT[1-5][0-9]\b",
r"\bR2[0-9]\b",
r"EST-[0-9]",
r"Path [A-Z].*Genesis",
r"Genesis.*Path [A-Z]",
r"G2.*fixed[- ]?point",
r"G2.*attractor",
r"Bargmann",
r"pentagon.*2\u{03C0}/5",
r"\u{03B8}_2[0-9]",
r"Fibonacci.*events",
r"Kuramoto.*Genesis",
];
/// Path substrings that mark a file as exempt. If any substring matches
/// anywhere in the absolute or relative path, the file is skipped.
///
/// Notes:
/// - `genesis-scan` itself is exempted — its own source stores these terms.
/// - `.claude/memory/concepts/` is exempted — private user concepts.
pub const EXEMPT_SUBSTRINGS: &[&str] = &[
"/KeiLab/",
"/KeiSeiBundle/",
"/theory/",
"/keinet/keinet-cfc-eigen/",
"/keinet/keinet-gpu-wgpu/",
"/memory/chatlogs/ml-keilab/",
"/.claude/memory/concepts/",
"/genesis-scan/",
];
/// Directory names to skip entirely while walking.
pub const SKIP_DIRS: &[&str] = &[
"target",
"node_modules",
".git",
"dist",
"build",
".venv",
"__pycache__",
];
/// Compile all forbidden patterns, joined into a single regex for speed.
/// Returns (combined, individual) so callers can both fast-match and
/// extract which specific pattern fired.
pub fn compile() -> anyhow::Result<(Regex, Vec<(String, Regex)>)> {
let individual: Vec<(String, Regex)> = FORBIDDEN
.iter()
.map(|p| {
let rx = Regex::new(p).map_err(|e| anyhow::anyhow!("bad pattern {p}: {e}"))?;
Ok((p.to_string(), rx))
})
.collect::<anyhow::Result<_>>()?;
let combined_src = format!("(?:{})", FORBIDDEN.join(")|(?:"));
let combined = Regex::new(&combined_src)?;
Ok((combined, individual))
}
/// True iff the given path lies in an exempt scope.
pub fn is_exempt(path: &str) -> bool {
EXEMPT_SUBSTRINGS.iter().any(|s| path.contains(s))
}
/// True iff this directory name should be skipped during walk.
pub fn should_skip_dir(name: &str) -> bool {
SKIP_DIRS.iter().any(|s| *s == name)
}

View file

@ -0,0 +1,67 @@
//! Output formatters for scanner hits.
use crate::scanner::Hit;
#[derive(Copy, Clone, Debug, clap::ValueEnum)]
pub enum Format {
Human,
Json,
GithubActions,
}
pub fn emit(hits: &[Hit], format: Format) {
match format {
Format::Human => emit_human(hits),
Format::Json => emit_json(hits),
Format::GithubActions => emit_github(hits),
}
}
fn emit_human(hits: &[Hit]) {
if hits.is_empty() {
println!("genesis-scan: clean (0 hits)");
return;
}
println!("genesis-scan: {} hit(s)", hits.len());
for h in hits {
println!(" {}:{}: {} [pattern: {}]", h.file, h.line, h.r#match, h.pattern);
}
eprintln!();
eprintln!("Genesis / patent-IP terms detected. Review and remove before commit.");
eprintln!("If false positive, move the file into an exempt scope or use --list-patterns");
eprintln!("to inspect rules.");
}
fn emit_json(hits: &[Hit]) {
let payload = serde_json::json!({
"hit_count": hits.len(),
"hits": hits,
});
println!("{}", serde_json::to_string_pretty(&payload).unwrap_or_else(|_| "{}".into()));
}
fn emit_github(hits: &[Hit]) {
for h in hits {
println!(
"::error file={},line={}::genesis-scan: '{}' matches forbidden pattern '{}'",
h.file, h.line, h.r#match, h.pattern
);
}
if hits.is_empty() {
println!("genesis-scan: 0 hits");
}
}
fn dump_section(title: &str, items: &[&str]) {
println!("# {}", title);
for s in items {
println!("{}", s);
}
println!();
}
pub fn dump_patterns() {
dump_section("genesis-scan patterns (forbidden)", crate::patterns::FORBIDDEN);
dump_section("genesis-scan exempt path substrings", crate::patterns::EXEMPT_SUBSTRINGS);
dump_section("genesis-scan skipped directory names", crate::patterns::SKIP_DIRS);
}

View file

@ -0,0 +1,179 @@
//! File-system walker + per-file pattern matcher.
use crate::patterns;
use anyhow::Result;
use regex::Regex;
use serde::Serialize;
use std::fs;
use std::path::Path;
use walkdir::WalkDir;
#[derive(Debug, Clone, Serialize)]
pub struct Hit {
pub file: String,
pub line: usize,
pub r#match: String,
pub pattern: String,
}
/// Scan a single file's in-memory content (bytes may be invalid UTF-8 — we
/// lossy-convert). Returns all hits. Does NOT consult exempt rules — caller
/// decides whether to call this.
pub fn scan_content(
file_label: &str,
content: &str,
combined: &Regex,
individual: &[(String, Regex)],
) -> Vec<Hit> {
if !combined.is_match(content) {
return Vec::new();
}
let mut out = Vec::new();
for (lineno, line) in content.lines().enumerate() {
for (pat_src, rx) in individual {
if let Some(m) = rx.find(line) {
out.push(Hit {
file: file_label.to_string(),
line: lineno + 1,
r#match: m.as_str().to_string(),
pattern: pat_src.clone(),
});
}
}
}
out
}
/// Scan one file on disk. Returns hits or [] if exempt / unreadable / binary.
pub fn scan_file(path: &Path, combined: &Regex, individual: &[(String, Regex)]) -> Vec<Hit> {
let label = path.to_string_lossy();
if patterns::is_exempt(&label) {
return Vec::new();
}
let Ok(bytes) = fs::read(path) else {
return Vec::new();
};
// Skip obvious binary files (heuristic: NUL in first 8 KiB).
if bytes.iter().take(8192).any(|b| *b == 0) {
return Vec::new();
}
let text = String::from_utf8_lossy(&bytes);
scan_content(&label, &text, combined, individual)
}
/// Recursively scan a directory. Skips SKIP_DIRS by name and exempt paths
/// by substring.
pub fn scan_dir(root: &Path, combined: &Regex, individual: &[(String, Regex)]) -> Result<Vec<Hit>> {
let mut hits = Vec::new();
let walker = WalkDir::new(root).into_iter().filter_entry(|e| {
let name = e.file_name().to_string_lossy();
if e.file_type().is_dir() && patterns::should_skip_dir(&name) {
return false;
}
true
});
for entry in walker {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
hits.extend(scan_file(entry.path(), combined, individual));
}
Ok(hits)
}
/// Scan a path (dispatches to file or dir).
pub fn scan_path(root: &Path, combined: &Regex, individual: &[(String, Regex)]) -> Result<Vec<Hit>> {
if root.is_file() {
Ok(scan_file(root, combined, individual))
} else if root.is_dir() {
scan_dir(root, combined, individual)
} else {
anyhow::bail!("path not found: {}", root.display())
}
}
/// Scan content read from stdin under a virtual file label.
pub fn scan_stdin(label: &str, combined: &Regex, individual: &[(String, Regex)]) -> Result<Vec<Hit>> {
use std::io::Read;
let mut buf = String::new();
std::io::stdin().read_to_string(&mut buf)?;
if patterns::is_exempt(label) {
return Ok(Vec::new());
}
Ok(scan_content(label, &buf, combined, individual))
}
/// Read one staged file from the git index. Returns None on error or when
/// the blob cannot be found (submodule, gitlink, etc.).
fn read_staged_blob(path: &str) -> Option<Vec<u8>> {
let blob = std::process::Command::new("git")
.args(["show", &format!(":{}", path)])
.output()
.ok()?;
if blob.status.success() {
Some(blob.stdout)
} else {
None
}
}
/// List staged path names via `git diff --cached --name-only -z`.
fn list_staged_paths() -> Result<Vec<String>> {
let out = std::process::Command::new("git")
.args(["diff", "--cached", "--name-only", "-z"])
.output()?;
if !out.status.success() {
anyhow::bail!("git diff --cached failed (not in a git repo?)");
}
let paths = out
.stdout
.split(|b| *b == 0)
.filter(|s| !s.is_empty())
.filter_map(|s| std::str::from_utf8(s).ok().map(str::to_string))
.collect();
Ok(paths)
}
/// Scan staged git blobs. Honors GENESIS_SCAN_TEST_STAGED for test fixtures.
pub fn scan_staged(combined: &Regex, individual: &[(String, Regex)]) -> Result<Vec<Hit>> {
if let Ok(fixture) = std::env::var("GENESIS_SCAN_TEST_STAGED") {
return Ok(scan_staged_fixture(&fixture, combined, individual));
}
let mut hits = Vec::new();
for path in list_staged_paths()? {
if patterns::is_exempt(&path) {
continue;
}
let Some(bytes) = read_staged_blob(&path) else {
continue;
};
let text = String::from_utf8_lossy(&bytes);
hits.extend(scan_content(&path, &text, combined, individual));
}
Ok(hits)
}
/// Test-only: parse a fixture string where records are separated by
/// ASCII Unit Separator (\x1f) and fields within a record by ASCII
/// Record Separator (\x1e): `path\x1econtent\x1fpath\x1econtent...`.
/// NUL cannot be used — OS environment values forbid it.
fn scan_staged_fixture(fixture: &str, combined: &Regex, individual: &[(String, Regex)]) -> Vec<Hit> {
let mut hits = Vec::new();
for record in fixture.split('\x1f') {
if record.is_empty() {
continue;
}
let Some((path, body)) = record.split_once('\x1e') else {
continue;
};
if patterns::is_exempt(path) {
continue;
}
hits.extend(scan_content(path, body, combined, individual));
}
hits
}

View file

@ -0,0 +1,155 @@
//! Integration tests for genesis-scan.
//!
//! We use tempfile-based fixtures rather than on-disk test assets because
//! the entire `genesis-scan/` tree is self-exempt (so any on-disk fixture
//! under `tests/` would be skipped by the scanner — correct behaviour, but
//! makes testing inconvenient).
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn bin() -> PathBuf {
// Cargo sets CARGO_BIN_EXE_<bin_name> for every bin target in the crate
// under test. The value is the path to the built binary.
PathBuf::from(env!("CARGO_BIN_EXE_genesis-scan"))
}
fn clean_md() -> &'static str {
"# Doc\n\nThis is a plain markdown file about cats and dogs.\n"
}
fn leaky_md() -> String {
// Terms assembled at runtime so this test source itself remains less
// likely to be flagged if a caller ever scans outside the self-exempt.
let g = "Gene".to_string() + "sis";
format!("# Leaky\n\nThis doc mentions {} theory on line 3.\n", g)
}
fn leaky_multi_md() -> String {
let g = "Gene".to_string() + "sis";
let w = "Wein".to_string() + "garten";
format!(
"line one\n{} theory\nnothing\n{} blindness\nrank-1 attractor here\n",
g, w
)
}
#[test]
fn clean_file_produces_zero_hits_in_json() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("clean.md");
fs::write(&path, clean_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
assert!(out.status.success());
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 0);
assert_eq!(v["hits"].as_array().unwrap().len(), 0);
}
#[test]
fn single_leaky_file_reports_one_hit() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("leaky.md");
fs::write(&path, leaky_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 1);
assert_eq!(v["hits"][0]["line"], 3);
}
#[test]
fn leaky_multi_file_reports_three_hits() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("leaky_multi.md");
fs::write(&path, leaky_multi_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
// Three distinct matches on three distinct lines.
assert_eq!(v["hit_count"], 3, "payload was: {v}");
}
#[test]
fn exempt_path_returns_zero_hits() {
// Place the leaky file under a path segment that matches EXEMPT_SUBSTRINGS
// — specifically "/theory/" — and verify it is skipped.
let tmp = TempDir::new().unwrap();
let exempt_dir = tmp.path().join("some").join("theory").join("inner");
fs::create_dir_all(&exempt_dir).unwrap();
let path = exempt_dir.join("leaky.md");
fs::write(&path, leaky_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 0);
}
#[test]
fn exit_on_hit_returns_code_two() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("leaky.md");
fs::write(&path, leaky_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--exit-on-hit", "--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
assert_eq!(out.status.code(), Some(2));
}
#[test]
fn staged_mode_reads_fixture_env() {
// Simulated staged payload. Field separator = \x1e (Record Separator),
// record separator = \x1f (Unit Separator). NUL is rejected by the OS
// for env-var values, so we cannot use it here.
let leaky = leaky_md();
let fixture = format!(
"staged/leaky.md\x1e{}\x1fstaged/clean.md\x1e{}",
leaky,
clean_md()
);
let out = std::process::Command::new(bin())
.args(["--staged", "--format", "json"])
.env("GENESIS_SCAN_TEST_STAGED", &fixture)
.output()
.unwrap();
assert!(out.status.success());
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 1);
assert_eq!(v["hits"][0]["file"], "staged/leaky.md");
}
#[test]
fn list_patterns_prints_rules() {
let out = std::process::Command::new(bin())
.args(["--list-patterns"])
.output()
.unwrap();
assert!(out.status.success());
let text = String::from_utf8_lossy(&out.stdout);
assert!(text.contains("forbidden"));
assert!(text.contains("exempt"));
// Spot-check one forbidden string is dumped.
assert!(text.contains("Weingarten") || text.contains("Bargmann"));
}

50
hooks/git-pre-commit-genesis.sh Executable file
View file

@ -0,0 +1,50 @@
#!/bin/sh
# Git pre-commit — block Genesis-IP term leaks before they reach the index.
#
# INSTALL (public-facing repos only):
# ln -sf $HOME/.claude/agents/hooks/git-pre-commit-genesis.sh .git/hooks/pre-commit
#
# BEHAVIOUR:
# - Resolves the `genesis-scan` binary under the KeiSeiKit install tree.
# - Runs `--staged --exit-on-hit` so the hook fails the commit on any hit.
# - Bypass with GENESIS_GUARD_BYPASS=1 (visible, per-call — logged in transcript).
#
# EXIT:
# 0 clean or bypassed
# 1 usage / binary missing
# 2 leak detected (commit blocked)
set -eu
SCANNER="${GENESIS_SCAN_BIN:-$HOME/.claude/agents/_primitives/_rust/target/release/genesis-scan}"
if [ ! -x "$SCANNER" ]; then
# Absent scanner = silent no-op. Installs without the `core` or `full`
# profile won't have the binary; we prefer letting the commit through
# over noisy spam, since the runtime hook still catches new writes.
exit 0
fi
if "$SCANNER" --staged --format=human --exit-on-hit; then
exit 0
fi
# Non-zero exit from scanner = hits found.
if [ "${GENESIS_GUARD_BYPASS:-0}" = "1" ]; then
echo "" >&2
echo "[genesis-scan] BYPASSED (GENESIS_GUARD_BYPASS=1). Hit logged above." >&2
exit 0
fi
cat >&2 <<EOF
Commit blocked by genesis-scan.
Review the hits above and either:
- remove the Genesis / patent-IP terms from the staged content, or
- move the file into an exempt scope (see: $SCANNER --list-patterns), or
- bypass explicitly (use sparingly):
GENESIS_GUARD_BYPASS=1 git commit ...
EOF
exit 2

View file

@ -289,6 +289,7 @@ primitive_time_secs() {
case "$name" in
mock-render|kei-migrate|kei-ledger) echo 20 ;;
kei-changelog|firewall-diff) echo 15 ;;
genesis-scan) echo 10 ;;
visual-diff|tokens-sync|ssh-check) echo 5 ;;
*) echo 10 ;;
esac
@ -306,6 +307,7 @@ primitive_disk_kb() {
case "$name" in
mock-render|kei-migrate|kei-ledger) echo 30000 ;;
kei-changelog|firewall-diff) echo 10000 ;;
genesis-scan) echo 6000 ;;
visual-diff|tokens-sync|ssh-check) echo 5000 ;;
*) echo 8000 ;;
esac