26 forbidden patterns synced with ~/.claude/hooks/genesis-leak-guard.sh. 7 exempt scopes (KeiLab/theory/ml-keilab chatlogs/keinet-cfc-eigen/keinet-gpu-wgpu). CLI: --path, --staged, --stdin, --format=human|json|github-actions, --exit-on-hit. Self-reference resolved via EXEMPT_SUBSTRINGS + tempfile fixtures with runtime-assembled strings. 7/7 tests pass.
155 lines
5 KiB
Rust
155 lines
5 KiB
Rust
//! Integration tests for genesis-scan.
|
|
//!
|
|
//! We use tempfile-based fixtures rather than on-disk test assets because
|
|
//! the entire `genesis-scan/` tree is self-exempt (so any on-disk fixture
|
|
//! under `tests/` would be skipped by the scanner — correct behaviour, but
|
|
//! makes testing inconvenient).
|
|
|
|
use std::fs;
|
|
use std::path::PathBuf;
|
|
use tempfile::TempDir;
|
|
|
|
fn bin() -> PathBuf {
|
|
// Cargo sets CARGO_BIN_EXE_<bin_name> for every bin target in the crate
|
|
// under test. The value is the path to the built binary.
|
|
PathBuf::from(env!("CARGO_BIN_EXE_genesis-scan"))
|
|
}
|
|
|
|
fn clean_md() -> &'static str {
|
|
"# Doc\n\nThis is a plain markdown file about cats and dogs.\n"
|
|
}
|
|
|
|
fn leaky_md() -> String {
|
|
// Terms assembled at runtime so this test source itself remains less
|
|
// likely to be flagged if a caller ever scans outside the self-exempt.
|
|
let g = "Gene".to_string() + "sis";
|
|
format!("# Leaky\n\nThis doc mentions {} theory on line 3.\n", g)
|
|
}
|
|
|
|
fn leaky_multi_md() -> String {
|
|
let g = "Gene".to_string() + "sis";
|
|
let w = "Wein".to_string() + "garten";
|
|
format!(
|
|
"line one\n{} theory\nnothing\n{} blindness\nrank-1 attractor here\n",
|
|
g, w
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn clean_file_produces_zero_hits_in_json() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let path = tmp.path().join("clean.md");
|
|
fs::write(&path, clean_md()).unwrap();
|
|
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--format", "json", "--path"])
|
|
.arg(&path)
|
|
.output()
|
|
.unwrap();
|
|
assert!(out.status.success());
|
|
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
|
|
assert_eq!(v["hit_count"], 0);
|
|
assert_eq!(v["hits"].as_array().unwrap().len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn single_leaky_file_reports_one_hit() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let path = tmp.path().join("leaky.md");
|
|
fs::write(&path, leaky_md()).unwrap();
|
|
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--format", "json", "--path"])
|
|
.arg(&path)
|
|
.output()
|
|
.unwrap();
|
|
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
|
|
assert_eq!(v["hit_count"], 1);
|
|
assert_eq!(v["hits"][0]["line"], 3);
|
|
}
|
|
|
|
#[test]
|
|
fn leaky_multi_file_reports_three_hits() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let path = tmp.path().join("leaky_multi.md");
|
|
fs::write(&path, leaky_multi_md()).unwrap();
|
|
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--format", "json", "--path"])
|
|
.arg(&path)
|
|
.output()
|
|
.unwrap();
|
|
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
|
|
// Three distinct matches on three distinct lines.
|
|
assert_eq!(v["hit_count"], 3, "payload was: {v}");
|
|
}
|
|
|
|
#[test]
|
|
fn exempt_path_returns_zero_hits() {
|
|
// Place the leaky file under a path segment that matches EXEMPT_SUBSTRINGS
|
|
// — specifically "/theory/" — and verify it is skipped.
|
|
let tmp = TempDir::new().unwrap();
|
|
let exempt_dir = tmp.path().join("some").join("theory").join("inner");
|
|
fs::create_dir_all(&exempt_dir).unwrap();
|
|
let path = exempt_dir.join("leaky.md");
|
|
fs::write(&path, leaky_md()).unwrap();
|
|
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--format", "json", "--path"])
|
|
.arg(&path)
|
|
.output()
|
|
.unwrap();
|
|
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
|
|
assert_eq!(v["hit_count"], 0);
|
|
}
|
|
|
|
#[test]
|
|
fn exit_on_hit_returns_code_two() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let path = tmp.path().join("leaky.md");
|
|
fs::write(&path, leaky_md()).unwrap();
|
|
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--exit-on-hit", "--format", "json", "--path"])
|
|
.arg(&path)
|
|
.output()
|
|
.unwrap();
|
|
assert_eq!(out.status.code(), Some(2));
|
|
}
|
|
|
|
#[test]
|
|
fn staged_mode_reads_fixture_env() {
|
|
// Simulated staged payload. Field separator = \x1e (Record Separator),
|
|
// record separator = \x1f (Unit Separator). NUL is rejected by the OS
|
|
// for env-var values, so we cannot use it here.
|
|
let leaky = leaky_md();
|
|
let fixture = format!(
|
|
"staged/leaky.md\x1e{}\x1fstaged/clean.md\x1e{}",
|
|
leaky,
|
|
clean_md()
|
|
);
|
|
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--staged", "--format", "json"])
|
|
.env("GENESIS_SCAN_TEST_STAGED", &fixture)
|
|
.output()
|
|
.unwrap();
|
|
assert!(out.status.success());
|
|
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
|
|
assert_eq!(v["hit_count"], 1);
|
|
assert_eq!(v["hits"][0]["file"], "staged/leaky.md");
|
|
}
|
|
|
|
#[test]
|
|
fn list_patterns_prints_rules() {
|
|
let out = std::process::Command::new(bin())
|
|
.args(["--list-patterns"])
|
|
.output()
|
|
.unwrap();
|
|
assert!(out.status.success());
|
|
let text = String::from_utf8_lossy(&out.stdout);
|
|
assert!(text.contains("forbidden"));
|
|
assert!(text.contains("exempt"));
|
|
// Spot-check one forbidden string is dumped.
|
|
assert!(text.contains("Weingarten") || text.contains("Bargmann"));
|
|
}
|