KeiSeiKit-1.0/_primitives/_rust/genesis-scan/tests/integration.rs
Parfii-bot 10bc799d26 feat(primitives): genesis-scan Rust — sensitive IP leak detector (CI/pre-commit)
26 forbidden patterns synced with ~/.claude/hooks/genesis-leak-guard.sh.
7 exempt scopes (KeiLab/theory/ml-keilab chatlogs/keinet-cfc-eigen/keinet-gpu-wgpu).
CLI: --path, --staged, --stdin, --format=human|json|github-actions, --exit-on-hit.
Self-reference resolved via EXEMPT_SUBSTRINGS + tempfile fixtures with runtime-assembled strings.
7/7 tests pass.
2026-04-22 01:01:56 +08:00

155 lines
5 KiB
Rust

//! Integration tests for genesis-scan.
//!
//! We use tempfile-based fixtures rather than on-disk test assets because
//! the entire `genesis-scan/` tree is self-exempt (so any on-disk fixture
//! under `tests/` would be skipped by the scanner — correct behaviour, but
//! makes testing inconvenient).
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn bin() -> PathBuf {
// Cargo sets CARGO_BIN_EXE_<bin_name> for every bin target in the crate
// under test. The value is the path to the built binary.
PathBuf::from(env!("CARGO_BIN_EXE_genesis-scan"))
}
fn clean_md() -> &'static str {
"# Doc\n\nThis is a plain markdown file about cats and dogs.\n"
}
fn leaky_md() -> String {
// Terms assembled at runtime so this test source itself remains less
// likely to be flagged if a caller ever scans outside the self-exempt.
let g = "Gene".to_string() + "sis";
format!("# Leaky\n\nThis doc mentions {} theory on line 3.\n", g)
}
fn leaky_multi_md() -> String {
let g = "Gene".to_string() + "sis";
let w = "Wein".to_string() + "garten";
format!(
"line one\n{} theory\nnothing\n{} blindness\nrank-1 attractor here\n",
g, w
)
}
#[test]
fn clean_file_produces_zero_hits_in_json() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("clean.md");
fs::write(&path, clean_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
assert!(out.status.success());
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 0);
assert_eq!(v["hits"].as_array().unwrap().len(), 0);
}
#[test]
fn single_leaky_file_reports_one_hit() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("leaky.md");
fs::write(&path, leaky_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 1);
assert_eq!(v["hits"][0]["line"], 3);
}
#[test]
fn leaky_multi_file_reports_three_hits() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("leaky_multi.md");
fs::write(&path, leaky_multi_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
// Three distinct matches on three distinct lines.
assert_eq!(v["hit_count"], 3, "payload was: {v}");
}
#[test]
fn exempt_path_returns_zero_hits() {
// Place the leaky file under a path segment that matches EXEMPT_SUBSTRINGS
// — specifically "/theory/" — and verify it is skipped.
let tmp = TempDir::new().unwrap();
let exempt_dir = tmp.path().join("some").join("theory").join("inner");
fs::create_dir_all(&exempt_dir).unwrap();
let path = exempt_dir.join("leaky.md");
fs::write(&path, leaky_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 0);
}
#[test]
fn exit_on_hit_returns_code_two() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("leaky.md");
fs::write(&path, leaky_md()).unwrap();
let out = std::process::Command::new(bin())
.args(["--exit-on-hit", "--format", "json", "--path"])
.arg(&path)
.output()
.unwrap();
assert_eq!(out.status.code(), Some(2));
}
#[test]
fn staged_mode_reads_fixture_env() {
// Simulated staged payload. Field separator = \x1e (Record Separator),
// record separator = \x1f (Unit Separator). NUL is rejected by the OS
// for env-var values, so we cannot use it here.
let leaky = leaky_md();
let fixture = format!(
"staged/leaky.md\x1e{}\x1fstaged/clean.md\x1e{}",
leaky,
clean_md()
);
let out = std::process::Command::new(bin())
.args(["--staged", "--format", "json"])
.env("GENESIS_SCAN_TEST_STAGED", &fixture)
.output()
.unwrap();
assert!(out.status.success());
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["hit_count"], 1);
assert_eq!(v["hits"][0]["file"], "staged/leaky.md");
}
#[test]
fn list_patterns_prints_rules() {
let out = std::process::Command::new(bin())
.args(["--list-patterns"])
.output()
.unwrap();
assert!(out.status.success());
let text = String::from_utf8_lossy(&out.stdout);
assert!(text.contains("forbidden"));
assert!(text.contains("exempt"));
// Spot-check one forbidden string is dumped.
assert!(text.contains("Weingarten") || text.contains("Bargmann"));
}