feat(primitives): 4 Rust crates for deep-sleep — conflict-scan, refactor-engine, graph-check, store

- kei-conflict-scan: rules/hooks/blocks/orphans/CP detection (6 tests)
- kei-refactor-engine: plan-mode + advisory patch format, zero-conflict guarantee (5 tests)
- kei-graph-check: wikilinks/handoffs/block-refs validator (4 tests)
- kei-store: trait + 5 backends (filesystem/github/forgejo/gitea prod, s3 stub) (8 tests)

1916 LOC Rust total; all files <200 LOC; 23/23 tests pass.
This commit is contained in:
Parfii-bot 2026-04-22 08:28:22 +08:00
parent 9ca7c4e148
commit 19ee220e0a
40 changed files with 2569 additions and 0 deletions

View file

@ -920,6 +920,32 @@ dependencies = [
"regex",
]
[[package]]
name = "kei-conflict-scan"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"regex",
"serde",
"serde_json",
"tempfile",
"walkdir",
]
[[package]]
name = "kei-graph-check"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"regex",
"serde",
"serde_json",
"tempfile",
"walkdir",
]
[[package]]
name = "kei-ledger"
version = "0.1.0"
@ -958,6 +984,31 @@ dependencies = [
"tokio",
]
[[package]]
name = "kei-refactor-engine"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"serde",
"serde_json",
"similar",
"tempfile",
]
[[package]]
name = "kei-store"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"git2",
"serde",
"serde_json",
"tempfile",
"toml",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -1610,6 +1661,15 @@ dependencies = [
"zmij",
]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
@ -1679,6 +1739,12 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
[[package]]
name = "similar"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
[[package]]
name = "slab"
version = "0.4.12"
@ -2095,6 +2161,47 @@ dependencies = [
"tokio",
]
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "tracing"
version = "0.1.44"
@ -2579,6 +2686,15 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"
dependencies = [
"memchr",
]
[[package]]
name = "wit-bindgen"
version = "0.51.0"

View file

@ -11,6 +11,10 @@ members = [
"tokens-sync",
"kei-memory",
"genesis-scan",
"kei-conflict-scan",
"kei-refactor-engine",
"kei-graph-check",
"kei-store",
]
[workspace.package]

View file

@ -0,0 +1,24 @@
[package]
name = "kei-conflict-scan"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
description = "Deep-sleep conflict scanner — rules/hooks/blocks/orphans/CP violations (v0.13.0)"
[[bin]]
name = "kei-conflict-scan"
path = "src/main.rs"
[lib]
path = "src/lib.rs"
[dependencies]
clap = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
regex = "1"
walkdir = "2"
anyhow = "1"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,64 @@
//! Conflict record — the single unit of output.
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Category {
Rules,
Hooks,
Blocks,
Orphans,
Cp,
}
impl Category {
pub fn as_str(&self) -> &'static str {
match self {
Category::Rules => "rules",
Category::Hooks => "hooks",
Category::Blocks => "blocks",
Category::Orphans => "orphans",
Category::Cp => "cp",
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Low,
Medium,
High,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Conflict {
pub category: Category,
pub severity: Severity,
pub files: Vec<String>,
pub evidence: String,
pub suggested_fix: String,
/// `true` → refactor-engine may auto-apply. `false` → plan-only.
pub auto_resolvable: bool,
}
impl Conflict {
pub fn new(
category: Category,
severity: Severity,
files: Vec<String>,
evidence: impl Into<String>,
suggested_fix: impl Into<String>,
auto_resolvable: bool,
) -> Self {
Self {
category,
severity,
files,
evidence: evidence.into(),
suggested_fix: suggested_fix.into(),
auto_resolvable,
}
}
}

View file

@ -0,0 +1,16 @@
//! kei-conflict-scan — library surface.
//!
//! Detects inconsistencies inside a `~/.claude`-style root:
//! - rule conflicts (contradictory directives in `rules/*.md`)
//! - hook overlap (two hooks on same matcher)
//! - block duplication (>70% text overlap in `_blocks/*.md`)
//! - orphan refs (wikilinks / handoffs to non-existent files)
//! - Constructor-Pattern violations (file >200 LOC / fn >30 LOC)
//!
//! Produces a JSON array consumable by `kei-refactor-engine`.
pub mod conflict;
pub mod scanners;
pub mod tree;
pub use conflict::{Category, Conflict, Severity};

View file

@ -0,0 +1,114 @@
//! kei-conflict-scan — binary entry point.
//!
//! See lib.rs for overview. CLI spec:
//! kei-conflict-scan --path <root> [--format json|human] [--only rules|hooks|blocks|orphans|cp]
//!
//! Exit codes:
//! 0 — scan completed (hits or no hits)
//! 1 — usage / I/O error
//! 2 — hits found AND --exit-on-hit set
use clap::{Parser, ValueEnum};
use kei_conflict_scan::scanners::{blocks, cp, hooks, orphans, rules};
use kei_conflict_scan::Conflict;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser, Debug)]
#[command(name = "kei-conflict-scan", about = "Deep-sleep conflict scanner.")]
struct Cli {
/// Root directory to scan (e.g. ~/.claude or a cloned memory repo).
#[arg(long)]
path: PathBuf,
/// Output format.
#[arg(long, value_enum, default_value_t = Format::Json)]
format: Format,
/// Only run one category; default = run all.
#[arg(long, value_enum)]
only: Option<Only>,
/// Exit 2 if any conflict is reported.
#[arg(long)]
exit_on_hit: bool,
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum Format {
Json,
Human,
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum Only {
Rules,
Hooks,
Blocks,
Orphans,
Cp,
}
fn run_all(root: &std::path::Path, only: Option<Only>) -> Vec<Conflict> {
let mut out = Vec::new();
if matches!(only, None | Some(Only::Rules)) {
out.extend(rules::scan(root));
}
if matches!(only, None | Some(Only::Hooks)) {
out.extend(hooks::scan(root));
}
if matches!(only, None | Some(Only::Blocks)) {
out.extend(blocks::scan(root));
}
if matches!(only, None | Some(Only::Orphans)) {
out.extend(orphans::scan(root));
}
if matches!(only, None | Some(Only::Cp)) {
out.extend(cp::scan(root));
}
out
}
fn emit_json(hits: &[Conflict]) {
let wrapper = serde_json::json!({
"hit_count": hits.len(),
"conflicts": hits,
});
println!("{}", serde_json::to_string_pretty(&wrapper).unwrap());
}
fn emit_human(hits: &[Conflict]) {
if hits.is_empty() {
println!("no conflicts found.");
return;
}
println!("{} conflict(s):", hits.len());
for h in hits {
println!(
" [{}][{:?}] {} — files: {}",
h.category.as_str(),
h.severity,
h.evidence,
h.files.join(", ")
);
println!(" fix: {}", h.suggested_fix);
}
}
fn main() -> ExitCode {
let cli = Cli::parse();
if !cli.path.exists() {
eprintln!("kei-conflict-scan: path not found: {}", cli.path.display());
return ExitCode::from(1);
}
let hits = run_all(&cli.path, cli.only);
match cli.format {
Format::Json => emit_json(&hits),
Format::Human => emit_human(&hits),
}
if cli.exit_on_hit && !hits.is_empty() {
ExitCode::from(2)
} else {
ExitCode::SUCCESS
}
}

View file

@ -0,0 +1,76 @@
//! Block-duplication detector (>70% text overlap).
//!
//! Uses shingled-word Jaccard similarity — cheap and deterministic,
//! no ML / embeddings. Flags pairs above threshold.
use crate::conflict::{Category, Conflict, Severity};
use crate::tree::{collect_markdown, read_lossy, rel};
use std::collections::HashSet;
use std::path::Path;
const THRESHOLD: f64 = 0.70;
const SHINGLE: usize = 5;
fn shingles(text: &str) -> HashSet<String> {
let words: Vec<String> = text
.split_whitespace()
.map(|w| {
w.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric())
.collect()
})
.filter(|w: &String| !w.is_empty())
.collect();
if words.len() < SHINGLE {
return HashSet::new();
}
let mut out = HashSet::new();
for window in words.windows(SHINGLE) {
out.insert(window.join(" "));
}
out
}
fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f64 {
if a.is_empty() || b.is_empty() {
return 0.0;
}
let inter = a.intersection(b).count() as f64;
let union = a.union(b).count() as f64;
inter / union
}
pub fn scan(root: &Path) -> Vec<Conflict> {
let files = collect_markdown(root, "_blocks");
let indexed: Vec<(String, HashSet<String>)> = files
.iter()
.map(|f| (rel(root, f), shingles(&read_lossy(f))))
.collect();
find_duplicates(&indexed)
}
fn find_duplicates(indexed: &[(String, HashSet<String>)]) -> Vec<Conflict> {
let mut out = Vec::new();
for i in 0..indexed.len() {
for j in (i + 1)..indexed.len() {
let s = jaccard(&indexed[i].1, &indexed[j].1);
if s >= THRESHOLD {
out.push(dup_conflict(&indexed[i].0, &indexed[j].0, s));
}
}
}
out
}
fn dup_conflict(a: &str, b: &str, score: f64) -> Conflict {
let pct = (score * 100.0).round() as u32;
Conflict::new(
Category::Blocks,
Severity::Medium,
vec![a.to_string(), b.to_string()],
format!("shingle-Jaccard {}% overlap", pct),
"keep the better-cited block; mark the other as deprecated with a pointer".to_string(),
true,
)
}

View file

@ -0,0 +1,96 @@
//! Constructor-Pattern violation reporter (read-only).
//!
//! Flags files >200 LOC (physical lines) and Rust/Python functions >30 LOC.
//! Read-only: we do NOT propose a refactor here; refactor-engine decides.
use crate::conflict::{Category, Conflict, Severity};
use crate::tree::{read_lossy, rel};
use regex::Regex;
use std::path::Path;
use walkdir::WalkDir;
const FILE_LIMIT: usize = 200;
const FN_LIMIT: usize = 30;
pub fn scan(root: &Path) -> Vec<Conflict> {
let mut out = Vec::new();
for entry in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
if skip_dir(path) {
continue;
}
let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
if !["rs", "py", "sh", "ts", "js", "go", "swift"].contains(&ext) {
continue;
}
let content = read_lossy(path);
let line_count = content.lines().count();
let file_rel = rel(root, path);
if line_count > FILE_LIMIT {
out.push(file_violation(&file_rel, line_count));
}
for (name, len) in long_fns(&content, ext) {
if len > FN_LIMIT {
out.push(fn_violation(&file_rel, &name, len));
}
}
}
out
}
fn skip_dir(path: &Path) -> bool {
let s = path.to_string_lossy();
s.contains("/target/") || s.contains("/.git/") || s.contains("/node_modules/")
}
fn long_fns(content: &str, ext: &str) -> Vec<(String, usize)> {
let pattern = match ext {
"rs" => r"(?m)^\s*(?:pub\s+)?(?:async\s+)?fn\s+([a-zA-Z0-9_]+)",
"py" => r"(?m)^\s*def\s+([a-zA-Z0-9_]+)",
_ => return Vec::new(),
};
let rx = Regex::new(pattern).expect("static regex");
let starts: Vec<(usize, String)> = rx
.captures_iter(content)
.filter_map(|c| {
let name = c.get(1)?.as_str().to_string();
let pos = c.get(0)?.start();
let line = content[..pos].lines().count();
Some((line, name))
})
.collect();
let total = content.lines().count();
starts
.iter()
.enumerate()
.map(|(i, (line, name))| {
let next = starts.get(i + 1).map(|(l, _)| *l).unwrap_or(total);
(name.clone(), next.saturating_sub(*line))
})
.collect()
}
fn file_violation(file: &str, loc: usize) -> Conflict {
Conflict::new(
Category::Cp,
Severity::Medium,
vec![file.to_string()],
format!("file is {} LOC (limit 200)", loc),
"split into smaller cubes; one file = one class = one responsibility".to_string(),
false,
)
}
fn fn_violation(file: &str, name: &str, loc: usize) -> Conflict {
Conflict::new(
Category::Cp,
Severity::Low,
vec![file.to_string()],
format!("function '{}' is {} LOC (limit 30)", name, loc),
"split into helper subfunctions".to_string(),
false,
)
}

View file

@ -0,0 +1,67 @@
//! Hook-overlap detector.
//!
//! Heuristic: two hook scripts in `hooks/` whose first line-match of
//! `tool_name|matcher|event|PreToolUse|PostToolUse|UserPromptSubmit`
//! targets the same value. Flags the pair as possibly-redundant.
use crate::conflict::{Category, Conflict, Severity};
use crate::tree::{collect_with_ext, read_lossy, rel};
use regex::Regex;
use std::path::Path;
fn extract_matcher(content: &str) -> Vec<String> {
let rx = Regex::new(
r#"(?i)(?:tool[_ ]?name|matcher|event)\s*[:=]\s*["']?([A-Za-z0-9_|/-]+)["']?"#,
)
.expect("static regex");
let mut out = Vec::new();
for c in rx.captures_iter(content) {
out.push(c[1].to_lowercase());
}
out.sort();
out.dedup();
out
}
pub fn scan(root: &Path) -> Vec<Conflict> {
let mut files = collect_with_ext(root, "hooks", "sh");
files.extend(collect_with_ext(root, "hooks", "py"));
files.extend(collect_with_ext(root, "hooks", "rs"));
let indexed: Vec<(String, Vec<String>)> = files
.iter()
.map(|f| (rel(root, f), extract_matcher(&read_lossy(f))))
.collect();
pairs(&indexed)
}
fn pairs(indexed: &[(String, Vec<String>)]) -> Vec<Conflict> {
let mut out = Vec::new();
for i in 0..indexed.len() {
for j in (i + 1)..indexed.len() {
let shared: Vec<&String> =
indexed[i].1.iter().filter(|m| indexed[j].1.contains(m)).collect();
if !shared.is_empty() {
out.push(overlap_conflict(&indexed[i].0, &indexed[j].0, &shared));
}
}
}
out
}
fn overlap_conflict(a: &str, b: &str, shared: &[&String]) -> Conflict {
let shared_str = shared
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(",");
Conflict::new(
Category::Hooks,
Severity::Medium,
vec![a.to_string(), b.to_string()],
format!("hooks share matcher(s): {}", shared_str),
"consider merging into a single hook with union of patterns; keep separate if responsibilities are genuinely distinct".to_string(),
false,
)
}

View file

@ -0,0 +1,10 @@
//! Per-category conflict scanners.
//!
//! Each sub-module exposes `fn scan(root: &Path) -> Vec<Conflict>`.
//! The CLI in `main.rs` calls them based on `--only` or runs all.
pub mod blocks;
pub mod cp;
pub mod hooks;
pub mod orphans;
pub mod rules;

View file

@ -0,0 +1,74 @@
//! Orphan-reference detector.
//!
//! Finds `[[wikilink]]` and `handoffs: - name` references whose targets
//! do not exist anywhere under the root. Case-insensitive basename match.
use crate::conflict::{Category, Conflict, Severity};
use crate::tree::{read_lossy, rel};
use regex::Regex;
use std::collections::HashSet;
use std::path::Path;
use walkdir::WalkDir;
fn all_basenames(root: &Path) -> HashSet<String> {
let mut out = HashSet::new();
for e in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
if e.file_type().is_file() {
if let Some(stem) = e.path().file_stem().and_then(|s| s.to_str()) {
out.insert(stem.to_lowercase());
}
}
}
out
}
fn extract_wikilinks(content: &str) -> Vec<String> {
let rx = Regex::new(r"\[\[([^\]\|#]+?)(?:#[^\]]*)?(?:\|[^\]]*)?\]\]").expect("static regex");
rx.captures_iter(content)
.map(|c| c[1].trim().to_lowercase())
.collect()
}
fn extract_handoffs(content: &str) -> Vec<String> {
let rx = Regex::new(r"(?im)^\s*-\s*\*\*([a-z0-9][a-z0-9_-]{2,})\*\*").expect("static regex");
rx.captures_iter(content)
.map(|c| c[1].trim().to_lowercase())
.collect()
}
pub fn scan(root: &Path) -> Vec<Conflict> {
let index = all_basenames(root);
let mut out = Vec::new();
for e in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
if !e.file_type().is_file() {
continue;
}
if e.path().extension().is_none_or(|x| x != "md") {
continue;
}
let content = read_lossy(e.path());
let file_rel = rel(root, e.path());
for target in extract_wikilinks(&content) {
if !index.contains(&target) {
out.push(orphan(&file_rel, &target, "wikilink"));
}
}
for target in extract_handoffs(&content) {
if !index.contains(&target) && target.contains('-') {
out.push(orphan(&file_rel, &target, "handoff"));
}
}
}
out
}
fn orphan(file: &str, target: &str, kind: &str) -> Conflict {
Conflict::new(
Category::Orphans,
Severity::Low,
vec![file.to_string()],
format!("{} target '{}' not found under root", kind, target),
"either create the target file or remove the stale reference".to_string(),
true,
)
}

View file

@ -0,0 +1,76 @@
//! Rule-file conflict detector.
//!
//! Heuristic: look for contradictory directive pairs like
//! "never X" vs "prefer X" or "forbidden: X" vs "required: X" across
//! `rules/*.md`. Tokens compared after stripping filler words.
use crate::conflict::{Category, Conflict, Severity};
use crate::tree::{collect_markdown, read_lossy, rel};
use regex::Regex;
use std::path::Path;
fn extract_directives(content: &str) -> Vec<(String, String)> {
// Returns (polarity, token) pairs. polarity ∈ {"pos","neg"}.
let neg = Regex::new(r"(?im)^\s*(?:never|forbidden|prohibited|do not|don't|no):?\s+(.{3,80})$")
.expect("static regex");
let pos = Regex::new(r"(?im)^\s*(?:always|required|prefer|must|do):?\s+(.{3,80})$")
.expect("static regex");
let mut out = Vec::new();
for c in neg.captures_iter(content) {
out.push(("neg".to_string(), normalize(&c[1])));
}
for c in pos.captures_iter(content) {
out.push(("pos".to_string(), normalize(&c[1])));
}
out
}
fn normalize(s: &str) -> String {
s.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect::<String>()
.split_whitespace()
.take(6)
.collect::<Vec<_>>()
.join(" ")
}
pub fn scan(root: &Path) -> Vec<Conflict> {
let files = collect_markdown(root, "rules");
let mut indexed: Vec<(String, Vec<(String, String)>)> = Vec::new();
for f in &files {
let c = read_lossy(f);
indexed.push((rel(root, f), extract_directives(&c)));
}
find_pairs(&indexed)
}
fn find_pairs(indexed: &[(String, Vec<(String, String)>)]) -> Vec<Conflict> {
let mut out = Vec::new();
for i in 0..indexed.len() {
for j in (i + 1)..indexed.len() {
for (pi, ti) in &indexed[i].1 {
for (pj, tj) in &indexed[j].1 {
if pi != pj && !ti.is_empty() && ti == tj {
out.push(mk_conflict(&indexed[i].0, &indexed[j].0, ti));
}
}
}
}
}
out
}
fn mk_conflict(a: &str, b: &str, token: &str) -> Conflict {
Conflict::new(
Category::Rules,
Severity::High,
vec![a.to_string(), b.to_string()],
format!("contradictory directive on '{}'", token),
format!(
"review both files; keep directive in the more-specific rule, drop or narrow in the other"
),
false,
)
}

View file

@ -0,0 +1,46 @@
//! Filesystem walker helpers — shared across scanners.
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
pub fn collect_markdown(root: &Path, sub: &str) -> Vec<PathBuf> {
let base = root.join(sub);
if !base.exists() {
return Vec::new();
}
WalkDir::new(&base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
.map(|e| e.into_path())
.collect()
}
pub fn collect_with_ext(root: &Path, sub: &str, ext: &str) -> Vec<PathBuf> {
let base = root.join(sub);
if !base.exists() {
return Vec::new();
}
WalkDir::new(&base)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| e.path().extension().is_some_and(|e2| e2 == ext))
.map(|e| e.into_path())
.collect()
}
pub fn read_lossy(path: &Path) -> String {
fs::read(path)
.map(|b| String::from_utf8_lossy(&b).into_owned())
.unwrap_or_default()
}
pub fn rel(root: &Path, path: &Path) -> String {
path.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.into_owned()
}

View file

@ -0,0 +1,88 @@
//! Integration tests for kei-conflict-scan.
use std::fs;
use std::path::{Path, PathBuf};
use tempfile::TempDir;
fn bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_kei-conflict-scan"))
}
fn write(root: &Path, rel: &str, body: &str) {
let full = root.join(rel);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(&full, body).unwrap();
}
fn run(root: &Path, extra: &[&str]) -> serde_json::Value {
let mut args = vec!["--path".to_string(), root.to_string_lossy().into_owned()];
args.extend(extra.iter().map(|s| s.to_string()));
let out = std::process::Command::new(bin()).args(&args).output().unwrap();
assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
serde_json::from_slice(&out.stdout).unwrap()
}
#[test]
fn empty_tree_is_clean() {
let tmp = TempDir::new().unwrap();
let v = run(tmp.path(), &[]);
assert_eq!(v["hit_count"], 0);
}
#[test]
fn contradictory_rules_flagged() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "rules/a.md", "Never: push to github\n");
write(tmp.path(), "rules/b.md", "Always: push to github\n");
let v = run(tmp.path(), &["--only", "rules"]);
assert!(v["hit_count"].as_u64().unwrap() >= 1, "{}", v);
assert_eq!(v["conflicts"][0]["category"], "rules");
}
#[test]
fn duplicate_blocks_flagged() {
let tmp = TempDir::new().unwrap();
let body =
"this is a long shared paragraph with many identical words over and over again repeated";
write(tmp.path(), "_blocks/a.md", body);
write(tmp.path(), "_blocks/b.md", body);
let v = run(tmp.path(), &["--only", "blocks"]);
assert!(v["hit_count"].as_u64().unwrap() >= 1, "{}", v);
assert_eq!(v["conflicts"][0]["category"], "blocks");
}
#[test]
fn orphan_wikilinks_flagged() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "docs/a.md", "see [[nonexistent-target]] for details");
let v = run(tmp.path(), &["--only", "orphans"]);
assert!(v["hit_count"].as_u64().unwrap() >= 1, "{}", v);
assert_eq!(v["conflicts"][0]["category"], "orphans");
}
#[test]
fn oversize_file_flagged() {
let tmp = TempDir::new().unwrap();
let mut body = String::new();
for _ in 0..250 {
body.push_str("line\n");
}
write(tmp.path(), "src/big.rs", &body);
let v = run(tmp.path(), &["--only", "cp"]);
assert!(v["hit_count"].as_u64().unwrap() >= 1, "{}", v);
assert_eq!(v["conflicts"][0]["category"], "cp");
}
#[test]
fn json_schema_has_required_fields() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "rules/a.md", "Never: do X\n");
write(tmp.path(), "rules/b.md", "Always: do X\n");
let v = run(tmp.path(), &["--only", "rules"]);
let c = &v["conflicts"][0];
for k in ["category", "severity", "files", "evidence", "suggested_fix", "auto_resolvable"] {
assert!(c.get(k).is_some(), "missing field {}: {}", k, c);
}
}

View file

@ -0,0 +1,24 @@
[package]
name = "kei-graph-check"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
description = "Post-refactor graph-integrity gate — wikilinks, block refs, handoffs (v0.13.0)"
[[bin]]
name = "kei-graph-check"
path = "src/main.rs"
[lib]
path = "src/lib.rs"
[dependencies]
clap = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
regex = "1"
walkdir = "2"
anyhow = "1"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,88 @@
//! Graph resolver — indexes files then walks refs.
use regex::Regex;
use serde::Serialize;
use std::collections::HashSet;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
#[derive(Debug, Clone, Serialize)]
pub struct BrokenRef {
pub source: String,
pub line: usize,
pub target: String,
pub kind: String,
}
pub struct Graph {
pub basenames: HashSet<String>,
pub files: Vec<PathBuf>,
}
impl Graph {
pub fn index(root: &Path) -> Self {
let mut basenames = HashSet::new();
let mut files = Vec::new();
for e in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
if e.file_type().is_file() {
if let Some(stem) = e.path().file_stem().and_then(|s| s.to_str()) {
basenames.insert(stem.to_lowercase());
}
files.push(e.into_path());
}
}
Self { basenames, files }
}
pub fn check(&self, root: &Path, removed: &HashSet<String>) -> Vec<BrokenRef> {
let mut out = Vec::new();
for file in &self.files {
if file.extension().is_none_or(|e| e != "md") {
continue;
}
out.extend(self.check_file(root, file, removed));
}
out
}
fn check_file(&self, root: &Path, file: &Path, removed: &HashSet<String>) -> Vec<BrokenRef> {
let content = fs::read(file)
.map(|b| String::from_utf8_lossy(&b).into_owned())
.unwrap_or_default();
let src = file
.strip_prefix(root)
.unwrap_or(file)
.to_string_lossy()
.into_owned();
let mut out = Vec::new();
for (ln, line) in content.lines().enumerate() {
out.extend(scan_wikilinks(&src, ln + 1, line, &self.basenames, removed));
}
out
}
}
fn scan_wikilinks(
src: &str,
line_no: usize,
line: &str,
index: &HashSet<String>,
removed: &HashSet<String>,
) -> Vec<BrokenRef> {
let rx = Regex::new(r"\[\[([^\]\|#]+?)(?:#[^\]]*)?(?:\|[^\]]*)?\]\]").expect("static regex");
let mut out = Vec::new();
for c in rx.captures_iter(line) {
let target = c[1].trim().to_lowercase();
let broken = !index.contains(&target) || removed.contains(&target);
if broken {
out.push(BrokenRef {
source: src.to_string(),
line: line_no,
target,
kind: "wikilink".to_string(),
});
}
}
out
}

View file

@ -0,0 +1,10 @@
//! kei-graph-check — post-refactor reference-integrity gate.
//!
//! Inputs: a directory root + an optional patch file (advisory only — we
//! detect file deletions/renames declared in the patch header and warn).
//! Output: list of broken references with file:line.
pub mod graph;
pub mod patch_advisory;
pub use graph::{BrokenRef, Graph};

View file

@ -0,0 +1,68 @@
//! kei-graph-check — binary entry.
//!
//! Exit 0 if all refs resolve; exit 2 if any broken. Useful as a gate
//! BEFORE the orchestrator commits the deep-sleep fork branch.
use clap::Parser;
use kei_graph_check::{graph::Graph, patch_advisory};
use std::collections::HashSet;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser, Debug)]
#[command(name = "kei-graph-check", about = "Post-refactor graph-integrity gate.")]
struct Cli {
/// Root directory (e.g. memory-repo clone).
#[arg(long)]
path: PathBuf,
/// Optional patch file — any `+++ /dev/null` removal or `# removed: <p>`
/// header is treated as a phantom-removed file for the check.
#[arg(long)]
after_diff: Option<PathBuf>,
/// JSON output (default is human).
#[arg(long)]
json: bool,
}
fn emit_human(broken: &[kei_graph_check::graph::BrokenRef]) {
if broken.is_empty() {
println!("kei-graph-check: graph ok (no broken references).");
return;
}
println!("kei-graph-check: {} broken reference(s):", broken.len());
for b in broken {
println!(" {}:{} [{}] -> '{}'", b.source, b.line, b.kind, b.target);
}
}
fn emit_json(broken: &[kei_graph_check::graph::BrokenRef]) {
let v = serde_json::json!({ "broken_count": broken.len(), "broken": broken });
println!("{}", serde_json::to_string_pretty(&v).unwrap());
}
fn main() -> ExitCode {
let cli = Cli::parse();
if !cli.path.exists() {
eprintln!("kei-graph-check: path not found: {}", cli.path.display());
return ExitCode::from(1);
}
let removed: HashSet<String> = match cli.after_diff.as_ref() {
Some(p) if p.exists() => patch_advisory::parse_removals(p),
_ => HashSet::new(),
};
let graph = Graph::index(&cli.path);
let broken = graph.check(&cli.path, &removed);
if cli.json {
emit_json(&broken);
} else {
emit_human(&broken);
}
if broken.is_empty() {
ExitCode::SUCCESS
} else {
ExitCode::from(2)
}
}

View file

@ -0,0 +1,33 @@
//! Patch-advisory: parses a unified-diff-like patch for file removals
//! or renames. Returns basenames the patch claims to remove.
use std::collections::HashSet;
use std::fs;
use std::path::Path;
pub fn parse_removals(patch_file: &Path) -> HashSet<String> {
let text = fs::read_to_string(patch_file).unwrap_or_default();
let mut out = HashSet::new();
for line in text.lines() {
if let Some(stripped) = line.strip_prefix("--- a/") {
// A `+++ /dev/null` on the next line would mean full removal;
// we don't track across lines, so treat any "--- a/x" as POSSIBLY
// touched. Conservative: we only add if `+++ /dev/null` appears
// later somewhere in the file.
if text.contains("+++ /dev/null") {
add_basename(stripped, &mut out);
}
}
// Also accept a lightweight header `# removed: path`
if let Some(s) = line.strip_prefix("# removed: ") {
add_basename(s.trim(), &mut out);
}
}
out
}
fn add_basename(rel: &str, out: &mut HashSet<String>) {
if let Some(name) = Path::new(rel).file_stem().and_then(|s| s.to_str()) {
out.insert(name.to_lowercase());
}
}

View file

@ -0,0 +1,75 @@
//! Integration tests for kei-graph-check.
use std::fs;
use std::path::{Path, PathBuf};
use tempfile::TempDir;
fn bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_kei-graph-check"))
}
fn write(root: &Path, rel: &str, body: &str) {
let full = root.join(rel);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(full, body).unwrap();
}
#[test]
fn clean_graph_exits_zero() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "a.md", "see [[b]]");
write(tmp.path(), "b.md", "hello");
let out = std::process::Command::new(bin())
.args(["--path"])
.arg(tmp.path())
.output()
.unwrap();
assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
}
#[test]
fn broken_wikilink_exits_two() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "a.md", "see [[ghost]]");
let out = std::process::Command::new(bin())
.args(["--path"])
.arg(tmp.path())
.output()
.unwrap();
assert_eq!(out.status.code(), Some(2));
}
#[test]
fn patch_removal_breaks_graph() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "a.md", "see [[b]]");
write(tmp.path(), "b.md", "hello");
let patch = tmp.path().join("p.patch");
fs::write(&patch, "# removed: b.md\n").unwrap();
let out = std::process::Command::new(bin())
.args(["--path"])
.arg(tmp.path())
.args(["--after-diff"])
.arg(&patch)
.output()
.unwrap();
assert_eq!(out.status.code(), Some(2));
}
#[test]
fn json_output_schema() {
let tmp = TempDir::new().unwrap();
write(tmp.path(), "a.md", "see [[ghost]]");
let out = std::process::Command::new(bin())
.args(["--path"])
.arg(tmp.path())
.arg("--json")
.output()
.unwrap();
let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
assert_eq!(v["broken_count"], 1);
assert_eq!(v["broken"][0]["kind"], "wikilink");
assert_eq!(v["broken"][0]["target"], "ghost");
}

View file

@ -0,0 +1,23 @@
[package]
name = "kei-refactor-engine"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
description = "Deep-sleep refactor-plan generator (consumes kei-conflict-scan JSON) (v0.13.0)"
[[bin]]
name = "kei-refactor-engine"
path = "src/main.rs"
[lib]
path = "src/lib.rs"
[dependencies]
clap = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
anyhow = "1"
similar = "2"
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,41 @@
//! Conflict input schema (mirror of kei-conflict-scan output).
//!
//! Deserialized locally so this crate does not depend on kei-conflict-scan
//! as a library — the pipe is JSON, both sides speak the same contract.
use anyhow::{Context, Result};
use serde::Deserialize;
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Deserialize)]
pub struct Conflict {
pub category: String,
pub severity: String,
pub files: Vec<String>,
pub evidence: String,
pub suggested_fix: String,
pub auto_resolvable: bool,
}
#[derive(Debug, Deserialize)]
struct Wrapper {
#[serde(default)]
pub conflicts: Vec<Conflict>,
}
pub fn read_conflicts(path: &Path) -> Result<Vec<Conflict>> {
let bytes = fs::read(path).with_context(|| format!("read {}", path.display()))?;
let w: Wrapper = serde_json::from_slice(&bytes).context("parse JSON")?;
Ok(w.conflicts)
}
pub fn read_from_stdin() -> Result<Vec<Conflict>> {
use std::io::Read;
let mut buf = String::new();
std::io::stdin()
.read_to_string(&mut buf)
.context("read stdin")?;
let w: Wrapper = serde_json::from_str(&buf).context("parse JSON")?;
Ok(w.conflicts)
}

View file

@ -0,0 +1,16 @@
//! kei-refactor-engine — library surface.
//!
//! Consumes `kei-conflict-scan` JSON; produces a structured refactor plan
//! (markdown) and, optionally, a patch file for user `git apply` review.
//!
//! Zero-conflict guarantee: any conflict whose `auto_resolvable = false`
//! is included in the plan under "Requires human decision" and EXCLUDED
//! from the generated patch.
pub mod input;
pub mod plan;
pub mod patch;
pub mod render;
pub use input::{read_conflicts, Conflict};
pub use plan::{Plan, PlanItem, Resolution};

View file

@ -0,0 +1,94 @@
//! kei-refactor-engine — binary entry.
//!
//! Usage:
//! kei-refactor-engine --input conflicts.json --plan-only > plan.md
//! kei-refactor-engine --input conflicts.json --apply-to-branch deep-sleep/2026-04-22 \
//! --plan-out plan.md --patch-out changes.patch
use anyhow::Result;
use clap::Parser;
use kei_refactor_engine::input::{read_conflicts, read_from_stdin};
use kei_refactor_engine::plan::Plan;
use kei_refactor_engine::{patch, render};
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser, Debug)]
#[command(name = "kei-refactor-engine", about = "Deep-sleep refactor-plan generator.")]
struct Cli {
/// Input JSON file (output of kei-conflict-scan). Use `-` for stdin.
#[arg(long)]
input: Option<PathBuf>,
/// Plan-only mode (default). Prints markdown to stdout if no --plan-out.
#[arg(long, default_value_t = true)]
plan_only: bool,
/// Apply mode — also write a patch file; takes the branch name.
#[arg(long)]
apply_to_branch: Option<String>,
/// Optional explicit path for the markdown plan.
#[arg(long)]
plan_out: Option<PathBuf>,
/// Optional explicit path for the patch file.
#[arg(long)]
patch_out: Option<PathBuf>,
}
fn load(cli: &Cli) -> Result<Vec<kei_refactor_engine::input::Conflict>> {
match cli.input.as_deref() {
None => read_from_stdin(),
Some(p) if p.to_string_lossy() == "-" => read_from_stdin(),
Some(p) => read_conflicts(p),
}
}
fn write_plan(plan: &Plan, branch: Option<&str>, out: Option<&PathBuf>) -> Result<()> {
let md = render::render(plan, branch);
match out {
Some(p) => std::fs::write(p, md)?,
None => print!("{}", md),
}
Ok(())
}
fn maybe_write_patch(
plan: &Plan,
branch: &str,
out: Option<&PathBuf>,
) -> Result<usize> {
let default = PathBuf::from("deep-sleep.patch");
let target = out.unwrap_or(&default);
patch::write_patch(plan, branch, target)
}
fn run(cli: &Cli) -> Result<ExitCode> {
let conflicts = load(cli)?;
let plan = Plan::from_conflicts(&conflicts);
let branch = cli.apply_to_branch.as_deref();
write_plan(&plan, branch, cli.plan_out.as_ref())?;
if let Some(br) = branch {
let applied = maybe_write_patch(&plan, br, cli.patch_out.as_ref())?;
eprintln!(
"kei-refactor-engine: wrote patch with {} auto-apply item(s); {} human-decision item(s) excluded.",
applied,
plan.manual_items().len(),
);
}
Ok(ExitCode::SUCCESS)
}
fn main() -> ExitCode {
let cli = Cli::parse();
match run(&cli) {
Ok(c) => c,
Err(e) => {
eprintln!("kei-refactor-engine: {e}");
ExitCode::from(1)
}
}
}

View file

@ -0,0 +1,56 @@
//! Patch synthesizer — writes a unified-diff file for `git apply` preview.
//!
//! This crate NEVER runs git. Per RULE 0.13 the orchestrator is the only
//! party that commits. We emit `.patch` text the user reads + applies.
//!
//! Only items whose resolution == AutoApply are materialised here; the
//! zero-conflict guarantee keeps `requires_human_decision` items out.
use crate::plan::{Plan, PlanItem, Resolution};
use anyhow::Result;
use std::fs;
use std::path::Path;
pub fn write_patch(plan: &Plan, branch: &str, out_file: &Path) -> Result<usize> {
let auto = plan.auto_items();
let mut body = String::new();
body.push_str(&header(branch, auto.len(), plan.manual_items().len()));
for item in &auto {
body.push_str(&hunk_for(item));
}
fs::write(out_file, body)?;
Ok(auto.len())
}
fn header(branch: &str, auto: usize, manual: usize) -> String {
format!(
"# kei-refactor-engine preview patch\n\
# Branch intent: {branch}\n\
# Auto-apply items: {auto}\n\
# Human-decision items (NOT in this patch, see plan): {manual}\n\
# Review with `git apply --check <file>` before merging.\n\n"
)
}
fn hunk_for(item: &PlanItem) -> String {
// Conservative: we do not invent file content. We emit an annotated
// comment block per item so the user sees intent, not fabricated code.
let files = item.files.join(", ");
format!(
"--- a/{file}\n+++ b/{file}\n# INTENT ({cat}/{sev}): {why}\n# FILES: {files}\n# EXAMPLE: {ex}\n# TRADEOFF: {tr}\n\n",
file = item.files.first().cloned().unwrap_or_else(|| "<unknown>".into()),
cat = item.category,
sev = item.severity,
why = item.why,
files = files,
ex = item.example,
tr = item.tradeoff,
)
}
pub fn excluded_manual(plan: &Plan) -> Vec<&PlanItem> {
plan.items
.iter()
.filter(|i| i.resolution == Resolution::RequiresHumanDecision)
.collect()
}

View file

@ -0,0 +1,92 @@
//! Plan builder — turns Conflict list into PlanItems grouped by resolution.
use crate::input::Conflict;
use serde::Serialize;
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Resolution {
/// Engine can deterministically propose a patch.
AutoApply,
/// Engine flags, human decides — NEVER in patch.
RequiresHumanDecision,
}
#[derive(Debug, Clone, Serialize)]
pub struct PlanItem {
pub resolution: Resolution,
pub category: String,
pub severity: String,
pub files: Vec<String>,
pub why: String,
pub example: String,
pub tradeoff: String,
}
#[derive(Debug, Serialize)]
pub struct Plan {
pub items: Vec<PlanItem>,
}
impl Plan {
pub fn from_conflicts(conflicts: &[Conflict]) -> Self {
let items = conflicts.iter().map(to_plan_item).collect();
Plan { items }
}
pub fn auto_items(&self) -> Vec<&PlanItem> {
self.items
.iter()
.filter(|i| i.resolution == Resolution::AutoApply)
.collect()
}
pub fn manual_items(&self) -> Vec<&PlanItem> {
self.items
.iter()
.filter(|i| i.resolution == Resolution::RequiresHumanDecision)
.collect()
}
}
fn to_plan_item(c: &Conflict) -> PlanItem {
let resolution = if c.auto_resolvable {
Resolution::AutoApply
} else {
Resolution::RequiresHumanDecision
};
PlanItem {
resolution,
category: c.category.clone(),
severity: c.severity.clone(),
files: c.files.clone(),
why: c.evidence.clone(),
example: build_example(c),
tradeoff: build_tradeoff(c),
}
}
fn build_example(c: &Conflict) -> String {
match c.category.as_str() {
"blocks" => format!(
"keep `{}` as canonical; add a `> Deprecated: see <canonical>` header to the other",
c.files.first().cloned().unwrap_or_default()
),
"orphans" => format!("edit {} to remove the stale link, OR create the target", c.files.first().cloned().unwrap_or_default()),
"hooks" => "union the matchers in one file; delete the other".to_string(),
"rules" => "narrow one directive with a scope qualifier, keep the other strict".to_string(),
"cp" => "extract the oversize part into a new sibling file".to_string(),
_ => c.suggested_fix.clone(),
}
}
fn build_tradeoff(c: &Conflict) -> String {
match c.category.as_str() {
"blocks" => "merge loses cross-link context; kept in deprecation header".to_string(),
"orphans" => "deleting a stale ref may hide an intended-but-missing file".to_string(),
"hooks" => "merged hook runs all logic on all matches; fine if logic is idempotent".to_string(),
"rules" => "narrowing a rule reduces coverage; document the carve-out in the rule file".to_string(),
"cp" => "split increases file count; offset by smaller cognitive units".to_string(),
_ => "engine cannot evaluate tradeoff; human review required".to_string(),
}
}

View file

@ -0,0 +1,76 @@
//! Markdown renderer for the refactor plan.
use crate::plan::{Plan, PlanItem};
pub fn render(plan: &Plan, branch: Option<&str>) -> String {
let mut out = String::new();
out.push_str("# Deep-sleep refactor plan\n\n");
if let Some(b) = branch {
out.push_str(&format!("Proposed fork branch: `{}`\n\n", b));
}
out.push_str(&summary(plan));
out.push_str(&auto_section(plan));
out.push_str(&manual_section(plan));
out.push_str(&footer());
out
}
fn summary(plan: &Plan) -> String {
let total = plan.items.len();
let auto = plan.auto_items().len();
let manual = plan.manual_items().len();
format!(
"## Summary\n\n\
- Total conflicts: **{total}**\n\
- Auto-apply candidates: **{auto}**\n\
- Requires human decision (zero-conflict guarantee excludes these from patch): **{manual}**\n\n",
)
}
fn auto_section(plan: &Plan) -> String {
let items = plan.auto_items();
if items.is_empty() {
return "## Auto-apply\n\n_No safe auto-apply changes this cycle._\n\n".to_string();
}
let mut s = String::from("## Auto-apply (engine-proposed; review before merge)\n\n");
for (i, item) in items.iter().enumerate() {
s.push_str(&item_block(i + 1, item));
}
s
}
fn manual_section(plan: &Plan) -> String {
let items = plan.manual_items();
if items.is_empty() {
return "## Requires human decision\n\n_None this cycle._\n\n".to_string();
}
let mut s = String::from("## Requires human decision (NOT in patch)\n\n");
for (i, item) in items.iter().enumerate() {
s.push_str(&item_block(i + 1, item));
}
s
}
fn item_block(n: usize, item: &PlanItem) -> String {
format!(
"### {n}. [{cat}/{sev}] {files}\n\n\
- **Why:** {why}\n\
- **Example:** {ex}\n\
- **Tradeoff:** {tr}\n\n",
n = n,
cat = item.category,
sev = item.severity,
files = item.files.join(" + "),
why = item.why,
ex = item.example,
tr = item.tradeoff,
)
}
fn footer() -> String {
"---\n\n\
Generated by `kei-refactor-engine` (v0.13.0). Zero-conflict guarantee: \
no item above marked `requires human decision` appears in the companion \
patch file.\n"
.to_string()
}

View file

@ -0,0 +1,125 @@
//! Integration tests for kei-refactor-engine.
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_kei-refactor-engine"))
}
fn sample_json(extra_manual: bool) -> String {
let mut items = vec![serde_json::json!({
"category": "blocks",
"severity": "medium",
"files": ["_blocks/a.md", "_blocks/b.md"],
"evidence": "shingle-Jaccard 72% overlap",
"suggested_fix": "keep better-cited",
"auto_resolvable": true
})];
if extra_manual {
items.push(serde_json::json!({
"category": "rules",
"severity": "high",
"files": ["rules/x.md", "rules/y.md"],
"evidence": "contradictory directive on 'push'",
"suggested_fix": "review both",
"auto_resolvable": false
}));
}
serde_json::json!({ "hit_count": items.len(), "conflicts": items }).to_string()
}
#[test]
fn plan_only_prints_markdown() {
let tmp = TempDir::new().unwrap();
let input = tmp.path().join("c.json");
fs::write(&input, sample_json(false)).unwrap();
let out = std::process::Command::new(bin())
.args(["--input"])
.arg(&input)
.output()
.unwrap();
assert!(out.status.success());
let md = String::from_utf8(out.stdout).unwrap();
assert!(md.contains("# Deep-sleep refactor plan"));
assert!(md.contains("Auto-apply"));
}
#[test]
fn manual_items_listed_but_not_in_patch() {
let tmp = TempDir::new().unwrap();
let input = tmp.path().join("c.json");
let plan_out = tmp.path().join("plan.md");
let patch_out = tmp.path().join("p.patch");
fs::write(&input, sample_json(true)).unwrap();
let out = std::process::Command::new(bin())
.args(["--input"])
.arg(&input)
.args(["--apply-to-branch", "deep-sleep/test", "--plan-out"])
.arg(&plan_out)
.args(["--patch-out"])
.arg(&patch_out)
.output()
.unwrap();
assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
let md = fs::read_to_string(&plan_out).unwrap();
assert!(md.contains("Requires human decision"));
let patch = fs::read_to_string(&patch_out).unwrap();
// patch must NOT reference rules/x.md from the manual item
assert!(!patch.contains("rules/x.md"), "patch leaked manual item: {}", patch);
assert!(patch.contains("_blocks/a.md"));
}
#[test]
fn empty_conflicts_produce_valid_plan() {
let tmp = TempDir::new().unwrap();
let input = tmp.path().join("c.json");
fs::write(&input, r#"{"hit_count": 0, "conflicts": []}"#).unwrap();
let out = std::process::Command::new(bin())
.args(["--input"])
.arg(&input)
.output()
.unwrap();
assert!(out.status.success());
let md = String::from_utf8(out.stdout).unwrap();
assert!(md.contains("Total conflicts: **0**"));
}
#[test]
fn stdin_input_works() {
let tmp = TempDir::new().unwrap();
let _ = tmp; // kept for parity
let mut child = std::process::Command::new(bin())
.args(["--input", "-"])
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.spawn()
.unwrap();
{
use std::io::Write;
let stdin = child.stdin.as_mut().unwrap();
stdin.write_all(sample_json(false).as_bytes()).unwrap();
}
let out = child.wait_with_output().unwrap();
assert!(out.status.success());
assert!(String::from_utf8(out.stdout).unwrap().contains("refactor plan"));
}
#[test]
fn patch_header_shows_counts() {
let tmp = TempDir::new().unwrap();
let input = tmp.path().join("c.json");
let patch_out = tmp.path().join("p.patch");
fs::write(&input, sample_json(true)).unwrap();
std::process::Command::new(bin())
.args(["--input"])
.arg(&input)
.args(["--apply-to-branch", "deep-sleep/a", "--patch-out"])
.arg(&patch_out)
.output()
.unwrap();
let patch = fs::read_to_string(&patch_out).unwrap();
assert!(patch.contains("Auto-apply items: 1"));
assert!(patch.contains("Human-decision items"));
}

View file

@ -0,0 +1,24 @@
[package]
name = "kei-store"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
description = "Memory-repo backend abstraction — GitHub/Forgejo/Gitea/Filesystem/S3 (v0.13.0)"
[[bin]]
name = "kei-store"
path = "src/main.rs"
[lib]
path = "src/lib.rs"
[dependencies]
clap = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
anyhow = "1"
toml = "0.8"
git2 = { version = "0.19", default-features = false }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,105 @@
//! TOML config loader.
//!
//! Example `store-config.toml`:
//!
//! ```toml
//! [active]
//! backend = "github"
//!
//! [github]
//! url = "git@github.com:user/memory-repo.git"
//! ssh_key_env = "KEI_MEMORY_SSH_KEY"
//!
//! [filesystem]
//! path = "~/.claude/memory/sync-repo"
//! ```
//!
//! Secrets (PATs, SSH keys) live in `~/.claude/secrets/.env` per RULE 0.8;
//! this file only stores env-var NAMES.
use anyhow::{Context, Result};
use serde::Deserialize;
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Deserialize, Default)]
pub struct Config {
pub active: Active,
#[serde(default)]
pub filesystem: FilesystemCfg,
#[serde(default)]
pub github: GitRemoteCfg,
#[serde(default)]
pub forgejo: GitRemoteCfg,
#[serde(default)]
pub gitea: GitRemoteCfg,
#[serde(default)]
pub s3: S3Cfg,
}
#[derive(Debug, Clone, Deserialize, Default)]
pub struct Active {
pub backend: String,
#[serde(default = "default_local_path")]
pub local_path: String,
}
fn default_local_path() -> String {
"~/.claude/memory/sync-repo".to_string()
}
#[derive(Debug, Clone, Deserialize, Default)]
pub struct FilesystemCfg {
#[serde(default)]
pub path: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Default)]
pub struct GitRemoteCfg {
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub ssh_key_env: Option<String>,
#[serde(default)]
pub pat_env: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Default)]
pub struct S3Cfg {
#[serde(default)]
pub endpoint: Option<String>,
#[serde(default)]
pub bucket: Option<String>,
#[serde(default)]
pub region: Option<String>,
#[serde(default)]
pub access_key_env: Option<String>,
#[serde(default)]
pub secret_key_env: Option<String>,
/// Local cache / manifest root. REQUIRED — S3 impl stores a manifest
/// there and (in stub mode) serves read/write from the cache.
#[serde(default)]
pub cache_path: Option<String>,
}
impl Config {
pub fn load(path: &Path) -> Result<Self> {
let text = fs::read_to_string(path)
.with_context(|| format!("read {}", path.display()))?;
let cfg: Config = toml::from_str(&text).context("parse store-config.toml")?;
Ok(cfg)
}
pub fn expanded_local_path(&self) -> String {
expand_tilde(&self.active.local_path)
}
}
pub fn expand_tilde(p: &str) -> String {
if let Some(rest) = p.strip_prefix("~/") {
if let Ok(home) = std::env::var("HOME") {
return format!("{home}/{rest}");
}
}
p.to_string()
}

View file

@ -0,0 +1,34 @@
//! Factory — construct a `Box<dyn MemoryStore>` from a Config.
use crate::config::{expand_tilde, Config};
use crate::{filesystem::FilesystemStore, forgejo::ForgejoStore, gitea::GiteaStore,
github::GitHubStore, s3::S3Store};
use crate::store_trait::MemoryStore;
use anyhow::{anyhow, Context, Result};
use std::path::PathBuf;
pub fn build_store(cfg: &Config) -> Result<Box<dyn MemoryStore>> {
let local = PathBuf::from(cfg.expanded_local_path());
match cfg.active.backend.as_str() {
"filesystem" => {
let p = cfg.filesystem.path.as_deref().map(expand_tilde);
let path = p.map(PathBuf::from).unwrap_or(local);
Ok(Box::new(FilesystemStore::new(path)?))
}
"github" => Ok(Box::new(GitHubStore::new(local, cfg.github.clone())?)),
"forgejo" => Ok(Box::new(ForgejoStore::new(local, cfg.forgejo.clone())?)),
"gitea" => Ok(Box::new(GiteaStore::new(local, cfg.gitea.clone())?)),
"s3" => {
let cache = cfg
.s3
.cache_path
.as_deref()
.map(expand_tilde)
.map(PathBuf::from)
.ok_or_else(|| anyhow!("s3 backend requires s3.cache_path"))?;
Ok(Box::new(S3Store::new(cache, cfg.s3.clone())?))
}
other => Err(anyhow!("unknown backend: {other}"))
.context("supported: filesystem | github | forgejo | gitea | s3"),
}
}

View file

@ -0,0 +1,105 @@
//! FilesystemStore — local `.git` repo, no remotes.
//!
//! Reuses git2 for branch/commit so behavior parity with remote stores is
//! maintained. `push`/`pull` are intentional no-ops.
use crate::store_trait::MemoryStore;
use anyhow::{Context, Result};
use std::fs;
use std::path::{Path, PathBuf};
pub struct FilesystemStore {
pub root: PathBuf,
}
impl FilesystemStore {
pub fn new(root: impl Into<PathBuf>) -> Result<Self> {
let root = root.into();
fs::create_dir_all(&root).with_context(|| format!("mkdir {}", root.display()))?;
ensure_repo(&root)?;
Ok(Self { root })
}
fn full(&self, rel: &str) -> PathBuf {
self.root.join(rel)
}
}
fn ensure_repo(root: &Path) -> Result<()> {
if root.join(".git").exists() {
return Ok(());
}
git2::Repository::init(root).context("git init")?;
Ok(())
}
impl MemoryStore for FilesystemStore {
fn read(&self, path: &str) -> Result<Vec<u8>> {
fs::read(self.full(path)).with_context(|| format!("read {}", path))
}
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> {
let full = self.full(path);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent)?;
}
fs::write(&full, bytes).with_context(|| format!("write {}", path))
}
fn list(&self, dir: &str) -> Result<Vec<String>> {
let full = self.full(dir);
if !full.exists() {
return Ok(Vec::new());
}
let mut out = Vec::new();
for e in fs::read_dir(&full)? {
let e = e?;
if e.file_type()?.is_file() {
if let Some(name) = e.file_name().to_str() {
out.push(name.to_string());
}
}
}
out.sort();
Ok(out)
}
fn branch(&self, name: &str) -> Result<()> {
let repo = git2::Repository::open(&self.root)?;
if repo.find_branch(name, git2::BranchType::Local).is_ok() {
return Ok(());
}
if let Ok(head) = repo.head().and_then(|h| h.peel_to_commit()) {
repo.branch(name, &head, false)?;
}
// If there is no HEAD yet (empty repo), silently no-op; first commit
// will be on default branch.
Ok(())
}
fn commit(&self, message: &str) -> Result<String> {
let repo = git2::Repository::open(&self.root)?;
let mut index = repo.index()?;
index.add_all(["*"].iter(), git2::IndexAddOption::DEFAULT, None)?;
index.write()?;
let tree_oid = index.write_tree()?;
let tree = repo.find_tree(tree_oid)?;
let sig = git2::Signature::now("kei-store", "kei-store@local")?;
let parent = repo.head().ok().and_then(|h| h.peel_to_commit().ok());
let parents: Vec<&git2::Commit> = parent.iter().collect();
let oid = repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &parents)?;
Ok(oid.to_string())
}
fn push(&self, _branch: &str) -> Result<()> {
Ok(())
}
fn pull(&self, _branch: &str) -> Result<()> {
Ok(())
}
fn backend_name(&self) -> &'static str {
"filesystem"
}
}

View file

@ -0,0 +1,31 @@
//! ForgejoStore — thin alias of GitHubStore with a different display name.
//!
//! Forgejo is a hard fork of Gitea — git wire protocol identical. Only the
//! base URL and token env var differ; those are resolved from config.
use crate::config::GitRemoteCfg;
use crate::github::GitHubStore;
use anyhow::Result;
use std::path::PathBuf;
pub struct ForgejoStore {
inner: GitHubStore,
}
impl ForgejoStore {
pub fn new(local: PathBuf, remote: GitRemoteCfg) -> Result<Self> {
let inner = GitHubStore::with_name(local, remote, "forgejo")?;
Ok(Self { inner })
}
}
impl crate::store_trait::MemoryStore for ForgejoStore {
fn read(&self, path: &str) -> Result<Vec<u8>> { self.inner.read(path) }
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> { self.inner.write(path, bytes) }
fn list(&self, dir: &str) -> Result<Vec<String>> { self.inner.list(dir) }
fn branch(&self, name: &str) -> Result<()> { self.inner.branch(name) }
fn commit(&self, message: &str) -> Result<String> { self.inner.commit(message) }
fn push(&self, branch: &str) -> Result<()> { self.inner.push(branch) }
fn pull(&self, branch: &str) -> Result<()> { self.inner.pull(branch) }
fn backend_name(&self) -> &'static str { "forgejo" }
}

View file

@ -0,0 +1,28 @@
//! GiteaStore — same wire protocol as Forgejo; separate type for clarity.
use crate::config::GitRemoteCfg;
use crate::github::GitHubStore;
use anyhow::Result;
use std::path::PathBuf;
pub struct GiteaStore {
inner: GitHubStore,
}
impl GiteaStore {
pub fn new(local: PathBuf, remote: GitRemoteCfg) -> Result<Self> {
let inner = GitHubStore::with_name(local, remote, "gitea")?;
Ok(Self { inner })
}
}
impl crate::store_trait::MemoryStore for GiteaStore {
fn read(&self, path: &str) -> Result<Vec<u8>> { self.inner.read(path) }
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> { self.inner.write(path, bytes) }
fn list(&self, dir: &str) -> Result<Vec<String>> { self.inner.list(dir) }
fn branch(&self, name: &str) -> Result<()> { self.inner.branch(name) }
fn commit(&self, message: &str) -> Result<String> { self.inner.commit(message) }
fn push(&self, branch: &str) -> Result<()> { self.inner.push(branch) }
fn pull(&self, branch: &str) -> Result<()> { self.inner.pull(branch) }
fn backend_name(&self) -> &'static str { "gitea" }
}

View file

@ -0,0 +1,107 @@
//! GitHubStore — git-over-SSH/HTTPS.
//!
//! Wraps FilesystemStore for local ops, adds push/pull to a configured
//! remote. SSH auth via `KEI_MEMORY_SSH_KEY` (path to key); HTTPS via
//! `KEI_MEMORY_PAT` (token). Exactly the pattern used in v0.11
//! `kei-sleep-setup.sh`.
use crate::config::GitRemoteCfg;
use crate::filesystem::FilesystemStore;
use crate::store_trait::MemoryStore;
use anyhow::{Context, Result};
use std::path::PathBuf;
pub struct GitHubStore {
inner: FilesystemStore,
remote: GitRemoteCfg,
name: &'static str,
}
impl GitHubStore {
pub fn new(local: PathBuf, remote: GitRemoteCfg) -> Result<Self> {
Self::with_name(local, remote, "github")
}
pub fn with_name(local: PathBuf, remote: GitRemoteCfg, name: &'static str) -> Result<Self> {
let inner = FilesystemStore::new(local)?;
Ok(Self { inner, remote, name })
}
fn callbacks(&self) -> git2::RemoteCallbacks<'_> {
let cfg = self.remote.clone();
let mut cbs = git2::RemoteCallbacks::new();
cbs.credentials(move |_url, user, _types| credential(&cfg, user));
cbs
}
fn remote_url(&self) -> Result<&str> {
self.remote
.url
.as_deref()
.context("remote url missing from config")
}
}
fn credential(cfg: &GitRemoteCfg, user: Option<&str>) -> std::result::Result<git2::Cred, git2::Error> {
if let Some(var) = cfg.ssh_key_env.as_ref() {
if let Ok(key_path) = std::env::var(var) {
let u = user.unwrap_or("git");
return git2::Cred::ssh_key(u, None, std::path::Path::new(&key_path), None);
}
}
if let Some(var) = cfg.pat_env.as_ref() {
if let Ok(token) = std::env::var(var) {
return git2::Cred::userpass_plaintext(user.unwrap_or("x-access-token"), &token);
}
}
git2::Cred::default()
}
impl MemoryStore for GitHubStore {
fn read(&self, path: &str) -> Result<Vec<u8>> {
self.inner.read(path)
}
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> {
self.inner.write(path, bytes)
}
fn list(&self, dir: &str) -> Result<Vec<String>> {
self.inner.list(dir)
}
fn branch(&self, name: &str) -> Result<()> {
self.inner.branch(name)
}
fn commit(&self, message: &str) -> Result<String> {
self.inner.commit(message)
}
fn push(&self, branch: &str) -> Result<()> {
let repo = git2::Repository::open(&self.inner.root)?;
let url = self.remote_url()?;
let mut remote = match repo.find_remote("origin") {
Ok(r) => r,
Err(_) => repo.remote("origin", url)?,
};
let mut opts = git2::PushOptions::new();
opts.remote_callbacks(self.callbacks());
let refspec = format!("refs/heads/{b}:refs/heads/{b}", b = branch);
remote.push(&[&refspec], Some(&mut opts))?;
Ok(())
}
fn pull(&self, branch: &str) -> Result<()> {
let repo = git2::Repository::open(&self.inner.root)?;
let url = self.remote_url()?;
let mut remote = match repo.find_remote("origin") {
Ok(r) => r,
Err(_) => repo.remote("origin", url)?,
};
let mut opts = git2::FetchOptions::new();
opts.remote_callbacks(self.callbacks());
remote.fetch(&[branch], Some(&mut opts), None)?;
Ok(())
}
fn backend_name(&self) -> &'static str {
self.name
}
}

View file

@ -0,0 +1,25 @@
//! kei-store — memory-repo backend abstraction.
//!
//! Trait `MemoryStore` + 5 implementations:
//! - `GitHubStore`, `ForgejoStore`, `GiteaStore` — git-over-SSH/HTTPS
//! - `FilesystemStore` — local `.git` only; never pushes
//! - `S3Store` — object-storage with manifest.json (MVP stub)
//!
//! Config loaded from `~/.claude/agents/_primitives/store-config.toml`
//! by default; overridable via `--config`.
//!
//! RULE 0.8 — this crate reads secret references from env vars only
//! (`KEI_MEMORY_SSH_KEY`, `KEI_MEMORY_PAT`, `AWS_SECRET_ACCESS_KEY`, ...).
pub mod config;
pub mod factory;
pub mod filesystem;
pub mod forgejo;
pub mod gitea;
pub mod github;
pub mod s3;
pub mod store_trait;
pub use config::Config;
pub use factory::build_store;
pub use store_trait::MemoryStore;

View file

@ -0,0 +1,117 @@
//! kei-store — binary entry.
//!
//! Subcommands: init / read / write / list / branch / commit / push / status.
use anyhow::{anyhow, Context, Result};
use clap::{Parser, Subcommand};
use kei_store::config::{expand_tilde, Config};
use kei_store::{build_store, MemoryStore};
use std::fs;
use std::path::PathBuf;
use std::process::ExitCode;
#[derive(Parser, Debug)]
#[command(name = "kei-store", about = "Memory-repo backend abstraction.")]
struct Cli {
/// Config file path (default: ~/.claude/agents/_primitives/store-config.toml).
#[arg(long)]
config: Option<PathBuf>,
#[command(subcommand)]
cmd: Cmd,
}
#[derive(Subcommand, Debug)]
enum Cmd {
Init { backend: String, #[arg(long)] url: Option<String> },
Read { path: String },
Write { path: String, file: PathBuf },
List { dir: String },
Branch { name: String },
Commit { #[arg(long, short)] message: String },
Push { branch: String },
Pull { branch: String },
Status,
}
fn default_config_path() -> PathBuf {
PathBuf::from(expand_tilde(
"~/.claude/agents/_primitives/store-config.toml",
))
}
fn load_config(cli: &Cli) -> Result<Config> {
let path = cli.config.clone().unwrap_or_else(default_config_path);
if !path.exists() {
return Err(anyhow!("config not found: {}", path.display()));
}
Config::load(&path)
}
fn cmd_init(backend: &str, url: Option<&str>, target: &PathBuf) -> Result<()> {
if target.exists() {
return Err(anyhow!("config already exists: {}", target.display()));
}
if let Some(parent) = target.parent() {
fs::create_dir_all(parent)?;
}
fs::write(target, render_init(backend, url))?;
eprintln!("kei-store: wrote {}", target.display());
Ok(())
}
fn render_init(backend: &str, url: Option<&str>) -> String {
let u = url.unwrap_or("<set-me>");
format!(
"[active]\nbackend = \"{b}\"\nlocal_path = \"~/.claude/memory/sync-repo\"\n\n\
[{b}]\nurl = \"{u}\"\nssh_key_env = \"KEI_MEMORY_SSH_KEY\"\npat_env = \"KEI_MEMORY_PAT\"\n",
b = backend,
u = u
)
}
fn run(cli: &Cli) -> Result<()> {
if let Cmd::Init { backend, url } = &cli.cmd {
let target = cli.config.clone().unwrap_or_else(default_config_path);
return cmd_init(backend, url.as_deref(), &target);
}
let cfg = load_config(cli)?;
let store = build_store(&cfg)?;
dispatch(&*store, &cli.cmd)
}
fn dispatch(store: &dyn MemoryStore, cmd: &Cmd) -> Result<()> {
match cmd {
Cmd::Read { path } => {
let bytes = store.read(path)?;
std::io::Write::write_all(&mut std::io::stdout(), &bytes).context("write stdout")?;
}
Cmd::Write { path, file } => {
let bytes = fs::read(file)?;
store.write(path, &bytes)?;
}
Cmd::List { dir } => {
for name in store.list(dir)? {
println!("{}", name);
}
}
Cmd::Branch { name } => store.branch(name)?,
Cmd::Commit { message } => println!("{}", store.commit(message)?),
Cmd::Push { branch } => store.push(branch)?,
Cmd::Pull { branch } => store.pull(branch)?,
Cmd::Status => println!("backend: {}", store.backend_name()),
Cmd::Init { .. } => unreachable!(),
}
Ok(())
}
fn main() -> ExitCode {
let cli = Cli::parse();
match run(&cli) {
Ok(_) => ExitCode::SUCCESS,
Err(e) => {
eprintln!("kei-store: {e:#}");
ExitCode::from(1)
}
}
}

View file

@ -0,0 +1,122 @@
//! S3Store — object-storage backend (MVP stub).
//!
//! This is a local-manifest-based implementation intended as an offline MVP.
//! Reads/writes go to `cache_path`; `commit` serialises a
//! `manifest-<hash>.json` listing the current file tree + content hash;
//! `push`/`pull` are NO-OPs in stub mode.
//!
//! Production S3/R2/MinIO support is planned via `aws-sdk-s3` behind a
//! feature flag — see README §Store backends. This stub keeps the trait
//! surface honest so downstream code can exercise the full kei-store
//! API without pulling a ~20 MB AWS SDK at install time.
use crate::config::S3Cfg;
use crate::store_trait::MemoryStore;
use anyhow::{Context, Result};
use std::fs;
use std::path::PathBuf;
pub struct S3Store {
pub cache: PathBuf,
pub cfg: S3Cfg,
}
impl S3Store {
pub fn new(cache: PathBuf, cfg: S3Cfg) -> Result<Self> {
fs::create_dir_all(&cache).with_context(|| format!("mkdir {}", cache.display()))?;
Ok(Self { cache, cfg })
}
fn full(&self, rel: &str) -> PathBuf {
self.cache.join(rel)
}
}
impl MemoryStore for S3Store {
fn read(&self, path: &str) -> Result<Vec<u8>> {
fs::read(self.full(path)).with_context(|| format!("read {}", path))
}
fn write(&self, path: &str, bytes: &[u8]) -> Result<()> {
let full = self.full(path);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent)?;
}
fs::write(full, bytes)?;
Ok(())
}
fn list(&self, dir: &str) -> Result<Vec<String>> {
let full = self.full(dir);
if !full.exists() {
return Ok(Vec::new());
}
let mut out = Vec::new();
for e in fs::read_dir(&full)? {
let e = e?;
if e.file_type()?.is_file() {
if let Some(n) = e.file_name().to_str() {
out.push(n.to_string());
}
}
}
out.sort();
Ok(out)
}
fn branch(&self, name: &str) -> Result<()> {
// Logical snapshot namespace — stored under cache/<branch>/
fs::create_dir_all(self.cache.join(name))?;
Ok(())
}
fn commit(&self, message: &str) -> Result<String> {
let manifest = build_manifest(&self.cache, message)?;
let hash = short_hash(&manifest);
let out = self.cache.join(format!("manifest-{hash}.json"));
fs::write(&out, manifest)?;
Ok(hash)
}
fn push(&self, _branch: &str) -> Result<()> {
// Production path: aws-sdk-s3 put_object loop. Stub: no-op.
Ok(())
}
fn pull(&self, _branch: &str) -> Result<()> {
Ok(())
}
fn backend_name(&self) -> &'static str {
"s3-stub"
}
}
fn build_manifest(root: &PathBuf, message: &str) -> Result<String> {
let mut entries: Vec<String> = Vec::new();
if root.exists() {
for e in fs::read_dir(root)? {
let e = e?;
if e.file_type()?.is_file() {
if let Some(n) = e.file_name().to_str() {
entries.push(n.to_string());
}
}
}
}
entries.sort();
let v = serde_json::json!({
"message": message,
"entries": entries,
});
Ok(v.to_string())
}
fn short_hash(s: &str) -> String {
// Tiny DJB2 — cheap, deterministic, avoids pulling sha2 just for stub.
let mut h: u64 = 5381;
for b in s.bytes() {
h = h.wrapping_mul(33).wrapping_add(b as u64);
}
format!("{:x}", h)
}

View file

@ -0,0 +1,29 @@
//! MemoryStore trait — single point of truth for every backend.
use anyhow::Result;
pub trait MemoryStore: Send + Sync {
/// Read a byte blob at a relative path.
fn read(&self, path: &str) -> Result<Vec<u8>>;
/// Write a byte blob at a relative path. Creates parents.
fn write(&self, path: &str, bytes: &[u8]) -> Result<()>;
/// List regular files under a relative directory (non-recursive).
fn list(&self, dir: &str) -> Result<Vec<String>>;
/// Create a branch (git) or a logical "snapshot namespace" (S3).
fn branch(&self, name: &str) -> Result<()>;
/// Commit staged changes; returns the object id / manifest hash.
fn commit(&self, message: &str) -> Result<String>;
/// Push a branch to the remote (no-op for FilesystemStore).
fn push(&self, branch: &str) -> Result<()>;
/// Pull a branch from the remote (no-op for FilesystemStore).
fn pull(&self, branch: &str) -> Result<()>;
/// Human-readable backend name for `status` reporting.
fn backend_name(&self) -> &'static str;
}

View file

@ -0,0 +1,150 @@
//! Integration tests for kei-store.
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn bin() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_kei-store"))
}
fn write_config(tmp: &TempDir, backend: &str, local: &PathBuf) -> PathBuf {
let cfg_path = tmp.path().join("c.toml");
let body = format!(
"[active]\nbackend = \"{b}\"\nlocal_path = \"{p}\"\n\n[filesystem]\npath = \"{p}\"\n\n[s3]\ncache_path = \"{p}\"\n",
b = backend,
p = local.to_string_lossy()
);
fs::write(&cfg_path, body).unwrap();
cfg_path
}
fn run(args: &[&str]) -> std::process::Output {
std::process::Command::new(bin()).args(args).output().unwrap()
}
#[test]
fn init_writes_config() {
let tmp = TempDir::new().unwrap();
let cfg = tmp.path().join("store.toml");
let out = run(&[
"--config",
cfg.to_str().unwrap(),
"init",
"filesystem",
]);
assert!(out.status.success(), "{}", String::from_utf8_lossy(&out.stderr));
assert!(cfg.exists());
let text = fs::read_to_string(&cfg).unwrap();
assert!(text.contains("backend = \"filesystem\""));
}
#[test]
fn filesystem_read_write_roundtrip() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("repo");
let cfg = write_config(&tmp, "filesystem", &local);
let file = tmp.path().join("input.bin");
fs::write(&file, b"hello world").unwrap();
let w = run(&[
"--config", cfg.to_str().unwrap(),
"write", "a/b.txt", file.to_str().unwrap(),
]);
assert!(w.status.success(), "{}", String::from_utf8_lossy(&w.stderr));
let r = run(&[
"--config", cfg.to_str().unwrap(),
"read", "a/b.txt",
]);
assert!(r.status.success());
assert_eq!(r.stdout, b"hello world");
}
#[test]
fn filesystem_list_shows_files() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("repo");
let cfg = write_config(&tmp, "filesystem", &local);
let file = tmp.path().join("x");
fs::write(&file, b"x").unwrap();
run(&["--config", cfg.to_str().unwrap(), "write", "dir/a", file.to_str().unwrap()]);
run(&["--config", cfg.to_str().unwrap(), "write", "dir/b", file.to_str().unwrap()]);
let out = run(&["--config", cfg.to_str().unwrap(), "list", "dir"]);
assert!(out.status.success());
let s = String::from_utf8(out.stdout).unwrap();
assert!(s.contains("a"));
assert!(s.contains("b"));
}
#[test]
fn filesystem_commit_returns_hash() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("repo");
let cfg = write_config(&tmp, "filesystem", &local);
let file = tmp.path().join("x");
fs::write(&file, b"x").unwrap();
run(&["--config", cfg.to_str().unwrap(), "write", "a.txt", file.to_str().unwrap()]);
let out = run(&["--config", cfg.to_str().unwrap(), "commit", "--message", "init"]);
assert!(out.status.success(), "{}", String::from_utf8_lossy(&out.stderr));
let hash = String::from_utf8(out.stdout).unwrap();
assert!(hash.trim().len() >= 7);
}
#[test]
fn filesystem_push_pull_are_noop() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("repo");
let cfg = write_config(&tmp, "filesystem", &local);
let file = tmp.path().join("x");
fs::write(&file, b"x").unwrap();
run(&["--config", cfg.to_str().unwrap(), "write", "a.txt", file.to_str().unwrap()]);
run(&["--config", cfg.to_str().unwrap(), "commit", "--message", "init"]);
let p1 = run(&["--config", cfg.to_str().unwrap(), "push", "main"]);
let p2 = run(&["--config", cfg.to_str().unwrap(), "pull", "main"]);
assert!(p1.status.success());
assert!(p2.status.success());
}
#[test]
fn s3_stub_commit_writes_manifest() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("cache");
let cfg = write_config(&tmp, "s3", &local);
let file = tmp.path().join("x");
fs::write(&file, b"x").unwrap();
run(&["--config", cfg.to_str().unwrap(), "write", "a.txt", file.to_str().unwrap()]);
let out = run(&["--config", cfg.to_str().unwrap(), "commit", "--message", "first"]);
assert!(out.status.success(), "{}", String::from_utf8_lossy(&out.stderr));
let entries: Vec<_> = fs::read_dir(&local)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_string_lossy().starts_with("manifest-"))
.collect();
assert_eq!(entries.len(), 1);
}
#[test]
fn status_reports_backend() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("repo");
let cfg = write_config(&tmp, "filesystem", &local);
let out = run(&["--config", cfg.to_str().unwrap(), "status"]);
assert!(out.status.success());
let s = String::from_utf8(out.stdout).unwrap();
assert!(s.contains("filesystem"));
}
#[test]
fn unknown_backend_errors() {
let tmp = TempDir::new().unwrap();
let local = tmp.path().join("repo");
let cfg_path = tmp.path().join("c.toml");
let body = format!(
"[active]\nbackend = \"xyz\"\nlocal_path = \"{p}\"\n",
p = local.to_string_lossy()
);
fs::write(&cfg_path, body).unwrap();
let out = run(&["--config", cfg_path.to_str().unwrap(), "status"]);
assert!(!out.status.success());
let e = String::from_utf8_lossy(&out.stderr);
assert!(e.contains("unknown backend"), "{}", e);
}