tests/determinism.rs (3 cases):
- same input across 2 isolated tempdirs → byte-identical output
- same input across 10 isolated tempdirs → all byte-identical
(catches HashMap iteration nondeterminism a 2-run check can miss)
- reordering blocks in the manifest changes output, but only in the
block region — frontmatter + role + trailing sections are stable
tests/roundtrip.rs (2 cases):
- every manifest string (name, model, tools list, all domain_in /
forbidden_domain / handoff.target / handoff.trigger entries)
appears verbatim in the generated output; no field silently dropped
- two consecutive runs in the SAME tempdir produce identical bytes
(defence against caching / mutable-global drift)
tests/validator_negative.rs (6 cases):
- unknown block ref → error mentions the bad name
- missing obligatory block (memory-protocol removed) → error names it
- empty handoff array → error mentions "handoff"
- whitespace-only role → error mentions "role"
- empty domain_in → error mentions "domain_in"
- --validate flag on a valid manifest: exit 0, no file written
Not covered: unsubstituted `{{placeholder}}` check — that validator
rule is being added in a parallel PR (fix/remaining-findings) and is
not yet on this base branch. Add a case for it when the check lands.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
96 lines
3.4 KiB
Rust
96 lines
3.4 KiB
Rust
//! Determinism + ordering tests for the assembler.
|
|
//!
|
|
//! The assembler module docstring promises:
|
|
//! > Output is deterministic: same manifest + blocks → byte-identical .md
|
|
//!
|
|
//! These tests actually verify that promise. Catches any accidental
|
|
//! `HashMap`-iteration leak, embedded timestamp, or non-stable sort.
|
|
|
|
mod common;
|
|
|
|
use common::{assemble_one, seed_tempdir};
|
|
use std::fs;
|
|
|
|
/// Same input, two runs, byte-identical output.
|
|
#[test]
|
|
fn determinism_same_input_byte_identical() {
|
|
let (_tmp1, root1) = seed_tempdir();
|
|
let first = assemble_one(&root1, "code-implementer");
|
|
|
|
let (_tmp2, root2) = seed_tempdir();
|
|
let second = assemble_one(&root2, "code-implementer");
|
|
|
|
assert_eq!(
|
|
first.as_bytes(),
|
|
second.as_bytes(),
|
|
"two independent runs produced different bytes"
|
|
);
|
|
}
|
|
|
|
/// Same input, ten runs, all byte-identical. Higher chance to catch
|
|
/// hash-map iteration nondeterminism that escapes a 2-run check.
|
|
#[test]
|
|
fn determinism_ten_runs_all_identical() {
|
|
let mut seen: Option<String> = None;
|
|
for i in 0..10 {
|
|
let (_tmp, root) = seed_tempdir();
|
|
let out = assemble_one(&root, "researcher");
|
|
match &seen {
|
|
None => seen = Some(out),
|
|
Some(prev) => assert_eq!(
|
|
prev.as_bytes(),
|
|
out.as_bytes(),
|
|
"run {i} diverged from run 0"
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Block ordering: the order in `manifest.blocks` defines the order
|
|
/// in the output. Reorder the blocks list → output changes, and the
|
|
/// change is localized to the block region (not to frontmatter or
|
|
/// trailing sections).
|
|
#[test]
|
|
fn block_order_controls_output_order() {
|
|
let (_tmp, root) = seed_tempdir();
|
|
|
|
// Baseline: default researcher (baseline, evidence-grading, memory-protocol).
|
|
let default_out = assemble_one(&root, "researcher");
|
|
|
|
// Swap two blocks — write a modified manifest into the same tempdir.
|
|
let manifest_src = fs::read_to_string(root.join("_manifests/researcher.toml")).unwrap();
|
|
let swapped = manifest_src.replace(
|
|
"blocks = [\n \"baseline\", # OBLIGATORY\n \"evidence-grading\", # OBLIGATORY\n \"memory-protocol\", # OBLIGATORY\n]",
|
|
"blocks = [\n \"baseline\",\n \"memory-protocol\",\n \"evidence-grading\",\n]",
|
|
);
|
|
assert_ne!(
|
|
manifest_src, swapped,
|
|
"blocks-list replacement did not match — test fixture drifted"
|
|
);
|
|
fs::write(root.join("_manifests/researcher.toml"), &swapped).unwrap();
|
|
|
|
let swapped_out = assemble_one(&root, "researcher");
|
|
|
|
// 1. Output is different.
|
|
assert_ne!(
|
|
default_out, swapped_out,
|
|
"swapping block order did not change output"
|
|
);
|
|
|
|
// 2. Frontmatter unchanged (first `---` through the trailing `---\n\n`
|
|
// ends identically — compare the first 500 bytes, which cover
|
|
// frontmatter for all our fixtures).
|
|
let prefix_len = default_out
|
|
.find("# BASELINE")
|
|
.expect("BASELINE marker missing in default output");
|
|
assert_eq!(
|
|
&default_out[..prefix_len],
|
|
&swapped_out[..prefix_len],
|
|
"frontmatter + role drifted when only blocks were reordered"
|
|
);
|
|
|
|
// 3. The "# DOMAIN SCOPE" marker appears in both (tail section unchanged
|
|
// by block reordering).
|
|
assert!(default_out.contains("# DOMAIN SCOPE"));
|
|
assert!(swapped_out.contains("# DOMAIN SCOPE"));
|
|
}
|