KeiSeiKit-1.0/_primitives/_rust/kei-model-router/src/kernel.rs
Parfii-bot 0be354a920 KeiSeiKit-public — clean state
Single-commit clean baseline after security scrub of niche-tells,
project codenames, internal jargon, and contributor-email leaks.

Contents:
- 100 Rust crates (_primitives/_rust/)
- 37 agent manifests (_manifests/) + generated specs (_generated/)
- 67 user-invocable skills (skills/)
- 33 hooks (hooks/)
- Composition blocks (_blocks/)
- Documentation (docs/, README.md)
- TS adapter packages (_ts_packages/)
- Assembler (_assembler/)
- Roles (_roles/)
- Templates (_templates/)
- Forgejo CI (.forgejo/)

Author: Denis Parfionovich <info@greendragon.info>

License: see LICENSE.
2026-05-01 12:09:03 +08:00

156 lines
5 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! DNA similarity kernel for unseen task-classes.
//!
//! When a new task arrives whose `task_class_dna` is not in the ledger,
//! we transfer learning from similar past task-classes via this kernel.
//!
//! K(d, d') = α_role · 1[role=role'] +
//! α_caps · |caps ∩ caps'| / |caps caps'| +
//! α_scope · 1[scope=scope'] +
//! α_body · jaccard(body8, body8') ← coarse n-gram on hex
//!
//! Calibrated weights are reset by the `calibrate` CLI subcommand from
//! observed outcomes; defaults below are seed values.
//!
//! Constructor Pattern: pure-fn cube. No SQL, no I/O. Caller composes
//! with `posterior::from_ledger` to weight transferred posteriors.
use crate::dna_class;
#[derive(Debug, Clone, Copy)]
pub struct KernelWeights {
pub alpha_role: f64,
pub alpha_caps: f64,
pub alpha_scope: f64,
pub alpha_body: f64,
}
impl Default for KernelWeights {
fn default() -> Self {
Self {
alpha_role: 0.40,
alpha_caps: 0.25,
alpha_scope: 0.25,
alpha_body: 0.10,
}
}
}
/// Similarity score in [0, 1]. Higher = more similar.
pub fn similarity(a: &str, b: &str, w: KernelWeights) -> f64 {
let mut s = 0.0;
// role match
if let (Some(ra), Some(rb)) = (dna_class::role(a), dna_class::role(b)) {
if ra == rb {
s += w.alpha_role;
}
}
// caps Jaccard on '-' tokens
if let (Some(ca), Some(cb)) = (dna_class::caps(a), dna_class::caps(b)) {
s += w.alpha_caps * jaccard_caps(ca, cb);
}
// scope_sha exact match
if let (Some(sa), Some(sb)) = (dna_class::scope_sha(a), dna_class::scope_sha(b)) {
if sa == sb {
s += w.alpha_scope;
}
}
// body sha hex similarity (only meaningful for task_class_dna inputs)
if let (Some(ba), Some(bb)) = (dna_class::body_sha(a), dna_class::body_sha(b)) {
s += w.alpha_body * jaccard_hex(ba, bb);
}
s.clamp(0.0, 1.0)
}
fn jaccard_caps(a: &str, b: &str) -> f64 {
let sa: std::collections::BTreeSet<&str> = a.split('-').filter(|t| !t.is_empty()).collect();
let sb: std::collections::BTreeSet<&str> = b.split('-').filter(|t| !t.is_empty()).collect();
if sa.is_empty() && sb.is_empty() {
return 1.0;
}
let inter = sa.intersection(&sb).count() as f64;
let union = sa.union(&sb).count() as f64;
if union == 0.0 {
0.0
} else {
inter / union
}
}
/// Coarse character-bigram Jaccard for two short hex strings.
fn jaccard_hex(a: &str, b: &str) -> f64 {
if a.len() < 2 || b.len() < 2 {
return if a == b { 1.0 } else { 0.0 };
}
let bigrams = |s: &str| -> std::collections::BTreeSet<[u8; 2]> {
s.as_bytes().windows(2).map(|w| [w[0], w[1]]).collect()
};
let sa = bigrams(a);
let sb = bigrams(b);
let inter = sa.intersection(&sb).count() as f64;
let union = sa.union(&sb).count() as f64;
if union == 0.0 {
0.0
} else {
inter / union
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn identical_dna_full_match() {
let dna = "code-implementer-rust::?::e3929e37::041b7526";
let s = similarity(dna, dna, KernelWeights::default());
// role(0.40) + caps_jaccard=1(0.25) + scope(0.25) + body_jaccard=1(0.10) = 1.00
assert!((s - 1.0).abs() < 1e-9);
}
#[test]
fn different_role_only_keeps_other_signals() {
let a = "role-a::cap1-cap2::s1::b1";
let b = "role-b::cap1-cap2::s1::b1";
let s = similarity(a, b, KernelWeights::default());
// role miss(0) + caps(0.25) + scope(0.25) + body bigram match
// body "b1" len=2 → bigram set={[b,1]}, identical → 1.0
// total = 0 + 0.25 + 0.25 + 0.10 = 0.60
assert!((s - 0.60).abs() < 1e-9, "got {}", s);
}
#[test]
fn caps_partial_overlap() {
let a = "r::a-b-c::s::body1234";
let b = "r::a-b-d::s::body5678";
let s = similarity(a, b, KernelWeights::default());
// role(0.40) + caps Jaccard 2/4 = 0.5 → 0.125 + scope(0.25) + body bigram
// body bigrams: {bo,od,dy,y1,12,23,34} vs {bo,od,dy,y5,56,67,78}: shared={bo,od,dy}=3, union=11, jaccard=3/11≈0.273
// body weight: 0.10 * 0.273 ≈ 0.0273
// total ≈ 0.40 + 0.125 + 0.25 + 0.027 = 0.802
assert!(s > 0.78 && s < 0.82, "got {}", s);
}
#[test]
fn unrelated_dna_low_score() {
let a = "role-x::FOO-BAR::deadbeef::aaaa1111";
let b = "role-y::QUX-ZAP::cafebabe::ffff8888";
let s = similarity(a, b, KernelWeights::default());
// role miss(0) + caps Jaccard 0(0) + scope miss(0) + body bigrams
// mostly disjoint hex bigrams
assert!(s < 0.10, "got {}", s);
}
#[test]
fn empty_caps_treated_as_full_match() {
// "?" placeholder caps in some real DNAs
let a = "r::?::s::b1";
let b = "r::?::s::b1";
let s = similarity(a, b, KernelWeights::default());
assert!((s - 1.0).abs() < 1e-9);
}
}