KeiSeiKit-1.0/_primitives/_rust/kei-memory/src/injection_patterns.rs
Parfii-bot a4e667de10 KeiSeiKit-public — clean state
Single-commit clean baseline after security scrub of niche-tells,
project codenames, internal jargon, and contributor-email leaks.

Contents:
- 100 Rust crates (_primitives/_rust/)
- 37 agent manifests (_manifests/) + generated specs (_generated/)
- 67 user-invocable skills (skills/)
- 33 hooks (hooks/)
- Composition blocks (_blocks/)
- Documentation (docs/, README.md)
- TS adapter packages (_ts_packages/)
- Assembler (_assembler/)
- Roles (_roles/)
- Templates (_templates/)
- Forgejo CI (.forgejo/)

Author: Denis Parfionovich <info@greendragon.info>

License: see LICENSE.
2026-05-01 12:09:03 +08:00

134 lines
4.6 KiB
Rust

//! Injection-pattern table for `injection_guard`.
//!
//! Constructor Pattern: this cube only declares regex/string patterns.
//! Detection logic lives in `injection_guard.rs`. Test corpus in
//! `tests/guard_test_corpus.rs`. Each entry carries a stable id, a
//! severity (`Block` or `Warn`), and a human-readable source label so
//! triage output points back to the heuristic that fired.
use regex::Regex;
use std::sync::OnceLock;
/// Severity of a single pattern match.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Severity {
/// Hard reject — content must not be persisted.
Block,
/// Surface to caller; persistence still allowed unless caller upgrades.
Warn,
}
/// One regex-based pattern row.
pub struct RegexPattern {
pub id: &'static str,
pub re: Regex,
pub severity: Severity,
pub source: &'static str,
}
/// One substring/heuristic row evaluated on a lower-cased copy of input.
pub struct SubstringPattern {
pub id: &'static str,
/// All needles must appear (AND semantics).
pub needles: &'static [&'static str],
pub severity: Severity,
pub source: &'static str,
}
/// Invisible / bidi / zero-width unicode codepoints.
pub const INVISIBLE_CHARS: &[char] = &[
'\u{200B}', '\u{200C}', '\u{200D}', '\u{200E}', '\u{200F}',
'\u{202A}', '\u{202B}', '\u{202C}', '\u{202D}', '\u{202E}',
'\u{2060}', '\u{FEFF}',
];
/// Threshold above which a single base64-looking line is flagged.
/// Documented for callers; the regex pattern hardcodes the same value.
#[allow(dead_code)]
pub const BASE64_BLOB_BYTES: usize = 1024;
/// PEM marker dashes — built at runtime to keep `secrets-guard` quiet.
fn pem_dashes() -> String {
"-".repeat(5)
}
fn pem_marker(label: &str) -> String {
let d = pem_dashes();
format!("{d}BEGIN {label}{d}")
}
fn rx(id: &'static str, pat: &str, sev: Severity, src: &'static str) -> RegexPattern {
RegexPattern {
id,
re: Regex::new(pat).unwrap(),
severity: sev,
source: src,
}
}
fn prompt_override_patterns() -> Vec<RegexPattern> {
vec![
rx("prompt_override_ignore_previous", r"(?i)ignore\s+previous\s+instructions", Severity::Block, "promptguard:override"),
rx("prompt_override_you_are_now", r"(?i)you\s+are\s+now\b", Severity::Block, "promptguard:roleplay"),
rx("prompt_override_disregard", r"(?i)disregard\s+(all|prior|above)", Severity::Block, "promptguard:override"),
rx("system_role_prefix", r"(?im)^\s*system\s*:", Severity::Block, "promptguard:role-prefix"),
rx("chatml_im_start", r"<\|im_start\|>", Severity::Block, "chatml:tag"),
rx("chatml_endoftext", r"<\|endoftext\|>", Severity::Block, "chatml:tag"),
]
}
fn secret_patterns() -> Vec<RegexPattern> {
let openssh = regex::escape(&pem_marker("OPENSSH PRIVATE KEY"));
let rsa = regex::escape(&pem_marker("RSA PRIVATE KEY"));
vec![
rx("ssh_openssh_private", &openssh, Severity::Block, "secret:openssh"),
rx("ssh_rsa_private", &rsa, Severity::Block, "secret:rsa"),
// Long single-line base64 blobs are now Block-tier (see audit P2.1.b):
// attestation/key blobs pasted into agent transcripts represent a
// direct exfiltration vector for memory-write paths. The 1024-byte
// floor keeps benign hex hashes / short tokens unaffected.
rx("long_base64_line", r"(?m)^[A-Za-z0-9+/=]{1024,}$", Severity::Block, "heuristic:base64-blob"),
]
}
fn build_regex_table() -> Vec<RegexPattern> {
let mut out = prompt_override_patterns();
out.extend(secret_patterns());
out
}
fn build_substring_table() -> Vec<SubstringPattern> {
vec![
SubstringPattern {
id: "curl_with_bearer",
needles: &["bearer ", "://"],
severity: Severity::Block,
source: "exfil:curl-bearer",
},
SubstringPattern {
id: "aws_secret_keyword",
needles: &["aws_secret"],
severity: Severity::Block,
source: "secret:aws",
},
SubstringPattern {
id: "api_key_url",
needles: &["api_key=", "://"],
severity: Severity::Block,
source: "exfil:api-key-url",
},
]
}
static REGEX_TABLE: OnceLock<Vec<RegexPattern>> = OnceLock::new();
static SUBSTR_TABLE: OnceLock<Vec<SubstringPattern>> = OnceLock::new();
/// Lazily-built regex pattern table.
pub fn regex_patterns() -> &'static [RegexPattern] {
REGEX_TABLE.get_or_init(build_regex_table)
}
/// Lazily-built substring/heuristic table.
pub fn substring_patterns() -> &'static [SubstringPattern] {
SUBSTR_TABLE.get_or_init(build_substring_table)
}