KeiSeiKit-1.0/_primitives/_rust/kei-artifact/src/validate.rs
Parfii-bot 0be354a920 KeiSeiKit-public — clean state
Single-commit clean baseline after security scrub of niche-tells,
project codenames, internal jargon, and contributor-email leaks.

Contents:
- 100 Rust crates (_primitives/_rust/)
- 37 agent manifests (_manifests/) + generated specs (_generated/)
- 67 user-invocable skills (skills/)
- 33 hooks (hooks/)
- Composition blocks (_blocks/)
- Documentation (docs/, README.md)
- TS adapter packages (_ts_packages/)
- Assembler (_assembler/)
- Roles (_roles/)
- Templates (_templates/)
- Forgejo CI (.forgejo/)

Author: Denis Parfionovich <info@greendragon.info>

License: see LICENSE.
2026-05-01 12:09:03 +08:00

268 lines
9.3 KiB
Rust

//! Minimal JSON Schema validator — strict subset of draft 2020-12.
//!
//! Keyword support (chosen for the 5 built-in schemas):
//! - `type` (object, array, string, integer, number, boolean, null)
//! - `required` (array of property names)
//! - `properties` (object → sub-schema)
//! - `additionalProperties` (bool; default true, we set false on ours)
//! - `enum` (array of allowed scalar values)
//! - `items` (sub-schema for array elements)
//! - `minLength` (integer) / `minItems` (integer) / `minimum` (number)
//!
//! Intentionally NOT supported: $ref, oneOf/anyOf/allOf, patternProperties,
//! format validation, conditional schemas. The 5 built-in schemas are written
//! to avoid needing those — keeps the validator under 200 LOC and removes the
//! 40+ transitive-dep `jsonschema` crate.
//!
//! RULE 0.4 note: draft 2020-12 is the current JSON Schema standard
//! [VERIFIED: https://json-schema.org/draft/2020-12 — spec page].
//! This implementation is a strict subset — any schema author sticking to
//! the keywords above gets draft-2020-12-compatible semantics.
use serde_json::Value;
/// Top-level entry. Returns `Ok(())` on pass, `Err(msg)` with a path-style
/// location on first failure.
pub fn validate_content(schema: &Value, content: &Value) -> Result<(), String> {
check(schema, content, "$")
}
/// Keywords the minimal validator knows about. Used by `warn_unsupported_keywords`
/// to flag — but not reject — schemas that lean on unsupported features (so an
/// operator writing human-readable docs in a schema still sees them stored,
/// while being warned they do not actually enforce anything).
const KNOWN_KEYWORDS: &[&str] = &[
"$schema",
"$id",
"title",
"description",
"type",
"required",
"properties",
"additionalProperties",
"enum",
"items",
"minLength",
"minItems",
"minimum",
];
/// Emit a stderr warning for each schema keyword this validator does not
/// enforce. Non-fatal: the schema is still accepted and stored verbatim —
/// operators can keep `pattern` / `format` / `oneOf` etc. as human-readable
/// hints without expecting runtime validation of them.
///
/// Walks the schema recursively so a nested `items` / `properties` sub-schema
/// with an unsupported keyword is caught too.
pub fn warn_unsupported_keywords(schema: &Value) {
fn walk(v: &Value, path: &str) {
if let Value::Object(map) = v {
for (k, sub) in map {
if !KNOWN_KEYWORDS.contains(&k.as_str()) {
eprintln!(
"[kei-artifact] schema warning: unsupported keyword '{k}' at {path}\
stored but not enforced by the minimal validator (see validate.rs KNOWN_KEYWORDS)"
);
}
walk(sub, &format!("{path}.{k}"));
}
} else if let Value::Array(arr) = v {
for (i, el) in arr.iter().enumerate() {
walk(el, &format!("{path}[{i}]"));
}
}
}
walk(schema, "$");
}
fn check(schema: &Value, value: &Value, path: &str) -> Result<(), String> {
if let Some(t) = schema.get("type") {
check_type(t, value, path)?;
}
if let Some(e) = schema.get("enum") {
check_enum(e, value, path)?;
}
match value {
Value::Object(_) => check_object(schema, value, path)?,
Value::Array(_) => check_array(schema, value, path)?,
Value::String(s) => check_min_length(schema, s, path)?,
Value::Number(n) => check_minimum(schema, n, path)?,
_ => {}
}
Ok(())
}
fn check_type(schema_type: &Value, value: &Value, path: &str) -> Result<(), String> {
let want = schema_type
.as_str()
.ok_or_else(|| format!("{path}: schema 'type' must be string"))?;
let ok = match (want, value) {
("object", Value::Object(_)) => true,
("array", Value::Array(_)) => true,
("string", Value::String(_)) => true,
("boolean", Value::Bool(_)) => true,
("null", Value::Null) => true,
("integer", Value::Number(n)) => n.is_i64() || n.is_u64(),
("number", Value::Number(_)) => true,
_ => false,
};
if !ok {
return Err(format!(
"{path}: expected type '{want}', got {}",
type_of(value)
));
}
Ok(())
}
fn type_of(v: &Value) -> &'static str {
match v {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
fn check_enum(enum_schema: &Value, value: &Value, path: &str) -> Result<(), String> {
let allowed = enum_schema
.as_array()
.ok_or_else(|| format!("{path}: 'enum' must be array"))?;
if !allowed.iter().any(|a| a == value) {
return Err(format!("{path}: value {value} not in enum"));
}
Ok(())
}
fn check_object(schema: &Value, value: &Value, path: &str) -> Result<(), String> {
let obj = value.as_object().unwrap();
if let Some(required) = schema.get("required").and_then(|v| v.as_array()) {
for r in required {
if let Some(name) = r.as_str() {
if !obj.contains_key(name) {
return Err(format!("{path}: missing required property '{name}'"));
}
}
}
}
let props = schema.get("properties").and_then(|v| v.as_object());
let additional = schema
.get("additionalProperties")
.and_then(|v| v.as_bool())
.unwrap_or(true);
for (k, v) in obj {
match props.and_then(|p| p.get(k)) {
Some(sub) => check(sub, v, &format!("{path}.{k}"))?,
None if !additional => {
return Err(format!("{path}: unexpected property '{k}'"));
}
None => {}
}
}
Ok(())
}
fn check_array(schema: &Value, value: &Value, path: &str) -> Result<(), String> {
let arr = value.as_array().unwrap();
if let Some(min) = schema.get("minItems").and_then(|v| v.as_u64()) {
if (arr.len() as u64) < min {
return Err(format!("{path}: array has {} items, min {min}", arr.len()));
}
}
if let Some(items) = schema.get("items") {
for (i, el) in arr.iter().enumerate() {
check(items, el, &format!("{path}[{i}]"))?;
}
}
Ok(())
}
fn check_min_length(schema: &Value, s: &str, path: &str) -> Result<(), String> {
if let Some(min) = schema.get("minLength").and_then(|v| v.as_u64()) {
if (s.chars().count() as u64) < min {
return Err(format!("{path}: string shorter than minLength {min}"));
}
}
Ok(())
}
fn check_minimum(schema: &Value, n: &serde_json::Number, path: &str) -> Result<(), String> {
if let Some(min) = schema.get("minimum").and_then(|v| v.as_f64()) {
if let Some(v) = n.as_f64() {
if v < min {
return Err(format!("{path}: number {v} below minimum {min}"));
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn type_mismatch_rejected() {
let schema = json!({"type": "string"});
let err = validate_content(&schema, &json!(42)).unwrap_err();
assert!(err.contains("expected type 'string'"), "got: {err}");
}
#[test]
fn missing_required_rejected() {
let schema = json!({
"type": "object",
"required": ["goal"],
"properties": {"goal": {"type": "string"}}
});
let err = validate_content(&schema, &json!({})).unwrap_err();
assert!(err.contains("goal"));
}
#[test]
fn unknown_additional_rejected() {
let schema = json!({
"type": "object",
"additionalProperties": false,
"properties": {"a": {"type": "string"}}
});
let err = validate_content(&schema, &json!({"a":"x","b":"y"})).unwrap_err();
assert!(err.contains("unexpected property 'b'"));
}
#[test]
fn enum_and_array_items_enforced() {
let schema = json!({
"type": "array",
"items": {"type": "string", "enum": ["add", "mod", "del"]}
});
assert!(validate_content(&schema, &json!(["add", "mod"])).is_ok());
let err = validate_content(&schema, &json!(["nope"])).unwrap_err();
assert!(err.contains("enum"));
}
#[test]
fn warn_unsupported_keywords_does_not_panic_or_mutate() {
// Smoke test — the warn function prints to stderr but returns unit and
// never mutates the schema. We cannot portably capture stderr without
// a gag-style helper, so we just assert execution is stable and the
// schema is still usable by `validate_content` afterwards.
let schema = json!({
"type": "object",
"required": ["k"],
"properties": {
"k": {"type": "string", "pattern": "^[a-z]+$", "format": "email"}
},
"oneOf": [{"type": "object"}],
"patternProperties": {"^x_": {"type": "string"}}
});
warn_unsupported_keywords(&schema);
// Validator is still callable and still enforces the supported subset.
assert!(validate_content(&schema, &json!({"k": "hi"})).is_ok());
let err = validate_content(&schema, &json!({})).unwrap_err();
assert!(err.contains("k"));
}
}