diff --git a/_primitives/_rust/kei-entity-store/src/engine.rs b/_primitives/_rust/kei-entity-store/src/engine.rs index cb5d94b..cac3838 100644 --- a/_primitives/_rust/kei-entity-store/src/engine.rs +++ b/_primitives/_rust/kei-entity-store/src/engine.rs @@ -12,6 +12,16 @@ use anyhow::{Context, Result}; use rusqlite::Connection; use std::path::Path; +/// Schema-level version stamped into SQLite's `user_version` pragma on +/// first open. Future migrations bump this constant and gate their DDL +/// on the pragma's current value — idempotent `CREATE TABLE IF NOT +/// EXISTS` is not enough once column shapes diverge. +/// +/// TODO B5: expose a `version: u32` field on `EntitySchema` and add a +/// `custom_migrations: &'static [&'static str]` entry indexed by +/// target version so sibling crates can publish their own bump paths. +pub const CURRENT_USER_VERSION: u32 = 1; + pub struct Store { conn: Connection, } @@ -48,6 +58,9 @@ impl Store { /// Run: create primary table, indexes, FTS virtual table, edge table, /// then any custom DDL. Idempotent (all statements use IF NOT EXISTS). +/// +/// Also stamps `PRAGMA user_version` on fresh databases so future +/// schema bumps can detect the target migration set exactly once. pub fn run_migrations(conn: &Connection, schema: &EntitySchema) -> Result<(), VerbError> { conn.execute_batch(&ddl_primary_table(schema))?; conn.execute_batch(&ddl_indexes(schema))?; @@ -60,6 +73,22 @@ pub fn run_migrations(conn: &Connection, schema: &EntitySchema) -> Result<(), Ve for stmt in schema.custom_migrations { conn.execute_batch(stmt)?; } + apply_user_version(conn)?; + Ok(()) +} + +/// Set `PRAGMA user_version` exactly once per DB lifetime (fresh DBs +/// default to 0). If already stamped at `CURRENT_USER_VERSION` this is +/// a no-op; if stamped at an older version a future bump will gate +/// version-indexed DDL here. +fn apply_user_version(conn: &Connection) -> Result<(), VerbError> { + let current: u32 = conn + .pragma_query_value(None, "user_version", |r| r.get(0)) + .unwrap_or(0); + if current < CURRENT_USER_VERSION { + // PRAGMA does not accept parameter binding; value is a constant. + conn.pragma_update(None, "user_version", CURRENT_USER_VERSION)?; + } Ok(()) } @@ -81,7 +110,12 @@ fn ddl_column(f: &FieldDef) -> String { FieldKind::Text => format!("{} TEXT DEFAULT ''", f.name), FieldKind::TextDefault => { let d = f.default.unwrap_or(""); - format!("{} TEXT NOT NULL DEFAULT '{}'", f.name, d) + // SQL-escape embedded single quotes (per SQL standard: `'` + // → `''`) so `text_default("status", "don't know")` does + // not inject. Today all callers pass safe constants; this + // defence is for the first dev who doesn't. + let escaped = d.replace('\'', "''"); + format!("{} TEXT NOT NULL DEFAULT '{}'", f.name, escaped) } FieldKind::TimestampCreated => format!("{} INTEGER NOT NULL", f.name), FieldKind::TimestampUpdated => format!("{} INTEGER NOT NULL", f.name), diff --git a/_primitives/_rust/kei-entity-store/src/error.rs b/_primitives/_rust/kei-entity-store/src/error.rs index a5b9359..023f260 100644 --- a/_primitives/_rust/kei-entity-store/src/error.rs +++ b/_primitives/_rust/kei-entity-store/src/error.rs @@ -8,6 +8,16 @@ pub enum VerbError { #[error("InvalidInput: {0}")] InvalidInput(String), + /// Typed-validation failure for a declared schema field. + /// Distinct variant from free-form `InvalidInput` so callers can + /// match on `{field, expected, got}` programmatically. + #[error("InvalidInput: field `{field}` expected {expected}, got {got}")] + InvalidType { + field: String, + expected: String, + got: String, + }, + #[error("VerbDisabled: {verb} not enabled on schema {schema}")] VerbDisabled { verb: String, schema: String }, @@ -29,7 +39,10 @@ impl VerbError { /// 1 for storage / IO. pub fn exit_code(&self) -> u8 { match self { - Self::InvalidInput(_) | Self::VerbDisabled { .. } | Self::NotFound { .. } => 2, + Self::InvalidInput(_) + | Self::InvalidType { .. } + | Self::VerbDisabled { .. } + | Self::NotFound { .. } => 2, Self::Sqlite(_) | Self::Serde(_) | Self::Storage(_) => 1, } } diff --git a/_primitives/_rust/kei-entity-store/src/verbs/create.rs b/_primitives/_rust/kei-entity-store/src/verbs/create.rs index a2aabae..63bbfbc 100644 --- a/_primitives/_rust/kei-entity-store/src/verbs/create.rs +++ b/_primitives/_rust/kei-entity-store/src/verbs/create.rs @@ -4,9 +4,14 @@ //! declared on the EntitySchema are copied; extras are silently ignored //! (the atom layer above is responsible for rejecting them if desired). //! Output: `{ "id": , "created_at": }`. +//! +//! Type discipline: when a key is present its JSON kind MUST match the +//! field kind (string for Text*, number for Integer*). Mismatch → +//! `VerbError::InvalidType`. Missing keys default to 0 / "" as before. use crate::error::VerbError; use crate::schema::{EntitySchema, FieldDef, FieldKind}; +use crate::verbs::validate; use chrono::Utc; use rusqlite::{types::Value as SqlValue, Connection}; use serde_json::{json, Value}; @@ -16,29 +21,51 @@ pub fn run( schema: &EntitySchema, input: Value, ) -> Result { + guard_enabled(schema)?; + let obj = as_object(&input, "create")?; + let now = Utc::now().timestamp(); + let (cols, values) = build_insert(schema, obj, now)?; + let id = insert_tx(conn, schema, &cols, &values, obj)?; + let created_at = read_created_at(conn, schema, id).unwrap_or(now); + Ok(json!({ "id": id, "created_at": created_at })) +} + +fn guard_enabled(schema: &EntitySchema) -> Result<(), VerbError> { if !schema.verb_enabled("create") { return Err(VerbError::VerbDisabled { verb: "create".into(), schema: schema.name.into(), }); } - let obj = as_object(&input, "create")?; - let now = Utc::now().timestamp(); - let id = exec_insert(conn, schema, obj, now)?; - if let Some(cols) = schema.fts_columns { - reindex_fts(conn, schema.table, cols, id, obj)?; - } - let created_at = read_created_at(conn, schema, id).unwrap_or(now); - Ok(json!({ "id": id, "created_at": created_at })) + Ok(()) } -fn exec_insert( +/// Wrap INSERT + FTS reindex in one transaction so a rusqlite failure +/// in the FTS leg rolls back the row insert too. `unchecked_transaction` +/// is used because callers hold `&Connection` — rusqlite permits this +/// as long as only one tx is in flight. +fn insert_tx( conn: &Connection, schema: &EntitySchema, + cols: &[&'static str], + values: &[SqlValue], obj: &serde_json::Map, - now: i64, ) -> Result { - let (cols, values) = build_insert(schema, obj, now); + let tx = conn.unchecked_transaction()?; + let id = exec_insert_tx(&tx, schema, cols, values)?; + if let Some(fts_cols) = schema.fts_columns { + reindex_fts(&tx, schema.table, fts_cols, id, obj)?; + } + tx.commit()?; + Ok(id) +} + +fn exec_insert_tx( + tx: &rusqlite::Transaction<'_>, + schema: &EntitySchema, + cols: &[&'static str], + values: &[SqlValue], +) -> Result { let placeholders: Vec = (1..=cols.len()).map(|i| format!("?{i}")).collect(); let sql = format!( "INSERT INTO {} ({}) VALUES ({})", @@ -48,8 +75,8 @@ fn exec_insert( ); let params: Vec<&dyn rusqlite::ToSql> = values.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); - conn.execute(&sql, params.as_slice())?; - Ok(conn.last_insert_rowid()) + tx.execute(&sql, params.as_slice())?; + Ok(tx.last_insert_rowid()) } fn as_object<'a>(v: &'a Value, verb: &str) -> Result<&'a serde_json::Map, VerbError> { @@ -61,47 +88,77 @@ fn build_insert( schema: &EntitySchema, input: &serde_json::Map, now: i64, -) -> (Vec<&'static str>, Vec) { +) -> Result<(Vec<&'static str>, Vec), VerbError> { let mut cols: Vec<&'static str> = Vec::new(); let mut values: Vec = Vec::new(); for f in schema.writable_fields() { cols.push(f.name); - values.push(field_value_for_insert(f, input, now)); + values.push(field_value_for_insert(f, input, now)?); } - (cols, values) + Ok((cols, values)) } -fn field_value_for_insert(f: &FieldDef, input: &serde_json::Map, now: i64) -> SqlValue { +fn field_value_for_insert( + f: &FieldDef, + input: &serde_json::Map, + now: i64, +) -> Result { match f.kind { FieldKind::TimestampCreated | FieldKind::TimestampUpdated => { - match input.get(f.name).and_then(|v| v.as_i64()) { + Ok(match input.get(f.name).and_then(|v| v.as_i64()) { Some(ts) if ts > 0 => SqlValue::Integer(ts), _ => SqlValue::Integer(now), + }) + } + FieldKind::TextDefault => insert_text_default(f, input), + FieldKind::IntegerPk => Ok(SqlValue::Null), + _ => match input.get(f.name) { + Some(raw) => validate::coerce(f, raw), + None => Ok(default_for_kind(f)), + }, + } +} + +fn insert_text_default( + f: &FieldDef, + input: &serde_json::Map, +) -> Result { + match input.get(f.name) { + Some(raw) => { + let coerced = validate::coerce(f, raw)?; + if let SqlValue::Text(ref s) = coerced { + if s.is_empty() { + let d = f.default.unwrap_or(""); + validate::check_text_len(f, d)?; + return Ok(SqlValue::Text(d.to_string())); + } } + Ok(coerced) } - FieldKind::IntegerNotNull | FieldKind::Integer => SqlValue::Integer( - input.get(f.name).and_then(|v| v.as_i64()).unwrap_or(0), - ), - FieldKind::TextNotNull | FieldKind::Text => SqlValue::Text( - input.get(f.name).and_then(|v| v.as_str()).unwrap_or("").to_string(), - ), - FieldKind::TextDefault => { - let raw = input.get(f.name).and_then(|v| v.as_str()).unwrap_or(""); - let final_v = if raw.is_empty() { f.default.unwrap_or("") } else { raw }; - SqlValue::Text(final_v.to_string()) + None => { + let d = f.default.unwrap_or(""); + validate::check_text_len(f, d)?; + Ok(SqlValue::Text(d.to_string())) } - FieldKind::IntegerPk => SqlValue::Null, // filtered by writable_fields + } +} + +fn default_for_kind(f: &FieldDef) -> SqlValue { + match f.kind { + FieldKind::IntegerNotNull | FieldKind::Integer => SqlValue::Integer(0), + FieldKind::TextNotNull | FieldKind::Text => SqlValue::Text(String::new()), + _ => SqlValue::Null, } } fn reindex_fts( - conn: &Connection, + tx: &rusqlite::Transaction<'_>, table: &str, cols: &[&str], id: i64, input: &serde_json::Map, ) -> Result<(), VerbError> { - conn.execute( + tx.execute( &format!("DELETE FROM fts_{table} WHERE {table}_id=?1"), rusqlite::params![id], )?; @@ -118,7 +175,7 @@ fn reindex_fts( } let params: Vec<&dyn rusqlite::ToSql> = values.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); - conn.execute(&sql, params.as_slice())?; + tx.execute(&sql, params.as_slice())?; Ok(()) } diff --git a/_primitives/_rust/kei-entity-store/src/verbs/mod.rs b/_primitives/_rust/kei-entity-store/src/verbs/mod.rs index c5b1c94..4489bab 100644 --- a/_primitives/_rust/kei-entity-store/src/verbs/mod.rs +++ b/_primitives/_rust/kei-entity-store/src/verbs/mod.rs @@ -17,6 +17,7 @@ pub mod list; pub mod rank; pub mod search; pub mod update; +pub mod validate; /// Full list of supported verbs — SSoT for documentation + schema /// validation. `EntitySchema.enabled_verbs` entries MUST appear here. diff --git a/_primitives/_rust/kei-entity-store/src/verbs/search.rs b/_primitives/_rust/kei-entity-store/src/verbs/search.rs index 8f85ed4..c451e28 100644 --- a/_primitives/_rust/kei-entity-store/src/verbs/search.rs +++ b/_primitives/_rust/kei-entity-store/src/verbs/search.rs @@ -2,6 +2,13 @@ //! table, ORDER BY rank. //! //! Requires `EntitySchema.fts_columns` to be `Some`. +//! +//! Security: user input is wrapped in an FTS5 double-quoted phrase so +//! the FTS5 query grammar (`col:term`, `NEAR/5`, boolean ops, `*`, +//! parentheses) is treated as LITERAL TEXT. This is a pure keyword +//! search — attackers cannot address unindexed columns or craft +//! pathological scan expressions. Embedded `"` chars in the user query +//! are escaped per FTS5 grammar by doubling (`"" → "`). use crate::error::VerbError; use crate::schema::EntitySchema; @@ -37,6 +44,7 @@ pub fn run( return Err(VerbError::InvalidInput("search: query must be non-empty".into())); } let limit = clamp(input.get("limit").and_then(|v| v.as_i64())); + let safe_query = fts5_quote(query); let cols: Vec = schema.fields.iter().map(|f| format!("t.{}", f.name)).collect(); let sql = format!( @@ -47,7 +55,7 @@ pub fn run( table = schema.table ); let mut stmt = conn.prepare(&sql)?; - let mut rows = stmt.query(rusqlite::params![query, limit])?; + let mut rows = stmt.query(rusqlite::params![safe_query, limit])?; let mut results: Vec = Vec::new(); while let Some(r) = rows.next()? { results.push(row_to_json(schema, r)?); @@ -55,9 +63,41 @@ pub fn run( Ok(json!({ "results": results })) } +/// Wrap a user-supplied string as an FTS5 literal phrase. Doubles any +/// embedded `"` per FTS5 grammar. Result is safe to bind as the MATCH +/// argument and will match rows containing all of the literal tokens +/// in order. +fn fts5_quote(raw: &str) -> String { + let escaped = raw.replace('"', "\"\""); + format!("\"{escaped}\"") +} + fn clamp(raw: Option) -> i64 { match raw { Some(n) if n > 0 && n <= MAX_LIMIT => n, _ => DEFAULT_LIMIT, } } + +#[cfg(test)] +mod tests { + use super::fts5_quote; + + #[test] + fn quote_basic() { + assert_eq!(fts5_quote("refactor"), "\"refactor\""); + } + + #[test] + fn quote_escapes_dq() { + assert_eq!(fts5_quote("has \"quote\""), "\"has \"\"quote\"\"\""); + } + + #[test] + fn quote_preserves_colons_and_ops() { + // Injection attempt: `title:evil` — quoted phrase neutralizes + // the column-prefix operator so the result searches for the + // literal tokens `title:evil` across the configured columns. + assert_eq!(fts5_quote("title:evil"), "\"title:evil\""); + } +} diff --git a/_primitives/_rust/kei-entity-store/src/verbs/update.rs b/_primitives/_rust/kei-entity-store/src/verbs/update.rs index ab3039d..71db6c2 100644 --- a/_primitives/_rust/kei-entity-store/src/verbs/update.rs +++ b/_primitives/_rust/kei-entity-store/src/verbs/update.rs @@ -1,8 +1,14 @@ //! `update` verb — partial update by id. Only keys that appear in //! the input JSON and that are declared on the schema are written. +//! +//! Type discipline: when a key is present its JSON kind MUST match the +//! field kind. Mismatch → `VerbError::InvalidType` (no silent coercion). +//! UPDATE + FTS reindex run in a single transaction so a mid-flight +//! failure leaves neither the row nor the FTS entry in a torn state. use crate::error::VerbError; use crate::schema::{EntitySchema, FieldDef, FieldKind}; +use crate::verbs::validate; use chrono::Utc; use rusqlite::{types::Value as SqlValue, Connection}; use serde_json::{json, Value}; @@ -12,12 +18,7 @@ pub fn run( schema: &EntitySchema, input: Value, ) -> Result { - if !schema.verb_enabled("update") { - return Err(VerbError::VerbDisabled { - verb: "update".into(), - schema: schema.name.into(), - }); - } + guard_enabled(schema)?; let obj = input .as_object() .ok_or_else(|| VerbError::InvalidInput("update: expected JSON object".into()))?; @@ -26,23 +27,47 @@ pub fn run( .and_then(|v| v.as_i64()) .ok_or_else(|| VerbError::InvalidInput("update: missing `id` integer".into()))?; let now = Utc::now().timestamp(); - let (set_cols, values) = build_set(schema, obj, now); + let (set_cols, values) = build_set(schema, obj, now)?; if set_cols.is_empty() { return Err(VerbError::InvalidInput("update: no writable fields supplied".into())); } - exec_update(conn, schema, id, &set_cols, values)?; - if let Some(cols) = schema.fts_columns { - reindex_fts(conn, schema.table, cols, id, obj)?; - } + update_tx(conn, schema, id, &set_cols, values, obj)?; Ok(json!({ "ok": true, "id": id })) } -fn exec_update( +fn guard_enabled(schema: &EntitySchema) -> Result<(), VerbError> { + if !schema.verb_enabled("update") { + return Err(VerbError::VerbDisabled { + verb: "update".into(), + schema: schema.name.into(), + }); + } + Ok(()) +} + +fn update_tx( conn: &Connection, schema: &EntitySchema, id: i64, set_cols: &[&'static str], values: Vec, + obj: &serde_json::Map, +) -> Result<(), VerbError> { + let tx = conn.unchecked_transaction()?; + exec_update_tx(&tx, schema, id, set_cols, values)?; + if let Some(cols) = schema.fts_columns { + reindex_fts(&tx, schema.table, cols, id, obj)?; + } + tx.commit()?; + Ok(()) +} + +fn exec_update_tx( + tx: &rusqlite::Transaction<'_>, + schema: &EntitySchema, + id: i64, + set_cols: &[&'static str], + values: Vec, ) -> Result<(), VerbError> { let placeholders: Vec = (1..=set_cols.len()).map(|i| format!("{} = ?{i}", set_cols[i - 1])).collect(); @@ -57,7 +82,7 @@ fn exec_update( all.push(SqlValue::Integer(id)); let params: Vec<&dyn rusqlite::ToSql> = all.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); - let rows = conn.execute(&sql, params.as_slice())?; + let rows = tx.execute(&sql, params.as_slice())?; if rows == 0 { return Err(VerbError::NotFound { entity: schema.name.into(), id }); } @@ -68,7 +93,7 @@ fn build_set( schema: &EntitySchema, input: &serde_json::Map, now: i64, -) -> (Vec<&'static str>, Vec) { +) -> Result<(Vec<&'static str>, Vec), VerbError> { let mut cols: Vec<&'static str> = Vec::new(); let mut values: Vec = Vec::new(); for f in schema.writable_fields() { @@ -77,38 +102,37 @@ fn build_set( values.push(SqlValue::Integer(now)); continue; } - if let Some(sql_val) = value_from_input(f, input) { + if let Some(sql_val) = value_from_input(f, input)? { cols.push(f.name); values.push(sql_val); } } - (cols, values) + Ok((cols, values)) } -fn value_from_input(f: &FieldDef, input: &serde_json::Map) -> Option { - let raw = input.get(f.name)?; - Some(match f.kind { - FieldKind::TextNotNull | FieldKind::Text | FieldKind::TextDefault => { - SqlValue::Text(raw.as_str().unwrap_or("").to_string()) - } - FieldKind::IntegerNotNull - | FieldKind::Integer - | FieldKind::TimestampCreated - | FieldKind::TimestampUpdated => SqlValue::Integer(raw.as_i64().unwrap_or(0)), - FieldKind::IntegerPk => return None, - }) +fn value_from_input( + f: &FieldDef, + input: &serde_json::Map, +) -> Result, VerbError> { + let Some(raw) = input.get(f.name) else { + return Ok(None); + }; + if f.kind == FieldKind::IntegerPk { + return Ok(None); + } + Ok(Some(validate::coerce(f, raw)?)) } fn reindex_fts( - conn: &Connection, + tx: &rusqlite::Transaction<'_>, table: &str, cols: &[&str], id: i64, input: &serde_json::Map, ) -> Result<(), VerbError> { // Pull existing values, overlay supplied ones, re-insert. - let existing: serde_json::Map = read_existing_fts(conn, table, cols, id)?; - conn.execute( + let existing = read_existing_fts(tx, table, cols, id)?; + tx.execute( &format!("DELETE FROM fts_{table} WHERE {table}_id=?1"), rusqlite::params![id], )?; @@ -118,6 +142,19 @@ fn reindex_fts( cols.join(", "), placeholders.join(", "), ); + let values = fts_row_values(id, cols, input, &existing); + let params: Vec<&dyn rusqlite::ToSql> = + values.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); + tx.execute(&sql, params.as_slice())?; + Ok(()) +} + +fn fts_row_values( + id: i64, + cols: &[&str], + input: &serde_json::Map, + existing: &serde_json::Map, +) -> Vec { let mut values: Vec = vec![SqlValue::Integer(id)]; for c in cols { let val = input @@ -128,21 +165,18 @@ fn reindex_fts( .to_string(); values.push(SqlValue::Text(val)); } - let params: Vec<&dyn rusqlite::ToSql> = - values.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); - conn.execute(&sql, params.as_slice())?; - Ok(()) + values } fn read_existing_fts( - conn: &Connection, + tx: &rusqlite::Transaction<'_>, table: &str, cols: &[&str], id: i64, ) -> Result, VerbError> { let col_list = cols.join(","); let sql = format!("SELECT {col_list} FROM {table} WHERE id=?1"); - let mut stmt = conn.prepare(&sql)?; + let mut stmt = tx.prepare(&sql)?; let mut rows = stmt.query(rusqlite::params![id])?; let mut out = serde_json::Map::new(); if let Some(r) = rows.next()? { diff --git a/_primitives/_rust/kei-entity-store/src/verbs/validate.rs b/_primitives/_rust/kei-entity-store/src/verbs/validate.rs new file mode 100644 index 0000000..b2830c6 --- /dev/null +++ b/_primitives/_rust/kei-entity-store/src/verbs/validate.rs @@ -0,0 +1,95 @@ +//! Shared input-type validator for create / update. +//! +//! Strict typed validation: integer fields require JSON numbers that +//! fit i64; text fields require JSON strings. Wrong-type input returns +//! `VerbError::InvalidType` instead of silent coercion to `0` / `""`. +//! +//! TEXT size cap: any text value longer than `MAX_TEXT_BYTES` is +//! rejected to prevent OOM from hostile input. Per-field override is +//! planned (TODO B5: add `max_bytes: Option` to `FieldDef`). + +use crate::error::VerbError; +use crate::schema::{FieldDef, FieldKind}; +use rusqlite::types::Value as SqlValue; +use serde_json::Value; + +/// Default TEXT size cap — 64 KiB. Enforced for every TextNotNull / +/// Text / TextDefault field unless overridden per-field (TODO). +pub const MAX_TEXT_BYTES: usize = 64 * 1024; + +/// Convert an input JSON value to a typed `SqlValue` for `f`. +/// +/// Errors if the JSON kind does not match the field kind, or if a +/// text value exceeds `MAX_TEXT_BYTES`. +pub fn coerce(f: &FieldDef, raw: &Value) -> Result { + match f.kind { + FieldKind::IntegerPk => Err(VerbError::InvalidInput(format!( + "field `{}` is PK and cannot be set directly", + f.name + ))), + FieldKind::IntegerNotNull + | FieldKind::Integer + | FieldKind::TimestampCreated + | FieldKind::TimestampUpdated => coerce_int(f, raw), + FieldKind::TextNotNull | FieldKind::Text | FieldKind::TextDefault => coerce_text(f, raw), + } +} + +fn coerce_int(f: &FieldDef, raw: &Value) -> Result { + match raw { + Value::Null => Ok(SqlValue::Integer(0)), + Value::Number(n) => n.as_i64().map(SqlValue::Integer).ok_or_else(|| { + type_err(f, "integer (i64)", &format!("number {} out of range", n)) + }), + other => Err(type_err(f, "integer", kind_name(other))), + } +} + +fn coerce_text(f: &FieldDef, raw: &Value) -> Result { + let s = match raw { + Value::Null => String::new(), + Value::String(s) => s.clone(), + other => return Err(type_err(f, "string", kind_name(other))), + }; + if s.len() > MAX_TEXT_BYTES { + return Err(type_err( + f, + &format!("string ≤ {} bytes", MAX_TEXT_BYTES), + &format!("{} bytes", s.len()), + )); + } + Ok(SqlValue::Text(s)) +} + +fn type_err(f: &FieldDef, expected: &str, got: &str) -> VerbError { + VerbError::InvalidType { + field: f.name.to_string(), + expected: expected.to_string(), + got: got.to_string(), + } +} + +fn kind_name(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Array(_) => "array", + Value::Object(_) => "object", + } +} + +/// Reject text values that exceed the configured cap. Used by create +/// for fields that flow through the old "default on missing" path +/// (where coerce is not invoked for missing keys). +pub fn check_text_len(f: &FieldDef, s: &str) -> Result<(), VerbError> { + if s.len() > MAX_TEXT_BYTES { + return Err(type_err( + f, + &format!("string ≤ {} bytes", MAX_TEXT_BYTES), + &format!("{} bytes", s.len()), + )); + } + Ok(()) +} diff --git a/_primitives/_rust/kei-entity-store/tests/bug_fixes_smoke.rs b/_primitives/_rust/kei-entity-store/tests/bug_fixes_smoke.rs new file mode 100644 index 0000000..3f6b58a --- /dev/null +++ b/_primitives/_rust/kei-entity-store/tests/bug_fixes_smoke.rs @@ -0,0 +1,190 @@ +//! Regression tests for post-convergence audit findings (C1/C2/FTS5 +//! injection/M3/TEXT-cap/M2). Each test names the finding it pins. + +use kei_entity_store::error::VerbError; +use kei_entity_store::schema::{EntitySchema, FieldDef}; +use kei_entity_store::verbs::{create, search, update}; +use kei_entity_store::verbs::validate::MAX_TEXT_BYTES; +use kei_entity_store::Store; +use rusqlite::Connection; +use serde_json::json; + +static FIELDS: &[FieldDef] = &[ + FieldDef::pk("id"), + FieldDef::text_nn("title"), + FieldDef::text("description"), + FieldDef::text_default("status", "pending"), + FieldDef::integer("parent_id"), + FieldDef::created_at(), + FieldDef::updated_at(), +]; + +static SCHEMA: EntitySchema = EntitySchema { + name: "item", + table: "items", + fields: FIELDS, + enabled_verbs: &["create", "get", "list", "search", "update", "delete"], + fts_columns: Some(&["title", "description"]), + edge_table: None, + custom_migrations: &[], +}; + +fn mk() -> Store { Store::open_memory(&SCHEMA).unwrap() } + +// ---------- C1 — silent type coercion ---------- + +fn expect_invalid_type(err: VerbError, expected_field: &str) { + match err { + VerbError::InvalidType { ref field, .. } if field == expected_field => {} + other => panic!("expected InvalidType on `{expected_field}`, got {other:?}"), + } +} + +#[test] +fn c1_create_rejects_integer_for_text_field() { + let s = mk(); + let err = create::run(s.conn(), &SCHEMA, json!({ "title": 42 })).unwrap_err(); + assert_eq!(err.exit_code(), 2); + expect_invalid_type(err, "title"); +} + +#[test] +fn c1_create_rejects_string_for_integer_field() { + let s = mk(); + let err = create::run( + s.conn(), + &SCHEMA, + json!({ "title": "ok", "parent_id": "not-a-number" }), + ) + .unwrap_err(); + expect_invalid_type(err, "parent_id"); +} + +#[test] +fn c1_update_rejects_integer_for_text_field() { + let s = mk(); + let id = create::run(s.conn(), &SCHEMA, json!({ "title": "orig" })) + .unwrap()["id"] + .as_i64() + .unwrap(); + let err = update::run(s.conn(), &SCHEMA, json!({ "id": id, "status": 7 })).unwrap_err(); + expect_invalid_type(err, "status"); +} + +// ---------- C2 — FTS transaction ---------- + +#[test] +fn c2_update_fts_failure_rolls_back_row_update() { + // Fresh DB, then manually drop the FTS table so the next update's + // DELETE-INTO-FTS fails mid-flight. The row UPDATE that ran first + // in the SAME transaction must roll back. + let s = mk(); + let id = create::run(s.conn(), &SCHEMA, json!({ "title": "before" })) + .unwrap()["id"] + .as_i64() + .unwrap(); + + // Sabotage: drop the fts virtual table. + s.conn().execute_batch("DROP TABLE fts_items;").unwrap(); + + let result = update::run( + s.conn(), + &SCHEMA, + json!({ "id": id, "title": "after" }), + ); + assert!(result.is_err(), "update should fail when FTS is missing"); + + // Row must still read as `before` — the UPDATE was rolled back. + let title: String = s + .conn() + .query_row( + "SELECT title FROM items WHERE id=?1", + rusqlite::params![id], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(title, "before", "update must have rolled back on FTS failure"); +} + +// ---------- FTS5 injection ---------- + +fn count_hits(s: &Store, q: &str) -> usize { + let v = search::run(s.conn(), &SCHEMA, json!({ "query": q })).unwrap(); + v["results"].as_array().unwrap().len() +} + +#[test] +fn fts5_injection_neutralized_by_phrase_quoting() { + // Column-prefix / NEAR / wildcard all become literal tokens when + // wrapped in the FTS5 double-quoted phrase. None should match the + // seeded rows — no doc contains the literal text `title:secret`. + let s = mk(); + create::run(s.conn(), &SCHEMA, json!({ + "title": "ordinary record", "description": "nothing special" + })).unwrap(); + create::run(s.conn(), &SCHEMA, json!({ + "title": "secret handshake", "description": "hidden" + })).unwrap(); + + assert_eq!(count_hits(&s, "title:secret"), 0, "column-prefix leaked"); + assert_eq!(count_hits(&s, "NEAR(secret hidden, 5)"), 0, "NEAR leaked"); + assert_eq!(count_hits(&s, "secr*"), 0, "wildcard leaked"); +} + +// ---------- TEXT size cap ---------- + +#[test] +fn text_cap_create_rejects_oversize() { + let s = mk(); + let oversize: String = "a".repeat(MAX_TEXT_BYTES + 1); + let err = create::run(s.conn(), &SCHEMA, json!({ "title": oversize })).unwrap_err(); + expect_invalid_type(err, "title"); +} + +#[test] +fn text_cap_update_rejects_oversize() { + let s = mk(); + let id = create::run(s.conn(), &SCHEMA, json!({ "title": "ok" })) + .unwrap()["id"] + .as_i64() + .unwrap(); + let oversize: String = "a".repeat(MAX_TEXT_BYTES + 1); + let err = update::run( + s.conn(), + &SCHEMA, + json!({ "id": id, "description": oversize }), + ) + .unwrap_err(); + expect_invalid_type(err, "description"); +} + +// ---------- M2 — migration version ---------- + +#[test] +fn m2_user_version_stamped_on_fresh_db() { + let s = mk(); + let v: u32 = s + .conn() + .pragma_query_value(None, "user_version", |r| r.get(0)) + .unwrap(); + assert_eq!(v, kei_entity_store::engine::CURRENT_USER_VERSION); +} + +#[test] +fn m2_user_version_applied_once_idempotent() { + // Open twice — second open must leave user_version unchanged (not + // bumped past CURRENT). + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("store.db"); + { + let _s = Store::open(&path, &SCHEMA).unwrap(); + } + { + let _s = Store::open(&path, &SCHEMA).unwrap(); + let conn = Connection::open(&path).unwrap(); + let v: u32 = conn + .pragma_query_value(None, "user_version", |r| r.get(0)) + .unwrap(); + assert_eq!(v, kei_entity_store::engine::CURRENT_USER_VERSION); + } +}