Merge fix/a1-entity-store — C1+C2+FTS5+M3+TEXT-cap+M2

This commit is contained in:
Parfii-bot 2026-04-23 05:31:03 +08:00
commit a5c5a68627
8 changed files with 536 additions and 72 deletions

View file

@ -12,6 +12,16 @@ use anyhow::{Context, Result};
use rusqlite::Connection;
use std::path::Path;
/// Schema-level version stamped into SQLite's `user_version` pragma on
/// first open. Future migrations bump this constant and gate their DDL
/// on the pragma's current value — idempotent `CREATE TABLE IF NOT
/// EXISTS` is not enough once column shapes diverge.
///
/// TODO B5: expose a `version: u32` field on `EntitySchema` and add a
/// `custom_migrations: &'static [&'static str]` entry indexed by
/// target version so sibling crates can publish their own bump paths.
pub const CURRENT_USER_VERSION: u32 = 1;
pub struct Store {
conn: Connection,
}
@ -48,6 +58,9 @@ impl Store {
/// Run: create primary table, indexes, FTS virtual table, edge table,
/// then any custom DDL. Idempotent (all statements use IF NOT EXISTS).
///
/// Also stamps `PRAGMA user_version` on fresh databases so future
/// schema bumps can detect the target migration set exactly once.
pub fn run_migrations(conn: &Connection, schema: &EntitySchema) -> Result<(), VerbError> {
conn.execute_batch(&ddl_primary_table(schema))?;
conn.execute_batch(&ddl_indexes(schema))?;
@ -60,6 +73,22 @@ pub fn run_migrations(conn: &Connection, schema: &EntitySchema) -> Result<(), Ve
for stmt in schema.custom_migrations {
conn.execute_batch(stmt)?;
}
apply_user_version(conn)?;
Ok(())
}
/// Set `PRAGMA user_version` exactly once per DB lifetime (fresh DBs
/// default to 0). If already stamped at `CURRENT_USER_VERSION` this is
/// a no-op; if stamped at an older version a future bump will gate
/// version-indexed DDL here.
fn apply_user_version(conn: &Connection) -> Result<(), VerbError> {
let current: u32 = conn
.pragma_query_value(None, "user_version", |r| r.get(0))
.unwrap_or(0);
if current < CURRENT_USER_VERSION {
// PRAGMA does not accept parameter binding; value is a constant.
conn.pragma_update(None, "user_version", CURRENT_USER_VERSION)?;
}
Ok(())
}
@ -81,7 +110,12 @@ fn ddl_column(f: &FieldDef) -> String {
FieldKind::Text => format!("{} TEXT DEFAULT ''", f.name),
FieldKind::TextDefault => {
let d = f.default.unwrap_or("");
format!("{} TEXT NOT NULL DEFAULT '{}'", f.name, d)
// SQL-escape embedded single quotes (per SQL standard: `'`
// → `''`) so `text_default("status", "don't know")` does
// not inject. Today all callers pass safe constants; this
// defence is for the first dev who doesn't.
let escaped = d.replace('\'', "''");
format!("{} TEXT NOT NULL DEFAULT '{}'", f.name, escaped)
}
FieldKind::TimestampCreated => format!("{} INTEGER NOT NULL", f.name),
FieldKind::TimestampUpdated => format!("{} INTEGER NOT NULL", f.name),

View file

@ -8,6 +8,16 @@ pub enum VerbError {
#[error("InvalidInput: {0}")]
InvalidInput(String),
/// Typed-validation failure for a declared schema field.
/// Distinct variant from free-form `InvalidInput` so callers can
/// match on `{field, expected, got}` programmatically.
#[error("InvalidInput: field `{field}` expected {expected}, got {got}")]
InvalidType {
field: String,
expected: String,
got: String,
},
#[error("VerbDisabled: {verb} not enabled on schema {schema}")]
VerbDisabled { verb: String, schema: String },
@ -29,7 +39,10 @@ impl VerbError {
/// 1 for storage / IO.
pub fn exit_code(&self) -> u8 {
match self {
Self::InvalidInput(_) | Self::VerbDisabled { .. } | Self::NotFound { .. } => 2,
Self::InvalidInput(_)
| Self::InvalidType { .. }
| Self::VerbDisabled { .. }
| Self::NotFound { .. } => 2,
Self::Sqlite(_) | Self::Serde(_) | Self::Storage(_) => 1,
}
}

View file

@ -4,9 +4,14 @@
//! declared on the EntitySchema are copied; extras are silently ignored
//! (the atom layer above is responsible for rejecting them if desired).
//! Output: `{ "id": <rowid>, "created_at": <unix ts> }`.
//!
//! Type discipline: when a key is present its JSON kind MUST match the
//! field kind (string for Text*, number for Integer*). Mismatch →
//! `VerbError::InvalidType`. Missing keys default to 0 / "" as before.
use crate::error::VerbError;
use crate::schema::{EntitySchema, FieldDef, FieldKind};
use crate::verbs::validate;
use chrono::Utc;
use rusqlite::{types::Value as SqlValue, Connection};
use serde_json::{json, Value};
@ -16,29 +21,51 @@ pub fn run(
schema: &EntitySchema,
input: Value,
) -> Result<Value, VerbError> {
guard_enabled(schema)?;
let obj = as_object(&input, "create")?;
let now = Utc::now().timestamp();
let (cols, values) = build_insert(schema, obj, now)?;
let id = insert_tx(conn, schema, &cols, &values, obj)?;
let created_at = read_created_at(conn, schema, id).unwrap_or(now);
Ok(json!({ "id": id, "created_at": created_at }))
}
fn guard_enabled(schema: &EntitySchema) -> Result<(), VerbError> {
if !schema.verb_enabled("create") {
return Err(VerbError::VerbDisabled {
verb: "create".into(),
schema: schema.name.into(),
});
}
let obj = as_object(&input, "create")?;
let now = Utc::now().timestamp();
let id = exec_insert(conn, schema, obj, now)?;
if let Some(cols) = schema.fts_columns {
reindex_fts(conn, schema.table, cols, id, obj)?;
}
let created_at = read_created_at(conn, schema, id).unwrap_or(now);
Ok(json!({ "id": id, "created_at": created_at }))
Ok(())
}
fn exec_insert(
/// Wrap INSERT + FTS reindex in one transaction so a rusqlite failure
/// in the FTS leg rolls back the row insert too. `unchecked_transaction`
/// is used because callers hold `&Connection` — rusqlite permits this
/// as long as only one tx is in flight.
fn insert_tx(
conn: &Connection,
schema: &EntitySchema,
cols: &[&'static str],
values: &[SqlValue],
obj: &serde_json::Map<String, Value>,
now: i64,
) -> Result<i64, VerbError> {
let (cols, values) = build_insert(schema, obj, now);
let tx = conn.unchecked_transaction()?;
let id = exec_insert_tx(&tx, schema, cols, values)?;
if let Some(fts_cols) = schema.fts_columns {
reindex_fts(&tx, schema.table, fts_cols, id, obj)?;
}
tx.commit()?;
Ok(id)
}
fn exec_insert_tx(
tx: &rusqlite::Transaction<'_>,
schema: &EntitySchema,
cols: &[&'static str],
values: &[SqlValue],
) -> Result<i64, VerbError> {
let placeholders: Vec<String> = (1..=cols.len()).map(|i| format!("?{i}")).collect();
let sql = format!(
"INSERT INTO {} ({}) VALUES ({})",
@ -48,8 +75,8 @@ fn exec_insert(
);
let params: Vec<&dyn rusqlite::ToSql> =
values.iter().map(|v| v as &dyn rusqlite::ToSql).collect();
conn.execute(&sql, params.as_slice())?;
Ok(conn.last_insert_rowid())
tx.execute(&sql, params.as_slice())?;
Ok(tx.last_insert_rowid())
}
fn as_object<'a>(v: &'a Value, verb: &str) -> Result<&'a serde_json::Map<String, Value>, VerbError> {
@ -61,47 +88,77 @@ fn build_insert(
schema: &EntitySchema,
input: &serde_json::Map<String, Value>,
now: i64,
) -> (Vec<&'static str>, Vec<SqlValue>) {
) -> Result<(Vec<&'static str>, Vec<SqlValue>), VerbError> {
let mut cols: Vec<&'static str> = Vec::new();
let mut values: Vec<SqlValue> = Vec::new();
for f in schema.writable_fields() {
cols.push(f.name);
values.push(field_value_for_insert(f, input, now));
values.push(field_value_for_insert(f, input, now)?);
}
(cols, values)
Ok((cols, values))
}
fn field_value_for_insert(f: &FieldDef, input: &serde_json::Map<String, Value>, now: i64) -> SqlValue {
fn field_value_for_insert(
f: &FieldDef,
input: &serde_json::Map<String, Value>,
now: i64,
) -> Result<SqlValue, VerbError> {
match f.kind {
FieldKind::TimestampCreated | FieldKind::TimestampUpdated => {
match input.get(f.name).and_then(|v| v.as_i64()) {
Ok(match input.get(f.name).and_then(|v| v.as_i64()) {
Some(ts) if ts > 0 => SqlValue::Integer(ts),
_ => SqlValue::Integer(now),
})
}
FieldKind::TextDefault => insert_text_default(f, input),
FieldKind::IntegerPk => Ok(SqlValue::Null),
_ => match input.get(f.name) {
Some(raw) => validate::coerce(f, raw),
None => Ok(default_for_kind(f)),
},
}
}
fn insert_text_default(
f: &FieldDef,
input: &serde_json::Map<String, Value>,
) -> Result<SqlValue, VerbError> {
match input.get(f.name) {
Some(raw) => {
let coerced = validate::coerce(f, raw)?;
if let SqlValue::Text(ref s) = coerced {
if s.is_empty() {
let d = f.default.unwrap_or("");
validate::check_text_len(f, d)?;
return Ok(SqlValue::Text(d.to_string()));
}
}
Ok(coerced)
}
FieldKind::IntegerNotNull | FieldKind::Integer => SqlValue::Integer(
input.get(f.name).and_then(|v| v.as_i64()).unwrap_or(0),
),
FieldKind::TextNotNull | FieldKind::Text => SqlValue::Text(
input.get(f.name).and_then(|v| v.as_str()).unwrap_or("").to_string(),
),
FieldKind::TextDefault => {
let raw = input.get(f.name).and_then(|v| v.as_str()).unwrap_or("");
let final_v = if raw.is_empty() { f.default.unwrap_or("") } else { raw };
SqlValue::Text(final_v.to_string())
None => {
let d = f.default.unwrap_or("");
validate::check_text_len(f, d)?;
Ok(SqlValue::Text(d.to_string()))
}
FieldKind::IntegerPk => SqlValue::Null, // filtered by writable_fields
}
}
fn default_for_kind(f: &FieldDef) -> SqlValue {
match f.kind {
FieldKind::IntegerNotNull | FieldKind::Integer => SqlValue::Integer(0),
FieldKind::TextNotNull | FieldKind::Text => SqlValue::Text(String::new()),
_ => SqlValue::Null,
}
}
fn reindex_fts(
conn: &Connection,
tx: &rusqlite::Transaction<'_>,
table: &str,
cols: &[&str],
id: i64,
input: &serde_json::Map<String, Value>,
) -> Result<(), VerbError> {
conn.execute(
tx.execute(
&format!("DELETE FROM fts_{table} WHERE {table}_id=?1"),
rusqlite::params![id],
)?;
@ -118,7 +175,7 @@ fn reindex_fts(
}
let params: Vec<&dyn rusqlite::ToSql> =
values.iter().map(|v| v as &dyn rusqlite::ToSql).collect();
conn.execute(&sql, params.as_slice())?;
tx.execute(&sql, params.as_slice())?;
Ok(())
}

View file

@ -17,6 +17,7 @@ pub mod list;
pub mod rank;
pub mod search;
pub mod update;
pub mod validate;
/// Full list of supported verbs — SSoT for documentation + schema
/// validation. `EntitySchema.enabled_verbs` entries MUST appear here.

View file

@ -2,6 +2,13 @@
//! table, ORDER BY rank.
//!
//! Requires `EntitySchema.fts_columns` to be `Some`.
//!
//! Security: user input is wrapped in an FTS5 double-quoted phrase so
//! the FTS5 query grammar (`col:term`, `NEAR/5`, boolean ops, `*`,
//! parentheses) is treated as LITERAL TEXT. This is a pure keyword
//! search — attackers cannot address unindexed columns or craft
//! pathological scan expressions. Embedded `"` chars in the user query
//! are escaped per FTS5 grammar by doubling (`"" → "`).
use crate::error::VerbError;
use crate::schema::EntitySchema;
@ -37,6 +44,7 @@ pub fn run(
return Err(VerbError::InvalidInput("search: query must be non-empty".into()));
}
let limit = clamp(input.get("limit").and_then(|v| v.as_i64()));
let safe_query = fts5_quote(query);
let cols: Vec<String> = schema.fields.iter().map(|f| format!("t.{}", f.name)).collect();
let sql = format!(
@ -47,7 +55,7 @@ pub fn run(
table = schema.table
);
let mut stmt = conn.prepare(&sql)?;
let mut rows = stmt.query(rusqlite::params![query, limit])?;
let mut rows = stmt.query(rusqlite::params![safe_query, limit])?;
let mut results: Vec<Value> = Vec::new();
while let Some(r) = rows.next()? {
results.push(row_to_json(schema, r)?);
@ -55,9 +63,41 @@ pub fn run(
Ok(json!({ "results": results }))
}
/// Wrap a user-supplied string as an FTS5 literal phrase. Doubles any
/// embedded `"` per FTS5 grammar. Result is safe to bind as the MATCH
/// argument and will match rows containing all of the literal tokens
/// in order.
fn fts5_quote(raw: &str) -> String {
let escaped = raw.replace('"', "\"\"");
format!("\"{escaped}\"")
}
fn clamp(raw: Option<i64>) -> i64 {
match raw {
Some(n) if n > 0 && n <= MAX_LIMIT => n,
_ => DEFAULT_LIMIT,
}
}
#[cfg(test)]
mod tests {
use super::fts5_quote;
#[test]
fn quote_basic() {
assert_eq!(fts5_quote("refactor"), "\"refactor\"");
}
#[test]
fn quote_escapes_dq() {
assert_eq!(fts5_quote("has \"quote\""), "\"has \"\"quote\"\"\"");
}
#[test]
fn quote_preserves_colons_and_ops() {
// Injection attempt: `title:evil` — quoted phrase neutralizes
// the column-prefix operator so the result searches for the
// literal tokens `title:evil` across the configured columns.
assert_eq!(fts5_quote("title:evil"), "\"title:evil\"");
}
}

View file

@ -1,8 +1,14 @@
//! `update` verb — partial update by id. Only keys that appear in
//! the input JSON and that are declared on the schema are written.
//!
//! Type discipline: when a key is present its JSON kind MUST match the
//! field kind. Mismatch → `VerbError::InvalidType` (no silent coercion).
//! UPDATE + FTS reindex run in a single transaction so a mid-flight
//! failure leaves neither the row nor the FTS entry in a torn state.
use crate::error::VerbError;
use crate::schema::{EntitySchema, FieldDef, FieldKind};
use crate::verbs::validate;
use chrono::Utc;
use rusqlite::{types::Value as SqlValue, Connection};
use serde_json::{json, Value};
@ -12,12 +18,7 @@ pub fn run(
schema: &EntitySchema,
input: Value,
) -> Result<Value, VerbError> {
if !schema.verb_enabled("update") {
return Err(VerbError::VerbDisabled {
verb: "update".into(),
schema: schema.name.into(),
});
}
guard_enabled(schema)?;
let obj = input
.as_object()
.ok_or_else(|| VerbError::InvalidInput("update: expected JSON object".into()))?;
@ -26,23 +27,47 @@ pub fn run(
.and_then(|v| v.as_i64())
.ok_or_else(|| VerbError::InvalidInput("update: missing `id` integer".into()))?;
let now = Utc::now().timestamp();
let (set_cols, values) = build_set(schema, obj, now);
let (set_cols, values) = build_set(schema, obj, now)?;
if set_cols.is_empty() {
return Err(VerbError::InvalidInput("update: no writable fields supplied".into()));
}
exec_update(conn, schema, id, &set_cols, values)?;
if let Some(cols) = schema.fts_columns {
reindex_fts(conn, schema.table, cols, id, obj)?;
}
update_tx(conn, schema, id, &set_cols, values, obj)?;
Ok(json!({ "ok": true, "id": id }))
}
fn exec_update(
fn guard_enabled(schema: &EntitySchema) -> Result<(), VerbError> {
if !schema.verb_enabled("update") {
return Err(VerbError::VerbDisabled {
verb: "update".into(),
schema: schema.name.into(),
});
}
Ok(())
}
fn update_tx(
conn: &Connection,
schema: &EntitySchema,
id: i64,
set_cols: &[&'static str],
values: Vec<SqlValue>,
obj: &serde_json::Map<String, Value>,
) -> Result<(), VerbError> {
let tx = conn.unchecked_transaction()?;
exec_update_tx(&tx, schema, id, set_cols, values)?;
if let Some(cols) = schema.fts_columns {
reindex_fts(&tx, schema.table, cols, id, obj)?;
}
tx.commit()?;
Ok(())
}
fn exec_update_tx(
tx: &rusqlite::Transaction<'_>,
schema: &EntitySchema,
id: i64,
set_cols: &[&'static str],
values: Vec<SqlValue>,
) -> Result<(), VerbError> {
let placeholders: Vec<String> =
(1..=set_cols.len()).map(|i| format!("{} = ?{i}", set_cols[i - 1])).collect();
@ -57,7 +82,7 @@ fn exec_update(
all.push(SqlValue::Integer(id));
let params: Vec<&dyn rusqlite::ToSql> =
all.iter().map(|v| v as &dyn rusqlite::ToSql).collect();
let rows = conn.execute(&sql, params.as_slice())?;
let rows = tx.execute(&sql, params.as_slice())?;
if rows == 0 {
return Err(VerbError::NotFound { entity: schema.name.into(), id });
}
@ -68,7 +93,7 @@ fn build_set(
schema: &EntitySchema,
input: &serde_json::Map<String, Value>,
now: i64,
) -> (Vec<&'static str>, Vec<SqlValue>) {
) -> Result<(Vec<&'static str>, Vec<SqlValue>), VerbError> {
let mut cols: Vec<&'static str> = Vec::new();
let mut values: Vec<SqlValue> = Vec::new();
for f in schema.writable_fields() {
@ -77,38 +102,37 @@ fn build_set(
values.push(SqlValue::Integer(now));
continue;
}
if let Some(sql_val) = value_from_input(f, input) {
if let Some(sql_val) = value_from_input(f, input)? {
cols.push(f.name);
values.push(sql_val);
}
}
(cols, values)
Ok((cols, values))
}
fn value_from_input(f: &FieldDef, input: &serde_json::Map<String, Value>) -> Option<SqlValue> {
let raw = input.get(f.name)?;
Some(match f.kind {
FieldKind::TextNotNull | FieldKind::Text | FieldKind::TextDefault => {
SqlValue::Text(raw.as_str().unwrap_or("").to_string())
}
FieldKind::IntegerNotNull
| FieldKind::Integer
| FieldKind::TimestampCreated
| FieldKind::TimestampUpdated => SqlValue::Integer(raw.as_i64().unwrap_or(0)),
FieldKind::IntegerPk => return None,
})
fn value_from_input(
f: &FieldDef,
input: &serde_json::Map<String, Value>,
) -> Result<Option<SqlValue>, VerbError> {
let Some(raw) = input.get(f.name) else {
return Ok(None);
};
if f.kind == FieldKind::IntegerPk {
return Ok(None);
}
Ok(Some(validate::coerce(f, raw)?))
}
fn reindex_fts(
conn: &Connection,
tx: &rusqlite::Transaction<'_>,
table: &str,
cols: &[&str],
id: i64,
input: &serde_json::Map<String, Value>,
) -> Result<(), VerbError> {
// Pull existing values, overlay supplied ones, re-insert.
let existing: serde_json::Map<String, Value> = read_existing_fts(conn, table, cols, id)?;
conn.execute(
let existing = read_existing_fts(tx, table, cols, id)?;
tx.execute(
&format!("DELETE FROM fts_{table} WHERE {table}_id=?1"),
rusqlite::params![id],
)?;
@ -118,6 +142,19 @@ fn reindex_fts(
cols.join(", "),
placeholders.join(", "),
);
let values = fts_row_values(id, cols, input, &existing);
let params: Vec<&dyn rusqlite::ToSql> =
values.iter().map(|v| v as &dyn rusqlite::ToSql).collect();
tx.execute(&sql, params.as_slice())?;
Ok(())
}
fn fts_row_values(
id: i64,
cols: &[&str],
input: &serde_json::Map<String, Value>,
existing: &serde_json::Map<String, Value>,
) -> Vec<SqlValue> {
let mut values: Vec<SqlValue> = vec![SqlValue::Integer(id)];
for c in cols {
let val = input
@ -128,21 +165,18 @@ fn reindex_fts(
.to_string();
values.push(SqlValue::Text(val));
}
let params: Vec<&dyn rusqlite::ToSql> =
values.iter().map(|v| v as &dyn rusqlite::ToSql).collect();
conn.execute(&sql, params.as_slice())?;
Ok(())
values
}
fn read_existing_fts(
conn: &Connection,
tx: &rusqlite::Transaction<'_>,
table: &str,
cols: &[&str],
id: i64,
) -> Result<serde_json::Map<String, Value>, VerbError> {
let col_list = cols.join(",");
let sql = format!("SELECT {col_list} FROM {table} WHERE id=?1");
let mut stmt = conn.prepare(&sql)?;
let mut stmt = tx.prepare(&sql)?;
let mut rows = stmt.query(rusqlite::params![id])?;
let mut out = serde_json::Map::new();
if let Some(r) = rows.next()? {

View file

@ -0,0 +1,95 @@
//! Shared input-type validator for create / update.
//!
//! Strict typed validation: integer fields require JSON numbers that
//! fit i64; text fields require JSON strings. Wrong-type input returns
//! `VerbError::InvalidType` instead of silent coercion to `0` / `""`.
//!
//! TEXT size cap: any text value longer than `MAX_TEXT_BYTES` is
//! rejected to prevent OOM from hostile input. Per-field override is
//! planned (TODO B5: add `max_bytes: Option<usize>` to `FieldDef`).
use crate::error::VerbError;
use crate::schema::{FieldDef, FieldKind};
use rusqlite::types::Value as SqlValue;
use serde_json::Value;
/// Default TEXT size cap — 64 KiB. Enforced for every TextNotNull /
/// Text / TextDefault field unless overridden per-field (TODO).
pub const MAX_TEXT_BYTES: usize = 64 * 1024;
/// Convert an input JSON value to a typed `SqlValue` for `f`.
///
/// Errors if the JSON kind does not match the field kind, or if a
/// text value exceeds `MAX_TEXT_BYTES`.
pub fn coerce(f: &FieldDef, raw: &Value) -> Result<SqlValue, VerbError> {
match f.kind {
FieldKind::IntegerPk => Err(VerbError::InvalidInput(format!(
"field `{}` is PK and cannot be set directly",
f.name
))),
FieldKind::IntegerNotNull
| FieldKind::Integer
| FieldKind::TimestampCreated
| FieldKind::TimestampUpdated => coerce_int(f, raw),
FieldKind::TextNotNull | FieldKind::Text | FieldKind::TextDefault => coerce_text(f, raw),
}
}
fn coerce_int(f: &FieldDef, raw: &Value) -> Result<SqlValue, VerbError> {
match raw {
Value::Null => Ok(SqlValue::Integer(0)),
Value::Number(n) => n.as_i64().map(SqlValue::Integer).ok_or_else(|| {
type_err(f, "integer (i64)", &format!("number {} out of range", n))
}),
other => Err(type_err(f, "integer", kind_name(other))),
}
}
fn coerce_text(f: &FieldDef, raw: &Value) -> Result<SqlValue, VerbError> {
let s = match raw {
Value::Null => String::new(),
Value::String(s) => s.clone(),
other => return Err(type_err(f, "string", kind_name(other))),
};
if s.len() > MAX_TEXT_BYTES {
return Err(type_err(
f,
&format!("string ≤ {} bytes", MAX_TEXT_BYTES),
&format!("{} bytes", s.len()),
));
}
Ok(SqlValue::Text(s))
}
fn type_err(f: &FieldDef, expected: &str, got: &str) -> VerbError {
VerbError::InvalidType {
field: f.name.to_string(),
expected: expected.to_string(),
got: got.to_string(),
}
}
fn kind_name(v: &Value) -> &'static str {
match v {
Value::Null => "null",
Value::Bool(_) => "bool",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
/// Reject text values that exceed the configured cap. Used by create
/// for fields that flow through the old "default on missing" path
/// (where coerce is not invoked for missing keys).
pub fn check_text_len(f: &FieldDef, s: &str) -> Result<(), VerbError> {
if s.len() > MAX_TEXT_BYTES {
return Err(type_err(
f,
&format!("string ≤ {} bytes", MAX_TEXT_BYTES),
&format!("{} bytes", s.len()),
));
}
Ok(())
}

View file

@ -0,0 +1,190 @@
//! Regression tests for post-convergence audit findings (C1/C2/FTS5
//! injection/M3/TEXT-cap/M2). Each test names the finding it pins.
use kei_entity_store::error::VerbError;
use kei_entity_store::schema::{EntitySchema, FieldDef};
use kei_entity_store::verbs::{create, search, update};
use kei_entity_store::verbs::validate::MAX_TEXT_BYTES;
use kei_entity_store::Store;
use rusqlite::Connection;
use serde_json::json;
static FIELDS: &[FieldDef] = &[
FieldDef::pk("id"),
FieldDef::text_nn("title"),
FieldDef::text("description"),
FieldDef::text_default("status", "pending"),
FieldDef::integer("parent_id"),
FieldDef::created_at(),
FieldDef::updated_at(),
];
static SCHEMA: EntitySchema = EntitySchema {
name: "item",
table: "items",
fields: FIELDS,
enabled_verbs: &["create", "get", "list", "search", "update", "delete"],
fts_columns: Some(&["title", "description"]),
edge_table: None,
custom_migrations: &[],
};
fn mk() -> Store { Store::open_memory(&SCHEMA).unwrap() }
// ---------- C1 — silent type coercion ----------
fn expect_invalid_type(err: VerbError, expected_field: &str) {
match err {
VerbError::InvalidType { ref field, .. } if field == expected_field => {}
other => panic!("expected InvalidType on `{expected_field}`, got {other:?}"),
}
}
#[test]
fn c1_create_rejects_integer_for_text_field() {
let s = mk();
let err = create::run(s.conn(), &SCHEMA, json!({ "title": 42 })).unwrap_err();
assert_eq!(err.exit_code(), 2);
expect_invalid_type(err, "title");
}
#[test]
fn c1_create_rejects_string_for_integer_field() {
let s = mk();
let err = create::run(
s.conn(),
&SCHEMA,
json!({ "title": "ok", "parent_id": "not-a-number" }),
)
.unwrap_err();
expect_invalid_type(err, "parent_id");
}
#[test]
fn c1_update_rejects_integer_for_text_field() {
let s = mk();
let id = create::run(s.conn(), &SCHEMA, json!({ "title": "orig" }))
.unwrap()["id"]
.as_i64()
.unwrap();
let err = update::run(s.conn(), &SCHEMA, json!({ "id": id, "status": 7 })).unwrap_err();
expect_invalid_type(err, "status");
}
// ---------- C2 — FTS transaction ----------
#[test]
fn c2_update_fts_failure_rolls_back_row_update() {
// Fresh DB, then manually drop the FTS table so the next update's
// DELETE-INTO-FTS fails mid-flight. The row UPDATE that ran first
// in the SAME transaction must roll back.
let s = mk();
let id = create::run(s.conn(), &SCHEMA, json!({ "title": "before" }))
.unwrap()["id"]
.as_i64()
.unwrap();
// Sabotage: drop the fts virtual table.
s.conn().execute_batch("DROP TABLE fts_items;").unwrap();
let result = update::run(
s.conn(),
&SCHEMA,
json!({ "id": id, "title": "after" }),
);
assert!(result.is_err(), "update should fail when FTS is missing");
// Row must still read as `before` — the UPDATE was rolled back.
let title: String = s
.conn()
.query_row(
"SELECT title FROM items WHERE id=?1",
rusqlite::params![id],
|r| r.get(0),
)
.unwrap();
assert_eq!(title, "before", "update must have rolled back on FTS failure");
}
// ---------- FTS5 injection ----------
fn count_hits(s: &Store, q: &str) -> usize {
let v = search::run(s.conn(), &SCHEMA, json!({ "query": q })).unwrap();
v["results"].as_array().unwrap().len()
}
#[test]
fn fts5_injection_neutralized_by_phrase_quoting() {
// Column-prefix / NEAR / wildcard all become literal tokens when
// wrapped in the FTS5 double-quoted phrase. None should match the
// seeded rows — no doc contains the literal text `title:secret`.
let s = mk();
create::run(s.conn(), &SCHEMA, json!({
"title": "ordinary record", "description": "nothing special"
})).unwrap();
create::run(s.conn(), &SCHEMA, json!({
"title": "secret handshake", "description": "hidden"
})).unwrap();
assert_eq!(count_hits(&s, "title:secret"), 0, "column-prefix leaked");
assert_eq!(count_hits(&s, "NEAR(secret hidden, 5)"), 0, "NEAR leaked");
assert_eq!(count_hits(&s, "secr*"), 0, "wildcard leaked");
}
// ---------- TEXT size cap ----------
#[test]
fn text_cap_create_rejects_oversize() {
let s = mk();
let oversize: String = "a".repeat(MAX_TEXT_BYTES + 1);
let err = create::run(s.conn(), &SCHEMA, json!({ "title": oversize })).unwrap_err();
expect_invalid_type(err, "title");
}
#[test]
fn text_cap_update_rejects_oversize() {
let s = mk();
let id = create::run(s.conn(), &SCHEMA, json!({ "title": "ok" }))
.unwrap()["id"]
.as_i64()
.unwrap();
let oversize: String = "a".repeat(MAX_TEXT_BYTES + 1);
let err = update::run(
s.conn(),
&SCHEMA,
json!({ "id": id, "description": oversize }),
)
.unwrap_err();
expect_invalid_type(err, "description");
}
// ---------- M2 — migration version ----------
#[test]
fn m2_user_version_stamped_on_fresh_db() {
let s = mk();
let v: u32 = s
.conn()
.pragma_query_value(None, "user_version", |r| r.get(0))
.unwrap();
assert_eq!(v, kei_entity_store::engine::CURRENT_USER_VERSION);
}
#[test]
fn m2_user_version_applied_once_idempotent() {
// Open twice — second open must leave user_version unchanged (not
// bumped past CURRENT).
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("store.db");
{
let _s = Store::open(&path, &SCHEMA).unwrap();
}
{
let _s = Store::open(&path, &SCHEMA).unwrap();
let conn = Connection::open(&path).unwrap();
let v: u32 = conn
.pragma_query_value(None, "user_version", |r| r.get(0))
.unwrap();
assert_eq!(v, kei_entity_store::engine::CURRENT_USER_VERSION);
}
}