Merge W10A — engine TextPairWithMetadata extras + kei-crossdomain re-migrated

This commit is contained in:
Parfii-bot 2026-04-23 13:59:18 +08:00
commit c10f17c202
8 changed files with 257 additions and 78 deletions

View file

@ -25,7 +25,7 @@ pub fn unlink(store: &Store, from: &str, to: &str, edge_type: &str) -> Result<us
pub fn query_edges(store: &Store, uri: &str) -> Result<Vec<CrossEdge>> {
let mut stmt = store.conn().prepare(
"SELECT id, from_uri, to_uri, edge_type, weight, evidence, metadata, created_at
"SELECT edge_id, from_uri, to_uri, edge_type, weight, evidence, metadata, created_at
FROM cross_edges WHERE from_uri=?1 OR to_uri=?1",
)?;
let rows = stmt.query_map(params![uri], |r| {

View file

@ -1,7 +1,7 @@
//! kei-crossdomain EntitySchema — declarative spec consumed by
//! `kei_entity_store::Store` for migrations + user_version pragma.
//!
//! **Architectural note (2026-04-23 migration to Layer-A engine):**
//! **Architectural note (2026-04-23 re-migration, Option B):**
//! kei-crossdomain is an edges-only graph store — URIs (`domain://path`)
//! are the only identifiers; there is no primary "node" entity row. The
//! engine's `EntitySchema` contract requires exactly one `IntegerPk`
@ -9,67 +9,57 @@
//! to satisfy the DDL contract. No code writes to this table; every
//! query still runs against `cross_edges`.
//!
//! **Why `edge_table: None` instead of `Some("cross_edges")` + TextPair:**
//! the engine's TextPair edge DDL is `(src_path, dst_path, edge_type)`
//! with `PRIMARY KEY(src_path, dst_path, edge_type)` — **incompatible**
//! with the existing `cross_edges` schema, which carries five extra
//! columns (`id INTEGER PRIMARY KEY`, `weight`, `evidence`, `metadata`,
//! `created_at`) and uses column names `from_uri` / `to_uri`. Adopting
//! engine's TextPair would destroy the `CrossEdge` type, the
//! `link()` rowid return, and backward compatibility with existing
//! on-disk DBs. Instead we follow the kei-task pattern: ride the engine
//! for connection lifecycle + `PRAGMA user_version` + migration
//! orchestration, keep the rich edge DDL in `custom_migrations`.
//!
//! Constructive path (not pursued here, would require destructive
//! rewrite): extend `kei-entity-store` with a richer TextPair variant
//! that preserves extra columns via schema fields, OR migrate
//! kei-crossdomain callers to drop the id/weight/evidence/metadata
//! fields. Both are multi-file changes outside this crate's scope.
//! The rich `cross_edges` DDL is now generated by `kei-entity-store` via
//! `EdgeKeyKind::TextPairWithMetadata { from_col: "from_uri", to_col:
//! "to_uri", has_id, has_weight, has_created_at, extra_columns }`. The
//! legacy hand-rolled `custom_migrations` DDL was dropped; see git
//! history for the prior version.
use kei_entity_store::schema::{EdgeKeyKind, EntitySchema, FieldDef};
use kei_entity_store::schema::{EdgeKeyKind, EntitySchema, FieldDef, FieldKind};
use rusqlite::{Connection, Result};
/// Synthetic primary table — exists solely to satisfy the engine's
/// `IntegerPk` requirement. Not used by any verb or caller.
static FIELDS: &[FieldDef] = &[FieldDef::pk("id")];
/// Byte-identical to the pre-migration `cross_edges` DDL (plus the
/// three indexes that used to live in the same `execute_batch`).
const DDL_CROSS_EDGES: &str = r#"
CREATE TABLE IF NOT EXISTS cross_edges (
id INTEGER PRIMARY KEY,
from_uri TEXT NOT NULL,
to_uri TEXT NOT NULL,
edge_type TEXT NOT NULL,
weight REAL DEFAULT 1.0,
evidence TEXT DEFAULT 'E4',
metadata TEXT DEFAULT '{}',
created_at INTEGER NOT NULL,
UNIQUE(from_uri, to_uri, edge_type)
);
CREATE INDEX IF NOT EXISTS idx_ce_from ON cross_edges(from_uri);
CREATE INDEX IF NOT EXISTS idx_ce_to ON cross_edges(to_uri);
CREATE INDEX IF NOT EXISTS idx_ce_type ON cross_edges(edge_type);
"#;
/// Extra columns on `cross_edges` beyond the standard metadata
/// (id / weight / created_at / edge_type). Defaults (`E4`, `{}`) are
/// applied by kei-crossdomain callers at INSERT time since the engine's
/// edge DDL only emits `TEXT DEFAULT ''` for `FieldKind::Text`; existing
/// databases keep their original `DEFAULT 'E4' / '{}'` column attributes
/// via SQLite's `CREATE TABLE IF NOT EXISTS` no-op.
static EDGE_EXTRAS: &[(&str, FieldKind)] = &[
("evidence", FieldKind::Text),
("metadata", FieldKind::Text),
];
pub static CROSSDOMAIN_SCHEMA: EntitySchema = EntitySchema {
name: "crossdomain",
table: "cross_nodes",
fields: FIELDS,
// Empty verb set: every kei-crossdomain op is bespoke (TextPair with
// extra columns — engine verbs can't dispatch them). Link/unlink/
// query/BFS/auto-link/stats all live in `edges.rs`, `bfs.rs`,
// `auto_link.rs`.
// Empty verb set: every kei-crossdomain op is bespoke (rich typed
// edges with evidence/metadata — engine's `link` verb does dispatch
// extras now, but kei-crossdomain keeps its own typed wrappers in
// `edges.rs`/`bfs.rs`/`auto_link.rs` for the strongly-typed API).
enabled_verbs: &[],
fts_columns: None,
// `None`: engine skips edge DDL. `cross_edges` is created via
// `custom_migrations` with byte-identical legacy DDL.
edge_table: None,
// Documentation hint only (inert while `edge_table = None`).
edge_key_kind: EdgeKeyKind::TextPair,
edge_table: Some("cross_edges"),
edge_key_kind: EdgeKeyKind::TextPairWithMetadata {
from_col: "from_uri",
to_col: "to_uri",
has_id: true,
has_weight: true,
has_created_at: true,
extra_columns: EDGE_EXTRAS,
},
archived_field: None,
custom_migrations: &[DDL_CROSS_EDGES],
// Legacy hand-rolled DDL dropped — engine now emits it. Only the
// kei-crossdomain-specific indexes (`idx_ce_from`, `idx_ce_type`)
// live here; the engine auto-emits `idx_cross_edges_dst` on `to_uri`.
custom_migrations: &[
"CREATE INDEX IF NOT EXISTS idx_ce_from ON cross_edges(from_uri);",
"CREATE INDEX IF NOT EXISTS idx_ce_type ON cross_edges(edge_type);",
],
};
/// Kept for backward compatibility with any external caller that
@ -77,6 +67,15 @@ pub static CROSSDOMAIN_SCHEMA: EntitySchema = EntitySchema {
/// `Store::open` / `Store::open_memory`, which invokes the engine's
/// migration runner with `CROSSDOMAIN_SCHEMA`.
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL_CROSS_EDGES)?;
// Delegate to the engine's DDL generator so the one-shot path stays
// byte-identical to the engine-driven migration.
let ddl = kei_entity_store::ddl::edge_table_for(
"cross_edges",
CROSSDOMAIN_SCHEMA.edge_key_kind,
);
conn.execute_batch(&ddl)?;
for stmt in CROSSDOMAIN_SCHEMA.custom_migrations {
conn.execute_batch(stmt)?;
}
Ok(())
}

View file

@ -90,10 +90,21 @@ pub fn edge_table_for(edge: &str, kind: EdgeKeyKind) -> String {
EdgeKeyKind::IntegerPair => edge_integer(edge),
EdgeKeyKind::TextPair => edge_text(edge),
EdgeKeyKind::TextPairWithMetadata {
from_col,
to_col,
has_id,
has_weight,
has_created_at,
} => edge_text_meta(edge, has_id, has_weight, has_created_at),
extra_columns,
} => edge_text_meta(
edge,
from_col,
to_col,
has_id,
has_weight,
has_created_at,
extra_columns,
),
}
}
@ -122,36 +133,58 @@ fn edge_text(edge: &str) -> String {
)
}
/// Text-keyed edge DDL with optional metadata columns.
/// Text-keyed edge DDL with optional metadata columns + caller-chosen
/// key column names + arbitrary extra columns.
fn edge_text_meta(
edge: &str,
from_col: &str,
to_col: &str,
has_id: bool,
has_weight: bool,
has_created_at: bool,
extras: &[(&str, FieldKind)],
) -> String {
let mut cols: Vec<String> = Vec::new();
if has_id {
cols.push("edge_id INTEGER PRIMARY KEY AUTOINCREMENT".to_string());
}
cols.push("src_path TEXT NOT NULL".to_string());
cols.push("dst_path TEXT NOT NULL".to_string());
cols.push(format!("{from_col} TEXT NOT NULL"));
cols.push(format!("{to_col} TEXT NOT NULL"));
cols.push("edge_type TEXT NOT NULL DEFAULT 'links'".to_string());
if has_weight {
cols.push("weight REAL NOT NULL DEFAULT 1.0".to_string());
}
for (name, kind) in extras {
cols.push(extra_column(name, *kind));
}
if has_created_at {
cols.push("created_at INTEGER NOT NULL".to_string());
}
// Without an autoincrement PK we still want `INSERT OR IGNORE`
// idempotent over the triple; with one we emit a UNIQUE instead.
if has_id {
cols.push("UNIQUE(src_path, dst_path, edge_type)".to_string());
cols.push(format!("UNIQUE({from_col}, {to_col}, edge_type)"));
} else {
cols.push("PRIMARY KEY(src_path, dst_path, edge_type)".to_string());
cols.push(format!("PRIMARY KEY({from_col}, {to_col}, edge_type)"));
}
let body = cols.join(",\n ");
format!(
"CREATE TABLE IF NOT EXISTS {edge} (\n {body}\n);\n\
CREATE INDEX IF NOT EXISTS idx_{edge}_dst ON {edge}(dst_path);"
CREATE INDEX IF NOT EXISTS idx_{edge}_dst ON {edge}({to_col});"
)
}
/// DDL for one extra edge column. Limited subset of `FieldKind` — edge
/// extras can't be PKs, archive enums, or auto-stamped timestamps.
fn extra_column(name: &str, kind: FieldKind) -> String {
match kind {
FieldKind::Text => format!("{name} TEXT DEFAULT ''"),
FieldKind::TextNotNull => format!("{name} TEXT NOT NULL"),
FieldKind::Integer => format!("{name} INTEGER DEFAULT 0"),
FieldKind::IntegerNotNull => format!("{name} INTEGER NOT NULL"),
FieldKind::Real => format!("{name} REAL NOT NULL DEFAULT 0.0"),
other => panic!(
"edge extra_columns: unsupported FieldKind {other:?} for column '{name}'"
),
}
}

View file

@ -51,15 +51,24 @@ pub enum FieldKind {
/// - `TextPair` — `(src_path TEXT, dst_path TEXT, edge_type TEXT)` —
/// required by kei-sage (composite text keys, no integer ids).
/// - `TextPairWithMetadata` — same text key but with optional
/// `id`/`weight`/`created_at` columns so edges can carry metadata
/// (kei-chat-store cross-refs, kei-content-store citations).
/// `id`/`weight`/`created_at` columns plus caller-controlled key
/// column names (`from_col`/`to_col`) and arbitrary extra columns
/// (kei-chat-store cross-refs, kei-content-store citations,
/// kei-crossdomain typed edges with evidence/metadata).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EdgeKeyKind {
IntegerPair,
TextPair,
/// Extended text-pair edge with optional metadata columns.
/// Existing `TextPair` stays backward-compat.
/// Extended text-pair edge with optional metadata columns and
/// caller-controlled column names. Existing `TextPair` stays
/// backward-compat (uses fixed `src_path`/`dst_path`).
TextPairWithMetadata {
/// Name of the "from" TEXT key column. Defaults to `"src_path"`
/// for continuity with `TextPair` — override to e.g. `"from_uri"`
/// for kei-crossdomain.
from_col: &'static str,
/// Name of the "to" TEXT key column. Defaults to `"dst_path"`.
to_col: &'static str,
/// Emit `edge_id INTEGER PRIMARY KEY AUTOINCREMENT` column.
has_id: bool,
/// Emit `weight REAL NOT NULL DEFAULT 1.0` column.
@ -67,6 +76,14 @@ pub enum EdgeKeyKind {
/// Emit `created_at INTEGER NOT NULL` column auto-stamped on
/// insert.
has_created_at: bool,
/// Extra typed columns appended after the standard metadata.
/// Each `(name, kind)` pair produces a column using the same
/// DDL rules as entity fields (`Text` → `TEXT DEFAULT ''`,
/// `TextDefault` is not supported here — use `Text` with a
/// caller-side default migration if a non-empty default is
/// needed). `link` verb accepts matching JSON keys and binds
/// them; `rank` ignores them.
extra_columns: &'static [(&'static str, FieldKind)],
},
}

View file

@ -11,7 +11,7 @@
//! and NEVER taken from the caller.
use crate::error::VerbError;
use crate::schema::{EdgeKeyKind, EntitySchema};
use crate::schema::{EdgeKeyKind, EntitySchema, FieldKind};
use rusqlite::{types::Value as SqlValue, Connection};
use serde_json::{json, Value};
@ -42,17 +42,22 @@ pub fn run(
EdgeKeyKind::IntegerPair => insert_integer(conn, edge, &input, &edge_type),
EdgeKeyKind::TextPair => insert_text(conn, edge, &input, &edge_type),
EdgeKeyKind::TextPairWithMetadata {
has_id,
from_col,
to_col,
has_id: _,
has_weight,
has_created_at,
extra_columns,
} => insert_text_meta(
conn,
edge,
&input,
&edge_type,
has_id,
from_col,
to_col,
has_weight,
has_created_at,
extra_columns,
),
}
}
@ -96,17 +101,20 @@ fn insert_text(
Ok(json!({ "ok": true }))
}
#[allow(clippy::too_many_arguments)]
fn insert_text_meta(
conn: &Connection,
edge: &str,
input: &Value,
edge_type: &str,
_has_id: bool,
from_col: &str,
to_col: &str,
has_weight: bool,
has_created_at: bool,
extras: &[(&str, FieldKind)],
) -> Result<Value, VerbError> {
let (from, to) = extract_text_pair(input)?;
let mut cols: Vec<&str> = vec!["src_path", "dst_path", "edge_type"];
let mut cols: Vec<String> = vec![from_col.into(), to_col.into(), "edge_type".into()];
let mut values: Vec<SqlValue> = vec![
SqlValue::Text(from),
SqlValue::Text(to),
@ -114,14 +122,50 @@ fn insert_text_meta(
];
if has_weight {
let weight = input.get("weight").and_then(|v| v.as_f64()).unwrap_or(1.0);
cols.push("weight");
cols.push("weight".into());
values.push(SqlValue::Real(weight));
}
push_extras(&mut cols, &mut values, input, extras);
if has_created_at {
let now = chrono::Utc::now().timestamp();
cols.push("created_at");
values.push(SqlValue::Integer(now));
cols.push("created_at".into());
values.push(SqlValue::Integer(chrono::Utc::now().timestamp()));
}
exec_insert(conn, edge, &cols, &values)
}
fn push_extras(
cols: &mut Vec<String>,
values: &mut Vec<SqlValue>,
input: &Value,
extras: &[(&str, FieldKind)],
) {
for (name, kind) in extras {
if let Some(v) = input.get(*name) {
cols.push((*name).into());
values.push(json_to_sql(v, *kind));
}
}
}
fn json_to_sql(v: &Value, kind: FieldKind) -> SqlValue {
match kind {
FieldKind::Text | FieldKind::TextNotNull => {
SqlValue::Text(v.as_str().unwrap_or("").to_string())
}
FieldKind::Integer | FieldKind::IntegerNotNull => {
SqlValue::Integer(v.as_i64().unwrap_or(0))
}
FieldKind::Real => SqlValue::Real(v.as_f64().unwrap_or(0.0)),
_ => SqlValue::Null,
}
}
fn exec_insert(
conn: &Connection,
edge: &str,
cols: &[String],
values: &[SqlValue],
) -> Result<Value, VerbError> {
let placeholders: Vec<String> = (1..=cols.len()).map(|i| format!("?{i}")).collect();
let sql = format!(
"INSERT OR IGNORE INTO {edge} ({}) VALUES ({})",

View file

@ -37,10 +37,13 @@ pub fn run(
})?;
match schema.edge_key_kind {
EdgeKeyKind::IntegerPair => rank_integer(conn, edge),
EdgeKeyKind::TextPair => rank_text(conn, edge, false),
EdgeKeyKind::TextPairWithMetadata { has_weight, .. } => {
rank_text(conn, edge, has_weight)
}
EdgeKeyKind::TextPair => rank_text(conn, edge, "src_path", "dst_path", false),
EdgeKeyKind::TextPairWithMetadata {
from_col,
to_col,
has_weight,
..
} => rank_text(conn, edge, from_col, to_col, has_weight),
}
}
@ -54,8 +57,14 @@ fn rank_integer(conn: &Connection, edge: &str) -> Result<Value, VerbError> {
Ok(json!({ "results": results }))
}
fn rank_text(conn: &Connection, edge: &str, with_weight: bool) -> Result<Value, VerbError> {
let (nodes, out_edges) = collect_text(conn, edge, with_weight)?;
fn rank_text(
conn: &Connection,
edge: &str,
from_col: &str,
to_col: &str,
with_weight: bool,
) -> Result<Value, VerbError> {
let (nodes, out_edges) = collect_text(conn, edge, from_col, to_col, with_weight)?;
let rank = pagerank(&nodes, &out_edges);
let mut out: Vec<(String, f64)> = rank.into_iter().collect();
out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
@ -85,12 +94,14 @@ fn collect_integer(
fn collect_text(
conn: &Connection,
edge: &str,
from_col: &str,
to_col: &str,
with_weight: bool,
) -> Result<(Vec<String>, HashMap<String, Vec<(String, f64)>>), VerbError> {
let sql = if with_weight {
format!("SELECT src_path, dst_path, weight FROM {edge}")
format!("SELECT {from_col}, {to_col}, weight FROM {edge}")
} else {
format!("SELECT src_path, dst_path FROM {edge}")
format!("SELECT {from_col}, {to_col} FROM {edge}")
};
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map([], |r| {

View file

@ -196,9 +196,12 @@ static META_EDGE_SCHEMA: EntitySchema = EntitySchema {
fts_columns: None,
edge_table: Some("doc_edges_meta"),
edge_key_kind: EdgeKeyKind::TextPairWithMetadata {
from_col: "src_path",
to_col: "dst_path",
has_id: true,
has_weight: true,
has_created_at: true,
extra_columns: &[],
},
archived_field: None,
custom_migrations: &[],

View file

@ -4,7 +4,7 @@
//! composite edge keys. Also keeps one IntegerPair regression case to
//! prove we did not disturb the default behaviour.
use kei_entity_store::schema::{EdgeKeyKind, EntitySchema, FieldDef};
use kei_entity_store::schema::{EdgeKeyKind, EntitySchema, FieldDef, FieldKind};
use kei_entity_store::verbs::{link, rank};
use kei_entity_store::Store;
use serde_json::json;
@ -100,6 +100,78 @@ fn text_pair_rejects_integer_input() {
assert_eq!(err.exit_code(), 2);
}
// ---- Extended TextPairWithMetadata: custom col names + extra columns ----
static META_EXTRAS_SCHEMA: EntitySchema = EntitySchema {
name: "xdoc",
table: "xdocs",
fields: NODE_FIELDS,
enabled_verbs: &["link", "rank"],
fts_columns: None,
edge_table: Some("xdoc_edges"),
edge_key_kind: EdgeKeyKind::TextPairWithMetadata {
from_col: "from_uri",
to_col: "to_uri",
has_id: true,
has_weight: true,
has_created_at: true,
extra_columns: &[
("evidence", FieldKind::Text),
("metadata", FieldKind::Text),
],
},
archived_field: None,
custom_migrations: &[],
};
#[test]
fn text_pair_with_extras_roundtrip() {
let s = Store::open_memory(&META_EXTRAS_SCHEMA).unwrap();
link::run(
s.conn(),
&META_EXTRAS_SCHEMA,
json!({
"from": "code://a.rs",
"to": "note://n1",
"edge_type": "refs",
"weight": 2.5,
"evidence": "E2",
"metadata": "{\"tag\":\"important\"}",
}),
)
.unwrap();
let (w, ev, md): (f64, String, String) = s
.conn()
.query_row(
"SELECT weight, evidence, metadata FROM xdoc_edges \
WHERE from_uri='code://a.rs' AND to_uri='note://n1'",
[],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
)
.unwrap();
assert_eq!(w, 2.5);
assert_eq!(ev, "E2");
assert_eq!(md, "{\"tag\":\"important\"}");
}
#[test]
fn text_pair_with_custom_col_names_rank_uses_from_to_cols() {
let s = Store::open_memory(&META_EXTRAS_SCHEMA).unwrap();
for (f, t) in [("a://x", "b://y"), ("a://x", "c://z"), ("b://y", "c://z")] {
link::run(
s.conn(),
&META_EXTRAS_SCHEMA,
json!({ "from": f, "to": t, "edge_type": "refs" }),
)
.unwrap();
}
let v = rank::run(s.conn(), &META_EXTRAS_SCHEMA, json!({})).unwrap();
let results = v["results"].as_array().unwrap();
assert_eq!(results.len(), 3);
// c://z has 2 inbound edges → highest rank.
assert_eq!(results[0]["id"], "c://z");
}
#[test]
fn integer_pair_still_works_after_refactor() {
// Regression guard — kei-task uses IntegerPair implicitly.