Merge M5 — kei-sage migration

This commit is contained in:
Parfii-bot 2026-04-23 05:55:35 +08:00
commit 41eec8d5b1
4 changed files with 100 additions and 39 deletions

View file

@ -2135,6 +2135,7 @@ dependencies = [
"chrono",
"clap",
"kei-atom-discovery",
"kei-entity-store",
"rusqlite",
"serde",
"serde_json",

View file

@ -21,6 +21,7 @@ serde_json = "1"
anyhow = "1"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
kei-atom-discovery = { path = "../kei-atom-discovery" }
kei-entity-store = { path = "../kei-entity-store" }
[dev-dependencies]
tempfile = "3"

View file

@ -1,36 +1,65 @@
//! SQLite schema for knowledge-vault. Port of LBM internal/sage/vault_schema.go.
//! SQLite schema — declarative via `kei_entity_store::EntitySchema`.
//!
//! Primary entity = `knowledge_units` ("unit"). Secondary tables (tags,
//! unit_tags, edges, fts_knowledge) ship as `custom_migrations` because
//! they pre-date the generic engine and carry sage-specific columns
//! (edge `id`/`weight`/`created_at`, FTS `unit_id`-named column, unique
//! partial index on `vault_path`).
//!
//! Why `edge_table: None` + `fts_columns: None`:
//! - Engine's default `TextPair` edge layout lacks `id`/`weight`/
//! `created_at` that sage's `list_outgoing` returns.
//! - Engine's FTS auto-table name is `fts_<table>` with column
//! `<table>_id` — sage uses `fts_knowledge` with column `unit_id`.
//!
//! The primary-table DDL produced by the engine matches the legacy
//! `knowledge_units` layout byte-for-byte (every column maps to an
//! engine `FieldKind`), so opening an existing sage DB stays idempotent.
use kei_entity_store::{EdgeKeyKind, EntitySchema, FieldDef};
use rusqlite::{Connection, Result};
const DDL_MAIN: &str = r#"
CREATE TABLE IF NOT EXISTS knowledge_units (
id INTEGER PRIMARY KEY,
unit_type TEXT NOT NULL,
title TEXT NOT NULL,
content TEXT DEFAULT '',
evidence_grade TEXT DEFAULT '',
source_path TEXT DEFAULT '',
vault_path TEXT DEFAULT '',
category TEXT DEFAULT '',
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL
);
/// Engine-owned primary-table fields for `knowledge_units`.
static UNIT_FIELDS: &[FieldDef] = &[
FieldDef::pk("id"),
FieldDef::text_nn("unit_type"),
FieldDef::text_nn("title"),
FieldDef::text("content"),
FieldDef::text("evidence_grade"),
FieldDef::text("source_path"),
FieldDef::text("vault_path"),
FieldDef::text("category"),
FieldDef::created_at(),
FieldDef::updated_at(),
];
/// Extra indexes on `knowledge_units` beyond the engine's per-field
/// auto-indexes. The unique partial index on `vault_path` is what makes
/// `INSERT OR REPLACE` idempotent by vault path in `Store::add_unit`.
const DDL_EXTRA_INDEXES: &str = r#"
CREATE INDEX IF NOT EXISTS idx_ku_type ON knowledge_units(unit_type);
CREATE UNIQUE INDEX IF NOT EXISTS idx_ku_vault
ON knowledge_units(vault_path) WHERE vault_path != '';
CREATE INDEX IF NOT EXISTS idx_ku_grade ON knowledge_units(evidence_grade);
"#;
/// Tags tables (currently unused by the CLI but preserved for parity
/// with the LBM port — external tooling may read them).
const DDL_TAGS: &str = r#"
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE NOT NULL
);
CREATE TABLE IF NOT EXISTS unit_tags (
unit_id INTEGER NOT NULL REFERENCES knowledge_units(id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY (unit_id, tag_id)
);
"#;
/// Typed wikilink edges between `vault_path`s — src_path/dst_path text
/// keys plus sage-specific `id`/`weight`/`created_at`.
const DDL_EDGES: &str = r#"
CREATE TABLE IF NOT EXISTS edges (
id INTEGER PRIMARY KEY,
src_path TEXT NOT NULL,
@ -44,14 +73,41 @@ const DDL_MAIN: &str = r#"
CREATE INDEX IF NOT EXISTS idx_sage_edges_dst ON edges(dst_path);
"#;
/// FTS5 virtual table — legacy column name `unit_id` kept so existing
/// search/CRUD SQL in `search.rs` and `store.rs` compiles unchanged.
const DDL_FTS: &str = r#"
CREATE VIRTUAL TABLE IF NOT EXISTS fts_knowledge
USING fts5(unit_id UNINDEXED, title, content, tokenize='porter unicode61');
"#;
/// Declarative SSoT for sage's SQLite layout. `edge_key_kind` is
/// `TextPair` because sage's graph nodes are vault paths (strings), but
/// `edge_table: None` keeps the custom `edges` schema with extra
/// columns — engine-side `link`/`rank` verbs are not used today.
pub static SAGE_SCHEMA: EntitySchema = EntitySchema {
name: "unit",
table: "knowledge_units",
fields: UNIT_FIELDS,
enabled_verbs: &["create", "get", "search", "link", "rank"],
fts_columns: None,
edge_table: None,
edge_key_kind: EdgeKeyKind::TextPair,
archived_field: None,
custom_migrations: &[DDL_EXTRA_INDEXES, DDL_TAGS, DDL_EDGES, DDL_FTS],
};
/// Apply schema + FTS5 virtual table. Idempotent.
///
/// Delegates to `kei_entity_store::engine::run_migrations` against
/// `SAGE_SCHEMA`. Preserved as a named entry point so downstream
/// callers and tests can still spell out the migration explicitly.
pub fn create_schema(conn: &Connection) -> Result<()> {
conn.execute_batch(DDL_MAIN)?;
conn.execute_batch(DDL_FTS)?;
kei_entity_store::engine::run_migrations(conn, &SAGE_SCHEMA)
.map_err(|e| match e {
kei_entity_store::VerbError::Sqlite(sq) => sq,
other => rusqlite::Error::ToSqlConversionFailure(Box::new(
std::io::Error::new(std::io::ErrorKind::Other, other.to_string()),
)),
})?;
Ok(())
}

View file

@ -1,42 +1,45 @@
//! Knowledge-unit CRUD + FTS indexer.
//!
//! `Store::open` / `Store::open_memory` delegate to
//! `kei_entity_store::Store` which runs `SAGE_SCHEMA` migrations.
//! The sage-specific `add_unit` / `update_unit` / `delete_unit`
//! helpers stay here because they use `INSERT OR REPLACE` idempotency
//! by `vault_path` and maintain sage's custom FTS table (`fts_knowledge`
//! with column `unit_id`) — engine's generic `create` verb assumes a
//! different FTS shape (`fts_<table>` with column `<table>_id`).
use crate::schema::create_schema;
use crate::schema::SAGE_SCHEMA;
use crate::types::Unit;
use anyhow::{Context, Result};
use chrono::Utc;
use kei_entity_store::Store as EngineStore;
use rusqlite::{params, Connection};
use std::path::Path;
pub struct Store {
conn: Connection,
engine: EngineStore,
}
impl Store {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let conn = Connection::open(path).context("open sqlite")?;
conn.pragma_update(None, "journal_mode", "WAL").ok();
create_schema(&conn)?;
Ok(Self { conn })
let engine = EngineStore::open(path, &SAGE_SCHEMA).context("engine store open")?;
Ok(Self { engine })
}
pub fn open_memory() -> Result<Self> {
let conn = Connection::open_in_memory()?;
create_schema(&conn)?;
Ok(Self { conn })
let engine = EngineStore::open_memory(&SAGE_SCHEMA).context("engine store open_memory")?;
Ok(Self { engine })
}
pub fn conn(&self) -> &Connection {
&self.conn
self.engine.conn()
}
/// Insert a new knowledge unit. Indexes title+content into FTS5. Idempotent by vault_path.
pub fn add_unit(&self, unit: &Unit) -> Result<i64> {
let now = Utc::now().timestamp();
let created = if unit.created_at == 0 { now } else { unit.created_at };
self.conn.execute(
self.conn().execute(
"INSERT OR REPLACE INTO knowledge_units
(unit_type, title, content, evidence_grade, source_path,
vault_path, category, created_at, updated_at)
@ -44,13 +47,13 @@ impl Store {
params![unit.unit_type, unit.title, unit.content, unit.evidence_grade,
unit.source_path, unit.vault_path, unit.category, created, now],
)?;
let id = self.conn.last_insert_rowid();
let id = self.conn().last_insert_rowid();
self.reindex_fts(id, &unit.title, &unit.content)?;
Ok(id)
}
pub fn get_unit(&self, id: i64) -> Result<Option<Unit>> {
let mut stmt = self.conn.prepare(
let mut stmt = self.conn().prepare(
"SELECT id, unit_type, title, content, evidence_grade, source_path,
vault_path, category, created_at, updated_at
FROM knowledge_units WHERE id=?1",
@ -64,7 +67,7 @@ impl Store {
pub fn update_unit(&self, unit: &Unit) -> Result<()> {
let now = Utc::now().timestamp();
self.conn.execute(
self.conn().execute(
"UPDATE knowledge_units SET title=?1, content=?2, evidence_grade=?3,
category=?4, updated_at=?5 WHERE id=?6",
params![unit.title, unit.content, unit.evidence_grade,
@ -75,19 +78,19 @@ impl Store {
}
pub fn delete_unit(&self, id: i64) -> Result<()> {
self.conn.execute("DELETE FROM fts_knowledge WHERE unit_id=?1", params![id])?;
self.conn.execute("DELETE FROM knowledge_units WHERE id=?1", params![id])?;
self.conn().execute("DELETE FROM fts_knowledge WHERE unit_id=?1", params![id])?;
self.conn().execute("DELETE FROM knowledge_units WHERE id=?1", params![id])?;
Ok(())
}
pub fn count_units(&self) -> Result<i64> {
Ok(self.conn.query_row(
Ok(self.conn().query_row(
"SELECT COUNT(*) FROM knowledge_units", [], |r| r.get(0))?)
}
fn reindex_fts(&self, id: i64, title: &str, content: &str) -> Result<()> {
self.conn.execute("DELETE FROM fts_knowledge WHERE unit_id=?1", params![id])?;
self.conn.execute(
self.conn().execute("DELETE FROM fts_knowledge WHERE unit_id=?1", params![id])?;
self.conn().execute(
"INSERT INTO fts_knowledge (unit_id, title, content) VALUES (?1,?2,?3)",
params![id, title, content],
)?;