- kei-router — keyword-dispatch meta-tool (CfC ML fallback removed) - kei-sage — Obsidian-style knowledge graph, FTS5 + BFS + PageRank - kei-task — task DAG with deps, milestones, dependency-chain queries - kei-chat-store — Claude conversation session persistence + FTS search - kei-crossdomain — typed-edge store + BFS cross-domain glue - kei-search-core — 3-wave deep research with microcent budget cap - kei-content-store — asset + prompt + campaign registry - kei-social-store — people + interactions CRM (lite) - kei-curator — edge-decay graph hygiene utility - kei-auth — multi-tenant session tokens (replaces single-bearer) Genesis-scan pre-import pass: skipped pkg/mxl1/*, pkg/inference/*, pkg/trainer/*, pkg/nc01/*, internal/ml/* (all Genesis/CfC adjacent, sensitive IP). Security: skipped tools_threat/radio/protocol/med/mlreg (offensive/banned). Domain verticals skipped: hr/legal/infra/ops/api/osint/edu/geo/hw/finance. New 'mcp' profile in MANIFEST.toml bundles all 10 for MCP server deployment. Workspace now 24 crates, cargo check --workspace clean, 94 workspace tests pass.
167 lines
4.8 KiB
Rust
167 lines
4.8 KiB
Rust
//! Param extraction — regex scans the raw query for path / limit / id / URI / KV.
|
|
//!
|
|
//! Ported from LBM pkg/keirouter/extract.go.
|
|
|
|
use regex::Regex;
|
|
use std::collections::HashMap;
|
|
use std::sync::OnceLock;
|
|
|
|
#[derive(Debug, Default, Clone)]
|
|
pub struct Extracted {
|
|
pub path: String,
|
|
pub paths: String,
|
|
pub limit: i64,
|
|
pub depth: i64,
|
|
pub id: i64,
|
|
pub query: String,
|
|
pub text: String,
|
|
pub text_clean: String,
|
|
pub uri: String,
|
|
pub kv: HashMap<String, String>,
|
|
}
|
|
|
|
fn re(pat: &str) -> Regex {
|
|
Regex::new(pat).expect("invalid regex pattern in kei-router")
|
|
}
|
|
|
|
fn re_abs_path() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"(?:^|\s)((?:/[\w.~-]+)+(?:\.\w+)?)"))
|
|
}
|
|
fn re_rel_path() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"(?:^|\s)((?:[\w.-]+/)+[\w.-]+\.\w+)"))
|
|
}
|
|
fn re_json_arr() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r#"\[(?:\s*"[^"]*"\s*,?\s*)+\]"#))
|
|
}
|
|
fn re_number() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\b(?:limit|max|top)\s*[=:]?\s*(\d+)"))
|
|
}
|
|
fn re_depth() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\b(?:depth)\s*[=:]?\s*(\d+)"))
|
|
}
|
|
fn re_id_num() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\b(?:id|unit)\s*[=:#]?\s*(\d+)"))
|
|
}
|
|
fn re_bare_num() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\b(\d{1,4})\b"))
|
|
}
|
|
fn re_vault_uri() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\bnote://vault/[\w/.\-]+"))
|
|
}
|
|
fn re_domain_uri() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\b(\w+://[\w/.+\-]+)"))
|
|
}
|
|
fn re_kv() -> &'static Regex {
|
|
static R: OnceLock<Regex> = OnceLock::new();
|
|
R.get_or_init(|| re(r"\b(\w+)=([\w://._+\-]+)"))
|
|
}
|
|
|
|
fn parse_i64(s: &str) -> i64 {
|
|
s.parse::<i64>().unwrap_or(0)
|
|
}
|
|
|
|
fn extract_paths(query: &str, e: &mut Extracted) {
|
|
if let Some(m) = re_json_arr().find(query) {
|
|
e.paths = m.as_str().to_string();
|
|
}
|
|
if let Some(c) = re_abs_path().captures(query) {
|
|
if let Some(m) = c.get(1) {
|
|
e.path = m.as_str().to_string();
|
|
}
|
|
}
|
|
if e.path.is_empty() {
|
|
if let Some(c) = re_rel_path().captures(query) {
|
|
if let Some(m) = c.get(1) {
|
|
e.path = m.as_str().to_string();
|
|
}
|
|
}
|
|
}
|
|
if let Some(m) = re_vault_uri().find(query) {
|
|
if e.path.is_empty() {
|
|
e.path = m.as_str().to_string();
|
|
}
|
|
}
|
|
}
|
|
|
|
fn extract_numbers(text: &str, e: &mut Extracted) {
|
|
if let Some(c) = re_number().captures(text) {
|
|
if let Some(m) = c.get(1) {
|
|
e.limit = parse_i64(m.as_str());
|
|
}
|
|
}
|
|
if let Some(c) = re_depth().captures(text) {
|
|
if let Some(m) = c.get(1) {
|
|
e.depth = parse_i64(m.as_str());
|
|
}
|
|
}
|
|
if let Some(c) = re_id_num().captures(text) {
|
|
if let Some(m) = c.get(1) {
|
|
e.id = parse_i64(m.as_str());
|
|
}
|
|
}
|
|
if e.limit == 0 && e.id == 0 {
|
|
if let Some(c) = re_bare_num().captures(text) {
|
|
if let Some(m) = c.get(1) {
|
|
let n = parse_i64(m.as_str());
|
|
if n > 0 && n <= 500 {
|
|
e.limit = n;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn extract_uri_kv(query: &str, e: &mut Extracted) {
|
|
if let Some(m) = re_domain_uri().find(query) {
|
|
let s = m.as_str();
|
|
if !s.starts_with("note://") {
|
|
e.uri = s.to_string();
|
|
}
|
|
}
|
|
for c in re_kv().captures_iter(query) {
|
|
if let (Some(k), Some(v)) = (c.get(1), c.get(2)) {
|
|
e.kv.insert(k.as_str().to_string(), v.as_str().to_string());
|
|
}
|
|
}
|
|
}
|
|
|
|
fn build_clean_query(e: &mut Extracted) {
|
|
let mut q = e.text.clone();
|
|
if !e.path.is_empty() {
|
|
q = q.replacen(&e.path.to_lowercase(), "", 1);
|
|
}
|
|
q = re_number().replace_all(&q, "").to_string();
|
|
q = re_depth().replace_all(&q, "").to_string();
|
|
q = re_id_num().replace_all(&q, "").to_string();
|
|
q = q.trim().to_string();
|
|
if !q.is_empty() {
|
|
e.query = q;
|
|
}
|
|
e.text_clean = e.text.clone();
|
|
if !e.path.is_empty() {
|
|
e.text_clean = e.text_clean.replacen(&e.path.to_lowercase(), " ", 1).trim().to_string();
|
|
}
|
|
}
|
|
|
|
/// Parse a raw NL query into structured [`Extracted`] params.
|
|
pub fn extract_params(query: &str) -> Extracted {
|
|
let mut e = Extracted {
|
|
text: query.trim().to_lowercase(),
|
|
..Default::default()
|
|
};
|
|
extract_paths(query, &mut e);
|
|
let text_copy = e.text.clone();
|
|
extract_numbers(&text_copy, &mut e);
|
|
extract_uri_kv(query, &mut e);
|
|
build_clean_query(&mut e);
|
|
e
|
|
}
|