KeiSeiKit-1.0/_primitives/_rust/frustration-matrix/src/row.rs
Parfii-bot 0be354a920 KeiSeiKit-public — clean state
Single-commit clean baseline after security scrub of niche-tells,
project codenames, internal jargon, and contributor-email leaks.

Contents:
- 100 Rust crates (_primitives/_rust/)
- 37 agent manifests (_manifests/) + generated specs (_generated/)
- 67 user-invocable skills (skills/)
- 33 hooks (hooks/)
- Composition blocks (_blocks/)
- Documentation (docs/, README.md)
- TS adapter packages (_ts_packages/)
- Assembler (_assembler/)
- Roles (_roles/)
- Templates (_templates/)
- Forgejo CI (.forgejo/)

Author: Denis Parfionovich <info@greendragon.info>

License: see LICENSE.
2026-05-01 12:09:03 +08:00

111 lines
3.6 KiB
Rust

//! Output row — one hit per (category, chatlog file, line_no).
//!
//! Constructor Pattern: one struct, two serializers. CSV is emitted by hand
//! (no `csv` crate in the dependency list); JSONL uses `serde_json`.
//!
//! Fields are public and stable — this is the wire format the `report`
//! sub-command reads back from disk.
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Row {
pub category: String,
pub chatlog_file: String,
pub line_no: usize,
pub timestamp: String, // ISO-ish string or mtime seconds
pub quote: String,
pub weight: f64,
}
/// CSV header — kept as a const so tests + report agree.
pub const CSV_HEADER: &str = "category,chatlog_file,line_no,timestamp,quote,weight";
/// CSV-escape per RFC 4180 + single-line enforcement. We replace newlines
/// with spaces BEFORE quote-wrapping so parse_csv's line-split assumption
/// holds. The original multi-line text is lossy-reduced for the CSV export;
/// use JSONL output if full fidelity matters.
fn csv_escape(s: &str) -> String {
let singleline: String = s
.chars()
.map(|c| if c == '\n' || c == '\r' { ' ' } else { c })
.collect();
let needs_quote = singleline.contains(',') || singleline.contains('"');
let mut body = singleline.replace('"', "\"\"");
if needs_quote {
body.insert(0, '"');
body.push('"');
}
body
}
/// Serialize one row to a single CSV line (no trailing newline).
pub fn to_csv(r: &Row) -> String {
format!(
"{},{},{},{},{},{}",
csv_escape(&r.category),
csv_escape(&r.chatlog_file),
r.line_no,
csv_escape(&r.timestamp),
csv_escape(&r.quote),
r.weight
)
}
/// Serialize one row to JSONL (ends with newline inside `to_string`).
pub fn to_jsonl(r: &Row) -> Result<String> {
serde_json::to_string(r).context("serialize row as JSON")
}
/// Parse a CSV body (header + rows) back into `Vec<Row>`.
/// Minimal RFC 4180 subset — no multi-line quoted fields (our quotes
/// never contain newlines because we stripped them at capture).
pub fn parse_csv(body: &str) -> Result<Vec<Row>> {
let mut lines = body.lines();
let Some(hdr) = lines.next() else {
return Ok(Vec::new());
};
if hdr.trim() != CSV_HEADER {
anyhow::bail!("csv header mismatch; got {hdr:?}, expected {CSV_HEADER:?}");
}
lines.enumerate().map(parse_row).collect()
}
fn parse_row((idx, line): (usize, &str)) -> Result<Row> {
let fields = split_csv_line(line);
if fields.len() != 6 {
anyhow::bail!("csv line {} has {} fields, expected 6", idx + 2, fields.len());
}
Ok(Row {
category: fields[0].clone(),
chatlog_file: fields[1].clone(),
line_no: fields[2].parse().context("line_no")?,
timestamp: fields[3].clone(),
quote: fields[4].clone(),
weight: fields[5].parse().context("weight")?,
})
}
/// Split a CSV line with RFC-4180 quote handling (single-line only).
fn split_csv_line(s: &str) -> Vec<String> {
let mut out = Vec::new();
let mut buf = String::new();
let mut in_quote = false;
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
match (c, in_quote) {
('"', true) if chars.peek() == Some(&'"') => {
buf.push('"');
chars.next();
}
('"', _) => in_quote = !in_quote,
(',', false) => {
out.push(std::mem::take(&mut buf));
}
(ch, _) => buf.push(ch),
}
}
out.push(buf);
out
}