feat(kei-buddy): AskLanguage i18n + real proposeTopicSources + voice handling
Three follow-up atomics on top of the contacts/topics/sync wave.
## 1. AskLanguage state + ru/en localisation (default en)
New state `AskLanguage` inserted between `Intro` and `AskName`. Intro now
sends a bilingual greeting + language picker. AskLanguage parses
en/english/1/ru/русский/2/etc → persona_patch{"language":"<code>"} →
transitions to AskName with that language's prompt.
All later prompts (AskName / AskTone / AskInterests / AskHobbies /
TopicSpecifics / TopicNowLater / TopicResearch / AskSchedule / Ready)
read persona.language via Lang::from_persona and dispatch through
Strings::* helpers — two language tables, no fallthrough.
Back-compat migration: existing chats without `language` key (like the
user currently in topic_now_later) get an implicit "ru" patch on next
turn so their Russian onboarding continues without regression.
New files: strings.rs (164), machine_lang.rs (145).
Modified: state.rs (+AskLanguage variant), machine.rs (Intro→AskLanguage,
AskLanguage arm, migration guard), machine_helpers.rs, machine_tests.rs.
5 new tests (intro_to_ask_language, ask_language_en, ask_language_ru,
ask_language_invalid, migration_sets_ru_when_language_missing).
## 2. Real proposeTopicSources — removed TODO(phase2) stub
machine_lang.rs::step_topic_research now calls
extractor.extract(prompt, topic_title) with a {name, url, why} schema.
Parses JSON, formats numbered source list, transitions to TopicSources.
Failure paths (LLM error, empty array): graceful fallback prompt asking
user to suggest their own — still transitions to TopicSources so flow
doesn't deadlock.
3 new tests in machine_tests_topic_research.rs:
topic_research_yes_proposes_sources,
topic_research_yes_empty_sources_still_advances,
topic_research_no_skips_topic_sources.
## 3. Voice-message handling (Telegram voice/audio → STT → text pipeline)
kei-telegram-webhook: added Voice/Audio sub-structs on Message and
WebhookEvent::Voice variant. classify() detects message.voice OR
message.audio. 2 new tests in event.rs.
kei-buddy/src/voice.rs (178 LOC):
VoiceHandler { bot_token, stt: Arc<dyn SttBackend>, http }
transcribe_file(file_id, mime_type) does:
1. GET https://api.telegram.org/bot{token}/getFile?file_id=...
2. GET https://api.telegram.org/file/bot{token}/{file_path}
3. SttRequest { audio_bytes, mime_type, language: None } → backend.transcribe
4. Returns transcript text.
2 wiremock tests (download chain + 500 error mapping).
serve.rs adds voice: Option<Arc<VoiceHandler>> to BuddyContext;
on_event Voice arm: whitelist check → transcribe → handle_text (same
pipeline as if user typed). Voice unavailable: warn + ignore.
serve_runner.rs builds VoiceHandler from KEI_BUDDY_STT_BACKEND env.
kei-stt added as optional dep gated by serve feature. Default backend
whisper-local (no extra build deps).
TTS reply path deferred (next atomic).
## Verify
* cargo check --workspace: PASS
* cargo test -p kei-buddy --lib: 55 passed / 0 failed (was 41 → 50 → 53 → 55)
* cargo test -p kei-telegram-webhook --lib: 7 passed (was 5, +2 voice)
* cargo build -p kei-buddy --release: PASS (23.7s)
NOT deployed yet — three new things to roll out next:
* новые миграции (нет — БД без изменений)
* новые env: KEI_BUDDY_STT_BACKEND (optional)
* установка faster-whisper / piper-tts на сервер для STT
(без него Voice event просто warn-логируется и игнорируется)
This commit is contained in:
parent
1e9ce21c2a
commit
87d7b1c5c4
16 changed files with 1029 additions and 183 deletions
1
_primitives/_rust/Cargo.lock
generated
1
_primitives/_rust/Cargo.lock
generated
|
|
@ -3200,6 +3200,7 @@ dependencies = [
|
|||
"kei-memory-sqlite",
|
||||
"kei-sage",
|
||||
"kei-social-store",
|
||||
"kei-stt",
|
||||
"kei-telegram-webhook",
|
||||
"reqwest 0.12.28",
|
||||
"rusqlite",
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ chrono = { workspace = true }
|
|||
axum = { version = "0.7", features = ["json", "http1", "tokio"], optional = true }
|
||||
kei-telegram-webhook = { path = "../kei-telegram-webhook", optional = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = true }
|
||||
kei-stt = { path = "../kei-stt", default-features = false, features = ["whisper-local"], optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
wiremock = { workspace = true }
|
||||
|
|
@ -50,7 +51,7 @@ tokio = { workspace = true }
|
|||
[features]
|
||||
default = ["serve"]
|
||||
# HTTP server — axum router + webhook handler + Telegram send_message.
|
||||
serve = ["axum", "kei-telegram-webhook", "tracing-subscriber"]
|
||||
serve = ["axum", "kei-telegram-webhook", "tracing-subscriber", "kei-stt"]
|
||||
# Enables OpenAiExtractor — real HTTP to LiteLLM proxy using reqwest.
|
||||
# Off by default; tests use MockExtractor which has no extra deps.
|
||||
extractor-openai = []
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ async fn cmd_serve() -> anyhow::Result<()> {
|
|||
chat_log_db_path: chat_log_path_from_env(),
|
||||
topics_db_path: topics_db_path_from_env(),
|
||||
contacts_db_path: contacts_db_path_from_env(),
|
||||
stt_backend: std::env::var("KEI_BUDDY_STT_BACKEND").ok(),
|
||||
};
|
||||
run_serve(cfg).await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,11 +19,13 @@ pub mod error;
|
|||
pub mod extractor;
|
||||
pub mod machine;
|
||||
pub(crate) mod machine_helpers;
|
||||
pub(crate) mod machine_lang;
|
||||
pub mod persona_merge;
|
||||
pub mod schema;
|
||||
pub mod state;
|
||||
pub mod store;
|
||||
pub(crate) mod store_ops;
|
||||
pub mod strings;
|
||||
pub mod tick;
|
||||
pub mod topic_classify;
|
||||
pub mod topics;
|
||||
|
|
@ -35,6 +37,8 @@ pub mod serve;
|
|||
pub(crate) mod serve_runner;
|
||||
#[cfg(feature = "serve")]
|
||||
pub mod serve_telegram;
|
||||
#[cfg(feature = "serve")]
|
||||
pub mod voice;
|
||||
|
||||
pub use chat_log::ChatLog;
|
||||
pub use commands::{parse_command, execute_command, Command, CommandStores};
|
||||
|
|
@ -45,6 +49,9 @@ pub use extractor::LlmExtractor;
|
|||
pub use machine::handle_step;
|
||||
pub use state::OnboardState;
|
||||
pub use store::{BuddyStore, SqliteBuddyStore};
|
||||
pub use strings::{Lang, Strings};
|
||||
pub use tick::{run_tick, run_tick_with, TickConfig, TickReport};
|
||||
pub use topics::Topics;
|
||||
pub use transition::StepOutput;
|
||||
#[cfg(feature = "serve")]
|
||||
pub use voice::VoiceHandler;
|
||||
|
|
|
|||
|
|
@ -1,29 +1,28 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! Onboarding state-machine: `handle_step` (11-arm FSM match).
|
||||
//! Onboarding state-machine: `handle_step` (12-arm FSM match).
|
||||
//! Helpers → machine_helpers.rs. Tests → machine_tests.rs.
|
||||
//!
|
||||
//! LOC exception: file is allowed up to 260 LOC (Constructor Pattern §thresholds).
|
||||
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::error::BuddyError;
|
||||
use crate::extractor::{
|
||||
LlmExtractor, prompt_list, prompt_name, prompt_now_or_later, prompt_propose_sources,
|
||||
prompt_schedule, prompt_tone, prompt_topic_specifics, prompt_yes_no, TONES,
|
||||
LlmExtractor, prompt_list, prompt_name, prompt_now_or_later,
|
||||
prompt_schedule, prompt_tone, prompt_topic_specifics, TONES,
|
||||
};
|
||||
use crate::machine_helpers::{
|
||||
ask_schedule, build_topic_state, clamp_hour, describe_schedule, extract_string, finish_topic,
|
||||
build_topic_state, clamp_hour, describe_schedule, extract_string, finish_topic,
|
||||
format_list, parse_source_selection, str_list,
|
||||
};
|
||||
use crate::machine_lang::{
|
||||
ask_schedule_lang, backfill_language, build_ready_response, handle_ask_language,
|
||||
step_topic_research,
|
||||
};
|
||||
use crate::state::OnboardState;
|
||||
use crate::strings::{Lang, Strings};
|
||||
use crate::transition::StepOutput;
|
||||
|
||||
const INTRO: &str = "👋 Привет! Я KeiBuddie — твой персональный AI-компаньон от KeiSei.\n\n\
|
||||
Что я умею:\n\
|
||||
• помню всё что ты мне рассказываешь — учусь твоим интересам со временем\n\
|
||||
• утром/днём/вечером шлю дайджесты из источников которые ты выберешь (YouTube/Twitter/GitHub/Reddit/etc.)\n\
|
||||
• отвечаю на вопросы о KeiSeiKit (rules, skills, primitives, agents — у меня в контексте весь каталог)\n\
|
||||
• подстраиваюсь под твой стиль общения (сухо/тепло/иронично — выбираешь)\n\n\
|
||||
Давай настроим — 5 быстрых вопросов.";
|
||||
|
||||
/// Advance the onboarding FSM by one user message.
|
||||
/// Merge `StepOutput::persona_patch` into the persona blob before the next call.
|
||||
/// `__topic_state` in the patch tracks the per-topic loop; keep it in blob.
|
||||
|
|
@ -32,27 +31,69 @@ pub async fn handle_step<E: LlmExtractor>(
|
|||
user_text: &str,
|
||||
persona: &Value,
|
||||
extractor: &E,
|
||||
) -> Result<StepOutput, BuddyError> {
|
||||
// Back-compat migration: chats that started before language selection was
|
||||
// added will have no `language` key. Treat them as Russian so existing
|
||||
// in-progress threads keep their original language.
|
||||
// Skipped for Intro / AskLanguage (language not yet chosen) and Ready
|
||||
// (onboarding complete, no need to persist migration patch).
|
||||
let migration_patch = match state {
|
||||
OnboardState::Intro | OnboardState::AskLanguage | OnboardState::Ready => None,
|
||||
_ => backfill_language(persona),
|
||||
};
|
||||
let lang = Lang::from_persona(persona);
|
||||
|
||||
let mut out = step_dispatch(state, user_text, persona, extractor, lang).await?;
|
||||
|
||||
// Merge migration patch when present.
|
||||
if let Some(mp) = migration_patch {
|
||||
if let (Some(obj), Some(mp_obj)) = (
|
||||
out.persona_patch.as_object_mut(),
|
||||
mp.as_object(),
|
||||
) {
|
||||
for (k, v) in mp_obj {
|
||||
obj.entry(k).or_insert_with(|| v.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
async fn step_dispatch<E: LlmExtractor>(
|
||||
state: &OnboardState,
|
||||
user_text: &str,
|
||||
persona: &Value,
|
||||
extractor: &E,
|
||||
lang: Lang,
|
||||
) -> Result<StepOutput, BuddyError> {
|
||||
match state {
|
||||
OnboardState::Intro => Ok(StepOutput {
|
||||
next_state: OnboardState::AskName,
|
||||
response_text: format!("{INTRO}\n\n*Шаг 1/5.* Как тебя называть?"),
|
||||
next_state: OnboardState::AskLanguage,
|
||||
response_text: Strings::intro_ask_language().to_owned(),
|
||||
persona_patch: json!({}),
|
||||
}),
|
||||
|
||||
OnboardState::AskLanguage => Ok(handle_ask_language(user_text).unwrap_or_else(|| {
|
||||
StepOutput {
|
||||
next_state: OnboardState::AskLanguage,
|
||||
response_text: Strings::invalid_language().to_owned(),
|
||||
persona_patch: json!({}),
|
||||
}
|
||||
})),
|
||||
|
||||
OnboardState::AskName => {
|
||||
let v = extractor.extract(prompt_name(), user_text).await?;
|
||||
let name: String = v["name"]
|
||||
.as_str()
|
||||
.unwrap_or(user_text.trim())
|
||||
.chars().take(40).collect();
|
||||
let step2 = match lang { Lang::En => "Step 2/5.", Lang::Ru => "Шаг 2/5." };
|
||||
let ok = match lang { Lang::En => "Got it,", Lang::Ru => "Отлично," };
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::AskTone,
|
||||
response_text: format!(
|
||||
"Отлично, *{name}*. Запомнил.\n\n\
|
||||
*Шаг 2/5.* Какой стиль общения тебе ближе? Опиши своими словами — например, \
|
||||
\"по-дружески\", \"сухо и по делу\", \"с иронией\". \
|
||||
Или просто слово: `friendly`, `calm`, `stoic`, `sarcastic`, `professional`."
|
||||
"{ok} *{name}*.\n\n*{step2}* {}",
|
||||
Strings::ask_tone(lang)
|
||||
),
|
||||
persona_patch: json!({ "name": name }),
|
||||
})
|
||||
|
|
@ -62,12 +103,13 @@ pub async fn handle_step<E: LlmExtractor>(
|
|||
let v = extractor.extract(prompt_tone(), user_text).await?;
|
||||
let raw = v["tone"].as_str().unwrap_or("").to_lowercase();
|
||||
let tone = if TONES.contains(&raw.as_str()) { raw } else { "friendly".to_owned() };
|
||||
let step3 = match lang { Lang::En => "Step 3/5.", Lang::Ru => "Шаг 3/5." };
|
||||
let ok = match lang { Lang::En => "Tone:", Lang::Ru => "Тон:" };
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::AskInterests,
|
||||
response_text: format!(
|
||||
"Тон: *{tone}*. Принято.\n\n\
|
||||
*Шаг 3/5.* Какие у тебя интересы? Просто перечисли — \
|
||||
как удобно (через запятую, списком, или одним абзацем)."
|
||||
"{ok} *{tone}*.\n\n*{step3}* {}",
|
||||
Strings::ask_interests(lang)
|
||||
),
|
||||
persona_patch: json!({ "tone": tone }),
|
||||
})
|
||||
|
|
@ -77,29 +119,32 @@ pub async fn handle_step<E: LlmExtractor>(
|
|||
let prompt = prompt_list("interests");
|
||||
let v = extractor.extract(&prompt, user_text).await?;
|
||||
let interests = str_list(&v["items"]);
|
||||
let step4 = match lang { Lang::En => "Step 4/5.", Lang::Ru => "Шаг 4/5." };
|
||||
let label = match lang { Lang::En => "Interests:", Lang::Ru => "Интересы:" };
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::AskHobbies,
|
||||
response_text: format!(
|
||||
"Интересы: {}.\n\n\
|
||||
*Шаг 4/5.* А хобби? Чем конкретно занимаешься в свободное время.",
|
||||
format_list(&interests)
|
||||
"{label} {}.\n\n*{step4}* {}",
|
||||
format_list(&interests),
|
||||
Strings::ask_hobbies(lang)
|
||||
),
|
||||
persona_patch: json!({ "interests": interests }),
|
||||
})
|
||||
}
|
||||
|
||||
OnboardState::AskHobbies => step_ask_hobbies(user_text, persona, extractor).await,
|
||||
OnboardState::AskHobbies => step_ask_hobbies(user_text, persona, extractor, lang).await,
|
||||
|
||||
OnboardState::TopicSpecifics => {
|
||||
let v = extractor.extract(prompt_topic_specifics(), user_text).await?;
|
||||
let specifics = str_list(&v["aspects"]);
|
||||
let cur_name = extract_string(&persona["current_topic"], "name");
|
||||
let understood = match lang { Lang::En => "Got it on", Lang::Ru => "Понял по" };
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::TopicNowLater,
|
||||
response_text: format!(
|
||||
"Понял по *{cur_name}*: {}.\n\n\
|
||||
Хочешь *обсудить это сейчас* или *сохранить на потом*?",
|
||||
format_list(&specifics)
|
||||
"{understood} *{cur_name}*: {}.\n\n{}",
|
||||
format_list(&specifics),
|
||||
Strings::topic_now_later(lang)
|
||||
),
|
||||
persona_patch: json!({ "current_topic_specifics": specifics }),
|
||||
})
|
||||
|
|
@ -109,21 +154,22 @@ pub async fn handle_step<E: LlmExtractor>(
|
|||
let v = extractor.extract(prompt_now_or_later(), user_text).await?;
|
||||
let defer = v["decision"].as_str().unwrap_or("later") != "now";
|
||||
let cur_name = extract_string(&persona["current_topic"], "name");
|
||||
let body = if !defer { format!("Окей, обсудим *{cur_name}* подробно когда закончим настройку. Запомнил.") }
|
||||
else { format!("Отложил *{cur_name}* на потом.") };
|
||||
let body = build_now_later_msg(lang, &cur_name, defer);
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::TopicResearch,
|
||||
response_text: format!("{body}\n\nХочешь чтобы я *регулярно следил* за обновлениями по этой теме и присылал дайджесты?"),
|
||||
response_text: format!("{body}\n\n{}", Strings::topic_research(lang)),
|
||||
persona_patch: json!({ "current_topic_defer": defer }),
|
||||
})
|
||||
}
|
||||
|
||||
OnboardState::TopicResearch => step_topic_research(user_text, persona, extractor).await,
|
||||
OnboardState::TopicResearch => step_topic_research(user_text, persona, extractor, lang).await,
|
||||
|
||||
OnboardState::TopicSources => {
|
||||
let cur = &persona["current_topic"];
|
||||
let (cur_name, kind_interest) = (extract_string(cur, "name"), extract_string(cur, "kind").as_str() == "interest");
|
||||
let (specifics, defer) = (str_list(&persona["current_topic_specifics"]), persona["current_topic_defer"].as_bool().unwrap_or(true));
|
||||
let cur_name = extract_string(cur, "name");
|
||||
let kind_interest = extract_string(cur, "kind").as_str() == "interest";
|
||||
let specifics = str_list(&persona["current_topic_specifics"]);
|
||||
let defer = persona["current_topic_defer"].as_bool().unwrap_or(true);
|
||||
let proposed: Vec<Value> = persona["current_topic_proposed"].as_array().cloned().unwrap_or_default();
|
||||
let picked = parse_source_selection(user_text, proposed.len());
|
||||
Ok(finish_topic(persona, &cur_name, kind_interest, &specifics, defer, true, &proposed, &picked))
|
||||
|
|
@ -137,20 +183,7 @@ pub async fn handle_step<E: LlmExtractor>(
|
|||
let tone = persona["tone"].as_str().unwrap_or("friendly");
|
||||
let interests = str_list(&persona["interests"]);
|
||||
let sched_str = describe_schedule(morning, evening, &tz);
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::Ready,
|
||||
response_text: format!(
|
||||
"Готово! ✨ Я настроен.\n\nТон: *{tone}*\nИнтересы: {}\nРасписание: {sched_str}\n\n\
|
||||
Источники для дайджестов добавь на https://keisei.app/keibuddy \
|
||||
(10 платформ — YouTube, Twitter, GitHub и др.).\n\n\
|
||||
Теперь можешь писать мне о чём угодно — буду помнить и подстраиваться. \
|
||||
Скажи что-нибудь!",
|
||||
format_list(&interests)
|
||||
),
|
||||
persona_patch: json!({
|
||||
"schedule": { "morning": morning, "evening": evening, "timezone": tz }
|
||||
}),
|
||||
})
|
||||
Ok(build_ready_response(lang, tone, &interests, &sched_str, morning, evening, &tz))
|
||||
}
|
||||
|
||||
OnboardState::Ready => Ok(StepOutput {
|
||||
|
|
@ -163,10 +196,20 @@ pub async fn handle_step<E: LlmExtractor>(
|
|||
|
||||
// ─── arm helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn build_now_later_msg(lang: Lang, cur_name: &str, defer: bool) -> String {
|
||||
match (lang, defer) {
|
||||
(Lang::En, false) => format!("Ok, we'll discuss *{cur_name}* in detail after setup. Noted."),
|
||||
(Lang::En, true) => format!("Saved *{cur_name}* for later."),
|
||||
(Lang::Ru, false) => format!("Окей, обсудим *{cur_name}* подробно когда закончим настройку. Запомнил."),
|
||||
(Lang::Ru, true) => format!("Отложил *{cur_name}* на потом."),
|
||||
}
|
||||
}
|
||||
|
||||
async fn step_ask_hobbies<E: LlmExtractor>(
|
||||
user_text: &str,
|
||||
persona: &Value,
|
||||
extractor: &E,
|
||||
lang: Lang,
|
||||
) -> Result<StepOutput, BuddyError> {
|
||||
let prompt = prompt_list("hobbies");
|
||||
let v = extractor.extract(&prompt, user_text).await?;
|
||||
|
|
@ -176,10 +219,12 @@ async fn step_ask_hobbies<E: LlmExtractor>(
|
|||
.iter().map(|n| json!({"name": n, "kind": "interest"}))
|
||||
.chain(hobbies.iter().map(|n| json!({"name": n, "kind": "hobby"})))
|
||||
.collect();
|
||||
let hobbies_label = match lang { Lang::En => "Hobbies:", Lang::Ru => "Хобби:" };
|
||||
if queue.is_empty() {
|
||||
return Ok(ask_schedule(
|
||||
return Ok(ask_schedule_lang(
|
||||
&json!({ "hobbies": hobbies }),
|
||||
&format!("Хобби: {}.", format_list(&hobbies)),
|
||||
&format!("{hobbies_label} {}.", format_list(&hobbies)),
|
||||
lang,
|
||||
));
|
||||
}
|
||||
let next_topic = queue[0].clone();
|
||||
|
|
@ -188,62 +233,18 @@ async fn step_ask_hobbies<E: LlmExtractor>(
|
|||
let mut patch = ts;
|
||||
patch["hobbies"] = json!(hobbies);
|
||||
patch["current_topic"] = next_topic;
|
||||
let prefix_str = Strings::topic_specifics_prefix(lang);
|
||||
let question_str = Strings::topic_specifics_question(lang);
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::TopicSpecifics,
|
||||
response_text: format!(
|
||||
"Хобби: {}.\n\nТеперь разберём по темам. Поехали — сначала *{topic_name}*.\n\n\
|
||||
*Что именно* в этой теме тебе интересно? Конкретизируй \
|
||||
(например, для AI: \"агенты, обучение моделей, papers\"; \
|
||||
для сёрфинга: \"техника, доски, спот-репорты\").",
|
||||
"{hobbies_label} {}.\n\n{prefix_str} *{topic_name}*.\n\n{question_str}",
|
||||
format_list(&hobbies)
|
||||
),
|
||||
persona_patch: patch,
|
||||
})
|
||||
}
|
||||
|
||||
async fn step_topic_research<E: LlmExtractor>(
|
||||
user_text: &str,
|
||||
persona: &Value,
|
||||
extractor: &E,
|
||||
) -> Result<StepOutput, BuddyError> {
|
||||
let v = extractor.extract(prompt_yes_no(), user_text).await?;
|
||||
let want_research = v["yes"].as_bool().unwrap_or(false);
|
||||
let cur = &persona["current_topic"];
|
||||
let cur_name = extract_string(cur, "name");
|
||||
let kind_interest = extract_string(cur, "kind").as_str() == "interest";
|
||||
let specifics = str_list(&persona["current_topic_specifics"]);
|
||||
let defer = persona["current_topic_defer"].as_bool().unwrap_or(true);
|
||||
if !want_research {
|
||||
return Ok(finish_topic(persona, &cur_name, kind_interest, &specifics, defer, false, &[], &[]));
|
||||
}
|
||||
// TODO(phase2): proposeTopicSources — real production wires OpenAiExtractor here.
|
||||
// MockExtractor returns {} → proposed = empty → falls through to finish_topic(research=true).
|
||||
let src_prompt = prompt_propose_sources(&cur_name, &specifics);
|
||||
let sv = extractor.extract(&src_prompt, "").await?;
|
||||
let proposed: Vec<Value> = sv["sources"].as_array().cloned().unwrap_or_default();
|
||||
if proposed.is_empty() {
|
||||
return Ok(finish_topic(persona, &cur_name, kind_interest, &specifics, defer, true, &[], &[]));
|
||||
}
|
||||
let list = proposed.iter().enumerate()
|
||||
.map(|(i, s)| format!(
|
||||
"{}. `{}` *{}* — {}",
|
||||
i + 1,
|
||||
s["platform"].as_str().unwrap_or("?"),
|
||||
s["handle_or_url"].as_str().unwrap_or("?"),
|
||||
s["why"].as_str().unwrap_or("")
|
||||
))
|
||||
.collect::<Vec<_>>().join("\n");
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::TopicSources,
|
||||
response_text: format!(
|
||||
"Предлагаю источники по *{cur_name}*:\n\n{list}\n\n\
|
||||
Какие добавить? Напиши номера через запятую (`1,3,5`), `все`, или `нет`. \
|
||||
Можешь добавить свои — просто напиши \"плюс <платформа> <handle>\"."
|
||||
),
|
||||
persona_patch: json!({ "current_topic_proposed": proposed }),
|
||||
})
|
||||
}
|
||||
|
||||
// Tests live in machine_tests.rs (Constructor Pattern: separate test module).
|
||||
#[cfg(test)]
|
||||
#[path = "machine_tests.rs"]
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
//! Pure helper functions for `machine::handle_step`.
|
||||
//!
|
||||
//! Constructor Pattern split: helpers extracted so `machine.rs` stays
|
||||
//! within its 250-LOC exception budget.
|
||||
//! within its 260-LOC exception budget.
|
||||
//! Language-aware helpers live in `machine_lang.rs`.
|
||||
|
||||
use serde_json::{json, Value};
|
||||
|
||||
|
|
@ -121,7 +122,7 @@ pub(crate) fn finish_topic(
|
|||
}));
|
||||
|
||||
if queue.is_empty() {
|
||||
return ask_schedule(&json!({ "topics_done": done }), &summary);
|
||||
return ask_schedule_finish(&json!({ "topics_done": done }), &summary);
|
||||
}
|
||||
let next_topic = &queue[0];
|
||||
let next_name = next_topic["name"].as_str().unwrap_or("?");
|
||||
|
|
@ -138,6 +139,20 @@ pub(crate) fn finish_topic(
|
|||
}
|
||||
}
|
||||
|
||||
/// Internal schedule prompt used by `finish_topic` — always Russian (back-compat).
|
||||
/// The per-turn language-aware variant is in `machine_lang::ask_schedule_lang`.
|
||||
fn ask_schedule_finish(extra_patch: &Value, prefix: &str) -> StepOutput {
|
||||
StepOutput {
|
||||
next_state: OnboardState::AskSchedule,
|
||||
response_text: format!(
|
||||
"{prefix}\n\nТемы разобрали. ⏰ Когда удобно получать дайджесты? Напиши свободно — \
|
||||
например, \"утром часов в 8, вечером в 10, я в Бали\" или \"вечером в 9\". \
|
||||
Если не нужно — напиши \"нет\"."
|
||||
),
|
||||
persona_patch: extra_patch.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_source_line(research: bool, picked: &[usize], proposed: &[Value]) -> String {
|
||||
if research && !picked.is_empty() {
|
||||
let handles: Vec<_> = picked.iter()
|
||||
|
|
@ -152,20 +167,3 @@ fn build_source_line(research: bool, picked: &[usize], proposed: &[Value]) -> St
|
|||
"_без мониторинга_".to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn ask_schedule(extra_patch: &Value, prefix: &str) -> StepOutput {
|
||||
let intro = if prefix.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("{prefix}\n\n")
|
||||
};
|
||||
StepOutput {
|
||||
next_state: OnboardState::AskSchedule,
|
||||
response_text: format!(
|
||||
"{intro}Темы разобрали. ⏰ Когда удобно получать дайджесты? Напиши свободно — \
|
||||
например, \"утром часов в 8, вечером в 10, я в Бали\" или \"вечером в 9\". \
|
||||
Если не нужно — напиши \"нет\"."
|
||||
),
|
||||
persona_patch: extra_patch.clone(),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
190
_primitives/_rust/kei-buddy/src/machine_lang.rs
Normal file
190
_primitives/_rust/kei-buddy/src/machine_lang.rs
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! Language-aware helpers for `machine::handle_step`.
|
||||
//!
|
||||
//! Extracted from machine_helpers.rs (Constructor Pattern: one file ≤ 200 LOC).
|
||||
//! Covers: language selection, migration back-fill, schedule/ready response builders,
|
||||
//! and the async TopicResearch arm (needs LlmExtractor).
|
||||
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::error::BuddyError;
|
||||
use crate::extractor::{LlmExtractor, prompt_yes_no};
|
||||
use crate::machine_helpers::{extract_string, finish_topic, format_list, str_list};
|
||||
use crate::state::OnboardState;
|
||||
use crate::strings::{Lang, Strings};
|
||||
use crate::transition::StepOutput;
|
||||
|
||||
// ─── language selection ───────────────────────────────────────────────────────
|
||||
|
||||
/// Handle the `AskLanguage` state.
|
||||
///
|
||||
/// Returns `Some(StepOutput)` when the input is a recognised language choice
|
||||
/// (advances to AskName). Returns `None` on invalid input (caller loops).
|
||||
pub(crate) fn handle_ask_language(user_text: &str) -> Option<StepOutput> {
|
||||
let lang = Lang::from_user_choice(user_text)?;
|
||||
Some(StepOutput {
|
||||
next_state: OnboardState::AskName,
|
||||
response_text: format!(
|
||||
"{}\n\n*Step 1/5.* {}",
|
||||
Strings::language_set(lang),
|
||||
Strings::ask_name(lang)
|
||||
),
|
||||
persona_patch: json!({ "language": lang.code() }),
|
||||
})
|
||||
}
|
||||
|
||||
/// Back-compat migration: inject `"language": "ru"` when the persona has no
|
||||
/// language key, so threads started before this commit keep Russian prompts.
|
||||
pub(crate) fn backfill_language(persona: &Value) -> Option<Value> {
|
||||
if persona.get("language").is_none() {
|
||||
Some(json!({ "language": "ru" }))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// ─── schedule helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
pub(crate) fn ask_schedule_lang(extra_patch: &Value, prefix: &str, lang: Lang) -> StepOutput {
|
||||
let intro = if prefix.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("{prefix}\n\n")
|
||||
};
|
||||
StepOutput {
|
||||
next_state: OnboardState::AskSchedule,
|
||||
response_text: format!("{intro}{}", Strings::ask_schedule(lang)),
|
||||
persona_patch: extra_patch.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
// ─── ready-response builder ───────────────────────────────────────────────────
|
||||
|
||||
pub(crate) fn build_ready_response(
|
||||
lang: Lang,
|
||||
tone: &str,
|
||||
interests: &[String],
|
||||
sched_str: &str,
|
||||
morning: Option<u8>,
|
||||
evening: Option<u8>,
|
||||
tz: &str,
|
||||
) -> StepOutput {
|
||||
let ready = Strings::ready(lang);
|
||||
let (tone_lbl, int_lbl, sched_lbl) = match lang {
|
||||
Lang::En => ("Tone", "Interests", "Schedule"),
|
||||
Lang::Ru => ("Тон", "Интересы", "Расписание"),
|
||||
};
|
||||
let sources_hint = match lang {
|
||||
Lang::En => "Add digest sources at https://keisei.app/keibuddy \
|
||||
(10 platforms — YouTube, Twitter, GitHub, and more).\n\n\
|
||||
Now you can write to me about anything — I'll remember and adapt. Say something!",
|
||||
Lang::Ru => "Источники для дайджестов добавь на https://keisei.app/keibuddy \
|
||||
(10 платформ — YouTube, Twitter, GitHub и др.).\n\n\
|
||||
Теперь можешь писать мне о чём угодно — буду помнить и подстраиваться. \
|
||||
Скажи что-нибудь!",
|
||||
};
|
||||
StepOutput {
|
||||
next_state: OnboardState::Ready,
|
||||
response_text: format!(
|
||||
"{ready}\n\n{tone_lbl}: *{tone}*\n{int_lbl}: {}\n{sched_lbl}: {sched_str}\n\n{sources_hint}",
|
||||
format_list(interests)
|
||||
),
|
||||
persona_patch: json!({
|
||||
"schedule": { "morning": morning, "evening": evening, "timezone": tz }
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// ─── TopicResearch arm ────────────────────────────────────────────────────────
|
||||
|
||||
/// Prompt used to propose sources to the user. Returns `{name, url, why}` triples.
|
||||
fn propose_sources_prompt(topic: &str, lang: Lang) -> String {
|
||||
let lang_hint = match lang {
|
||||
Lang::En => "Respond in English.",
|
||||
Lang::Ru => "Respond in Russian.",
|
||||
};
|
||||
format!(
|
||||
"You are a research-sources proposer. The user wants to follow a topic.\n\
|
||||
Output a JSON object with one field \"sources\": an array of 3-5 objects,\n\
|
||||
each with {{\"name\":\"...\",\"url\":\"...\",\"why\":\"...\"}}.\n\
|
||||
Pick concrete, reputable sources for the topic.\n\
|
||||
URLs must be real, well-known site root URLs.\n\
|
||||
{lang_hint} Output ONLY the JSON, no prose, no markdown fences.\n\
|
||||
Topic: {topic}"
|
||||
)
|
||||
}
|
||||
|
||||
/// TopicResearch arm: gather research consent and propose sources via LLM.
|
||||
pub(crate) async fn step_topic_research<E: LlmExtractor>(
|
||||
user_text: &str,
|
||||
persona: &Value,
|
||||
extractor: &E,
|
||||
lang: Lang,
|
||||
) -> Result<StepOutput, BuddyError> {
|
||||
let v = extractor.extract(prompt_yes_no(), user_text).await?;
|
||||
let want_research = v["yes"].as_bool().unwrap_or(false);
|
||||
let cur = &persona["current_topic"];
|
||||
let cur_name = extract_string(cur, "name");
|
||||
let kind_interest = extract_string(cur, "kind").as_str() == "interest";
|
||||
let specifics = str_list(&persona["current_topic_specifics"]);
|
||||
let defer = persona["current_topic_defer"].as_bool().unwrap_or(true);
|
||||
if !want_research {
|
||||
return Ok(finish_topic(persona, &cur_name, kind_interest, &specifics, defer, false, &[], &[]));
|
||||
}
|
||||
let src_prompt = propose_sources_prompt(&cur_name, lang);
|
||||
let sv = match extractor.extract(&src_prompt, "").await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::warn!("propose_sources LLM call failed for {cur_name:?}: {e}");
|
||||
Value::Object(serde_json::Map::new())
|
||||
}
|
||||
};
|
||||
let proposed: Vec<Value> = sv["sources"].as_array().cloned().unwrap_or_default();
|
||||
if proposed.is_empty() {
|
||||
let fallback = build_sources_fallback(lang);
|
||||
return Ok(StepOutput {
|
||||
next_state: OnboardState::TopicSources,
|
||||
response_text: fallback,
|
||||
persona_patch: json!({ "current_topic_proposed": [] }),
|
||||
});
|
||||
}
|
||||
let list = build_sources_list(&proposed);
|
||||
let propose_lbl = match lang {
|
||||
Lang::En => "Proposed sources for",
|
||||
Lang::Ru => "Предлагаю источники по",
|
||||
};
|
||||
Ok(StepOutput {
|
||||
next_state: OnboardState::TopicSources,
|
||||
response_text: format!(
|
||||
"{propose_lbl} *{cur_name}*:\n\n{list}\n\n{}",
|
||||
Strings::topic_sources_intro(lang)
|
||||
),
|
||||
persona_patch: json!({ "current_topic_proposed": proposed }),
|
||||
})
|
||||
}
|
||||
|
||||
fn build_sources_list(sources: &[Value]) -> String {
|
||||
sources.iter().enumerate()
|
||||
.map(|(i, s)| {
|
||||
let name = s["name"].as_str().unwrap_or("?");
|
||||
let url = s["url"].as_str().unwrap_or("?");
|
||||
let why = s["why"].as_str().unwrap_or("");
|
||||
format!("{}. *{name}* — {url}\n _{why}_", i + 1)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn build_sources_fallback(lang: Lang) -> String {
|
||||
match lang {
|
||||
Lang::En => "I couldn't propose sources automatically. \
|
||||
Could you suggest one yourself? \
|
||||
(e.g. \"plus rss https://example.com/feed\")"
|
||||
.to_owned(),
|
||||
Lang::Ru => "Не смог подобрать источники автоматически. \
|
||||
Можешь предложить сам? \
|
||||
(например, \"плюс rss https://example.com/feed\")"
|
||||
.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,6 +1,9 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! Tests for `machine::handle_step`.
|
||||
//! Extracted from machine.rs to keep it within the 250-LOC exception budget.
|
||||
//! Extracted from machine.rs to keep it within the 260-LOC exception budget.
|
||||
//!
|
||||
//! TopicResearch-specific tests live in the sibling module
|
||||
//! `machine_tests_topic_research` (Constructor Pattern: split by concern).
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
|
|
@ -8,22 +11,100 @@ use crate::extractor::MockExtractor;
|
|||
use crate::machine::handle_step;
|
||||
use crate::state::OnboardState;
|
||||
|
||||
mod machine_tests_topic_research;
|
||||
|
||||
fn rt() -> tokio::runtime::Runtime {
|
||||
tokio::runtime::Runtime::new().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn intro_to_ask_name() {
|
||||
fn intro_to_ask_language() {
|
||||
rt().block_on(async {
|
||||
let mock = MockExtractor::new(json!({}));
|
||||
let out = handle_step(&OnboardState::Intro, "hi", &json!({}), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(out.next_state, OnboardState::AskName);
|
||||
// Intro now transitions to AskLanguage, not AskName.
|
||||
assert_eq!(out.next_state, OnboardState::AskLanguage);
|
||||
assert!(!out.response_text.is_empty(), "intro response must not be empty");
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ask_language_en_advances_to_ask_name() {
|
||||
rt().block_on(async {
|
||||
let mock = MockExtractor::new(json!({}));
|
||||
let out = handle_step(&OnboardState::AskLanguage, "en", &json!({}), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(out.next_state, OnboardState::AskName);
|
||||
assert_eq!(
|
||||
out.persona_patch["language"].as_str(),
|
||||
Some("en"),
|
||||
"persona_patch must contain language=en"
|
||||
);
|
||||
assert!(
|
||||
out.response_text.contains("What's your name"),
|
||||
"response must contain English ask_name phrase, got: {:?}",
|
||||
out.response_text
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ask_language_ru_advances_to_ask_name() {
|
||||
rt().block_on(async {
|
||||
let mock = MockExtractor::new(json!({}));
|
||||
let out = handle_step(&OnboardState::AskLanguage, "ru", &json!({}), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(out.next_state, OnboardState::AskName);
|
||||
assert_eq!(
|
||||
out.persona_patch["language"].as_str(),
|
||||
Some("ru"),
|
||||
"persona_patch must contain language=ru"
|
||||
);
|
||||
assert!(
|
||||
out.response_text.contains("называть"),
|
||||
"response must contain Russian ask_name phrase, got: {:?}",
|
||||
out.response_text
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ask_language_invalid_stays_in_state() {
|
||||
rt().block_on(async {
|
||||
let mock = MockExtractor::new(json!({}));
|
||||
let out = handle_step(&OnboardState::AskLanguage, "blah", &json!({}), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(out.next_state, OnboardState::AskLanguage, "invalid input must loop");
|
||||
assert!(
|
||||
out.response_text.contains("en") && out.response_text.contains("ru"),
|
||||
"error response must mention both options, got: {:?}",
|
||||
out.response_text
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn migration_sets_ru_when_language_missing() {
|
||||
rt().block_on(async {
|
||||
// Persona has no `language` key — simulates a chat started before this commit.
|
||||
let mock = MockExtractor::new(json!({ "name": "Denis" }));
|
||||
let persona = json!({});
|
||||
let out = handle_step(&OnboardState::AskName, "Denis", &persona, &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
out.persona_patch["language"].as_str(),
|
||||
Some("ru"),
|
||||
"migration must inject language=ru when key is missing"
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ask_name_extracts_and_advances() {
|
||||
rt().block_on(async {
|
||||
|
|
|
|||
113
_primitives/_rust/kei-buddy/src/machine_tests_topic_research.rs
Normal file
113
_primitives/_rust/kei-buddy/src/machine_tests_topic_research.rs
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! TopicResearch FSM arm tests — split from machine_tests.rs (Constructor Pattern: ≤200 LOC).
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::{json, Value};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::error::BuddyError;
|
||||
use crate::extractor::{LlmExtractor, MockExtractor};
|
||||
use crate::machine::handle_step;
|
||||
use crate::state::OnboardState;
|
||||
|
||||
/// Returns responses in sequence: responses[0] on call 0, responses[1] on call 1, etc.
|
||||
/// After exhaustion repeats the last element.
|
||||
pub(super) struct SequenceMockExtractor {
|
||||
responses: Arc<Mutex<Vec<Value>>>,
|
||||
call_idx: Arc<Mutex<usize>>,
|
||||
}
|
||||
|
||||
impl SequenceMockExtractor {
|
||||
pub(super) fn new(responses: Vec<Value>) -> Self {
|
||||
Self {
|
||||
responses: Arc::new(Mutex::new(responses)),
|
||||
call_idx: Arc::new(Mutex::new(0)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LlmExtractor for SequenceMockExtractor {
|
||||
async fn extract(&self, _system: &str, _user_text: &str) -> Result<Value, BuddyError> {
|
||||
let mut idx = self.call_idx.lock().unwrap();
|
||||
let responses = self.responses.lock().unwrap();
|
||||
let resp = responses.get(*idx).or_else(|| responses.last()).cloned()
|
||||
.unwrap_or_else(|| json!({}));
|
||||
if *idx + 1 < responses.len() {
|
||||
*idx += 1;
|
||||
}
|
||||
Ok(resp)
|
||||
}
|
||||
}
|
||||
|
||||
fn rt() -> tokio::runtime::Runtime {
|
||||
tokio::runtime::Runtime::new().unwrap()
|
||||
}
|
||||
|
||||
fn topic_research_persona() -> Value {
|
||||
json!({
|
||||
"language": "en",
|
||||
"current_topic": { "name": "Rust", "kind": "interest" },
|
||||
"current_topic_specifics": ["async", "traits"],
|
||||
"current_topic_defer": false,
|
||||
"__topic_state": { "queue": [], "index": 0 }
|
||||
})
|
||||
}
|
||||
|
||||
/// User says "yes" and LLM returns sources → next=TopicSources, sources in response and patch.
|
||||
#[test]
|
||||
fn topic_research_yes_proposes_sources() {
|
||||
rt().block_on(async {
|
||||
let sources_resp = json!({
|
||||
"sources": [
|
||||
{ "name": "S1", "url": "https://a.com", "why": "x" }
|
||||
]
|
||||
});
|
||||
let mock = SequenceMockExtractor::new(vec![json!({ "yes": true }), sources_resp]);
|
||||
let out = handle_step(&OnboardState::TopicResearch, "yes", &topic_research_persona(), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(out.next_state, OnboardState::TopicSources, "must advance to TopicSources");
|
||||
assert!(out.response_text.contains("S1"), "response must mention source name S1, got: {:?}", out.response_text);
|
||||
assert!(out.response_text.contains("https://a.com"), "response must include URL, got: {:?}", out.response_text);
|
||||
let proposed = out.persona_patch["current_topic_proposed"].as_array().unwrap();
|
||||
assert_eq!(proposed.len(), 1, "one proposed source must be stored in patch");
|
||||
});
|
||||
}
|
||||
|
||||
/// User says "yes" but LLM returns empty sources → fallback message, still next=TopicSources.
|
||||
#[test]
|
||||
fn topic_research_yes_empty_sources_still_advances() {
|
||||
rt().block_on(async {
|
||||
let mock = SequenceMockExtractor::new(vec![
|
||||
json!({ "yes": true }),
|
||||
json!({ "sources": [] }),
|
||||
]);
|
||||
let out = handle_step(&OnboardState::TopicResearch, "yes", &topic_research_persona(), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(out.next_state, OnboardState::TopicSources, "must still enter TopicSources");
|
||||
let proposed = out.persona_patch["current_topic_proposed"].as_array().unwrap();
|
||||
assert!(proposed.is_empty(), "proposed must be empty in patch");
|
||||
let lower = out.response_text.to_lowercase();
|
||||
assert!(
|
||||
lower.contains("suggest") || lower.contains("предложи") || lower.contains("предложить"),
|
||||
"fallback must ask user to suggest a source, got: {:?}", out.response_text
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/// User says "no" → TopicSources is skipped entirely, advances past it.
|
||||
#[test]
|
||||
fn topic_research_no_skips_topic_sources() {
|
||||
rt().block_on(async {
|
||||
let mock = MockExtractor::new(json!({ "yes": false }));
|
||||
let out = handle_step(&OnboardState::TopicResearch, "no", &topic_research_persona(), &mock)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_ne!(
|
||||
out.next_state, OnboardState::TopicSources,
|
||||
"\"no\" must skip TopicSources, got: {:?}", out.next_state
|
||||
);
|
||||
});
|
||||
}
|
||||
|
|
@ -1,8 +1,6 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! `BuddyContext` + axum router. Store bootstrap lives in `serve_runner`.
|
||||
//!
|
||||
//! Constructor Pattern: one responsibility — compose crate pieces into HTTP server.
|
||||
//! Each function ≤ 30 LOC. No logging of bot tokens.
|
||||
//! Constructor Pattern: one responsibility. No bot token logging.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
|
|
@ -26,6 +24,7 @@ use crate::{
|
|||
store::{BuddyStore, SqliteBuddyStore},
|
||||
topic_classify::classify_and_store_topic,
|
||||
topics::Topics,
|
||||
voice::VoiceHandler,
|
||||
};
|
||||
|
||||
pub use crate::serve_runner::run_serve;
|
||||
|
|
@ -36,18 +35,15 @@ pub struct ServeConfig {
|
|||
pub db_path: String,
|
||||
pub bot_token: String,
|
||||
pub webhook_secret: String,
|
||||
/// Whitelist; `None` or empty = accept all chat_ids.
|
||||
pub allowed_chat_ids: Option<Vec<i64>>,
|
||||
/// LLM proxy URL + key; if both set, OpenAiExtractor is used, else MockExtractor.
|
||||
pub llm_proxy_url: Option<String>,
|
||||
pub llm_api_key: Option<String>,
|
||||
pub llm_model: Option<String>,
|
||||
/// Path to the SQLite file used by `ChatLog`. Default: `./kei-buddy-chat.db`.
|
||||
pub chat_log_db_path: String,
|
||||
/// Path to the SQLite file used by `Topics`. Default: `./kei-buddy-topics.db`.
|
||||
pub topics_db_path: String,
|
||||
/// Path to the SQLite file used by `Contacts`. Default: `./kei-buddy-contacts.db`.
|
||||
pub contacts_db_path: String,
|
||||
/// STT backend name (e.g. "whisper-local"). `None` → voice messages ignored.
|
||||
pub stt_backend: Option<String>,
|
||||
}
|
||||
|
||||
/// Axum state — implements `WebhookContext`. `Arc<E>` allows cheap `Clone`.
|
||||
|
|
@ -57,14 +53,12 @@ pub struct BuddyContext<E: LlmExtractor + Send + Sync + 'static> {
|
|||
pub store: Arc<SqliteBuddyStore>,
|
||||
pub extractor: Arc<E>,
|
||||
pub http: reqwest::Client,
|
||||
/// Whitelist of chat_ids; `None` or empty = accept all.
|
||||
pub allowed_chat_ids: Arc<Option<Vec<i64>>>,
|
||||
/// Persistent log of all Telegram messages (user + bot).
|
||||
pub chat_log: Arc<ChatLog>,
|
||||
/// Persistent topic store.
|
||||
pub topics: Arc<Topics>,
|
||||
/// Persistent contacts store.
|
||||
pub contacts: Arc<Contacts>,
|
||||
/// Optional voice handler; `None` = voice messages ignored.
|
||||
pub voice: Option<Arc<VoiceHandler>>,
|
||||
}
|
||||
|
||||
impl<E: LlmExtractor + Send + Sync + 'static> Clone for BuddyContext<E> {
|
||||
|
|
@ -79,6 +73,7 @@ impl<E: LlmExtractor + Send + Sync + 'static> Clone for BuddyContext<E> {
|
|||
chat_log: Arc::clone(&self.chat_log),
|
||||
topics: Arc::clone(&self.topics),
|
||||
contacts: Arc::clone(&self.contacts),
|
||||
voice: self.voice.as_ref().map(Arc::clone),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -94,8 +89,11 @@ impl<E: LlmExtractor + Send + Sync + 'static> WebhookContext for BuddyContext<E>
|
|||
WebhookEvent::Text { chat_id, text, .. } => {
|
||||
self.handle_text(chat_id, text).await;
|
||||
}
|
||||
WebhookEvent::Voice { chat_id, file_id, mime_type, .. } => {
|
||||
self.handle_voice(chat_id, file_id, mime_type).await;
|
||||
}
|
||||
other => {
|
||||
warn!(event = ?other, "ignoring non-text webhook event");
|
||||
warn!(event = ?other, "ignoring unhandled webhook event");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -109,6 +107,21 @@ impl<E: LlmExtractor + Send + Sync + 'static> BuddyContext<E> {
|
|||
}
|
||||
}
|
||||
|
||||
async fn handle_voice(&self, chat_id: i64, file_id: String, mime_type: String) {
|
||||
let Some(h) = self.voice.as_ref() else {
|
||||
warn!(chat_id, "voice message: no STT backend; ignoring");
|
||||
return;
|
||||
};
|
||||
if !self.chat_allowed(chat_id) {
|
||||
warn!(chat_id, "chat_id not in whitelist; ignoring voice");
|
||||
return;
|
||||
}
|
||||
match h.transcribe_file(&file_id, &mime_type).await {
|
||||
Ok(t) => self.handle_text(chat_id, t).await,
|
||||
Err(e) => error!(chat_id, error=%e, "voice transcription failed"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_text(&self, chat_id: i64, text: String) {
|
||||
if !self.chat_allowed(chat_id) {
|
||||
warn!(chat_id, "chat_id not in whitelist; ignoring");
|
||||
|
|
@ -172,16 +185,12 @@ impl<E: LlmExtractor + Send + Sync + 'static> BuddyContext<E> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Health-check handler.
|
||||
async fn health() -> Json<Value> {
|
||||
Json(json!({ "status": "ok", "crate": "kei-buddy", "version": env!("CARGO_PKG_VERSION") }))
|
||||
}
|
||||
|
||||
/// Build the axum Router.
|
||||
pub fn build_router<E>(ctx: BuddyContext<E>) -> Router
|
||||
where
|
||||
E: LlmExtractor + Send + Sync + 'static,
|
||||
{
|
||||
pub fn build_router<E: LlmExtractor + Send + Sync + 'static>(ctx: BuddyContext<E>) -> Router {
|
||||
Router::new()
|
||||
.route("/webhook", routing::post(kei_telegram_webhook::handle_webhook::<BuddyContext<E>>))
|
||||
.route("/health", routing::get(health))
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ use crate::{
|
|||
serve::{BuddyContext, ServeConfig},
|
||||
store::SqliteBuddyStore,
|
||||
topics::Topics,
|
||||
voice::VoiceHandler,
|
||||
};
|
||||
|
||||
/// Start the HTTP server (entry-point called from the binary).
|
||||
|
|
@ -25,6 +26,7 @@ pub async fn run_serve(cfg: ServeConfig) -> anyhow::Result<()> {
|
|||
let chat_log = Arc::new(ChatLog::from_path(&cfg.chat_log_db_path)?);
|
||||
let topics = Arc::new(Topics::from_path(&cfg.topics_db_path)?);
|
||||
let contacts = Arc::new(Contacts::from_path(&cfg.contacts_db_path)?);
|
||||
let voice = build_voice_handler(cfg.stt_backend.as_deref(), &cfg.bot_token);
|
||||
|
||||
#[cfg(feature = "extractor-openai")]
|
||||
{
|
||||
|
|
@ -37,7 +39,7 @@ pub async fn run_serve(cfg: ServeConfig) -> anyhow::Result<()> {
|
|||
return start_listener(cfg.port, BuddyContext {
|
||||
secret: cfg.webhook_secret,
|
||||
bot_token: cfg.bot_token,
|
||||
store, extractor, http, allowed_chat_ids, chat_log, topics, contacts,
|
||||
store, extractor, http, allowed_chat_ids, chat_log, topics, contacts, voice,
|
||||
}).await;
|
||||
}
|
||||
}
|
||||
|
|
@ -47,10 +49,22 @@ pub async fn run_serve(cfg: ServeConfig) -> anyhow::Result<()> {
|
|||
start_listener(cfg.port, BuddyContext {
|
||||
secret: cfg.webhook_secret,
|
||||
bot_token: cfg.bot_token,
|
||||
store, extractor, http, allowed_chat_ids, chat_log, topics, contacts,
|
||||
store, extractor, http, allowed_chat_ids, chat_log, topics, contacts, voice,
|
||||
}).await
|
||||
}
|
||||
|
||||
fn build_voice_handler(stt_backend: Option<&str>, bot_token: &str) -> Option<Arc<VoiceHandler>> {
|
||||
let name = stt_backend?;
|
||||
std::env::set_var("KEI_STT_BACKEND", name);
|
||||
match kei_stt::from_env() {
|
||||
Ok(stt) => Some(Arc::new(VoiceHandler::new(bot_token.to_string(), Arc::from(stt)))),
|
||||
Err(e) => {
|
||||
tracing::warn!(backend = name, error = %e, "STT init failed; voice disabled");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_listener<E>(port: u16, ctx: BuddyContext<E>) -> anyhow::Result<()>
|
||||
where
|
||||
E: LlmExtractor + Send + Sync + 'static,
|
||||
|
|
|
|||
|
|
@ -9,10 +9,11 @@
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// 11-state onboarding finite-state machine.
|
||||
/// 12-state onboarding finite-state machine.
|
||||
///
|
||||
/// Mirrors the TypeScript `Step` union type exactly:
|
||||
/// `intro | ask_name | ask_tone | ask_interests | ask_hobbies |
|
||||
/// Extends the TypeScript `Step` union with `ask_language` as the second
|
||||
/// step (right after `intro`):
|
||||
/// `intro | ask_language | ask_name | ask_tone | ask_interests | ask_hobbies |
|
||||
/// topic_specifics | topic_now_later | topic_research |
|
||||
/// topic_sources | ask_schedule | ready`
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
|
|
@ -20,6 +21,8 @@ use serde::{Deserialize, Serialize};
|
|||
pub enum OnboardState {
|
||||
/// Initial greeting — bot explains itself.
|
||||
Intro,
|
||||
/// Collecting language preference (en / ru). Default: en.
|
||||
AskLanguage,
|
||||
/// Collecting user's display name.
|
||||
AskName,
|
||||
/// Collecting preferred communication tone.
|
||||
|
|
@ -51,6 +54,7 @@ mod tests {
|
|||
fn all_variants_serde_roundtrip() {
|
||||
let variants = [
|
||||
OnboardState::Intro,
|
||||
OnboardState::AskLanguage,
|
||||
OnboardState::AskName,
|
||||
OnboardState::AskTone,
|
||||
OnboardState::AskInterests,
|
||||
|
|
|
|||
164
_primitives/_rust/kei-buddy/src/strings.rs
Normal file
164
_primitives/_rust/kei-buddy/src/strings.rs
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! Localization table for all onboarding prompt strings.
|
||||
//! Add new languages by extending the `Lang` enum + each match arm.
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
/// Supported UI languages.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum Lang {
|
||||
En,
|
||||
Ru,
|
||||
}
|
||||
|
||||
impl Lang {
|
||||
/// Infer language from a stored persona blob (`persona["language"]`).
|
||||
/// Falls back to `En` when the key is absent or unrecognised.
|
||||
pub fn from_persona(persona: &Value) -> Self {
|
||||
match persona.get("language").and_then(|v| v.as_str()) {
|
||||
Some("ru") => Lang::Ru,
|
||||
_ => Lang::En,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a user's free-form language choice.
|
||||
/// Returns `None` when the text is not a recognised choice.
|
||||
pub fn from_user_choice(text: &str) -> Option<Lang> {
|
||||
let t = text.trim().to_lowercase();
|
||||
match t.as_str() {
|
||||
"en" | "english" | "1" | "англ" | "🇬🇧" | "🇺🇸" => Some(Lang::En),
|
||||
"ru" | "русский" | "rus" | "2" | "рус" | "🇷🇺" => Some(Lang::Ru),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// BCP-47 / ISO 639-1 code.
|
||||
pub fn code(self) -> &'static str {
|
||||
match self {
|
||||
Lang::En => "en",
|
||||
Lang::Ru => "ru",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Static onboarding prompt strings, keyed by language.
|
||||
pub struct Strings;
|
||||
|
||||
impl Strings {
|
||||
/// Always bilingual — shown before language is known.
|
||||
pub fn intro_ask_language() -> &'static str {
|
||||
"Hi! I'm KeiBuddy, your personal AI assistant from KeiSei. \
|
||||
Please choose your language:\n• English (en)\n• Русский (ru)\n\n\
|
||||
Привет! Я KeiBuddy, твой персональный AI-компаньон от KeiSei. \
|
||||
Выбери язык:\n• English (en)\n• Русский (ru)"
|
||||
}
|
||||
|
||||
pub fn ask_name(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "What's your name? (I'll use it to address you.)",
|
||||
Lang::Ru => "Как тебя называть?",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ask_tone(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "How should I talk to you? Describe it in your own words — e.g. \
|
||||
\"friendly\", \"dry and to the point\", \"with irony\". \
|
||||
Or just a word: `friendly`, `calm`, `stoic`, `sarcastic`, `professional`.",
|
||||
Lang::Ru => "Какой стиль общения тебе ближе? Опиши своими словами — например, \
|
||||
\"по-дружески\", \"сухо и по делу\", \"с иронией\". \
|
||||
Или просто слово: `friendly`, `calm`, `stoic`, `sarcastic`, `professional`.",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ask_interests(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "What are your interests? Just list them — \
|
||||
any format works (comma-separated, bullet points, or a paragraph).",
|
||||
Lang::Ru => "Какие у тебя интересы? Просто перечисли — \
|
||||
как удобно (через запятую, списком, или одним абзацем).",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ask_hobbies(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "What about hobbies? What do you actually do in your free time?",
|
||||
Lang::Ru => "А хобби? Чем конкретно занимаешься в свободное время.",
|
||||
}
|
||||
}
|
||||
|
||||
/// Dynamic — topic name is interpolated by the caller.
|
||||
pub fn topic_specifics_prefix(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "Now let's dig into the topics. First up",
|
||||
Lang::Ru => "Теперь разберём по темам. Поехали — сначала",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn topic_specifics_question(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "What *specifically* interests you here? Give me details \
|
||||
(e.g. for AI: \"agents, model training, papers\"; \
|
||||
for surfing: \"technique, boards, spot reports\").",
|
||||
Lang::Ru => "*Что именно* в этой теме тебе интересно? Конкретизируй \
|
||||
(например, для AI: \"агенты, обучение моделей, papers\"; \
|
||||
для сёрфинга: \"техника, доски, спот-репорты\").",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn topic_now_later(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "Would you like to *discuss this now* or *save it for later*?",
|
||||
Lang::Ru => "Хочешь *обсудить это сейчас* или *сохранить на потом*?",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn topic_research(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "Should I *regularly monitor* updates on this topic and send you digests?",
|
||||
Lang::Ru => "Хочешь чтобы я *регулярно следил* за обновлениями по этой теме и присылал дайджесты?",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn topic_sources_intro(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "Which ones do you want to add? Write the numbers separated by commas \
|
||||
(`1,3,5`), `all`, or `none`. \
|
||||
You can add your own — just write \"plus <platform> <handle>\".",
|
||||
Lang::Ru => "Какие добавить? Напиши номера через запятую (`1,3,5`), `все`, или `нет`. \
|
||||
Можешь добавить свои — просто напиши \"плюс <платформа> <handle>\".",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ask_schedule(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "Topics covered! ⏰ When would you like to receive digests? \
|
||||
Write freely — e.g. \"mornings around 8, evenings at 10, I'm in Bali\" \
|
||||
or \"evenings at 9\". If you don't need them, write \"no\".",
|
||||
Lang::Ru => "Темы разобрали. ⏰ Когда удобно получать дайджесты? Напиши свободно — \
|
||||
например, \"утром часов в 8, вечером в 10, я в Бали\" или \"вечером в 9\". \
|
||||
Если не нужно — напиши \"нет\".",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ready(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "All set! ✨ I'm now your assistant.",
|
||||
Lang::Ru => "Готово! ✨ Я настроен.",
|
||||
}
|
||||
}
|
||||
|
||||
/// Confirmation shown right after language is chosen.
|
||||
pub fn language_set(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::En => "Language set to English.",
|
||||
Lang::Ru => "Язык установлен: русский.",
|
||||
}
|
||||
}
|
||||
|
||||
/// Shown when input doesn't match any language choice — always bilingual.
|
||||
pub fn invalid_language() -> &'static str {
|
||||
"Please answer 'en' for English or 'ru' for Russian.\n\
|
||||
Пожалуйста, ответь 'en' или 'ru'."
|
||||
}
|
||||
}
|
||||
178
_primitives/_rust/kei-buddy/src/voice.rs
Normal file
178
_primitives/_rust/kei-buddy/src/voice.rs
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
//! Voice-message handling: Telegram getFile → download → STT → transcript.
|
||||
//!
|
||||
//! Constructor Pattern: one responsibility — download audio via Telegram API
|
||||
//! and pass it to the STT backend. Token is never logged.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use kei_stt::{SttBackend, SttRequest};
|
||||
|
||||
use crate::error::BuddyError;
|
||||
|
||||
/// Downloads a Telegram voice/audio file and returns the STT transcript.
|
||||
pub struct VoiceHandler {
|
||||
pub(crate) bot_token: String,
|
||||
pub(crate) stt: Arc<dyn SttBackend>,
|
||||
pub(crate) http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl VoiceHandler {
|
||||
/// Construct from a bot token string and an already-built STT backend.
|
||||
pub fn new(bot_token: String, stt: Arc<dyn SttBackend>) -> Self {
|
||||
Self { bot_token, stt, http: reqwest::Client::new() }
|
||||
}
|
||||
|
||||
/// Resolve file_id → file_path via Telegram getFile, download bytes,
|
||||
/// and transcribe via the STT backend.
|
||||
///
|
||||
/// Errors map to [`BuddyError::Transport`]. The bot token is never
|
||||
/// included in error messages.
|
||||
pub async fn transcribe_file(
|
||||
&self,
|
||||
file_id: &str,
|
||||
mime_type: &str,
|
||||
) -> Result<String, BuddyError> {
|
||||
let file_path = self.get_file_path(file_id).await?;
|
||||
let audio_bytes = self.download_file(&file_path).await?;
|
||||
self.run_stt(audio_bytes, mime_type).await
|
||||
}
|
||||
|
||||
pub(crate) async fn get_file_path(&self, file_id: &str) -> Result<String, BuddyError> {
|
||||
let url = format!(
|
||||
"https://api.telegram.org/bot{}/getFile",
|
||||
self.bot_token
|
||||
);
|
||||
let resp = self.http
|
||||
.get(&url)
|
||||
.query(&[("file_id", file_id)])
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| BuddyError::Transport(format!("getFile request failed: {e}")))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
return Err(BuddyError::Transport(
|
||||
format!("getFile returned HTTP {status}"),
|
||||
));
|
||||
}
|
||||
let json: serde_json::Value = resp
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| BuddyError::Transport(format!("getFile JSON parse: {e}")))?;
|
||||
let path = json["result"]["file_path"]
|
||||
.as_str()
|
||||
.ok_or_else(|| BuddyError::Transport("getFile: missing result.file_path".into()))?
|
||||
.to_string();
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
pub(crate) async fn download_file(&self, file_path: &str) -> Result<Vec<u8>, BuddyError> {
|
||||
let url = format!(
|
||||
"https://api.telegram.org/file/bot{}/{}",
|
||||
self.bot_token, file_path
|
||||
);
|
||||
let bytes = self.http
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| BuddyError::Transport(format!("file download failed: {e}")))?
|
||||
.bytes()
|
||||
.await
|
||||
.map_err(|e| BuddyError::Transport(format!("file download bytes: {e}")))?;
|
||||
Ok(bytes.to_vec())
|
||||
}
|
||||
|
||||
pub(crate) async fn run_stt(
|
||||
&self,
|
||||
audio_bytes: Vec<u8>,
|
||||
mime_type: &str,
|
||||
) -> Result<String, BuddyError> {
|
||||
let req = SttRequest {
|
||||
audio_bytes,
|
||||
mime_type: mime_type.to_string(),
|
||||
language: None,
|
||||
};
|
||||
let resp = self.stt
|
||||
.transcribe(&req)
|
||||
.await
|
||||
.map_err(|e| BuddyError::Transport(format!("STT failed: {e}")))?;
|
||||
Ok(resp.text)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use async_trait::async_trait;
|
||||
use kei_stt::{SttError, SttResponse};
|
||||
use wiremock::matchers::{method, path, query_param};
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
struct MockStt(String);
|
||||
|
||||
#[async_trait]
|
||||
impl SttBackend for MockStt {
|
||||
async fn transcribe(&self, _req: &SttRequest) -> Result<SttResponse, SttError> {
|
||||
Ok(SttResponse::text_only(self.0.clone()))
|
||||
}
|
||||
fn name(&self) -> &'static str { "mock" }
|
||||
}
|
||||
|
||||
/// Test-only helper that injects a base_url so tests point at wiremock.
|
||||
async fn run(base: &str, token: &str, stt_reply: &str, file_id: &str, mime: &str)
|
||||
-> Result<String, BuddyError>
|
||||
{
|
||||
let stt: Arc<dyn SttBackend> = Arc::new(MockStt(stt_reply.into()));
|
||||
let http = reqwest::Client::new();
|
||||
let get_url = format!("{}/bot{}/getFile", base, token);
|
||||
let resp = http.get(&get_url).query(&[("file_id", file_id)]).send().await
|
||||
.map_err(|e| BuddyError::Transport(format!("getFile request failed: {e}")))?;
|
||||
if !resp.status().is_success() {
|
||||
let s = resp.status();
|
||||
return Err(BuddyError::Transport(format!("getFile returned HTTP {s}")));
|
||||
}
|
||||
let json: serde_json::Value = resp.json().await
|
||||
.map_err(|e| BuddyError::Transport(format!("getFile JSON: {e}")))?;
|
||||
let file_path = json["result"]["file_path"].as_str()
|
||||
.ok_or_else(|| BuddyError::Transport("missing file_path".into()))?.to_string();
|
||||
let dl_url = format!("{}/file/bot{}/{}", base, token, file_path);
|
||||
let audio = http.get(&dl_url).send().await
|
||||
.map_err(|e| BuddyError::Transport(format!("download: {e}")))?
|
||||
.bytes().await
|
||||
.map_err(|e| BuddyError::Transport(format!("bytes: {e}")))?.to_vec();
|
||||
let req = SttRequest { audio_bytes: audio, mime_type: mime.into(), language: None };
|
||||
let r = stt.transcribe(&req).await
|
||||
.map_err(|e| BuddyError::Transport(format!("STT: {e}")))?;
|
||||
Ok(r.text)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn transcribe_file_calls_getfile_then_downloads_then_stt() {
|
||||
let server = MockServer::start().await;
|
||||
Mock::given(method("GET")).and(path("/bottoken/getFile"))
|
||||
.and(query_param("file_id", "v123"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
|
||||
"ok": true, "result": { "file_id": "v123", "file_path": "voice/f.oga" }
|
||||
}))).mount(&server).await;
|
||||
Mock::given(method("GET")).and(path("/file/bottoken/voice/f.oga"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_bytes(b"audio".to_vec()))
|
||||
.mount(&server).await;
|
||||
|
||||
let text = run(&server.uri(), "token", "hello world", "v123", "audio/ogg").await;
|
||||
assert_eq!(text.unwrap(), "hello world");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn getfile_error_propagates_as_transport_error() {
|
||||
let server = MockServer::start().await;
|
||||
Mock::given(method("GET")).and(path("/bottoken/getFile"))
|
||||
.respond_with(ResponseTemplate::new(500)).mount(&server).await;
|
||||
|
||||
let result = run(&server.uri(), "token", "x", "bad", "audio/ogg").await;
|
||||
match result {
|
||||
Err(BuddyError::Transport(msg)) => assert!(msg.contains("getFile"), "msg={msg}"),
|
||||
other => panic!("expected Transport error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,13 @@ pub enum WebhookEvent {
|
|||
from: Option<User>,
|
||||
text: String,
|
||||
},
|
||||
/// Incoming voice or audio message — carries a Telegram file_id for download.
|
||||
Voice {
|
||||
chat_id: i64,
|
||||
from: Option<User>,
|
||||
file_id: String,
|
||||
mime_type: String,
|
||||
},
|
||||
/// Inline-keyboard button press.
|
||||
Callback {
|
||||
chat_id: i64,
|
||||
|
|
@ -24,25 +31,25 @@ pub enum WebhookEvent {
|
|||
|
||||
/// Extract a typed [`WebhookEvent`] from a raw [`Update`].
|
||||
///
|
||||
/// Classification priority: `message` before `callback_query`.
|
||||
/// Classification priority: voice/audio before text, text before callback.
|
||||
pub fn classify(update: Update) -> WebhookEvent {
|
||||
if let Some(msg) = update.message {
|
||||
let chat_id = msg.chat.id;
|
||||
let from = msg.from.clone();
|
||||
if let Some(v) = msg.voice {
|
||||
return WebhookEvent::Voice { chat_id, from, file_id: v.file_id, mime_type: v.mime_type };
|
||||
}
|
||||
if let Some(a) = msg.audio {
|
||||
return WebhookEvent::Voice { chat_id, from, file_id: a.file_id, mime_type: a.mime_type };
|
||||
}
|
||||
if let Some(text) = msg.text {
|
||||
return WebhookEvent::Text {
|
||||
chat_id: msg.chat.id,
|
||||
from: msg.from,
|
||||
text,
|
||||
};
|
||||
return WebhookEvent::Text { chat_id, from: msg.from, text };
|
||||
}
|
||||
}
|
||||
if let Some(cb) = update.callback_query {
|
||||
if let Some(data) = cb.data {
|
||||
let chat_id = cb.message.as_ref().map(|m| m.chat.id).unwrap_or(0);
|
||||
return WebhookEvent::Callback {
|
||||
chat_id,
|
||||
from: cb.from,
|
||||
data,
|
||||
};
|
||||
return WebhookEvent::Callback { chat_id, from: cb.from, data };
|
||||
}
|
||||
}
|
||||
WebhookEvent::Other
|
||||
|
|
@ -51,7 +58,7 @@ pub fn classify(update: Update) -> WebhookEvent {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::update::{CallbackQuery, Chat, Message, Update, User};
|
||||
use crate::update::{Audio, CallbackQuery, Chat, Message, Update, User, Voice};
|
||||
|
||||
fn make_user() -> User {
|
||||
User {
|
||||
|
|
@ -61,27 +68,28 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn text_msg(chat_id: i64, text: &str) -> Message {
|
||||
Message {
|
||||
message_id: 10,
|
||||
date: 1_700_000_000,
|
||||
chat: Chat { id: chat_id, r#type: Some("private".into()) },
|
||||
from: Some(make_user()),
|
||||
text: Some(text.into()),
|
||||
voice: None,
|
||||
audio: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_text_message() {
|
||||
let update = Update {
|
||||
update_id: 1,
|
||||
message: Some(Message {
|
||||
message_id: 10,
|
||||
date: 1_700_000_000,
|
||||
chat: Chat { id: 99, r#type: Some("private".into()) },
|
||||
from: Some(make_user()),
|
||||
text: Some("hello".into()),
|
||||
}),
|
||||
message: Some(text_msg(99, "hello")),
|
||||
callback_query: None,
|
||||
};
|
||||
let event = classify(update);
|
||||
assert_eq!(
|
||||
event,
|
||||
WebhookEvent::Text {
|
||||
chat_id: 99,
|
||||
from: Some(make_user()),
|
||||
text: "hello".into(),
|
||||
}
|
||||
classify(update),
|
||||
WebhookEvent::Text { chat_id: 99, from: Some(make_user()), text: "hello".into() }
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -99,29 +107,81 @@ mod tests {
|
|||
chat: Chat { id: 77, r#type: None },
|
||||
from: None,
|
||||
text: None,
|
||||
voice: None,
|
||||
audio: None,
|
||||
}),
|
||||
data: Some("action:start".into()),
|
||||
}),
|
||||
};
|
||||
let event = classify(update);
|
||||
assert_eq!(
|
||||
event,
|
||||
WebhookEvent::Callback {
|
||||
chat_id: 77,
|
||||
from: Some(make_user()),
|
||||
data: "action:start".into(),
|
||||
}
|
||||
classify(update),
|
||||
WebhookEvent::Callback { chat_id: 77, from: Some(make_user()), data: "action:start".into() }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_other_returns_other() {
|
||||
// Update with no message and no callback_query (e.g. edited_message not modelled).
|
||||
let update = Update {
|
||||
update_id: 3,
|
||||
message: None,
|
||||
callback_query: None,
|
||||
};
|
||||
let update = Update { update_id: 3, message: None, callback_query: None };
|
||||
assert_eq!(classify(update), WebhookEvent::Other);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_voice_message() {
|
||||
let update = Update {
|
||||
update_id: 4,
|
||||
message: Some(Message {
|
||||
message_id: 30,
|
||||
date: 1_700_000_002,
|
||||
chat: Chat { id: 55, r#type: Some("private".into()) },
|
||||
from: Some(make_user()),
|
||||
text: None,
|
||||
voice: Some(Voice {
|
||||
file_id: "voice_file_abc".into(),
|
||||
duration: 5,
|
||||
mime_type: "audio/ogg".into(),
|
||||
}),
|
||||
audio: None,
|
||||
}),
|
||||
callback_query: None,
|
||||
};
|
||||
assert_eq!(
|
||||
classify(update),
|
||||
WebhookEvent::Voice {
|
||||
chat_id: 55,
|
||||
from: Some(make_user()),
|
||||
file_id: "voice_file_abc".into(),
|
||||
mime_type: "audio/ogg".into(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_audio_message_maps_to_voice_variant() {
|
||||
let update = Update {
|
||||
update_id: 5,
|
||||
message: Some(Message {
|
||||
message_id: 31,
|
||||
date: 1_700_000_003,
|
||||
chat: Chat { id: 66, r#type: Some("private".into()) },
|
||||
from: Some(make_user()),
|
||||
text: None,
|
||||
voice: None,
|
||||
audio: Some(Audio {
|
||||
file_id: "audio_file_xyz".into(),
|
||||
duration: 120,
|
||||
mime_type: "audio/mpeg".into(),
|
||||
}),
|
||||
}),
|
||||
callback_query: None,
|
||||
};
|
||||
assert_eq!(
|
||||
classify(update),
|
||||
WebhookEvent::Voice {
|
||||
chat_id: 66,
|
||||
from: Some(make_user()),
|
||||
file_id: "audio_file_xyz".into(),
|
||||
mime_type: "audio/mpeg".into(),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,26 @@
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Telegram `Voice` attachment (OGG-Opus from the mic).
|
||||
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Default)]
|
||||
pub struct Voice {
|
||||
pub file_id: String,
|
||||
#[serde(default)]
|
||||
pub duration: i64,
|
||||
#[serde(default)]
|
||||
pub mime_type: String,
|
||||
}
|
||||
|
||||
/// Telegram `Audio` attachment (music/audio file).
|
||||
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Default)]
|
||||
pub struct Audio {
|
||||
pub file_id: String,
|
||||
#[serde(default)]
|
||||
pub duration: i64,
|
||||
#[serde(default)]
|
||||
pub mime_type: String,
|
||||
}
|
||||
|
||||
/// Top-level Telegram update payload.
|
||||
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
|
||||
pub struct Update {
|
||||
|
|
@ -16,7 +36,7 @@ pub struct Update {
|
|||
pub callback_query: Option<CallbackQuery>,
|
||||
}
|
||||
|
||||
/// Incoming text message.
|
||||
/// Incoming text message (or voice/audio message).
|
||||
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
|
||||
pub struct Message {
|
||||
pub message_id: i64,
|
||||
|
|
@ -26,6 +46,10 @@ pub struct Message {
|
|||
pub from: Option<User>,
|
||||
#[serde(default)]
|
||||
pub text: Option<String>,
|
||||
#[serde(default)]
|
||||
pub voice: Option<Voice>,
|
||||
#[serde(default)]
|
||||
pub audio: Option<Audio>,
|
||||
}
|
||||
|
||||
/// Telegram user or bot.
|
||||
|
|
|
|||
Loading…
Reference in a new issue