//! Generate — non-streaming stdout parsing. //! //! mlx_lm prints the generated text, then `==========` separator, then a //! footer with token / tokens-per-sec stats. We pin the parser against //! a representative fixture so version drift in mlx_lm output style //! is caught here, not in production. use kei_llm_mlx::generate::{parse_response, build_argv, GenerateOpts}; const STDOUT_SAMPLE: &str = "\ Once upon a time, in a far-off land, there lived a curious cat. ========== Prompt: 12 tokens, 132.4 tokens-per-sec Generation: 64 tokens, 78.9 tokens-per-sec "; #[test] fn footer_yields_typed_response() { let r = parse_response(STDOUT_SAMPLE, "mlx-community/Llama-3.2-3B-Instruct-4bit", "Once upon") .expect("parse ok"); assert!(r.text.starts_with("Once upon a time")); assert!(!r.text.contains("==========")); assert_eq!(r.prompt_tokens, Some(12)); assert_eq!(r.generation_tokens, Some(64)); assert!(r.tokens_per_sec.is_some()); assert_eq!(r.model_id, "mlx-community/Llama-3.2-3B-Instruct-4bit"); } #[test] fn argv_carries_optional_flags() { let argv = build_argv( "mlx-community/x-4bit", "hi", &GenerateOpts { max_tokens: Some(64), temperature: Some(0.7) }, ); assert!(argv.contains(&"--model".into())); assert!(argv.contains(&"mlx-community/x-4bit".to_string())); assert!(argv.contains(&"--max-tokens".into())); assert!(argv.contains(&"64".to_string())); assert!(argv.contains(&"--temp".into())); }