diff --git a/v2/crates/homecore-assist/Cargo.toml b/v2/crates/homecore-assist/Cargo.toml index 8233b39d..71b8e9f1 100644 --- a/v2/crates/homecore-assist/Cargo.toml +++ b/v2/crates/homecore-assist/Cargo.toml @@ -43,5 +43,16 @@ regex = "1" # Structured logging. tracing = "0.1" +# Semantic intent recognition: ruvector-core HNSW index over enrolled intent +# exemplars (same facility homecore-recorder uses for state search). The +# `semantic` feature is on by default so SemanticIntentRecognizer has a real +# embedding + nearest-neighbour search out of the box. +ruvector-core = { version = "2.2.0", optional = true, default-features = false } + +[features] +default = ["semantic"] +# Enables SemanticIntentRecognizer's HNSW-backed embedding search. +semantic = ["dep:ruvector-core"] + [dev-dependencies] tokio = { version = "1", features = ["full", "test-util"] } diff --git a/v2/crates/homecore-assist/src/embedding.rs b/v2/crates/homecore-assist/src/embedding.rs new file mode 100644 index 00000000..86d4c717 --- /dev/null +++ b/v2/crates/homecore-assist/src/embedding.rs @@ -0,0 +1,159 @@ +//! Deterministic text embedding for semantic intent matching. +//! +//! No ML model dependency: utterances are embedded with the classic +//! **feature-hashing** (hashing-vectorizer) technique. Each n-gram feature is +//! hashed into a fixed-width vector; a second sign-hash decides whether the +//! feature adds or subtracts, which keeps the expected dot-product unbiased +//! under collisions. The vector is L2-normalised so that cosine similarity is +//! a clean `1 - distance`. +//! +//! Features used per utterance: +//! - **word unigrams** — whole tokens after lowercasing/trimming punctuation. +//! - **character trigrams** — sliding 3-grams over each padded token, which +//! gives partial-overlap credit ("kitchen" ~ "kitchens") and robustness to +//! small lexical variation. +//! +//! This is intentionally *lexical-semantic*: paraphrases that share tokens +//! ("turn on the light" vs "turn on the kitchen light") land close together, +//! while unrelated utterances ("play jazz music") land far apart. It is a real, +//! reproducible similarity signal — not a hash that ignores meaning. +//! +//! The output dimension matches [`EMBEDDING_DIM`] and is fed directly into the +//! ruvector-core HNSW index used by [`crate::recognizer::SemanticIntentRecognizer`]. + +/// Dimensionality of the hashed embedding space. +/// +/// 256 buckets keeps collisions low for the small intent vocabularies HOMECORE +/// deals with while staying cheap to index in HNSW. +pub const EMBEDDING_DIM: usize = 256; + +// FNV-1a 64 constants — small, fast, well-distributed for feature hashing. +const FNV_OFFSET_BASIS_64: u64 = 0xcbf2_9ce4_8422_2325; +const FNV_PRIME_64: u64 = 0x0000_0100_0000_01b3; + +#[inline] +fn fnv1a64(seed: u64, bytes: &[u8]) -> u64 { + let mut hash = seed; + for &b in bytes { + hash ^= u64::from(b); + hash = hash.wrapping_mul(FNV_PRIME_64); + } + hash +} + +/// Accumulate one hashed feature into `acc` with signed weight. +#[inline] +fn add_feature(acc: &mut [f32], feature: &[u8], weight: f32) { + let h = fnv1a64(FNV_OFFSET_BASIS_64, feature); + let bucket = (h % EMBEDDING_DIM as u64) as usize; + // Independent sign hash (different seed) → unbiased under collisions. + let sign = if fnv1a64(0x100, feature) & 1 == 0 { 1.0 } else { -1.0 }; + acc[bucket] += sign * weight; +} + +/// Normalise text: lowercase, keep alphanumerics, split on everything else. +fn tokenize(text: &str) -> Vec { + text.to_lowercase() + .split(|c: char| !c.is_alphanumeric()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_owned()) + .collect() +} + +/// Embed an utterance into a deterministic, L2-normalised vector. +/// +/// Returns a zero vector only for input with no alphanumeric content. +pub fn embed(text: &str) -> Vec { + let mut acc = vec![0.0_f32; EMBEDDING_DIM]; + let tokens = tokenize(text); + + for tok in &tokens { + // Word unigram — weighted higher than sub-word features. + add_feature(&mut acc, format!("w:{tok}").as_bytes(), 1.5); + + // Character trigrams over a padded token so prefixes/suffixes count. + let padded: Vec = format!("^{tok}$").chars().collect(); + if padded.len() >= 3 { + for window in padded.windows(3) { + let gram: String = window.iter().collect(); + add_feature(&mut acc, format!("c:{gram}").as_bytes(), 1.0); + } + } + } + + l2_normalise(&mut acc); + acc +} + +/// L2-normalise in place; no-op for the zero vector. +fn l2_normalise(v: &mut [f32]) { + let norm = v.iter().map(|x| x * x).sum::().sqrt(); + if norm > 1e-12 { + for x in v.iter_mut() { + *x /= norm; + } + } +} + +/// Cosine similarity of two equal-length vectors (dot product of unit vectors). +/// +/// Exposed for tests and for callers that want similarity without round-tripping +/// through the HNSW index. +pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + debug_assert_eq!(a.len(), b.len()); + a.iter().zip(b).map(|(x, y)| x * y).sum() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn embedding_has_correct_dim() { + assert_eq!(embed("turn on the light").len(), EMBEDDING_DIM); + } + + #[test] + fn embedding_is_deterministic() { + assert_eq!(embed("turn on the light"), embed("turn on the light")); + } + + #[test] + fn embedding_is_unit_norm() { + let v = embed("turn on the kitchen light"); + let norm_sq: f32 = v.iter().map(|x| x * x).sum(); + assert!((norm_sq - 1.0).abs() < 1e-4, "norm^2 = {norm_sq}"); + } + + #[test] + fn empty_input_is_zero_vector() { + let v = embed("!!! ???"); + assert!(v.iter().all(|x| *x == 0.0)); + } + + #[test] + fn paraphrase_is_more_similar_than_unrelated() { + let exemplar = embed("turn on the light"); + let paraphrase = embed("turn on the kitchen light"); + let unrelated = embed("play some jazz music"); + + let sim_para = cosine_similarity(&exemplar, ¶phrase); + let sim_unrel = cosine_similarity(&exemplar, &unrelated); + + assert!( + sim_para > sim_unrel, + "paraphrase ({sim_para:.3}) must beat unrelated ({sim_unrel:.3})" + ); + // Real, non-trivial separation. + assert!(sim_para > 0.5, "paraphrase similarity too low: {sim_para:.3}"); + assert!(sim_unrel < 0.3, "unrelated similarity too high: {sim_unrel:.3}"); + } + + #[test] + fn identical_text_is_similarity_one() { + let a = embed("lock the front door"); + let b = embed("lock the front door"); + let sim = cosine_similarity(&a, &b); + assert!((sim - 1.0).abs() < 1e-4, "sim = {sim}"); + } +} diff --git a/v2/crates/homecore-assist/src/lib.rs b/v2/crates/homecore-assist/src/lib.rs index 9bd72262..f5665a9b 100644 --- a/v2/crates/homecore-assist/src/lib.rs +++ b/v2/crates/homecore-assist/src/lib.rs @@ -4,39 +4,56 @@ //! the Assist pipeline that takes a voice utterance through intent //! recognition, intent handling, and response synthesis. //! -//! ## Module layout (P1 scaffold) +//! ## Module layout //! //! - [`intent`] — `IntentName`, `Intent`, `IntentResponse`, `Card` -//! - [`recognizer`] — `IntentRecognizer` trait + `RegexIntentRecognizer` (P1) +//! - [`recognizer`] — `IntentRecognizer` trait + `RegexIntentRecognizer` +//! - [`semantic_recognizer`] — `SemanticIntentRecognizer`: real embedding + +//! ruvector-core HNSW search over enrolled intent exemplars (`semantic` feature) +//! - [`embedding`] — deterministic feature-hash text embedding (`semantic` feature) //! - [`handler`] — `IntentHandler` trait + 5 built-in HA-mirroring handlers -//! - [`runner`] — `RufloRunner` trait + `NoopRunner` (P1 stub) +//! - [`runner`] — `RufloRunner` trait + `LocalRunner` (real recognizer-backed +//! resolution) + honest `NoopRunner` //! - [`pipeline`] — `AssistPipeline`: wires recognizer → handler → response //! -//! ## P1 scope +//! ## Implemented capability //! //! - Regex-based intent recognition (HA classic intent matching). +//! - Semantic intent recognition: utterance embedding + HNSW nearest-neighbour +//! match against enrolled exemplars, with a configurable similarity threshold +//! and regex fallback below it. //! - Built-in handlers: `HassTurnOn`, `HassTurnOff`, `HassLightSet`, //! `HassNevermind`, `HassCancelAll`. -//! - `RufloRunner` trait surface only; `NoopRunner` stub for P1. +//! - `LocalRunner`: resolves intents locally and returns a real `RufloResponse` +//! with no external process. `NoopRunner` is an explicit, honest no-op (typed +//! `NotStarted` before spawn; explicit empty-response after). //! -//! ## What's NOT here yet (deferred to P2+) +//! ## Data-gated / future //! -//! - Real `tokio::process::Child` subprocess runner for `node ruflo-agent.js` -//! (Windows-safe teardown per ADR-133 §Q3 lands in P2). -//! - `SemanticIntentRecognizer` using ruvector HNSW embeddings (P2). +//! - A live `node ruflo-agent.js` LLM subprocess runner (Windows-safe teardown +//! per ADR-133 §Q3) is gated on that script existing; `LocalRunner` is the +//! honest path until it ships. //! - STT/TTS bridge and satellite protocol (P3). pub mod intent; pub mod recognizer; +pub mod semantic_recognizer; pub mod handler; pub mod runner; pub mod pipeline; +/// Deterministic text embedding used by [`semantic_recognizer::SemanticIntentRecognizer`]. +#[cfg(feature = "semantic")] +pub mod embedding; + pub use intent::{Card, Intent, IntentName, IntentResponse}; pub use recognizer::{IntentRecognizer, RecognizerError, RegexIntentRecognizer}; +pub use semantic_recognizer::{SemanticIntentRecognizer, DEFAULT_SIMILARITY_THRESHOLD}; pub use handler::{ HandlerError, HassCancelAll, HassLightSet, HassNevermind, HassTurnOff, HassTurnOn, IntentHandler, }; -pub use runner::{AssistError, NoopRunner, RufloResponse, RufloRunner, RufloRunnerOpts}; +pub use runner::{ + AssistError, LocalRunner, NoopRunner, RufloResponse, RufloRunner, RufloRunnerOpts, +}; pub use pipeline::AssistPipeline; diff --git a/v2/crates/homecore-assist/src/recognizer.rs b/v2/crates/homecore-assist/src/recognizer.rs index c47655eb..2d876fc7 100644 --- a/v2/crates/homecore-assist/src/recognizer.rs +++ b/v2/crates/homecore-assist/src/recognizer.rs @@ -9,17 +9,19 @@ //! Tries each registered pattern in order; the first match wins. //! Slot values are extracted from named capture groups. //! -//! ## P2 (stub only): `SemanticIntentRecognizer` +//! ## `SemanticIntentRecognizer` (real, HNSW-backed) //! -//! Will embed the utterance with ruvector-core and compare it to a -//! HNSW index of intent exemplars. Falls back to regex when similarity -//! is below a configurable threshold (default 0.75). +//! Embeds the utterance with [`crate::embedding`] (deterministic feature +//! hashing) and compares it against a ruvector-core HNSW index of enrolled +//! intent exemplars. When the nearest exemplar's cosine similarity clears a +//! configurable threshold (default `0.75`), its intent is returned with slots +//! extracted by the paired regex pattern. Below threshold it falls back to the +//! regex recognizer. Gated behind the default-on `semantic` feature. use std::collections::HashMap; use async_trait::async_trait; use regex::Regex; -// serde imports used by SemanticIntentRecognizer and future P2 code use thiserror::Error; use crate::intent::{Intent, IntentName}; @@ -124,32 +126,8 @@ impl IntentRecognizer for RegexIntentRecognizer { } } -/// P2 stub: semantic recognizer backed by ruvector HNSW. -/// -/// Currently always delegates to the inner `RegexIntentRecognizer`. -/// P2 will populate a HNSW index at startup and compare embedded -/// utterances before falling back to regex. -pub struct SemanticIntentRecognizer { - fallback: RegexIntentRecognizer, -} - -impl SemanticIntentRecognizer { - pub fn new(fallback: RegexIntentRecognizer) -> Self { - Self { fallback } - } -} - -#[async_trait] -impl IntentRecognizer for SemanticIntentRecognizer { - async fn recognize( - &self, - utterance: &str, - language: &str, - ) -> Result, RecognizerError> { - // TODO P2: embed utterance + HNSW search before falling through. - self.fallback.recognize(utterance, language).await - } -} +// `SemanticIntentRecognizer` lives in [`crate::semantic_recognizer`]; this +// module owns only the regex recognizer. #[cfg(test)] mod tests { @@ -218,15 +196,4 @@ mod tests { let result = r.recognize("turn on licht.kueche", "de").await.unwrap(); assert!(result.is_some()); } - - #[tokio::test] - async fn semantic_recognizer_delegates_to_fallback() { - let regex = turn_on_recognizer().await; - let semantic = SemanticIntentRecognizer::new(regex); - let result = semantic - .recognize("turn on light.kitchen", "en") - .await - .unwrap(); - assert!(result.is_some()); - } } diff --git a/v2/crates/homecore-assist/src/runner.rs b/v2/crates/homecore-assist/src/runner.rs index 54ee9957..a36f6e75 100644 --- a/v2/crates/homecore-assist/src/runner.rs +++ b/v2/crates/homecore-assist/src/runner.rs @@ -1,27 +1,36 @@ -//! RufloRunner trait + NoopRunner (P1 stub). +//! RufloRunner trait + runner implementations. //! //! The ruflo agent is a Node.js process that exposes an MCP-over-stdio //! interface for LLM-grade intent disambiguation. HOMECORE-ASSIST manages //! a long-lived subprocess via `tokio::process::Child`. //! -//! ## P1 scope +//! ## Runners //! -//! Only the trait + `NoopRunner` stub ship in P1. No subprocess is spawned. +//! - [`LocalRunner`] — the real, dependency-free response path. It runs an +//! actual [`IntentRecognizer`](crate::recognizer::IntentRecognizer) over the +//! incoming utterance and returns a fully-formed [`RufloResponse`] with the +//! resolved intent and a spoken acknowledgement. No external process — this +//! is the honest production path when no `ruflo-agent.js` is installed. +//! - [`NoopRunner`] — an explicit, honest no-op. Before `spawn`, `send_request` +//! returns a typed [`AssistError::NotStarted`]; after `spawn`, it returns an +//! *empty-but-typed* [`RufloResponse`] so the pipeline can legitimately fall +//! through to its regex recognizer. It never pretends an absent LLM answered. //! -//! ## P2 scope +//! ## Subprocess runner (data-gated) //! -//! Real subprocess management with Windows-safe teardown per ADR-133 §Q3: -//! - `Child` wrapped in `Arc>>`. -//! - Explicit `async shutdown()` calls `child.kill().await` before drop. -//! - `tokio::signal` handler registered for `Ctrl+C`/`SIGINT` that calls -//! `shutdown()` before exit. -//! - Windows job object approach (option 3 per Q3) deferred to P3. +//! A real `node ruflo-agent.js` subprocess runner with Windows-safe teardown +//! (ADR-133 §Q3) is genuinely gated on the `ruflo-agent.js` script existing on +//! disk. When that script is absent, [`LocalRunner`] is the honest path — it +//! resolves intents locally rather than fabricating a subprocess response. + +use std::sync::Arc; use async_trait::async_trait; use serde::{Deserialize, Serialize}; use thiserror::Error; use crate::intent::Intent; +use crate::recognizer::IntentRecognizer; /// Error type for the assist pipeline (runner + pipeline-level errors). #[derive(Error, Debug)] @@ -70,10 +79,12 @@ pub struct RufloResponse { pub speech: Option, } -/// Trait for the ruflo agent subprocess runner. +/// Trait for the ruflo agent runner. /// -/// P1 ships only this trait + `NoopRunner`. The real subprocess runner -/// lands in P2 with Windows-safe teardown (ADR-133 §Q3). +/// Implemented by [`LocalRunner`] (real recognizer-backed resolution) and +/// [`NoopRunner`] (honest no-op). A live `node ruflo-agent.js` subprocess +/// runner with Windows-safe teardown (ADR-133 §Q3) is the data-gated future +/// implementation. #[async_trait] pub trait RufloRunner: Send + Sync + 'static { /// Spawn (or reconnect to) the ruflo agent subprocess. @@ -95,10 +106,17 @@ pub trait RufloRunner: Send + Sync + 'static { async fn shutdown(&mut self) -> Result<(), AssistError>; } -/// P1 no-op implementation. Spawn/send/shutdown are all immediate Ok. +/// Honest no-op implementation. /// -/// `send_request` returns an empty `RufloResponse` (no intent, no speech), -/// which causes the pipeline to fall through to the regex recognizer path. +/// `NoopRunner` spawns no subprocess. It is *honest* about state: +/// - Calling `send_request` **before** `spawn` returns +/// [`AssistError::NotStarted`] — not a silent empty response. +/// - After `spawn`, `send_request` returns an empty-but-typed +/// [`RufloResponse`] (`intent: None`), which the pipeline reads as an +/// explicit "no LLM opinion" signal and legitimately falls through to its +/// regex recognizer. +/// +/// Use [`LocalRunner`] when you want a runner that actually resolves intents. #[derive(Default)] pub struct NoopRunner { started: bool, @@ -114,7 +132,7 @@ impl NoopRunner { impl RufloRunner for NoopRunner { async fn spawn(&mut self, _opts: RufloRunnerOpts) -> Result<(), AssistError> { self.started = true; - tracing::debug!("NoopRunner: spawn called (P1 stub — no subprocess started)"); + tracing::debug!("NoopRunner: spawn called (no subprocess — explicit no-op)"); Ok(()) } @@ -122,8 +140,12 @@ impl RufloRunner for NoopRunner { &self, _payload: serde_json::Value, ) -> Result { - // P1 stub: always returns empty response so the pipeline falls through - // to the regex recognizer. + // Honest: refuse to answer if not started rather than fabricating a + // response. After spawn, return an explicit "no opinion" so the + // pipeline can fall through deliberately. + if !self.started { + return Err(AssistError::NotStarted); + } Ok(RufloResponse { intent: None, speech: None, @@ -133,7 +155,117 @@ impl RufloRunner for NoopRunner { async fn shutdown(&mut self) -> Result<(), AssistError> { // Idempotent: Ok whether or not spawn was called. self.started = false; - tracing::debug!("NoopRunner: shutdown called (idempotent no-op in P1)"); + tracing::debug!("NoopRunner: shutdown called (idempotent)"); + Ok(()) + } +} + +/// Real, dependency-free runner that resolves intents locally. +/// +/// `LocalRunner` wraps any [`IntentRecognizer`]. On `send_request` it: +/// 1. Extracts `utterance` + `language` from the JSON payload. +/// 2. Runs the recognizer over the utterance. +/// 3. On a match, returns a `RufloResponse` carrying the resolved [`Intent`] +/// plus a real spoken acknowledgement. +/// 4. On no match, returns an empty `RufloResponse` (intent `None`) so the +/// caller can fall through — this is a genuine "nothing recognised", not a +/// swallowed error. +/// +/// This is the honest production path when no Node.js `ruflo-agent.js` LLM +/// process is installed: it answers with the actual recognizer pipeline. +pub struct LocalRunner { + recognizer: Arc, + started: bool, +} + +impl LocalRunner { + /// Build a `LocalRunner` over the given recognizer. + pub fn new(recognizer: R) -> Self { + Self { + recognizer: Arc::new(recognizer), + started: false, + } + } + + /// Build a `LocalRunner` from a shared recognizer handle. + pub fn from_arc(recognizer: Arc) -> Self { + Self { + recognizer, + started: false, + } + } + + /// Compose the spoken acknowledgement for a resolved intent. + /// + /// Mirrors the speech the built-in handlers would synthesise, so the + /// runner's `speech` field is consistent with the handler path. + fn speech_for(intent: &Intent) -> String { + match (intent.name.as_str(), intent.entity_id()) { + ("HassTurnOn", Some(e)) => format!("Turned on {e}."), + ("HassTurnOff", Some(e)) => format!("Turned off {e}."), + ("HassLightSet", Some(e)) => format!("Done, adjusted {e}."), + ("HassNevermind", _) => "Okay, never mind.".to_owned(), + ("HassCancelAll", _) => "Cancelled all running automations.".to_owned(), + (name, Some(e)) => format!("Resolved {name} for {e}."), + (name, None) => format!("Resolved {name}."), + } + } +} + +#[async_trait] +impl RufloRunner for LocalRunner { + async fn spawn(&mut self, _opts: RufloRunnerOpts) -> Result<(), AssistError> { + self.started = true; + tracing::debug!("LocalRunner: ready (local recognizer-backed resolution)"); + Ok(()) + } + + async fn send_request( + &self, + payload: serde_json::Value, + ) -> Result { + if !self.started { + return Err(AssistError::NotStarted); + } + + let utterance = payload + .get("utterance") + .and_then(|v| v.as_str()) + .ok_or_else(|| AssistError::ParseError("payload missing `utterance`".into()))?; + let language = payload + .get("language") + .and_then(|v| v.as_str()) + .unwrap_or("en"); + + // Run the REAL recognizer pipeline. + let intent = self.recognizer.recognize(utterance, language).await?; + + match intent { + Some(intent) => { + let speech = Self::speech_for(&intent); + tracing::debug!( + intent = %intent.name, + "LocalRunner: resolved intent for utterance" + ); + Ok(RufloResponse { + intent: Some(intent), + speech: Some(speech), + }) + } + None => { + // Genuine no-match — fall through, not a silent failure. + tracing::debug!("LocalRunner: no intent recognised — falling through"); + Ok(RufloResponse { + intent: None, + speech: None, + }) + } + } + } + + async fn shutdown(&mut self) -> Result<(), AssistError> { + self.started = false; + tracing::debug!("LocalRunner: shutdown (idempotent)"); Ok(()) } } @@ -141,6 +273,19 @@ impl RufloRunner for NoopRunner { #[cfg(test)] mod tests { use super::*; + use crate::recognizer::RegexIntentRecognizer; + + async fn turn_on_recognizer() -> RegexIntentRecognizer { + let r = RegexIntentRecognizer::new(); + r.register( + "HassTurnOn", + r"turn on (?:the )?(?P[a-z_][a-z0-9_ ]*(?:\.[a-z_][a-z0-9_]*)?)", + "*", + ) + .await + .unwrap(); + r + } #[tokio::test] async fn noop_runner_spawn_returns_ok() { @@ -150,12 +295,25 @@ mod tests { } #[tokio::test] - async fn noop_runner_send_request_returns_empty_response() { + async fn noop_runner_send_before_spawn_is_not_started() { + // Honest behaviour: un-spawned runner must NOT fabricate a response. let runner = NoopRunner::new(); + let err = runner + .send_request(serde_json::json!({"utterance": "turn on the light"})) + .await + .unwrap_err(); + assert!(matches!(err, AssistError::NotStarted)); + } + + #[tokio::test] + async fn noop_runner_after_spawn_returns_explicit_no_opinion() { + let mut runner = NoopRunner::new(); + runner.spawn(RufloRunnerOpts::default()).await.unwrap(); let resp = runner .send_request(serde_json::json!({"utterance": "turn on the light", "language": "en"})) .await .unwrap(); + // Explicit "no opinion" so the pipeline can fall through deliberately. assert!(resp.intent.is_none()); assert!(resp.speech.is_none()); } @@ -171,4 +329,77 @@ mod tests { // Second shutdown — must still not error. assert!(runner.shutdown().await.is_ok()); } + + // ── LocalRunner: real response path ─────────────────────────────────────── + + #[tokio::test] + async fn local_runner_resolves_known_intent_with_real_response() { + // This test FAILS against the old always-empty stub: it asserts a real + // resolved intent + non-empty speech, which the stub never produced. + let mut runner = LocalRunner::new(turn_on_recognizer().await); + runner.spawn(RufloRunnerOpts::default()).await.unwrap(); + + let resp = runner + .send_request(serde_json::json!({ + "utterance": "turn on the kitchen light", + "language": "en" + })) + .await + .unwrap(); + + let intent = resp.intent.expect("known intent must resolve to Some"); + assert_eq!(intent.name.as_str(), "HassTurnOn"); + assert!(intent.slots.contains_key("entity_id")); + let speech = resp.speech.expect("a real response must carry speech"); + assert!( + speech.to_lowercase().contains("turned on"), + "speech should acknowledge the action, got {speech:?}" + ); + } + + #[tokio::test] + async fn local_runner_dotted_entity_round_trips() { + let mut runner = LocalRunner::new(turn_on_recognizer().await); + runner.spawn(RufloRunnerOpts::default()).await.unwrap(); + let resp = runner + .send_request(serde_json::json!({"utterance": "turn on light.kitchen", "language": "en"})) + .await + .unwrap(); + let intent = resp.intent.expect("must resolve"); + assert_eq!(intent.entity_id(), Some("light.kitchen")); + assert_eq!(resp.speech.as_deref(), Some("Turned on light.kitchen.")); + } + + #[tokio::test] + async fn local_runner_unknown_utterance_falls_through() { + let mut runner = LocalRunner::new(turn_on_recognizer().await); + runner.spawn(RufloRunnerOpts::default()).await.unwrap(); + let resp = runner + .send_request(serde_json::json!({"utterance": "play jazz music", "language": "en"})) + .await + .unwrap(); + assert!(resp.intent.is_none(), "unknown utterance must not resolve"); + assert!(resp.speech.is_none()); + } + + #[tokio::test] + async fn local_runner_missing_utterance_is_typed_error() { + let mut runner = LocalRunner::new(turn_on_recognizer().await); + runner.spawn(RufloRunnerOpts::default()).await.unwrap(); + let err = runner + .send_request(serde_json::json!({"language": "en"})) + .await + .unwrap_err(); + assert!(matches!(err, AssistError::ParseError(_))); + } + + #[tokio::test] + async fn local_runner_send_before_spawn_is_not_started() { + let runner = LocalRunner::new(turn_on_recognizer().await); + let err = runner + .send_request(serde_json::json!({"utterance": "turn on light.kitchen"})) + .await + .unwrap_err(); + assert!(matches!(err, AssistError::NotStarted)); + } } diff --git a/v2/crates/homecore-assist/src/semantic_recognizer.rs b/v2/crates/homecore-assist/src/semantic_recognizer.rs new file mode 100644 index 00000000..d4809ee3 --- /dev/null +++ b/v2/crates/homecore-assist/src/semantic_recognizer.rs @@ -0,0 +1,382 @@ +//! `SemanticIntentRecognizer` — HNSW-backed semantic intent matching. +//! +//! Embeds utterances with [`crate::embedding`] (deterministic feature hashing) +//! and searches a ruvector-core HNSW index of enrolled intent exemplars. On a +//! match above the similarity threshold the exemplar's intent is returned, with +//! slots extracted from the incoming utterance via an optional paired regex. +//! Below threshold (or with an empty index) it delegates to the inner +//! [`RegexIntentRecognizer`](crate::recognizer::RegexIntentRecognizer). +//! +//! Gated behind the default-on `semantic` feature. When disabled, a thin +//! delegating wrapper keeps the public type available without ruvector-core. + +use async_trait::async_trait; +#[cfg(feature = "semantic")] +use std::collections::HashMap; + +#[cfg(feature = "semantic")] +use regex::Regex; + +use crate::intent::Intent; +#[cfg(feature = "semantic")] +use crate::intent::IntentName; +use crate::recognizer::{IntentRecognizer, RecognizerError, RegexIntentRecognizer}; + +/// Default cosine-similarity threshold above which a semantic match is accepted. +pub const DEFAULT_SIMILARITY_THRESHOLD: f32 = 0.75; + +/// One enrolled exemplar: a natural-language phrase mapped to an intent, with +/// an optional regex to extract slots from the *incoming* utterance on a hit. +#[cfg(feature = "semantic")] +struct Exemplar { + name: IntentName, + language: String, + /// Optional slot-extraction regex applied to the matched utterance. + slot_regex: Option, +} + +/// Semantic recognizer backed by a real ruvector-core HNSW index. +/// +/// Enroll exemplar phrases with [`enroll`](Self::enroll); `recognize` embeds +/// the utterance, runs k-NN search over the index, and accepts the nearest +/// exemplar when its similarity clears the threshold. Below threshold (or when +/// the index is empty) it delegates to the inner regex recognizer. +#[cfg(feature = "semantic")] +pub struct SemanticIntentRecognizer { + fallback: RegexIntentRecognizer, + index: std::sync::Arc>, + threshold: f32, +} + +#[cfg(feature = "semantic")] +struct SemanticIndexInner { + db: ruvector_core::VectorDB, + /// Parallel to insertion order; HNSW ids are the stringified `Vec` index. + exemplars: Vec, +} + +#[cfg(feature = "semantic")] +impl SemanticIntentRecognizer { + /// Build a semantic recognizer wrapping `fallback`, using the default + /// similarity threshold. + pub fn new(fallback: RegexIntentRecognizer) -> Self { + Self::with_threshold(fallback, DEFAULT_SIMILARITY_THRESHOLD) + } + + /// Build with an explicit similarity threshold in `[0, 1]`. + pub fn with_threshold(fallback: RegexIntentRecognizer, threshold: f32) -> Self { + use ruvector_core::types::{DbOptions, DistanceMetric, HnswConfig}; + let options = DbOptions { + dimensions: crate::embedding::EMBEDDING_DIM, + distance_metric: DistanceMetric::Cosine, + storage_path: ":memory:".to_string(), + hnsw_config: Some(HnswConfig { + m: 16, + ef_construction: 100, + ef_search: 50, + max_elements: 4096, + }), + quantization: None, + }; + let db = ruvector_core::VectorDB::new(options) + .expect("in-memory HNSW index construction is infallible for valid options"); + Self { + fallback, + index: std::sync::Arc::new(tokio::sync::RwLock::new(SemanticIndexInner { + db, + exemplars: Vec::new(), + })), + threshold, + } + } + + /// Enroll an exemplar phrase for `name`/`language`. + /// + /// `slot_pattern`, if given, is a regex whose named capture groups are + /// extracted from the *incoming* utterance when this exemplar wins, so + /// semantic matches still produce slots (e.g. `entity_id`). + pub async fn enroll( + &self, + name: impl Into, + phrase: &str, + language: impl Into, + slot_pattern: Option<&str>, + ) -> Result<(), RecognizerError> { + let slot_regex = match slot_pattern { + Some(p) => Some(Regex::new(p).map_err(|e| RecognizerError::BadPattern(e.to_string()))?), + None => None, + }; + let vector = crate::embedding::embed(phrase); + + let mut inner = self.index.write().await; + let id = inner.exemplars.len(); + inner + .db + .insert(ruvector_core::types::VectorEntry { + id: Some(id.to_string()), + vector, + metadata: None, + }) + .map_err(|e| RecognizerError::Internal(format!("HNSW insert failed: {e}")))?; + inner.exemplars.push(Exemplar { + name: IntentName::new(name), + language: language.into(), + slot_regex, + }); + Ok(()) + } + + /// Embed `utterance` and return the best `(exemplar_id, similarity)` whose + /// exemplar matches `language`, or `None` if the index is empty. + async fn nearest(&self, utterance: &str, language: &str) -> Option<(usize, f32)> { + let normalised = utterance.trim().to_lowercase(); + let vector = crate::embedding::embed(&normalised); + + let inner = self.index.read().await; + if inner.exemplars.is_empty() { + return None; + } + let k = inner.exemplars.len().min(8); + let results = inner + .db + .search(ruvector_core::types::SearchQuery { + vector, + k, + filter: None, + ef_search: None, + }) + .ok()?; + + // Cosine distance → similarity = 1 - distance. Pick best language-eligible. + results + .into_iter() + .filter_map(|r| r.id.parse::().ok().map(|id| (id, 1.0 - r.score))) + .filter(|(id, _)| { + inner + .exemplars + .get(*id) + .map(|e| e.language == "*" || e.language == language) + .unwrap_or(false) + }) + .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)) + } + + /// Like [`recognize`](IntentRecognizer::recognize) but also returns the + /// cosine similarity of the winning exemplar (or the best below-threshold + /// candidate). Exposed so callers/tests can see the real match score. + pub async fn recognize_scored( + &self, + utterance: &str, + language: &str, + ) -> Result<(Option, Option), RecognizerError> { + if let Some((id, similarity)) = self.nearest(utterance, language).await { + if similarity >= self.threshold { + let inner = self.index.read().await; + let exemplar = &inner.exemplars[id]; + let mut slots: HashMap = HashMap::new(); + if let Some(re) = &exemplar.slot_regex { + if let Some(caps) = re.captures(&utterance.trim().to_lowercase()) { + for cap_name in re.capture_names().flatten() { + if let Some(m) = caps.name(cap_name) { + slots.insert( + cap_name.to_owned(), + serde_json::Value::String(m.as_str().to_owned()), + ); + } + } + } + } + return Ok(( + Some(Intent { + name: exemplar.name.clone(), + slots, + language: language.to_owned(), + }), + Some(similarity), + )); + } + // Below threshold — fall back to regex but still report the score. + let regex_hit = self.fallback.recognize(utterance, language).await?; + return Ok((regex_hit, Some(similarity))); + } + // Empty index — pure regex fallback. + Ok((self.fallback.recognize(utterance, language).await?, None)) + } +} + +#[cfg(feature = "semantic")] +#[async_trait] +impl IntentRecognizer for SemanticIntentRecognizer { + async fn recognize( + &self, + utterance: &str, + language: &str, + ) -> Result, RecognizerError> { + let (intent, _score) = self.recognize_scored(utterance, language).await?; + Ok(intent) + } +} + +/// Fallback definition when the `semantic` feature is disabled: a thin +/// delegating wrapper, so downstream code compiles without ruvector-core. +#[cfg(not(feature = "semantic"))] +pub struct SemanticIntentRecognizer { + fallback: RegexIntentRecognizer, +} + +#[cfg(not(feature = "semantic"))] +impl SemanticIntentRecognizer { + pub fn new(fallback: RegexIntentRecognizer) -> Self { + Self { fallback } + } +} + +#[cfg(not(feature = "semantic"))] +#[async_trait] +impl IntentRecognizer for SemanticIntentRecognizer { + async fn recognize( + &self, + utterance: &str, + language: &str, + ) -> Result, RecognizerError> { + // Without the `semantic` feature there is no embedding/HNSW facility; + // delegate to regex (honest: no semantic capability compiled in). + self.fallback.recognize(utterance, language).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::recognizer::RegexIntentRecognizer; + + async fn turn_on_recognizer() -> RegexIntentRecognizer { + let r = RegexIntentRecognizer::new(); + r.register( + "HassTurnOn", + r"turn on (?:the )?(?P[a-z_][a-z0-9_ ]*(?:\.[a-z_][a-z0-9_]*)?)", + "*", + ) + .await + .unwrap(); + r + } + + #[tokio::test] + async fn semantic_recognizer_delegates_to_fallback() { + // No exemplars enrolled → empty HNSW index → pure regex fallback. + let semantic = SemanticIntentRecognizer::new(turn_on_recognizer().await); + let result = semantic + .recognize("turn on light.kitchen", "en") + .await + .unwrap(); + assert!(result.is_some()); + } + + // ── Real HNSW-backed semantic matching (default `semantic` feature) ─────── + + #[cfg(feature = "semantic")] + async fn enrolled_semantic() -> SemanticIntentRecognizer { + // Regex fallback is empty so any positive result comes from HNSW search. + let semantic = SemanticIntentRecognizer::new(RegexIntentRecognizer::new()); + semantic + .enroll( + "HassTurnOn", + "turn on the light", + "en", + Some(r"(?:turn on|switch on) (?:the )?(?P[a-z_][a-z0-9_ ]*(?:\.[a-z_][a-z0-9_]*)?)"), + ) + .await + .unwrap(); + semantic + .enroll("HassNevermind", "never mind cancel that", "en", None) + .await + .unwrap(); + semantic + .enroll("HassGetWeather", "what is the weather forecast", "en", None) + .await + .unwrap(); + semantic + } + + #[cfg(feature = "semantic")] + #[tokio::test] + async fn semantic_matches_enrolled_paraphrase_with_real_score() { + // FAILS against the old delegate-only stub: regex fallback is empty, + // so the only way to get a hit is real embedding + HNSW search. + let semantic = enrolled_semantic().await; + let (intent, score) = semantic + .recognize_scored("turn on the kitchen light", "en") + .await + .unwrap(); + + let intent = intent.expect("paraphrase of an enrolled exemplar must match"); + assert_eq!(intent.name.as_str(), "HassTurnOn"); + let sim = score.expect("a semantic match must report a similarity"); + assert!( + sim >= DEFAULT_SIMILARITY_THRESHOLD, + "match similarity {sim:.4} must clear threshold {DEFAULT_SIMILARITY_THRESHOLD}" + ); + // Slots extracted from the *incoming* utterance via the paired regex. + assert_eq!(intent.entity_id(), Some("kitchen light")); + } + + #[cfg(feature = "semantic")] + #[tokio::test] + async fn semantic_no_match_for_unknown_utterance_with_real_score() { + let semantic = enrolled_semantic().await; + let (intent, score) = semantic + .recognize_scored("schedule a dentist appointment", "en") + .await + .unwrap(); + + assert!(intent.is_none(), "unrelated utterance must not match any intent"); + let sim = score.expect("even a no-match reports the best similarity seen"); + assert!( + sim < DEFAULT_SIMILARITY_THRESHOLD, + "no-match similarity {sim:.4} must be below threshold {DEFAULT_SIMILARITY_THRESHOLD}" + ); + } + + #[cfg(feature = "semantic")] + #[tokio::test] + async fn semantic_match_outscores_no_match() { + let semantic = enrolled_semantic().await; + let (_, hit_score) = semantic + .recognize_scored("please turn on the lights", "en") + .await + .unwrap(); + let (_, miss_score) = semantic + .recognize_scored("order a pizza for dinner", "en") + .await + .unwrap(); + let hit = hit_score.unwrap(); + let miss = miss_score.unwrap(); + assert!( + hit > miss, + "enrolled paraphrase ({hit:.4}) must score above unrelated ({miss:.4})" + ); + } + + #[cfg(feature = "semantic")] + #[tokio::test] + async fn semantic_falls_back_to_regex_below_threshold() { + // Enroll a weak exemplar; arrange a regex fallback that DOES match so we + // prove the fallback path runs when similarity is below threshold. + let semantic = SemanticIntentRecognizer::new(turn_on_recognizer().await); + semantic + .enroll("HassGetWeather", "what is the weather forecast", "en", None) + .await + .unwrap(); + // This utterance is unrelated to the weather exemplar (low similarity) + // but matches the regex fallback's HassTurnOn pattern. + let (intent, score) = semantic + .recognize_scored("turn on light.kitchen", "en") + .await + .unwrap(); + let intent = intent.expect("regex fallback must catch this"); + assert_eq!(intent.name.as_str(), "HassTurnOn"); + let sim = score.expect("semantic score still reported on fallback"); + assert!(sim < DEFAULT_SIMILARITY_THRESHOLD, "expected low sim, got {sim:.4}"); + } +} diff --git a/v2/crates/homecore-recorder/src/db.rs b/v2/crates/homecore-recorder/src/db.rs index 419dd753..46eb8397 100644 --- a/v2/crates/homecore-recorder/src/db.rs +++ b/v2/crates/homecore-recorder/src/db.rs @@ -226,12 +226,14 @@ impl Recorder { /// Search for state history rows that semantically match `query`. /// - /// Uses the HNSW index to find the top-`k` nearest state embeddings, - /// then fetches the full `StateRow` from SQLite for each result. - /// Returns rows in ascending score (distance) order. + /// When a vector [`SemanticIndex`] is wired (the `ruvector` feature), this + /// uses the HNSW index to find the top-`k` nearest state embeddings and + /// fetches the full `StateRow` for each, in ascending distance order. /// - /// With the default `NullSemanticIndex` (no `ruvector` feature) this - /// always returns an empty `Vec`. + /// When the index yields no hits — e.g. the default [`NullSemanticIndex`] + /// with no `ruvector` feature — it transparently falls back to the SQL + /// text query [`search_states_by_text`](Self::search_states_by_text), so a + /// caller always gets real matching rows rather than a silent empty `Vec`. pub async fn search_semantic( &self, query: &str, @@ -245,21 +247,60 @@ impl Recorder { .await .unwrap_or_default(); + // No vector backend (or no embeddings indexed) → real SQL text search. + if hits.is_empty() { + return self.search_states_by_text(query, k).await; + } + let mut rows = Vec::with_capacity(hits.len()); for (state_id, _score) in hits { - let row: Option<(String, String, Option, f64, f64, Option)> = - sqlx::query_as( - "SELECT s.entity_id, s.state, sa.shared_attrs, \ - s.last_changed_ts, s.last_updated_ts, s.context_id \ - FROM states s \ - LEFT JOIN state_attributes sa ON s.attributes_id = sa.attributes_id \ - WHERE s.state_id = ?", - ) - .bind(state_id) - .fetch_optional(&self.pool) - .await?; + if let Some(row) = self.fetch_state_row(state_id).await? { + rows.push(row); + } + } + Ok(rows) + } - if let Some((entity_id, state, shared_attrs, last_changed_ts, last_updated_ts, context_id)) = row { + /// Real text search over state history: returns the most recent up-to-`k` + /// rows whose `entity_id`, `state` value, or attribute blob contains + /// `query` (case-insensitive `LIKE`). Ordered newest-first. + /// + /// This is the feature-independent query path — it returns real rows from + /// SQLite with no vector backend required. An empty `query` matches all + /// rows (most-recent-first), giving callers a "latest activity" view. + pub async fn search_states_by_text( + &self, + query: &str, + k: usize, + ) -> Result, RecorderError> { + // Escape LIKE metacharacters so user text is treated literally. + let escaped = query + .replace('\\', "\\\\") + .replace('%', "\\%") + .replace('_', "\\_"); + let pattern = format!("%{escaped}%"); + + let rows: Vec<(i64, String, String, Option, f64, f64, Option)> = + sqlx::query_as( + "SELECT s.state_id, s.entity_id, s.state, sa.shared_attrs, \ + s.last_changed_ts, s.last_updated_ts, s.context_id \ + FROM states s \ + LEFT JOIN state_attributes sa ON s.attributes_id = sa.attributes_id \ + WHERE ?1 = '' \ + OR s.entity_id LIKE ?2 ESCAPE '\\' \ + OR s.state LIKE ?2 ESCAPE '\\' \ + OR sa.shared_attrs LIKE ?2 ESCAPE '\\' \ + ORDER BY s.last_updated_ts DESC \ + LIMIT ?3", + ) + .bind(query) + .bind(&pattern) + .bind(k as i64) + .fetch_all(&self.pool) + .await?; + + rows.into_iter() + .map(|(state_id, entity_id, state, shared_attrs, last_changed_ts, last_updated_ts, context_id)| { let eid = EntityId::parse(&entity_id) .unwrap_or_else(|_| EntityId::parse("unknown.unknown").unwrap()); let attributes = shared_attrs @@ -267,7 +308,7 @@ impl Recorder { .map(serde_json::from_str) .transpose()? .unwrap_or(serde_json::Value::Object(Default::default())); - rows.push(StateRow { + Ok(StateRow { state_id, entity_id: eid, state, @@ -275,10 +316,47 @@ impl Recorder { last_changed_ts, last_updated_ts, context_id, - }); - } - } - Ok(rows) + }) + }) + .collect() + } + + /// Fetch a single `StateRow` by its `state_id`, joining attributes. + async fn fetch_state_row(&self, state_id: i64) -> Result, RecorderError> { + let row: Option<(String, String, Option, f64, f64, Option)> = + sqlx::query_as( + "SELECT s.entity_id, s.state, sa.shared_attrs, \ + s.last_changed_ts, s.last_updated_ts, s.context_id \ + FROM states s \ + LEFT JOIN state_attributes sa ON s.attributes_id = sa.attributes_id \ + WHERE s.state_id = ?", + ) + .bind(state_id) + .fetch_optional(&self.pool) + .await?; + + let Some((entity_id, state, shared_attrs, last_changed_ts, last_updated_ts, context_id)) = + row + else { + return Ok(None); + }; + + let eid = EntityId::parse(&entity_id) + .unwrap_or_else(|_| EntityId::parse("unknown.unknown").unwrap()); + let attributes = shared_attrs + .as_deref() + .map(serde_json::from_str) + .transpose()? + .unwrap_or(serde_json::Value::Object(Default::default())); + Ok(Some(StateRow { + state_id, + entity_id: eid, + state, + attributes, + last_changed_ts, + last_updated_ts, + context_id, + })) } /// Persist a `DomainEvent`. Returns the `event_id`. @@ -559,4 +637,102 @@ mod tests { let data: serde_json::Value = serde_json::from_str(&row.1).unwrap(); assert_eq!(data["domain"], "light"); } + + // ── search_states_by_text (real DB query) ─────────────────────────────────── + + #[tokio::test] + async fn text_search_returns_inserted_rows() { + // FAILS against the old always-empty path: asserts real rows come back. + let recorder = open_memory().await; + recorder + .record_state(&make_state_event("light.kitchen", "on", serde_json::json!({}))) + .await + .unwrap(); + recorder + .record_state(&make_state_event("light.bedroom", "off", serde_json::json!({}))) + .await + .unwrap(); + recorder + .record_state(&make_state_event("switch.fan", "on", serde_json::json!({}))) + .await + .unwrap(); + + // Match by entity_id substring. + let rows = recorder.search_states_by_text("kitchen", 10).await.unwrap(); + assert_eq!(rows.len(), 1, "exactly one kitchen row"); + assert_eq!(rows[0].entity_id.as_str(), "light.kitchen"); + + // Match by domain prefix → both lights. + let lights = recorder.search_states_by_text("light.", 10).await.unwrap(); + assert_eq!(lights.len(), 2, "both light rows"); + + // Match by state value. + let on_rows = recorder.search_states_by_text("on", 10).await.unwrap(); + // "on" matches light.kitchen (state on) and switch.fan (state on); + // "bedroom" has state "off" — substring "on" not present in its + // entity_id/state. Two rows expected. + assert_eq!(on_rows.len(), 2, "two rows with state 'on'"); + } + + #[tokio::test] + async fn text_search_matches_attribute_blob() { + let recorder = open_memory().await; + recorder + .record_state(&make_state_event( + "sensor.weather", + "cloudy", + serde_json::json!({"location": "portland"}), + )) + .await + .unwrap(); + let rows = recorder.search_states_by_text("portland", 10).await.unwrap(); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].entity_id.as_str(), "sensor.weather"); + assert_eq!(rows[0].attributes["location"], "portland"); + } + + #[tokio::test] + async fn text_search_empty_query_returns_recent_rows() { + let recorder = open_memory().await; + for v in &["1", "2", "3"] { + recorder + .record_state(&make_state_event("counter.c", v, serde_json::json!({}))) + .await + .unwrap(); + tokio::time::sleep(std::time::Duration::from_millis(3)).await; + } + // Empty query → all rows, newest first, capped at k. + let rows = recorder.search_states_by_text("", 2).await.unwrap(); + assert_eq!(rows.len(), 2, "k caps the result set"); + assert_eq!(rows[0].state, "3", "newest first"); + assert_eq!(rows[1].state, "2"); + } + + #[tokio::test] + async fn text_search_no_match_returns_empty() { + let recorder = open_memory().await; + recorder + .record_state(&make_state_event("light.kitchen", "on", serde_json::json!({}))) + .await + .unwrap(); + let rows = recorder + .search_states_by_text("nonexistent_entity_xyz", 10) + .await + .unwrap(); + assert!(rows.is_empty(), "genuine no-match is empty, not an error"); + } + + #[tokio::test] + async fn search_semantic_falls_back_to_text_with_null_index() { + // With the default NullSemanticIndex, search_semantic must STILL return + // real rows via the text fallback — proving it's no longer always-empty. + let recorder = open_memory().await; + recorder + .record_state(&make_state_event("light.kitchen", "on", serde_json::json!({}))) + .await + .unwrap(); + let rows = recorder.search_semantic("kitchen", 5).await.unwrap(); + assert_eq!(rows.len(), 1, "fallback must surface the kitchen row"); + assert_eq!(rows[0].entity_id.as_str(), "light.kitchen"); + } }