import { Injectable, Optional, Logger } from "@nestjs/common"; import { LlmService } from "../llm/llm.service"; import type { IntentType, IntentClassification, IntentPattern, ExtractedEntity, } from "./interfaces"; /** Valid entity types for validation */ const VALID_ENTITY_TYPES = ["date", "time", "person", "project", "priority", "status", "text"]; /** * Intent Classification Service * * Classifies natural language queries into structured intents using a hybrid approach: * 1. Rule-based classification (fast, <100ms) - regex patterns for common phrases * 2. LLM fallback (optional) - for ambiguous queries or when explicitly requested * * @example * ```typescript * // Rule-based classification (default) * const result = await service.classify("show my tasks"); * // { intent: "query_tasks", confidence: 0.9, method: "rule", ... } * * // Force LLM classification * const result = await service.classify("show my tasks", true); * // { intent: "query_tasks", confidence: 0.95, method: "llm", ... } * ``` */ @Injectable() export class IntentClassificationService { private readonly logger = new Logger(IntentClassificationService.name); private readonly patterns: IntentPattern[]; private readonly RULE_CONFIDENCE_THRESHOLD = 0.7; /** Configurable LLM model for intent classification */ private readonly intentModel = // eslint-disable-next-line @typescript-eslint/dot-notation -- env vars use bracket notation process.env["INTENT_CLASSIFICATION_MODEL"] ?? "llama3.2"; /** Configurable temperature (low for consistent results) */ private readonly intentTemperature = parseFloat( // eslint-disable-next-line @typescript-eslint/dot-notation -- env vars use bracket notation process.env["INTENT_CLASSIFICATION_TEMPERATURE"] ?? "0.1" ); constructor(@Optional() private readonly llmService?: LlmService) { this.patterns = this.buildPatterns(); this.logger.log("Intent classification service initialized"); } /** * Classify a natural language query into an intent. * Uses rule-based classification by default, with optional LLM fallback. * * @param query - Natural language query to classify * @param useLlm - Force LLM classification (default: false) * @returns Intent classification result */ async classify(query: string, useLlm = false): Promise { if (!query || query.trim().length === 0) { return { intent: "unknown", confidence: 0, entities: [], method: "rule", query, }; } // Try rule-based classification first const ruleResult = this.classifyWithRules(query); // Use LLM if: // 1. Explicitly requested // 2. Rule confidence is low and LLM is available const shouldUseLlm = useLlm || (ruleResult.confidence < this.RULE_CONFIDENCE_THRESHOLD && this.llmService); if (shouldUseLlm) { return this.classifyWithLlm(query); } return ruleResult; } /** * Classify a query using rule-based pattern matching. * Fast (<100ms) but limited to predefined patterns. * * @param query - Natural language query to classify * @returns Intent classification result */ classifyWithRules(query: string): IntentClassification { if (!query || query.trim().length === 0) { return { intent: "unknown", confidence: 0, entities: [], method: "rule", query, }; } const normalizedQuery = query.toLowerCase().trim(); // Sort patterns by priority (highest first) const sortedPatterns = [...this.patterns].sort((a, b) => b.priority - a.priority); // Find first matching pattern for (const patternConfig of sortedPatterns) { for (const pattern of patternConfig.patterns) { if (pattern.test(normalizedQuery)) { const entities = this.extractEntities(query); return { intent: patternConfig.intent, confidence: 0.9, // High confidence for direct pattern match entities, method: "rule", query, }; } } } // No pattern matched return { intent: "unknown", confidence: 0.2, entities: [], method: "rule", query, }; } /** * Classify a query using LLM. * Slower but more flexible for ambiguous queries. * * @param query - Natural language query to classify * @returns Intent classification result */ async classifyWithLlm(query: string): Promise { if (!this.llmService) { this.logger.warn("LLM service not available, falling back to rule-based classification"); return this.classifyWithRules(query); } try { const prompt = this.buildLlmPrompt(query); const response = await this.llmService.chat({ messages: [ { role: "system", content: "You are an intent classification assistant. Respond only with valid JSON.", }, { role: "user", content: prompt, }, ], model: this.intentModel, temperature: this.intentTemperature, }); const result = this.parseLlmResponse(response.message.content, query); return result; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); this.logger.error(`LLM classification failed: ${errorMessage}`); return { intent: "unknown", confidence: 0, entities: [], method: "llm", query, }; } } /** * Extract entities from a query. * Identifies dates, times, priorities, statuses, etc. * * @param query - Query to extract entities from * @returns Array of extracted entities */ extractEntities(query: string): ExtractedEntity[] { const entities: ExtractedEntity[] = []; /* eslint-disable security/detect-unsafe-regex */ // Date patterns const datePatterns = [ { pattern: /\b(today|tomorrow|yesterday)\b/gi, normalize: (m: string) => m.toLowerCase() }, { pattern: /\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/gi, normalize: (m: string) => m.toLowerCase(), }, { pattern: /\b(next|this)\s+(week|month|year)\b/gi, normalize: (m: string) => m.toLowerCase(), }, { pattern: /\b(\d{1,2})[/-](\d{1,2})([/-](\d{2,4}))?\b/g, normalize: (m: string) => m, }, ]; for (const { pattern, normalize } of datePatterns) { let match: RegExpExecArray | null; while ((match = pattern.exec(query)) !== null) { entities.push({ type: "date", value: normalize(match[0]), raw: match[0], start: match.index, end: match.index + match[0].length, }); } } // Time patterns const timePatterns = [ /\b(\d{1,2}):(\d{2})\s*(am|pm)?\b/gi, /\b(\d{1,2})\s*(am|pm)\b/gi, /\bat\s+(\d{1,2})\b/gi, ]; for (const pattern of timePatterns) { let match: RegExpExecArray | null; while ((match = pattern.exec(query)) !== null) { entities.push({ type: "time", value: match[0].toLowerCase(), raw: match[0], start: match.index, end: match.index + match[0].length, }); } } // Priority patterns const priorityPatterns = [ { pattern: /\b(high|urgent|critical)\s*priority\b/gi, value: "HIGH" }, { pattern: /\b(medium|normal)\s*priority\b/gi, value: "MEDIUM" }, { pattern: /\b(low|minor)\s*priority\b/gi, value: "LOW" }, ]; for (const { pattern, value } of priorityPatterns) { let match: RegExpExecArray | null; while ((match = pattern.exec(query)) !== null) { entities.push({ type: "priority", value, raw: match[0], start: match.index, end: match.index + match[0].length, }); } } // Status patterns const statusPatterns = [ { pattern: /\b(done|complete|finished|completed)\b/gi, value: "DONE" }, { pattern: /\b(in\s*progress|working\s*on|ongoing)\b/gi, value: "IN_PROGRESS" }, { pattern: /\b(pending|todo|not\s*started)\b/gi, value: "PENDING" }, { pattern: /\b(blocked|stuck)\b/gi, value: "BLOCKED" }, { pattern: /\b(cancelled|canceled)\b/gi, value: "CANCELLED" }, ]; for (const { pattern, value } of statusPatterns) { let match: RegExpExecArray | null; while ((match = pattern.exec(query)) !== null) { entities.push({ type: "status", value, raw: match[0], start: match.index, end: match.index + match[0].length, }); } } // Person patterns (mentions) const personPattern = /@(\w+)/g; let match: RegExpExecArray | null; while ((match = personPattern.exec(query)) !== null) { if (match[1]) { entities.push({ type: "person", value: match[1], raw: match[0], start: match.index, end: match.index + match[0].length, }); } } /* eslint-enable security/detect-unsafe-regex */ return entities; } /** * Build regex patterns for intent matching. * Patterns are sorted by priority (higher = checked first). * * @returns Array of intent patterns */ private buildPatterns(): IntentPattern[] { /* eslint-disable security/detect-unsafe-regex */ return [ // Briefing (highest priority - specific intent) { intent: "briefing", patterns: [ /\b(morning|daily|today'?s?)\s+(briefing|summary|overview)\b/i, /\bwhat'?s?\s+(my|the)\s+day\s+look\s+like\b/i, /\bgive\s+me\s+(a\s+)?(rundown|summary)\b/i, ], priority: 10, }, // Create operations (high priority - specific actions) { intent: "create_task", patterns: [ /\b(add|create|new|make)\s+(a\s+)?(task|to-?do)\b/i, /\bremind\s+me\s+to\b/i, /\bI\s+need\s+to\b/i, ], priority: 9, }, { intent: "create_event", patterns: [ /\b(schedule|create|add|book)\s+(a\s+|an\s+)?(meeting|event|appointment|call)\b/i, /\bset\s+up\s+(a\s+)?(meeting|call)\b/i, ], priority: 9, }, // Update operations { intent: "update_task", patterns: [ /\b(mark|set|update|change)\s+(task|to-?do)\s+(as\s+)?(done|complete|status|priority)\b/i, /\bcomplete\s+(the\s+)?(task|to-?do)\b/i, /\b(finish|done\s+with)\s+(the\s+)?(task|to-?do)\b/i, /\bcomplete\s+\w+\s+\w+\s+(task|to-?do)\b/i, // "complete the review task" /\bcomplete\s+[\w\s]{1,30}(task|to-?do)\b/i, // More flexible but bounded ], priority: 8, }, { intent: "update_event", patterns: [ /\b(reschedule|move|change|cancel|update)\s+(the\s+)?(meeting|event|appointment|call|standup)\b/i, /\bmove\s+(event|meeting)\s+to\b/i, /\bcancel\s+(the\s+)?(meeting|event|standup|call)\b/i, ], priority: 8, }, // Query operations { intent: "query_tasks", patterns: [ /\b(show|list|get|what|display)\s+((my|all|the)\s+)?tasks?\b/i, /\bwhat\s+(tasks?|to-?dos?)\s+(do\s+I|have)\b/i, /\b(pending|overdue|upcoming|active)\s+tasks?\b/i, ], priority: 8, }, { intent: "query_events", patterns: [ /\b(show|list|get|display)\s+((my|all|the)\s+)?(calendar|events?|meetings?|schedule)\b/i, /\bwhat'?s?\s+(on\s+)?(my\s+)?(calendar|schedule)\b/i, /\b(upcoming|next|today'?s?)\s+(events?|meetings?)\b/i, ], priority: 8, }, { intent: "query_projects", patterns: [ /\b(show|list|get|display|what)\s+((my|all|the)\s+)?projects?\b/i, /\bwhat\s+projects?\s+(do\s+I|have)\b/i, /\b(active|ongoing)\s+projects?\b/i, ], priority: 8, }, // Search (lower priority - more general) { intent: "search", patterns: [/\b(find|search|look\s*for|locate)\b/i], priority: 6, }, ]; /* eslint-enable security/detect-unsafe-regex */ } /** * Sanitize user query for safe inclusion in LLM prompt. * Prevents prompt injection by escaping special characters and limiting length. * * @param query - Raw user query * @returns Sanitized query safe for LLM prompt */ private sanitizeQueryForPrompt(query: string): string { // Escape quotes and backslashes to prevent prompt injection const sanitized = query .replace(/\\/g, "\\\\") .replace(/"/g, '\\"') .replace(/\n/g, " ") .replace(/\r/g, " "); // Limit length to prevent prompt overflow (500 chars max) const maxLength = 500; if (sanitized.length > maxLength) { this.logger.warn( `Query truncated from ${String(sanitized.length)} to ${String(maxLength)} chars` ); return sanitized.slice(0, maxLength); } return sanitized; } /** * Build the prompt for LLM classification. * * @param query - User query to classify * @returns Formatted prompt */ private buildLlmPrompt(query: string): string { const sanitizedQuery = this.sanitizeQueryForPrompt(query); return `Classify the following user query into one of these intents: - query_tasks: User wants to see their tasks - query_events: User wants to see their calendar/events - query_projects: User wants to see their projects - create_task: User wants to create a new task - create_event: User wants to schedule a new event - update_task: User wants to update an existing task - update_event: User wants to update/reschedule an event - briefing: User wants a daily briefing/summary - search: User wants to search for something - unknown: Query doesn't match any intent Also extract any entities (dates, times, priorities, statuses, people). Query: "${sanitizedQuery}" Respond with ONLY this JSON format (no other text): { "intent": "", "confidence": <0.0-1.0>, "entities": [ { "type": "", "value": "", "raw": "", "start": , "end": } ] }`; } /** * Validate and sanitize confidence score from LLM. * Ensures confidence is a valid number between 0.0 and 1.0. * * @param confidence - Raw confidence value from LLM * @returns Validated confidence (0.0 - 1.0) */ private validateConfidence(confidence: unknown): number { if (typeof confidence !== "number" || isNaN(confidence) || !isFinite(confidence)) { return 0; } return Math.max(0, Math.min(1, confidence)); } /** * Validate an entity from LLM response. * Ensures entity has valid structure and safe values. * * @param entity - Raw entity from LLM * @returns True if entity is valid */ private isValidEntity(entity: unknown): entity is ExtractedEntity { if (typeof entity !== "object" || entity === null) { return false; } const e = entity as Record; // Validate type if (typeof e.type !== "string" || !VALID_ENTITY_TYPES.includes(e.type)) { return false; } // Validate value (string, max 200 chars) if (typeof e.value !== "string" || e.value.length > 200) { return false; } // Validate raw (string, max 200 chars) if (typeof e.raw !== "string" || e.raw.length > 200) { return false; } // Validate positions (non-negative integers, end > start) if ( typeof e.start !== "number" || typeof e.end !== "number" || e.start < 0 || e.end <= e.start || e.end > 10000 ) { return false; } return true; } /** * Parse LLM response into IntentClassification. * * @param content - LLM response content * @param query - Original query * @returns Intent classification result */ private parseLlmResponse(content: string, query: string): IntentClassification { try { const parsed: unknown = JSON.parse(content); if (typeof parsed !== "object" || parsed === null) { throw new Error("Invalid JSON structure"); } const parsedObj = parsed as Record; // Validate intent type const validIntents: IntentType[] = [ "query_tasks", "query_events", "query_projects", "create_task", "create_event", "update_task", "update_event", "briefing", "search", "unknown", ]; const intent = typeof parsedObj.intent === "string" && validIntents.includes(parsedObj.intent as IntentType) ? (parsedObj.intent as IntentType) : "unknown"; // Validate and filter entities const rawEntities: unknown[] = Array.isArray(parsedObj.entities) ? parsedObj.entities : []; const validEntities = rawEntities.filter((e): e is ExtractedEntity => this.isValidEntity(e)); if (rawEntities.length !== validEntities.length) { this.logger.warn( `Filtered ${String(rawEntities.length - validEntities.length)} invalid entities from LLM response` ); } return { intent, confidence: this.validateConfidence(parsedObj.confidence), entities: validEntities, method: "llm", query, }; } catch { this.logger.error(`Failed to parse LLM response: ${content}`); return { intent: "unknown", confidence: 0, entities: [], method: "llm", query, }; } } }