stack/apps/api/src/brain/intent-classification.service.ts

import { Injectable, Optional, Logger } from "@nestjs/common";
import { LlmService } from "../llm/llm.service";
import type {
  IntentType,
  IntentClassification,
  IntentPattern,
  ExtractedEntity,
} from "./interfaces";

/** Valid entity types for validation */
const VALID_ENTITY_TYPES = ["date", "time", "person", "project", "priority", "status", "text"];

/**
 * Intent Classification Service
 *
 * Classifies natural language queries into structured intents using a hybrid approach:
 * 1. Rule-based classification (fast, <100ms) - regex patterns for common phrases
 * 2. LLM fallback (optional) - for ambiguous queries or when explicitly requested
 *
 * @example
 * ```typescript
 * // Rule-based classification (default)
 * const result = await service.classify("show my tasks");
 * // { intent: "query_tasks", confidence: 0.9, method: "rule", ... }
 *
 * // Force LLM classification
 * const result = await service.classify("show my tasks", true);
 * // { intent: "query_tasks", confidence: 0.95, method: "llm", ... }
 * ```
 */
@Injectable()
export class IntentClassificationService {
  private readonly logger = new Logger(IntentClassificationService.name);
  private readonly patterns: IntentPattern[];
  private readonly RULE_CONFIDENCE_THRESHOLD = 0.7;

  /** Configurable LLM model for intent classification */
  private readonly intentModel =
    // eslint-disable-next-line @typescript-eslint/dot-notation -- env vars use bracket notation
    process.env["INTENT_CLASSIFICATION_MODEL"] ?? "llama3.2";
  /** Configurable temperature (low for consistent results) */
  private readonly intentTemperature = parseFloat(
    // eslint-disable-next-line @typescript-eslint/dot-notation -- env vars use bracket notation
    process.env["INTENT_CLASSIFICATION_TEMPERATURE"] ?? "0.1"
  );

  constructor(@Optional() private readonly llmService?: LlmService) {
    this.patterns = this.buildPatterns();
    this.logger.log("Intent classification service initialized");
  }

  /**
   * Classify a natural language query into an intent.
   * Uses rule-based classification by default, with optional LLM fallback.
   *
   * @param query - Natural language query to classify
   * @param useLlm - Force LLM classification (default: false)
   * @returns Intent classification result
   */
  async classify(query: string, useLlm = false): Promise<IntentClassification> {
    if (!query || query.trim().length === 0) {
      return {
        intent: "unknown",
        confidence: 0,
        entities: [],
        method: "rule",
        query,
      };
    }

    // Try rule-based classification first
    const ruleResult = this.classifyWithRules(query);

    // Use LLM if:
    // 1. Explicitly requested
    // 2. Rule confidence is low and LLM is available
    const shouldUseLlm =
      useLlm || (ruleResult.confidence < this.RULE_CONFIDENCE_THRESHOLD && this.llmService);

    if (shouldUseLlm) {
      return this.classifyWithLlm(query);
    }

    return ruleResult;
  }

  /**
   * Classify a query using rule-based pattern matching.
   * Fast (<100ms) but limited to predefined patterns.
   *
   * @param query - Natural language query to classify
   * @returns Intent classification result
   */
  classifyWithRules(query: string): IntentClassification {
    if (!query || query.trim().length === 0) {
      return {
        intent: "unknown",
        confidence: 0,
        entities: [],
        method: "rule",
        query,
      };
    }

    const normalizedQuery = query.toLowerCase().trim();

    // Sort patterns by priority (highest first)
    const sortedPatterns = [...this.patterns].sort((a, b) => b.priority - a.priority);

    // Find first matching pattern
    for (const patternConfig of sortedPatterns) {
      for (const pattern of patternConfig.patterns) {
        if (pattern.test(normalizedQuery)) {
          const entities = this.extractEntities(query);
          return {
            intent: patternConfig.intent,
            confidence: 0.9, // High confidence for direct pattern match
            entities,
            method: "rule",
            query,
          };
        }
      }
    }

    // No pattern matched
    return {
      intent: "unknown",
      confidence: 0.2,
      entities: [],
      method: "rule",
      query,
    };
  }

  /**
   * Classify a query using LLM.
   * Slower but more flexible for ambiguous queries.
   *
   * @param query - Natural language query to classify
   * @returns Intent classification result
   */
  async classifyWithLlm(query: string): Promise<IntentClassification> {
    if (!this.llmService) {
      this.logger.warn("LLM service not available, falling back to rule-based classification");
      return this.classifyWithRules(query);
    }

    try {
      const prompt = this.buildLlmPrompt(query);
      const response = await this.llmService.chat({
        messages: [
          {
            role: "system",
            content: "You are an intent classification assistant. Respond only with valid JSON.",
          },
          {
            role: "user",
            content: prompt,
          },
        ],
        model: this.intentModel,
        temperature: this.intentTemperature,
      });

      const result = this.parseLlmResponse(response.message.content, query);
      return result;
    } catch (error: unknown) {
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`LLM classification failed: ${errorMessage}`);
      return {
        intent: "unknown",
        confidence: 0,
        entities: [],
        method: "llm",
        query,
      };
    }
  }

  /**
   * Extract entities from a query.
   * Identifies dates, times, priorities, statuses, etc.
   *
   * @param query - Query to extract entities from
   * @returns Array of extracted entities
   */
  extractEntities(query: string): ExtractedEntity[] {
    const entities: ExtractedEntity[] = [];

    /* eslint-disable security/detect-unsafe-regex */
    // Date patterns
    const datePatterns = [
      { pattern: /\b(today|tomorrow|yesterday)\b/gi, normalize: (m: string) => m.toLowerCase() },
      {
        pattern: /\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/gi,
        normalize: (m: string) => m.toLowerCase(),
      },
      {
        pattern: /\b(next|this)\s+(week|month|year)\b/gi,
        normalize: (m: string) => m.toLowerCase(),
      },
      {
        pattern: /\b(\d{1,2})[/-](\d{1,2})([/-](\d{2,4}))?\b/g,
        normalize: (m: string) => m,
      },
    ];

    for (const { pattern, normalize } of datePatterns) {
      let match: RegExpExecArray | null;
      while ((match = pattern.exec(query)) !== null) {
        entities.push({
          type: "date",
          value: normalize(match[0]),
          raw: match[0],
          start: match.index,
          end: match.index + match[0].length,
        });
      }
    }

    // Time patterns
    const timePatterns = [
      /\b(\d{1,2}):(\d{2})\s*(am|pm)?\b/gi,
      /\b(\d{1,2})\s*(am|pm)\b/gi,
      /\bat\s+(\d{1,2})\b/gi,
    ];

    for (const pattern of timePatterns) {
      let match: RegExpExecArray | null;
      while ((match = pattern.exec(query)) !== null) {
        entities.push({
          type: "time",
          value: match[0].toLowerCase(),
          raw: match[0],
          start: match.index,
          end: match.index + match[0].length,
        });
      }
    }

    // Priority patterns
    const priorityPatterns = [
      { pattern: /\b(high|urgent|critical)\s*priority\b/gi, value: "HIGH" },
      { pattern: /\b(medium|normal)\s*priority\b/gi, value: "MEDIUM" },
      { pattern: /\b(low|minor)\s*priority\b/gi, value: "LOW" },
    ];

    for (const { pattern, value } of priorityPatterns) {
      let match: RegExpExecArray | null;
      while ((match = pattern.exec(query)) !== null) {
        entities.push({
          type: "priority",
          value,
          raw: match[0],
          start: match.index,
          end: match.index + match[0].length,
        });
      }
    }

    // Status patterns
    const statusPatterns = [
      { pattern: /\b(done|complete|finished|completed)\b/gi, value: "DONE" },
      { pattern: /\b(in\s*progress|working\s*on|ongoing)\b/gi, value: "IN_PROGRESS" },
      { pattern: /\b(pending|todo|not\s*started)\b/gi, value: "PENDING" },
      { pattern: /\b(blocked|stuck)\b/gi, value: "BLOCKED" },
      { pattern: /\b(cancelled|canceled)\b/gi, value: "CANCELLED" },
    ];

    for (const { pattern, value } of statusPatterns) {
      let match: RegExpExecArray | null;
      while ((match = pattern.exec(query)) !== null) {
        entities.push({
          type: "status",
          value,
          raw: match[0],
          start: match.index,
          end: match.index + match[0].length,
        });
      }
    }

    // Person patterns (mentions)
    const personPattern = /@(\w+)/g;
    let match: RegExpExecArray | null;
    while ((match = personPattern.exec(query)) !== null) {
      if (match[1]) {
        entities.push({
          type: "person",
          value: match[1],
          raw: match[0],
          start: match.index,
          end: match.index + match[0].length,
        });
      }
    }
    /* eslint-enable security/detect-unsafe-regex */

    return entities;
  }

  /**
   * Build regex patterns for intent matching.
   * Patterns are sorted by priority (higher = checked first).
   *
   * @returns Array of intent patterns
   */
  private buildPatterns(): IntentPattern[] {
    /* eslint-disable security/detect-unsafe-regex */
    return [
      // Briefing (highest priority - specific intent)
      {
        intent: "briefing",
        patterns: [
          /\b(morning|daily|today'?s?)\s+(briefing|summary|overview)\b/i,
          /\bwhat'?s?\s+(my|the)\s+day\s+look\s+like\b/i,
          /\bgive\s+me\s+(a\s+)?(rundown|summary)\b/i,
        ],
        priority: 10,
      },
      // Create operations (high priority - specific actions)
      {
        intent: "create_task",
        patterns: [
          /\b(add|create|new|make)\s+(a\s+)?(task|to-?do)\b/i,
          /\bremind\s+me\s+to\b/i,
          /\bI\s+need\s+to\b/i,
        ],
        priority: 9,
      },
      {
        intent: "create_event",
        patterns: [
          /\b(schedule|create|add|book)\s+(a\s+|an\s+)?(meeting|event|appointment|call)\b/i,
          /\bset\s+up\s+(a\s+)?(meeting|call)\b/i,
        ],
        priority: 9,
      },
      // Update operations
      {
        intent: "update_task",
        patterns: [
          /\b(mark|set|update|change)\s+(task|to-?do)\s+(as\s+)?(done|complete|status|priority)\b/i,
          /\bcomplete\s+(the\s+)?(task|to-?do)\b/i,
          /\b(finish|done\s+with)\s+(the\s+)?(task|to-?do)\b/i,
          /\bcomplete\s+\w+\s+\w+\s+(task|to-?do)\b/i, // "complete the review task"
          /\bcomplete\s+[\w\s]{1,30}(task|to-?do)\b/i, // More flexible but bounded
        ],
        priority: 8,
      },
      {
        intent: "update_event",
        patterns: [
          /\b(reschedule|move|change|cancel|update)\s+(the\s+)?(meeting|event|appointment|call|standup)\b/i,
          /\bmove\s+(event|meeting)\s+to\b/i,
          /\bcancel\s+(the\s+)?(meeting|event|standup|call)\b/i,
        ],
        priority: 8,
      },
      // Query operations
      {
        intent: "query_tasks",
        patterns: [
          /\b(show|list|get|what|display)\s+((my|all|the)\s+)?tasks?\b/i,
          /\bwhat\s+(tasks?|to-?dos?)\s+(do\s+I|have)\b/i,
          /\b(pending|overdue|upcoming|active)\s+tasks?\b/i,
        ],
        priority: 8,
      },
      {
        intent: "query_events",
        patterns: [
          /\b(show|list|get|display)\s+((my|all|the)\s+)?(calendar|events?|meetings?|schedule)\b/i,
          /\bwhat'?s?\s+(on\s+)?(my\s+)?(calendar|schedule)\b/i,
          /\b(upcoming|next|today'?s?)\s+(events?|meetings?)\b/i,
        ],
        priority: 8,
      },
      {
        intent: "query_projects",
        patterns: [
          /\b(show|list|get|display|what)\s+((my|all|the)\s+)?projects?\b/i,
          /\bwhat\s+projects?\s+(do\s+I|have)\b/i,
          /\b(active|ongoing)\s+projects?\b/i,
        ],
        priority: 8,
      },
      // Search (lower priority - more general)
      {
        intent: "search",
        patterns: [/\b(find|search|look\s*for|locate)\b/i],
        priority: 6,
      },
    ];
    /* eslint-enable security/detect-unsafe-regex */
  }

  /**
   * Sanitize user query for safe inclusion in LLM prompt.
   * Prevents prompt injection by escaping special characters and limiting length.
   *
   * @param query - Raw user query
   * @returns Sanitized query safe for LLM prompt
   */
  private sanitizeQueryForPrompt(query: string): string {
    // Escape quotes and backslashes to prevent prompt injection
    const sanitized = query
      .replace(/\\/g, "\\\\")
      .replace(/"/g, '\\"')
      .replace(/\n/g, " ")
      .replace(/\r/g, " ");

    // Limit length to prevent prompt overflow (500 chars max)
    const maxLength = 500;
    if (sanitized.length > maxLength) {
      this.logger.warn(
        `Query truncated from ${String(sanitized.length)} to ${String(maxLength)} chars`
      );
      return sanitized.slice(0, maxLength);
    }

    return sanitized;
  }

  /**
   * Build the prompt for LLM classification.
   *
   * @param query - User query to classify
   * @returns Formatted prompt
   */
  private buildLlmPrompt(query: string): string {
    const sanitizedQuery = this.sanitizeQueryForPrompt(query);

    return `Classify the following user query into one of these intents:
- query_tasks: User wants to see their tasks
- query_events: User wants to see their calendar/events
- query_projects: User wants to see their projects
- create_task: User wants to create a new task
- create_event: User wants to schedule a new event
- update_task: User wants to update an existing task
- update_event: User wants to update/reschedule an event
- briefing: User wants a daily briefing/summary
- search: User wants to search for something
- unknown: Query doesn't match any intent

Also extract any entities (dates, times, priorities, statuses, people).

Query: "${sanitizedQuery}"

Respond with ONLY this JSON format (no other text):
{
  "intent": "<intent_type>",
  "confidence": <0.0-1.0>,
  "entities": [
    {
      "type": "<date|time|person|project|priority|status|text>",
      "value": "<normalized_value>",
      "raw": "<original_text>",
      "start": <position>,
      "end": <position>
    }
  ]
}`;
  }

  /**
   * Validate and sanitize confidence score from LLM.
   * Ensures confidence is a valid number between 0.0 and 1.0.
   *
   * @param confidence - Raw confidence value from LLM
   * @returns Validated confidence (0.0 - 1.0)
   */
  private validateConfidence(confidence: unknown): number {
    if (typeof confidence !== "number" || isNaN(confidence) || !isFinite(confidence)) {
      return 0;
    }
    return Math.max(0, Math.min(1, confidence));
  }

  /**
   * Validate an entity from LLM response.
   * Ensures entity has valid structure and safe values.
   *
   * @param entity - Raw entity from LLM
   * @returns True if entity is valid
   */
  private isValidEntity(entity: unknown): entity is ExtractedEntity {
    if (typeof entity !== "object" || entity === null) {
      return false;
    }

    const e = entity as Record<string, unknown>;

    // Validate type
    if (typeof e.type !== "string" || !VALID_ENTITY_TYPES.includes(e.type)) {
      return false;
    }

    // Validate value (string, max 200 chars)
    if (typeof e.value !== "string" || e.value.length > 200) {
      return false;
    }

    // Validate raw (string, max 200 chars)
    if (typeof e.raw !== "string" || e.raw.length > 200) {
      return false;
    }

    // Validate positions (non-negative integers, end > start)
    if (
      typeof e.start !== "number" ||
      typeof e.end !== "number" ||
      e.start < 0 ||
      e.end <= e.start ||
      e.end > 10000
    ) {
      return false;
    }

    return true;
  }

  /**
   * Parse LLM response into IntentClassification.
   *
   * @param content - LLM response content
   * @param query - Original query
   * @returns Intent classification result
   */
  private parseLlmResponse(content: string, query: string): IntentClassification {
    try {
      const parsed: unknown = JSON.parse(content);

      if (typeof parsed !== "object" || parsed === null) {
        throw new Error("Invalid JSON structure");
      }

      const parsedObj = parsed as Record<string, unknown>;

      // Validate intent type
      const validIntents: IntentType[] = [
        "query_tasks",
        "query_events",
        "query_projects",
        "create_task",
        "create_event",
        "update_task",
        "update_event",
        "briefing",
        "search",
        "unknown",
      ];
      const intent =
        typeof parsedObj.intent === "string" &&
        validIntents.includes(parsedObj.intent as IntentType)
          ? (parsedObj.intent as IntentType)
          : "unknown";

      // Validate and filter entities
      const rawEntities: unknown[] = Array.isArray(parsedObj.entities) ? parsedObj.entities : [];
      const validEntities = rawEntities.filter((e): e is ExtractedEntity => this.isValidEntity(e));

      if (rawEntities.length !== validEntities.length) {
        this.logger.warn(
          `Filtered ${String(rawEntities.length - validEntities.length)} invalid entities from LLM response`
        );
      }

      return {
        intent,
        confidence: this.validateConfidence(parsedObj.confidence),
        entities: validEntities,
        method: "llm",
        query,
      };
    } catch {
      this.logger.error(`Failed to parse LLM response: ${content}`);
      return {
        intent: "unknown",
        confidence: 0,
        entities: [],
        method: "llm",
        query,
      };
    }
  }
}