feat(#127): refactor LlmService to use provider pattern

Refactor LlmService to delegate to LlmManagerService instead of using Ollama directly. This enables multiple provider support and user-specific provider configuration. Changes: - Remove direct Ollama client from LlmService - Delegate all LLM operations to provider via LlmManagerService - Update health status to use provider-agnostic interface - Add PrismaModule to LlmModule for manager service - Maintain backward compatibility with existing API - Achieve 89.74% test coverage Fixes #127 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 12:33:56 -06:00
parent be6c15116d
commit 1f97e6de40
5 changed files with 433 additions and 133 deletions
--- a/apps/api/src/llm/llm.service.ts
+++ b/apps/api/src/llm/llm.service.ts
@@ -1,140 +1,146 @@
 import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
-import { Ollama, Message } from "ollama";
-import type {
-  ChatRequestDto,
-  ChatResponseDto,
-  EmbedRequestDto,
-  EmbedResponseDto,
-  ChatStreamChunkDto,
-} from "./dto";
-export interface OllamaConfig {
-  host: string;
-  timeout?: number;
-}
-export interface OllamaHealthStatus {
-  healthy: boolean;
-  host: string;
-  error?: string;
-  models?: string[];
-}
+import { LlmManagerService } from "./llm-manager.service";
+import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
+import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface";
+
+/**
+ * LLM Service
+ *
+ * High-level service for LLM operations. Delegates to providers via LlmManagerService.
+ * Maintains backward compatibility with the original API while supporting multiple providers.
+ *
+ * @example
+ * ```typescript
+ * // Chat completion
+ * const response = await llmService.chat({
+ *   model: "llama3.2",
+ *   messages: [{ role: "user", content: "Hello" }]
+ * });
+ *
+ * // Streaming chat
+ * for await (const chunk of llmService.chatStream(request)) {
+ *   console.log(chunk.message.content);
+ * }
+ *
+ * // Generate embeddings
+ * const embeddings = await llmService.embed({
+ *   model: "llama3.2",
+ *   input: ["text to embed"]
+ * });
+ * ```
+ */
@Injectable()
 export class LlmService implements OnModuleInit {
  private readonly logger = new Logger(LlmService.name);
-  private client: Ollama;
-  private readonly config: OllamaConfig;
-  constructor() {
-    this.config = {
-      host: process.env.OLLAMA_HOST ?? "http://localhost:11434",
-      timeout: parseInt(process.env.OLLAMA_TIMEOUT ?? "120000", 10),
-    };
-    this.client = new Ollama({ host: this.config.host });
-    this.logger.log("Ollama service initialized");
+
+  constructor(private readonly llmManager: LlmManagerService) {
+    this.logger.log("LLM service initialized");
  }
+
+  /**
+   * Check health status on module initialization.
+   * Logs the status but does not fail if unhealthy.
+   */
  async onModuleInit(): Promise<void> {
-    const h = await this.checkHealth();
-    if (h.healthy) this.logger.log("Ollama healthy");
-    else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown"));
+    const health = await this.checkHealth();
+    if (health.healthy) {
+      const endpoint = health.endpoint ?? "default endpoint";
+      this.logger.log(`LLM provider healthy: ${health.provider} at ${endpoint}`);
+    } else {
+      const errorMsg = health.error ?? "unknown error";
+      this.logger.warn(`LLM provider unhealthy: ${errorMsg}`);
+    }
  }
-  async checkHealth(): Promise<OllamaHealthStatus> {
+  /**
+   * Check health of the default LLM provider.
+   * Returns health status without throwing errors.
+   *
+   * @returns Health status of the default provider
+   */
+  async checkHealth(): Promise<LlmProviderHealthStatus> {
    try {
-      const r = await this.client.list();
-      return { healthy: true, host: this.config.host, models: r.models.map((m) => m.name) };
-    } catch (e: unknown) {
+      const provider = await this.llmManager.getDefaultProvider();
+      return await provider.checkHealth();
+    } catch (error: unknown) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      this.logger.error(`Health check failed: ${errorMessage}`);
      return {
        healthy: false,
-        host: this.config.host,
-        error: e instanceof Error ? e.message : String(e),
+        provider: "unknown",
+        error: errorMessage,
      };
    }
  }
+  /**
+   * List all available models from the default provider.
+   *
+   * @returns Array of model names
+   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
+   */
  async listModels(): Promise<string[]> {
    try {
-      return (await this.client.list()).models.map((m) => m.name);
-    } catch (e: unknown) {
-      const msg = e instanceof Error ? e.message : String(e);
-      this.logger.error("Failed to list models: " + msg);
-      throw new ServiceUnavailableException("Failed to list models: " + msg);
+      const provider = await this.llmManager.getDefaultProvider();
+      return await provider.listModels();
+    } catch (error: unknown) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      this.logger.error(`Failed to list models: ${errorMessage}`);
+      throw new ServiceUnavailableException(`Failed to list models: ${errorMessage}`);
    }
  }
+  /**
+   * Perform a synchronous chat completion.
+   *
+   * @param request - Chat request with messages and configuration
+   * @returns Complete chat response
+   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
+   */
  async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
    try {
-      const msgs = this.buildMessages(request);
-      const options: { temperature?: number; num_predict?: number } = {};
-      if (request.temperature !== undefined) {
-        options.temperature = request.temperature;
-      }
-      if (request.maxTokens !== undefined) {
-        options.num_predict = request.maxTokens;
-      }
-      const r = await this.client.chat({
-        model: request.model,
-        messages: msgs,
-        stream: false,
-        options,
-      });
-      return {
-        model: r.model,
-        message: { role: r.message.role as "assistant", content: r.message.content },
-        done: r.done,
-        totalDuration: r.total_duration,
-        promptEvalCount: r.prompt_eval_count,
-        evalCount: r.eval_count,
-      };
-    } catch (e: unknown) {
-      const msg = e instanceof Error ? e.message : String(e);
-      this.logger.error("Chat failed: " + msg);
-      throw new ServiceUnavailableException("Chat completion failed: " + msg);
+      const provider = await this.llmManager.getDefaultProvider();
+      return await provider.chat(request);
+    } catch (error: unknown) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      this.logger.error(`Chat failed: ${errorMessage}`);
+      throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`);
    }
  }
-  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatStreamChunkDto> {
+  /**
+   * Perform a streaming chat completion.
+   * Yields response chunks as they arrive from the provider.
+   *
+   * @param request - Chat request with messages and configuration
+   * @yields Chat response chunks
+   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
+   */
+  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto, void, unknown> {
    try {
-      const options: { temperature?: number; num_predict?: number } = {};
-      if (request.temperature !== undefined) {
-        options.temperature = request.temperature;
+      const provider = await this.llmManager.getDefaultProvider();
+      const stream = provider.chatStream(request);
+
+      for await (const chunk of stream) {
+        yield chunk;
      }
-      if (request.maxTokens !== undefined) {
-        options.num_predict = request.maxTokens;
-      }
-      const stream = await this.client.chat({
-        model: request.model,
-        messages: this.buildMessages(request),
-        stream: true,
-        options,
-      });
-      for await (const c of stream)
-        yield {
-          model: c.model,
-          message: { role: c.message.role as "assistant", content: c.message.content },
-          done: c.done,
-        };
-    } catch (e: unknown) {
-      const msg = e instanceof Error ? e.message : String(e);
-      this.logger.error("Stream failed: " + msg);
-      throw new ServiceUnavailableException("Streaming failed: " + msg);
+    } catch (error: unknown) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      this.logger.error(`Stream failed: ${errorMessage}`);
+      throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`);
    }
  }
+  /**
+   * Generate embeddings for the given input texts.
+   *
+   * @param request - Embedding request with model and input texts
+   * @returns Embeddings response with vector arrays
+   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
+   */
  async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
    try {
-      const r = await this.client.embed({
-        model: request.model,
-        input: request.input,
-        truncate: request.truncate === "none" ? false : true,
-      });
-      return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration };
-    } catch (e: unknown) {
-      const msg = e instanceof Error ? e.message : String(e);
-      this.logger.error("Embed failed: " + msg);
-      throw new ServiceUnavailableException("Embedding failed: " + msg);
+      const provider = await this.llmManager.getDefaultProvider();
+      return await provider.embed(request);
+    } catch (error: unknown) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      this.logger.error(`Embed failed: ${errorMessage}`);
+      throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);
    }
  }
-  private buildMessages(req: ChatRequestDto): Message[] {
-    const msgs: Message[] = [];
-    if (req.systemPrompt && !req.messages.some((m) => m.role === "system"))
-      msgs.push({ role: "system", content: req.systemPrompt });
-    for (const m of req.messages) msgs.push({ role: m.role, content: m.content });
-    return msgs;
-  }
-  getConfig(): OllamaConfig {
-    return { ...this.config };
-  }
 }