stack/apps/api/src/llm/llm.service.ts

import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
import { Ollama, Message } from "ollama";
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto, ChatStreamChunkDto } from "./dto";
export interface OllamaConfig { host: string; timeout?: number; }
export interface OllamaHealthStatus { healthy: boolean; host: string; error?: string; models?: string[]; }
@Injectable()
export class LlmService implements OnModuleInit {
  private readonly logger = new Logger(LlmService.name);
  private client: Ollama;
  private readonly config: OllamaConfig;
  constructor() { this.config = { host: process.env["OLLAMA_HOST"] ?? "http://localhost:11434", timeout: parseInt(process.env["OLLAMA_TIMEOUT"] ?? "120000", 10) }; this.client = new Ollama({ host: this.config.host }); this.logger.log("Ollama service initialized"); }
  async onModuleInit(): Promise<void> { const h = await this.checkHealth(); if (h.healthy) this.logger.log("Ollama healthy"); else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown")); }
  async checkHealth(): Promise<OllamaHealthStatus> { try { const r = await this.client.list(); return { healthy: true, host: this.config.host, models: r.models.map(m => m.name) }; } catch (e: unknown) { return { healthy: false, host: this.config.host, error: e instanceof Error ? e.message : String(e) }; } }
  async listModels(): Promise<string[]> { try { return (await this.client.list()).models.map(m => m.name); } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Failed to list models: " + msg); throw new ServiceUnavailableException("Failed to list models: " + msg); } }
  async chat(request: ChatRequestDto): Promise<ChatResponseDto> { try { const msgs = this.buildMessages(request); const r = await this.client.chat({ model: request.model, messages: msgs, stream: false, options: { temperature: request.temperature, num_predict: request.maxTokens } }); return { model: r.model, message: { role: r.message.role as "assistant", content: r.message.content }, done: r.done, totalDuration: r.total_duration, promptEvalCount: r.prompt_eval_count, evalCount: r.eval_count }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Chat failed: " + msg); throw new ServiceUnavailableException("Chat completion failed: " + msg); } }
  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatStreamChunkDto> { try { const stream = await this.client.chat({ model: request.model, messages: this.buildMessages(request), stream: true, options: { temperature: request.temperature, num_predict: request.maxTokens } }); for await (const c of stream) yield { model: c.model, message: { role: c.message.role as "assistant", content: c.message.content }, done: c.done }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Stream failed: " + msg); throw new ServiceUnavailableException("Streaming failed: " + msg); } }
  async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> { try { const r = await this.client.embed({ model: request.model, input: request.input, truncate: request.truncate === "none" ? false : true }); return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Embed failed: " + msg); throw new ServiceUnavailableException("Embedding failed: " + msg); } }
  private buildMessages(req: ChatRequestDto): Message[] { const msgs: Message[] = []; if (req.systemPrompt && !req.messages.some(m => m.role === "system")) msgs.push({ role: "system", content: req.systemPrompt }); for (const m of req.messages) msgs.push({ role: m.role, content: m.content }); return msgs; }
  getConfig(): OllamaConfig { return { ...this.config }; }
}